Beispiel #1
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4
    # Last trajectory should be the longest
    trajectory = tqueue.put.call_args_list[0][0][-1]

    # Make sure trajectory has the right Groupmate Experiences
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = _create_action_info(2, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5
    # Make sure ungrouped agents don't have team obs
    for step in trajectory.steps:
        assert len(step.group_status) == 0

    # Assert that the AgentProcessor is empty
    assert len(processor._experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor._experience_buffers[0]) == 0
Beispiel #4
0
def test_trajectory_to_agentbuffer():
    length = 15
    wanted_keys = [
        "next_visual_obs0",
        "visual_obs0",
        "vector_obs",
        "next_vector_in",
        "memory",
        "masks",
        "done",
        "continuous_action",
        "discrete_action",
        "continuous_log_probs",
        "discrete_log_probs",
        "action_mask",
        "prev_action",
        "environment_rewards",
    ]
    wanted_keys = set(wanted_keys)
    trajectory = make_fake_trajectory(
        length=length,
        observation_shapes=[(VEC_OBS_SIZE, ), (84, 84, 3)],
        action_spec=ActionSpec.create_continuous(ACTION_SIZE),
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()
    for key, field in agentbuffer.items():
        assert len(field) == length
        seen_keys.add(key)

    assert seen_keys == wanted_keys
Beispiel #5
0
def create_mock_group_spec(
    number_visual_observations=0,
    vector_action_space_type="continuous",
    vector_observation_space_size=3,
    vector_action_space_size=None,
):
    """
    Creates a mock BrainParameters object with parameters.
    """
    # Avoid using mutable object as default param
    if vector_action_space_type == "continuous":
        if vector_action_space_size is None:
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
        action_spec = ActionSpec.create_continuous(vector_action_space_size)
    else:
        if vector_action_space_size is None:
            vector_action_space_size = (2, )
        else:
            vector_action_space_size = tuple(vector_action_space_size)
        action_spec = ActionSpec.create_discrete(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size, )]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
    obs_spec = create_observation_specs_with_shapes(obs_shapes)
    return BehaviorSpec(obs_spec, action_spec)
def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                        ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
        if agent_id in decision_steps:
            # we set the reward equal to the agent id in generate_list_agent_proto
            assert decision_steps[agent_id].reward == agent_id
        elif agent_id in terminal_steps:
            assert terminal_steps[agent_id].reward == agent_id
        else:
            raise Exception("Missing agent from the steps")
    # We sort the AgentId since they are split between DecisionSteps and TerminalSteps
    combined_agent_id = list(decision_steps.agent_id) + list(
        terminal_steps.agent_id)
    combined_agent_id.sort()
    assert combined_agent_id == list(range(n_agents))
    for agent_id in range(n_agents):
        assert (agent_id in terminal_steps) == (agent_id % 2 == 0)
        if agent_id in terminal_steps:
            assert terminal_steps[agent_id].interrupted == (agent_id % 4 == 0)
    assert decision_steps.obs[0].shape[1] == shapes[0][0]
    assert decision_steps.obs[1].shape[1] == shapes[1][0]
    assert terminal_steps.obs[0].shape[1] == shapes[0][0]
    assert terminal_steps.obs[1].shape[1] == shapes[1][0]
def test_empty_terminal_steps():
    specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )],
                         action_spec=ActionSpec.create_continuous(3))
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
def test_empty_decision_steps():
    specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )],
                         action_spec=ActionSpec.create_continuous(3))
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Beispiel #9
0
def test_trajectory_to_agentbuffer():
    length = 15
    wanted_keys = [
        (ObservationKeyPrefix.OBSERVATION, 0),
        (ObservationKeyPrefix.OBSERVATION, 1),
        (ObservationKeyPrefix.NEXT_OBSERVATION, 0),
        (ObservationKeyPrefix.NEXT_OBSERVATION, 1),
        BufferKey.MEMORY,
        BufferKey.MASKS,
        BufferKey.DONE,
        BufferKey.CONTINUOUS_ACTION,
        BufferKey.DISCRETE_ACTION,
        BufferKey.CONTINUOUS_LOG_PROBS,
        BufferKey.DISCRETE_LOG_PROBS,
        BufferKey.ACTION_MASK,
        BufferKey.PREV_ACTION,
        BufferKey.ENVIRONMENT_REWARDS,
    ]
    wanted_keys = set(wanted_keys)
    trajectory = make_fake_trajectory(
        length=length,
        observation_specs=create_observation_specs_with_shapes([
            (VEC_OBS_SIZE, ), (84, 84, 3)
        ]),
        action_spec=ActionSpec.create_continuous(ACTION_SIZE),
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()
    for key, field in agentbuffer.items():
        assert len(field) == length
        seen_keys.add(key)

    assert seen_keys == wanted_keys
Beispiel #10
0
def test_action_generator():
    # Continuous
    action_len = 30
    specs = ActionSpec.create_continuous(action_len)
    zero_action = specs.empty_action(4).continuous
    assert np.array_equal(zero_action,
                          np.zeros((4, action_len), dtype=np.float32))
    print(specs.random_action(4))
    random_action = specs.random_action(4).continuous
    print(random_action)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
    assert np.max(random_action) <= 1

    # Discrete
    action_shape = (10, 20, 30)
    specs = ActionSpec.create_discrete(action_shape)
    zero_action = specs.empty_action(4).discrete
    assert np.array_equal(zero_action,
                          np.zeros((4, len(action_shape)), dtype=np.int32))

    random_action = specs.random_action(4).discrete
    assert random_action.dtype == np.int32
    assert random_action.shape == (4, len(action_shape))
    assert np.min(random_action) >= 0
    for index, branch_size in enumerate(action_shape):
        assert np.max(random_action[:, index]) < branch_size
Beispiel #11
0
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert masks is None
Beispiel #12
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Beispiel #13
0
def test_empty_terminal_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
Beispiel #14
0
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Beispiel #15
0
def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes),
                                 ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Beispiel #16
0
def test_mismatch_observations_raise_in_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                        ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    # Hack an observation to be larger, we should get an exception
    ap_list[0].observations[0].shape[0] += 1
    ap_list[0].observations[0].float_data.data.append(0.42)
    with pytest.raises(UnityObservationException):
        steps_from_proto(ap_list, spec)
Beispiel #17
0
def setup_test_behavior_specs(
    use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
):
    if use_discrete:
        action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
    observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
    obs_spec = create_observation_specs_with_shapes(observation_shapes)
    behavior_spec = BehaviorSpec(obs_spec, action_spec)
    return behavior_spec
Beispiel #18
0
def test_end_episode():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
    )
    # Run 3 trajectories, with different workers (to simulate different agents)
    remove_calls = []
    for _ep in range(3):
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))
        for _ in range(5):
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
            # Make sure we don't add experiences from the prior agents after the done

    # Call end episode
    processor.end_episode()
    # Check that we removed every agent
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0
Beispiel #19
0
def test_specs():
    specs = ActionSpec.create_continuous(3)
    assert specs.discrete_branches == ()
    assert specs.discrete_size == 0
    assert specs.continuous_size == 3
    assert specs.empty_action(5).shape == (5, 3)
    assert specs.empty_action(5).dtype == np.float32

    specs = ActionSpec.create_discrete((3,))
    assert specs.discrete_branches == (3,)
    assert specs.discrete_size == 1
    assert specs.continuous_size == 0
    assert specs.empty_action(5).shape == (5, 1)
    assert specs.empty_action(5).dtype == np.int32
Beispiel #20
0
def test_trajectory_to_agentbuffer():
    length = 15
    # These keys should be of type np.ndarray
    wanted_keys = [
        (ObservationKeyPrefix.OBSERVATION, 0),
        (ObservationKeyPrefix.OBSERVATION, 1),
        (ObservationKeyPrefix.NEXT_OBSERVATION, 0),
        (ObservationKeyPrefix.NEXT_OBSERVATION, 1),
        BufferKey.MEMORY,
        BufferKey.MASKS,
        BufferKey.DONE,
        BufferKey.CONTINUOUS_ACTION,
        BufferKey.DISCRETE_ACTION,
        BufferKey.CONTINUOUS_LOG_PROBS,
        BufferKey.DISCRETE_LOG_PROBS,
        BufferKey.ACTION_MASK,
        BufferKey.PREV_ACTION,
        BufferKey.ENVIRONMENT_REWARDS,
        BufferKey.GROUP_REWARD,
    ]
    # These keys should be of type List
    wanted_group_keys = [
        BufferKey.GROUPMATE_REWARDS,
        BufferKey.GROUP_CONTINUOUS_ACTION,
        BufferKey.GROUP_DISCRETE_ACTION,
        BufferKey.GROUP_DONES,
        BufferKey.GROUP_NEXT_CONT_ACTION,
        BufferKey.GROUP_NEXT_DISC_ACTION,
    ]
    wanted_keys = set(wanted_keys + wanted_group_keys)
    trajectory = make_fake_trajectory(
        length=length,
        observation_specs=create_observation_specs_with_shapes([
            (VEC_OBS_SIZE, ), (84, 84, 3)
        ]),
        action_spec=ActionSpec.create_continuous(ACTION_SIZE),
        num_other_agents_in_group=4,
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()
    for key, field in agentbuffer.items():
        assert len(field) == length
        seen_keys.add(key)

    assert seen_keys.issuperset(wanted_keys)

    for _key in wanted_group_keys:
        for step in agentbuffer[_key]:
            assert len(step) == 4
Beispiel #21
0
    def __init__(
        self,
        brain_names,
        use_discrete,
        step_size=STEP_SIZE,
        num_visual=0,
        num_vector=1,
        vis_obs_size=VIS_OBS_SIZE,
        vec_obs_size=OBS_SIZE,
        action_size=1,
    ):
        super().__init__()
        self.discrete = use_discrete
        self.num_visual = num_visual
        self.num_vector = num_vector
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        if use_discrete:
            action_spec = ActionSpec.create_discrete(
                tuple(2 for _ in range(action_size))
            )
        else:
            action_spec = ActionSpec.create_continuous(action_size)
        self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
        self.action_size = action_size
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}
        self.random = random.Random(str(self.behavior_spec))
        self.goal: Dict[str, int] = {}
        self.action = {}
        self.rewards: Dict[str, float] = {}
        self.final_rewards: Dict[str, List[float]] = {}
        self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self.agent_id: Dict[str, int] = {}
        self.step_size = step_size  # defines the difficulty of the test

        for name in self.names:
            self.agent_id[name] = 0
            self.goal[name] = self.random.choice([-1, 1])
            self.rewards[name] = 0
            self.final_rewards[name] = []
            self._reset_agent(name)
            self.action[name] = None
            self.step_result[name] = None
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None)
    obs_shapes = [(obs_size, )]
    act_size = [2]
    stream_names = [f"stream_name{n}" for n in range(4)]
    action_spec = ActionSpec.create_continuous(act_size[0])
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs], [],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get_dist_and_value
    dists, value_out, mem_out = actor.get_dist_and_value([sample_obs], [],
                                                         memories=memories)
    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for dist in dists:
        assert isinstance(dist, GaussianDistInstance)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
def test_simple_actor(use_discrete):
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, )]
    act_size = [2]
    if use_discrete:
        masks = torch.ones((1, 1))
        action_spec = ActionSpec.create_discrete(tuple(act_size))
    else:
        masks = None
        action_spec = ActionSpec.create_continuous(act_size[0])
    actor = SimpleActor(obs_shapes, network_settings, action_spec)
    # Test get_dist
    sample_obs = torch.ones((1, obs_size))
    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
    for dist in dists:
        if use_discrete:
            assert isinstance(dist, CategoricalDistInstance)
        else:
            assert isinstance(dist, GaussianDistInstance)

    # Test sample_actions
    actions = actor.sample_action(dists)
    for act in actions:
        if use_discrete:
            assert act.shape == (1, 1)
        else:
            assert act.shape == (1, act_size[0])

    # Test forward
    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
        [sample_obs], [], masks=masks)
    for act in actions:
        # This is different from above for ONNX export
        if use_discrete:
            assert act.shape == tuple(act_size)
        else:
            assert act.shape == (act_size[0], 1)

    assert mem_size == 0
    assert is_cont == int(not use_discrete)
    assert act_size_vec == torch.tensor(act_size)
Beispiel #24
0
from mlagents.trainers.buffer import BufferKey
import pytest
import numpy as np
from mlagents.trainers.torch.components.reward_providers import (
    ExtrinsicRewardProvider,
    create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer, )
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes

ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))


@pytest.mark.parametrize(
    "behavior_spec",
    [
        BehaviorSpec(create_observation_specs_with_shapes([(10, )]),
                     ACTIONSPEC_CONTINUOUS),
        BehaviorSpec(create_observation_specs_with_shapes([(10, )]),
                     ACTIONSPEC_TWODISCRETE),
    ],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
    settings = RewardSignalSettings()
    settings.gamma = 0.2
    extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
    assert extrinsic_rp.gamma == 0.2
Beispiel #25
0
from mlagents_envs.base_env import ActionSpec


@pytest.fixture
def dummy_config():
    return ppo_dummy_config()


VECTOR_ACTION_SPACE = 2
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12

CONTINUOUS_ACTION_SPEC = ActionSpec.create_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.create_discrete(tuple(DISCRETE_ACTION_SPACE))


def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual):
    mock_specs = mb.setup_test_behavior_specs(
        use_discrete,
        use_visual,
        vector_action_space=DISCRETE_ACTION_SPACE
        if use_discrete else VECTOR_ACTION_SPACE,
        vector_obs_space=VECTOR_OBS_SPACE,
    )

    trainer_settings = attr.evolve(dummy_config)
    trainer_settings.network_settings.memory = (NetworkSettings.MemorySettings(
        sequence_length=16, memory_size=10) if use_rnn else None)
Beispiel #26
0
def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    _, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
        agent_ids=[2, 3],
    )
    # Make decision steps continue for other agents
    mock_decision_steps_2, _ = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=False,
        grouped=True,
        agent_ids=[0, 1],
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    # Continue to add for remaining live agents
    fake_action_info = _create_action_info(4, mock_decision_steps_2.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps_2, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4

    # Get the first trajectory, which should have been agent 2 (one of the killed agents)
    trajectory = tqueue.put.call_args_list[0][0][-1]
    assert len(trajectory.steps) == 3
    # Make sure trajectory has the right Groupmate Experiences.
    # All three steps should contain all agents
    for step in trajectory.steps:
        assert len(step.group_status) == 3

    # Last trajectory should be the longest. It should be that of agent 1, one of the surviving agents.
    trajectory = tqueue.put.call_args_list[-1][0][-1]
    assert len(trajectory.steps) == 5

    # Make sure trajectory has the right Groupmate Experiences.
    # THe first 3 steps should contain all of the obs (that 3rd step is also the terminal step of 2 of the agents)
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died, there should only be 1 group status.
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1
Beispiel #27
0
def basic_behavior_spec():
    dummy_actionspec = ActionSpec.create_continuous(1)
    dummy_groupspec = BehaviorSpec([(1, )], dummy_actionspec)
    return dummy_groupspec