def test_agentprocessor(num_vis_obs): policy = create_mock_policy() tqueue = mock.Mock() name_behavior_id = "test_brain_name" processor = AgentProcessor( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) fake_action_outputs = { "action": ActionTuple(continuous=np.array([[0.1], [0.1]])), "entropy": np.array([1.0], dtype=np.float32), "learning_rate": 1.0, "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])), } mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=2, observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)], action_spec=ActionSpec.create_continuous(2), ) fake_action_info = ActionInfo( action=ActionTuple(continuous=np.array([[0.1], [0.1]])), env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])), value=[0.1, 0.1], outputs=fake_action_outputs, agent_ids=mock_decision_steps.agent_id, ) processor.publish_trajectory_queue(tqueue) # This is like the initial state after the env reset processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) for _ in range(5): processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, fake_action_info ) # Assert that two trajectories have been added to the Trainer assert len(tqueue.put.call_args_list) == 2 # Assert that the trajectory is of length 5 trajectory = tqueue.put.call_args_list[0][0][0] assert len(trajectory.steps) == 5 # Assert that the AgentProcessor is empty assert len(processor.experience_buffers[0]) == 0 # Test empty steps mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=0, observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)], action_spec=ActionSpec.create_continuous(2), ) processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) # Assert that the AgentProcessor is still empty assert len(processor.experience_buffers[0]) == 0
def test_group_statuses(): policy = create_mock_policy() tqueue = mock.Mock() name_behavior_id = "test_brain_name" processor = AgentProcessor( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=4, observation_specs=create_observation_specs_with_shapes([(8,)]), action_spec=ActionSpec.create_continuous(2), grouped=True, ) fake_action_info = _create_action_info(4, mock_decision_steps.agent_id) processor.publish_trajectory_queue(tqueue) # This is like the initial state after the env reset processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) for _ in range(2): processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, fake_action_info ) # Make terminal steps for some dead agents mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps( num_agents=2, observation_specs=create_observation_specs_with_shapes([(8,)]), action_spec=ActionSpec.create_continuous(2), done=True, grouped=True, ) processor.add_experiences( mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info ) fake_action_info = _create_action_info(4, mock_decision_steps.agent_id) for _ in range(3): processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, fake_action_info ) # Assert that four trajectories have been added to the Trainer assert len(tqueue.put.call_args_list) == 4 # Last trajectory should be the longest trajectory = tqueue.put.call_args_list[0][0][-1] # Make sure trajectory has the right Groupmate Experiences for step in trajectory.steps[0:3]: assert len(step.group_status) == 3 # After 2 agents has died for step in trajectory.steps[3:]: assert len(step.group_status) == 1
def test_agentprocessor(num_vis_obs): policy = create_mock_policy() tqueue = mock.Mock() name_behavior_id = "test_brain_name" processor = AgentProcessor( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=2, observation_specs=create_observation_specs_with_shapes( [(8,)] + num_vis_obs * [(84, 84, 3)] ), action_spec=ActionSpec.create_continuous(2), ) fake_action_info = _create_action_info(2, mock_decision_steps.agent_id) processor.publish_trajectory_queue(tqueue) # This is like the initial state after the env reset processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) for _ in range(5): processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, fake_action_info ) # Assert that two trajectories have been added to the Trainer assert len(tqueue.put.call_args_list) == 2 # Assert that the trajectory is of length 5 trajectory = tqueue.put.call_args_list[0][0][0] assert len(trajectory.steps) == 5 # Make sure ungrouped agents don't have team obs for step in trajectory.steps: assert len(step.group_status) == 0 # Assert that the AgentProcessor is empty assert len(processor._experience_buffers[0]) == 0 # Test empty steps mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=0, observation_specs=create_observation_specs_with_shapes( [(8,)] + num_vis_obs * [(84, 84, 3)] ), action_spec=ActionSpec.create_continuous(2), ) processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) # Assert that the AgentProcessor is still empty assert len(processor._experience_buffers[0]) == 0
def test_trajectory_to_agentbuffer(): length = 15 wanted_keys = [ "next_visual_obs0", "visual_obs0", "vector_obs", "next_vector_in", "memory", "masks", "done", "continuous_action", "discrete_action", "continuous_log_probs", "discrete_log_probs", "action_mask", "prev_action", "environment_rewards", ] wanted_keys = set(wanted_keys) trajectory = make_fake_trajectory( length=length, observation_shapes=[(VEC_OBS_SIZE, ), (84, 84, 3)], action_spec=ActionSpec.create_continuous(ACTION_SIZE), ) agentbuffer = trajectory.to_agentbuffer() seen_keys = set() for key, field in agentbuffer.items(): assert len(field) == length seen_keys.add(key) assert seen_keys == wanted_keys
def create_mock_group_spec( number_visual_observations=0, vector_action_space_type="continuous", vector_observation_space_size=3, vector_action_space_size=None, ): """ Creates a mock BrainParameters object with parameters. """ # Avoid using mutable object as default param if vector_action_space_type == "continuous": if vector_action_space_size is None: vector_action_space_size = 2 else: vector_action_space_size = vector_action_space_size[0] action_spec = ActionSpec.create_continuous(vector_action_space_size) else: if vector_action_space_size is None: vector_action_space_size = (2, ) else: vector_action_space_size = tuple(vector_action_space_size) action_spec = ActionSpec.create_discrete(vector_action_space_size) obs_shapes = [(vector_observation_space_size, )] for _ in range(number_visual_observations): obs_shapes += [(8, 8, 3)] obs_spec = create_observation_specs_with_shapes(obs_shapes) return BehaviorSpec(obs_spec, action_spec)
def test_batched_step_result_from_proto(): n_agents = 10 shapes = [(3, ), (4, )] spec = BehaviorSpec(create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)) ap_list = generate_list_agent_proto(n_agents, shapes) decision_steps, terminal_steps = steps_from_proto(ap_list, spec) for agent_id in range(n_agents): if agent_id in decision_steps: # we set the reward equal to the agent id in generate_list_agent_proto assert decision_steps[agent_id].reward == agent_id elif agent_id in terminal_steps: assert terminal_steps[agent_id].reward == agent_id else: raise Exception("Missing agent from the steps") # We sort the AgentId since they are split between DecisionSteps and TerminalSteps combined_agent_id = list(decision_steps.agent_id) + list( terminal_steps.agent_id) combined_agent_id.sort() assert combined_agent_id == list(range(n_agents)) for agent_id in range(n_agents): assert (agent_id in terminal_steps) == (agent_id % 2 == 0) if agent_id in terminal_steps: assert terminal_steps[agent_id].interrupted == (agent_id % 4 == 0) assert decision_steps.obs[0].shape[1] == shapes[0][0] assert decision_steps.obs[1].shape[1] == shapes[1][0] assert terminal_steps.obs[0].shape[1] == shapes[0][0] assert terminal_steps.obs[1].shape[1] == shapes[1][0]
def test_empty_terminal_steps(): specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )], action_spec=ActionSpec.create_continuous(3)) ts = TerminalSteps.empty(specs) assert len(ts.obs) == 2 assert ts.obs[0].shape == (0, 3, 2) assert ts.obs[1].shape == (0, 5)
def test_empty_decision_steps(): specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )], action_spec=ActionSpec.create_continuous(3)) ds = DecisionSteps.empty(specs) assert len(ds.obs) == 2 assert ds.obs[0].shape == (0, 3, 2) assert ds.obs[1].shape == (0, 5)
def test_trajectory_to_agentbuffer(): length = 15 wanted_keys = [ (ObservationKeyPrefix.OBSERVATION, 0), (ObservationKeyPrefix.OBSERVATION, 1), (ObservationKeyPrefix.NEXT_OBSERVATION, 0), (ObservationKeyPrefix.NEXT_OBSERVATION, 1), BufferKey.MEMORY, BufferKey.MASKS, BufferKey.DONE, BufferKey.CONTINUOUS_ACTION, BufferKey.DISCRETE_ACTION, BufferKey.CONTINUOUS_LOG_PROBS, BufferKey.DISCRETE_LOG_PROBS, BufferKey.ACTION_MASK, BufferKey.PREV_ACTION, BufferKey.ENVIRONMENT_REWARDS, ] wanted_keys = set(wanted_keys) trajectory = make_fake_trajectory( length=length, observation_specs=create_observation_specs_with_shapes([ (VEC_OBS_SIZE, ), (84, 84, 3) ]), action_spec=ActionSpec.create_continuous(ACTION_SIZE), ) agentbuffer = trajectory.to_agentbuffer() seen_keys = set() for key, field in agentbuffer.items(): assert len(field) == length seen_keys.add(key) assert seen_keys == wanted_keys
def test_action_generator(): # Continuous action_len = 30 specs = ActionSpec.create_continuous(action_len) zero_action = specs.empty_action(4).continuous assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32)) print(specs.random_action(4)) random_action = specs.random_action(4).continuous print(random_action) assert random_action.dtype == np.float32 assert random_action.shape == (4, action_len) assert np.min(random_action) >= -1 assert np.max(random_action) <= 1 # Discrete action_shape = (10, 20, 30) specs = ActionSpec.create_discrete(action_shape) zero_action = specs.empty_action(4).discrete assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32)) random_action = specs.random_action(4).discrete assert random_action.dtype == np.int32 assert random_action.shape == (4, len(action_shape)) assert np.min(random_action) >= 0 for index, branch_size in enumerate(action_shape): assert np.max(random_action[:, index]) < branch_size
def test_action_masking_continuous(): n_agents = 10 shapes = [(3, ), (4, )] behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10)) ap_list = generate_list_agent_proto(n_agents, shapes) decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec) masks = decision_steps.action_mask assert masks is None
def test_empty_decision_steps(): specs = BehaviorSpec( sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]), action_spec=ActionSpec.create_continuous(3), ) ds = DecisionSteps.empty(specs) assert len(ds.obs) == 2 assert ds.obs[0].shape == (0, 3, 2) assert ds.obs[1].shape == (0, 5)
def test_empty_terminal_steps(): specs = BehaviorSpec( sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]), action_spec=ActionSpec.create_continuous(3), ) ts = TerminalSteps.empty(specs) assert len(ts.obs) == 2 assert ts.obs[0].shape == (0, 3, 2) assert ts.obs[1].shape == (0, 5)
def test_batched_step_result_from_proto_raises_on_nan(): n_agents = 10 shapes = [(3, ), (4, )] behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3)) ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True) with pytest.raises(RuntimeError): steps_from_proto(ap_list, behavior_spec)
def test_batched_step_result_from_proto_raises_on_infinite(): n_agents = 10 shapes = [(3, ), (4, )] behavior_spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)) ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True) with pytest.raises(RuntimeError): steps_from_proto(ap_list, behavior_spec)
def test_mismatch_observations_raise_in_step_result_from_proto(): n_agents = 10 shapes = [(3, ), (4, )] spec = BehaviorSpec(create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)) ap_list = generate_list_agent_proto(n_agents, shapes) # Hack an observation to be larger, we should get an exception ap_list[0].observations[0].shape[0] += 1 ap_list[0].observations[0].float_data.data.append(0.42) with pytest.raises(UnityObservationException): steps_from_proto(ap_list, spec)
def setup_test_behavior_specs( use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8 ): if use_discrete: action_spec = ActionSpec.create_discrete(tuple(vector_action_space)) else: action_spec = ActionSpec.create_continuous(vector_action_space) observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)] obs_spec = create_observation_specs_with_shapes(observation_shapes) behavior_spec = BehaviorSpec(obs_spec, action_spec) return behavior_spec
def test_end_episode(): policy = create_mock_policy() tqueue = mock.Mock() name_behavior_id = "test_brain_name" processor = AgentProcessor( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) fake_action_outputs = { "action": ActionTuple(continuous=np.array([[0.1]])), "entropy": np.array([1.0], dtype=np.float32), "learning_rate": 1.0, "log_probs": LogProbsTuple(continuous=np.array([[0.1]])), } mock_decision_step, mock_terminal_step = mb.create_mock_steps( num_agents=1, observation_shapes=[(8,)], action_spec=ActionSpec.create_continuous(2), ) fake_action_info = ActionInfo( action=ActionTuple(continuous=np.array([[0.1]])), env_action=ActionTuple(continuous=np.array([[0.1]])), value=[0.1], outputs=fake_action_outputs, agent_ids=mock_decision_step.agent_id, ) processor.publish_trajectory_queue(tqueue) # This is like the initial state after the env reset processor.add_experiences( mock_decision_step, mock_terminal_step, 0, ActionInfo.empty() ) # Run 3 trajectories, with different workers (to simulate different agents) remove_calls = [] for _ep in range(3): remove_calls.append(mock.call([get_global_agent_id(_ep, 0)])) for _ in range(5): processor.add_experiences( mock_decision_step, mock_terminal_step, _ep, fake_action_info ) # Make sure we don't add experiences from the prior agents after the done # Call end episode processor.end_episode() # Check that we removed every agent policy.remove_previous_action.assert_has_calls(remove_calls) # Check that there are no experiences left assert len(processor.experience_buffers.keys()) == 0 assert len(processor.last_take_action_outputs.keys()) == 0 assert len(processor.episode_steps.keys()) == 0 assert len(processor.episode_rewards.keys()) == 0
def test_specs(): specs = ActionSpec.create_continuous(3) assert specs.discrete_branches == () assert specs.discrete_size == 0 assert specs.continuous_size == 3 assert specs.empty_action(5).shape == (5, 3) assert specs.empty_action(5).dtype == np.float32 specs = ActionSpec.create_discrete((3,)) assert specs.discrete_branches == (3,) assert specs.discrete_size == 1 assert specs.continuous_size == 0 assert specs.empty_action(5).shape == (5, 1) assert specs.empty_action(5).dtype == np.int32
def test_trajectory_to_agentbuffer(): length = 15 # These keys should be of type np.ndarray wanted_keys = [ (ObservationKeyPrefix.OBSERVATION, 0), (ObservationKeyPrefix.OBSERVATION, 1), (ObservationKeyPrefix.NEXT_OBSERVATION, 0), (ObservationKeyPrefix.NEXT_OBSERVATION, 1), BufferKey.MEMORY, BufferKey.MASKS, BufferKey.DONE, BufferKey.CONTINUOUS_ACTION, BufferKey.DISCRETE_ACTION, BufferKey.CONTINUOUS_LOG_PROBS, BufferKey.DISCRETE_LOG_PROBS, BufferKey.ACTION_MASK, BufferKey.PREV_ACTION, BufferKey.ENVIRONMENT_REWARDS, BufferKey.GROUP_REWARD, ] # These keys should be of type List wanted_group_keys = [ BufferKey.GROUPMATE_REWARDS, BufferKey.GROUP_CONTINUOUS_ACTION, BufferKey.GROUP_DISCRETE_ACTION, BufferKey.GROUP_DONES, BufferKey.GROUP_NEXT_CONT_ACTION, BufferKey.GROUP_NEXT_DISC_ACTION, ] wanted_keys = set(wanted_keys + wanted_group_keys) trajectory = make_fake_trajectory( length=length, observation_specs=create_observation_specs_with_shapes([ (VEC_OBS_SIZE, ), (84, 84, 3) ]), action_spec=ActionSpec.create_continuous(ACTION_SIZE), num_other_agents_in_group=4, ) agentbuffer = trajectory.to_agentbuffer() seen_keys = set() for key, field in agentbuffer.items(): assert len(field) == length seen_keys.add(key) assert seen_keys.issuperset(wanted_keys) for _key in wanted_group_keys: for step in agentbuffer[_key]: assert len(step) == 4
def __init__( self, brain_names, use_discrete, step_size=STEP_SIZE, num_visual=0, num_vector=1, vis_obs_size=VIS_OBS_SIZE, vec_obs_size=OBS_SIZE, action_size=1, ): super().__init__() self.discrete = use_discrete self.num_visual = num_visual self.num_vector = num_vector self.vis_obs_size = vis_obs_size self.vec_obs_size = vec_obs_size if use_discrete: action_spec = ActionSpec.create_discrete( tuple(2 for _ in range(action_size)) ) else: action_spec = ActionSpec.create_continuous(action_size) self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec) self.action_size = action_size self.names = brain_names self.positions: Dict[str, List[float]] = {} self.step_count: Dict[str, float] = {} self.random = random.Random(str(self.behavior_spec)) self.goal: Dict[str, int] = {} self.action = {} self.rewards: Dict[str, float] = {} self.final_rewards: Dict[str, List[float]] = {} self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {} self.agent_id: Dict[str, int] = {} self.step_size = step_size # defines the difficulty of the test for name in self.names: self.agent_id[name] = 0 self.goal[name] = self.random.choice([-1, 1]) self.rewards[name] = 0 self.final_rewards[name] = [] self._reset_agent(name) self.action[name] = None self.step_result[name] = None
def test_actor_critic(ac_type, lstm): obs_size = 4 network_settings = NetworkSettings( memory=NetworkSettings.MemorySettings() if lstm else None) obs_shapes = [(obs_size, )] act_size = [2] stream_names = [f"stream_name{n}" for n in range(4)] action_spec = ActionSpec.create_continuous(act_size[0]) actor = ac_type(obs_shapes, network_settings, action_spec, stream_names) if lstm: sample_obs = torch.ones( (1, network_settings.memory.sequence_length, obs_size)) memories = torch.ones( (1, network_settings.memory.sequence_length, actor.memory_size)) else: sample_obs = torch.ones((1, obs_size)) memories = torch.tensor([]) # memories isn't always set to None, the network should be able to # deal with that. # Test critic pass value_out, memories_out = actor.critic_pass([sample_obs], [], memories=memories) for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) assert memories_out.shape == memories.shape else: assert value_out[stream].shape == (1, ) # Test get_dist_and_value dists, value_out, mem_out = actor.get_dist_and_value([sample_obs], [], memories=memories) if mem_out is not None: assert mem_out.shape == memories.shape for dist in dists: assert isinstance(dist, GaussianDistInstance) for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) else: assert value_out[stream].shape == (1, )
def test_simple_actor(use_discrete): obs_size = 4 network_settings = NetworkSettings() obs_shapes = [(obs_size, )] act_size = [2] if use_discrete: masks = torch.ones((1, 1)) action_spec = ActionSpec.create_discrete(tuple(act_size)) else: masks = None action_spec = ActionSpec.create_continuous(act_size[0]) actor = SimpleActor(obs_shapes, network_settings, action_spec) # Test get_dist sample_obs = torch.ones((1, obs_size)) dists, _ = actor.get_dists([sample_obs], [], masks=masks) for dist in dists: if use_discrete: assert isinstance(dist, CategoricalDistInstance) else: assert isinstance(dist, GaussianDistInstance) # Test sample_actions actions = actor.sample_action(dists) for act in actions: if use_discrete: assert act.shape == (1, 1) else: assert act.shape == (1, act_size[0]) # Test forward actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward( [sample_obs], [], masks=masks) for act in actions: # This is different from above for ONNX export if use_discrete: assert act.shape == tuple(act_size) else: assert act.shape == (act_size[0], 1) assert mem_size == 0 assert is_cont == int(not use_discrete) assert act_size_vec == torch.tensor(act_size)
from mlagents.trainers.buffer import BufferKey import pytest import numpy as np from mlagents.trainers.torch.components.reward_providers import ( ExtrinsicRewardProvider, create_reward_provider, ) from mlagents_envs.base_env import BehaviorSpec, ActionSpec from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType from mlagents.trainers.tests.torch.test_reward_providers.utils import ( create_agent_buffer, ) from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5) ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3)) @pytest.mark.parametrize( "behavior_spec", [ BehaviorSpec(create_observation_specs_with_shapes([(10, )]), ACTIONSPEC_CONTINUOUS), BehaviorSpec(create_observation_specs_with_shapes([(10, )]), ACTIONSPEC_TWODISCRETE), ], ) def test_construction(behavior_spec: BehaviorSpec) -> None: settings = RewardSignalSettings() settings.gamma = 0.2 extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) assert extrinsic_rp.gamma == 0.2
from mlagents_envs.base_env import ActionSpec @pytest.fixture def dummy_config(): return ppo_dummy_config() VECTOR_ACTION_SPACE = 2 VECTOR_OBS_SPACE = 8 DISCRETE_ACTION_SPACE = [3, 3, 3, 2] BUFFER_INIT_SAMPLES = 64 NUM_AGENTS = 12 CONTINUOUS_ACTION_SPEC = ActionSpec.create_continuous(VECTOR_ACTION_SPACE) DISCRETE_ACTION_SPEC = ActionSpec.create_discrete(tuple(DISCRETE_ACTION_SPACE)) def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual): mock_specs = mb.setup_test_behavior_specs( use_discrete, use_visual, vector_action_space=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, ) trainer_settings = attr.evolve(dummy_config) trainer_settings.network_settings.memory = (NetworkSettings.MemorySettings( sequence_length=16, memory_size=10) if use_rnn else None)
def test_group_statuses(): policy = create_mock_policy() tqueue = mock.Mock() name_behavior_id = "test_brain_name" processor = AgentProcessor( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) mock_decision_steps, mock_terminal_steps = mb.create_mock_steps( num_agents=4, observation_specs=create_observation_specs_with_shapes([(8,)]), action_spec=ActionSpec.create_continuous(2), grouped=True, ) fake_action_info = _create_action_info(4, mock_decision_steps.agent_id) processor.publish_trajectory_queue(tqueue) # This is like the initial state after the env reset processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty() ) for _ in range(2): processor.add_experiences( mock_decision_steps, mock_terminal_steps, 0, fake_action_info ) # Make terminal steps for some dead agents _, mock_terminal_steps_2 = mb.create_mock_steps( num_agents=2, observation_specs=create_observation_specs_with_shapes([(8,)]), action_spec=ActionSpec.create_continuous(2), done=True, grouped=True, agent_ids=[2, 3], ) # Make decision steps continue for other agents mock_decision_steps_2, _ = mb.create_mock_steps( num_agents=2, observation_specs=create_observation_specs_with_shapes([(8,)]), action_spec=ActionSpec.create_continuous(2), done=False, grouped=True, agent_ids=[0, 1], ) processor.add_experiences( mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info ) # Continue to add for remaining live agents fake_action_info = _create_action_info(4, mock_decision_steps_2.agent_id) for _ in range(3): processor.add_experiences( mock_decision_steps_2, mock_terminal_steps, 0, fake_action_info ) # Assert that four trajectories have been added to the Trainer assert len(tqueue.put.call_args_list) == 4 # Get the first trajectory, which should have been agent 2 (one of the killed agents) trajectory = tqueue.put.call_args_list[0][0][-1] assert len(trajectory.steps) == 3 # Make sure trajectory has the right Groupmate Experiences. # All three steps should contain all agents for step in trajectory.steps: assert len(step.group_status) == 3 # Last trajectory should be the longest. It should be that of agent 1, one of the surviving agents. trajectory = tqueue.put.call_args_list[-1][0][-1] assert len(trajectory.steps) == 5 # Make sure trajectory has the right Groupmate Experiences. # THe first 3 steps should contain all of the obs (that 3rd step is also the terminal step of 2 of the agents) for step in trajectory.steps[0:3]: assert len(step.group_status) == 3 # After 2 agents has died, there should only be 1 group status. for step in trajectory.steps[3:]: assert len(step.group_status) == 1
def basic_behavior_spec(): dummy_actionspec = ActionSpec.create_continuous(1) dummy_groupspec = BehaviorSpec([(1, )], dummy_actionspec) return dummy_groupspec