コード例 #1
0
def test_stat_reporter_add_summary_write():
    # Test add_writer
    StatsReporter.writers.clear()
    mock_writer1 = mock.Mock()
    mock_writer2 = mock.Mock()
    StatsReporter.add_writer(mock_writer1)
    StatsReporter.add_writer(mock_writer2)
    assert len(StatsReporter.writers) == 2

    # Test add_stats and summaries
    statsreporter1 = StatsReporter("category1")
    statsreporter2 = StatsReporter("category2")
    for i in range(10):
        statsreporter1.add_stat("key1", float(i))
        statsreporter2.add_stat("key2", float(i))

    statssummary1 = statsreporter1.get_stats_summaries("key1")
    statssummary2 = statsreporter2.get_stats_summaries("key2")

    assert statssummary1.num == 10
    assert statssummary2.num == 10
    assert statssummary1.mean == 4.5
    assert statssummary2.mean == 4.5
    assert statssummary1.std == pytest.approx(2.9, abs=0.1)
    assert statssummary2.std == pytest.approx(2.9, abs=0.1)

    # Test write_stats
    step = 10
    statsreporter1.write_stats(step)
    mock_writer1.write_stats.assert_called_once_with(
        "category1", {"key1": statssummary1}, step
    )
    mock_writer2.write_stats.assert_called_once_with(
        "category1", {"key1": statssummary1}, step
    )
コード例 #2
0
def test_agent_manager_stats():
    policy = mock.Mock()
    stats_reporter = StatsReporter("FakeCategory")
    writer = mock.Mock()
    stats_reporter.add_writer(writer)
    manager = AgentManager(policy, "MyBehavior", stats_reporter)

    all_env_stats = [
        {
            "averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
            "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
        },
        {
            "averaged": [(3.0, StatsAggregationMethod.AVERAGE)],
            "most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)],
        },
    ]
    for env_stats in all_env_stats:
        manager.record_environment_stats(env_stats, worker_id=0)

    expected_stats = {
        "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
        "most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
    }
    stats_reporter.write_stats(123)
    writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)

    # clean up our Mock from the global list
    StatsReporter.writers.remove(writer)
コード例 #3
0
def test_agent_deletion():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": [0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1],
        "log_probs": [0.1],
    }
    mock_step = mb.create_mock_batchedstep(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=0,
    )
    mock_done_step = mb.create_mock_batchedstep(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=0,
        done=True,
    )
    fake_action_info = ActionInfo(
        action=[0.1],
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_step, 0, ActionInfo.empty())

    # Run 3 trajectories, with different workers (to simulate different agents)
    add_calls = []
    remove_calls = []
    for _ep in range(3):
        for _ in range(5):
            processor.add_experiences(mock_step, _ep, fake_action_info)
            add_calls.append(mock.call([get_global_agent_id(_ep, 0)], [0.1]))
        processor.add_experiences(mock_done_step, _ep, fake_action_info)
        # Make sure we don't add experiences from the prior agents after the done
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))

    policy.save_previous_action.assert_has_calls(add_calls)
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0
コード例 #4
0
ファイル: trainer.py プロジェクト: zouhunter/ml-agents
 def __init__(
     self,
     brain: BrainParameters,
     trainer_parameters: dict,
     training: bool,
     run_id: str,
     reward_buff_cap: int = 1,
 ):
     """
     Responsible for collecting experiences and training a neural network model.
     :BrainParameters brain: Brain to be trained.
     :dict trainer_parameters: The parameters for the trainer (dictionary).
     :bool training: Whether the trainer is set for training.
     :str run_id: The identifier of the current run
     :int reward_buff_cap:
     """
     self.param_keys: List[str] = []
     self.brain_name = brain.brain_name
     self.run_id = run_id
     self.trainer_parameters = trainer_parameters
     self.summary_path = trainer_parameters["summary_path"]
     self.stats_reporter = StatsReporter(self.summary_path)
     self.cumulative_returns_since_policy_update: List[float] = []
     self.is_training = training
     self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
     self.policy: TFPolicy = None  # type: ignore  # this will always get set
     self.step: int = 0
コード例 #5
0
ファイル: trainer.py プロジェクト: syg19850728/ml-agents
 def __init__(
     self,
     brain_name: str,
     trainer_settings: TrainerSettings,
     training: bool,
     run_id: str,
     reward_buff_cap: int = 1,
 ):
     """
     Responsible for collecting experiences and training a neural network model.
     :BrainParameters brain: Brain to be trained.
     :dict trainer_settings: The parameters for the trainer (dictionary).
     :bool training: Whether the trainer is set for training.
     :str run_id: The identifier of the current run
     :int reward_buff_cap:
     """
     self.brain_name = brain_name
     self.run_id = run_id
     self.trainer_settings = trainer_settings
     self._threaded = trainer_settings.threaded
     self._stats_reporter = StatsReporter(brain_name)
     self.is_training = training
     self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
     self.policy_queues: List[AgentManagerQueue[Policy]] = []
     self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
     self.step: int = 0
     self.summary_freq = self.trainer_settings.summary_freq
コード例 #6
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    trainer = mock.Mock()
    processor = AgentProcessor(
        trainer,
        policy,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_braininfo = mb.create_mock_braininfo(
        num_agents=2,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    for i in range(5):
        processor.add_experiences(mock_braininfo, mock_braininfo,
                                  fake_action_outputs)

    # Assert that two trajectories have been added to the Trainer
    assert len(trainer.process_trajectory.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = trainer.process_trajectory.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0
コード例 #7
0
ファイル: trainer.py プロジェクト: ximianglongchang/ml-agents
 def __init__(
     self,
     brain_name: str,
     trainer_parameters: dict,
     training: bool,
     run_id: str,
     reward_buff_cap: int = 1,
 ):
     """
     Responsible for collecting experiences and training a neural network model.
     :BrainParameters brain: Brain to be trained.
     :dict trainer_parameters: The parameters for the trainer (dictionary).
     :bool training: Whether the trainer is set for training.
     :str run_id: The identifier of the current run
     :int reward_buff_cap:
     """
     self.param_keys: List[str] = []
     self.brain_name = brain_name
     self.run_id = run_id
     self.trainer_parameters = trainer_parameters
     self.summary_path = trainer_parameters["summary_path"]
     self.stats_reporter = StatsReporter(self.summary_path)
     self.cumulative_returns_since_policy_update: List[float] = []
     self.is_training = training
     self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
     self.policy_queues: List[AgentManagerQueue[Policy]] = []
     self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
     self.step: int = 0
     self.training_start_time = time.time()
     self.summary_freq = self.trainer_parameters["summary_freq"]
     self.next_summary_step = self.summary_freq
コード例 #8
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
コード例 #9
0
 def __init__(
     self,
     brain_name: str,
     trainer_settings: TrainerSettings,
     training: bool,
     load: bool,
     artifact_path: str,
     reward_buff_cap: int = 1,
 ):
     """
     Responsible for collecting experiences and training a neural network model.
     :param brain_name: Brain name of brain to be trained.
     :param trainer_settings: The parameters for the trainer (dictionary).
     :param training: Whether the trainer is set for training.
     :param artifact_path: The directory within which to store artifacts from this trainer
     :param reward_buff_cap:
     """
     self.brain_name = brain_name
     self.trainer_settings = trainer_settings
     self._threaded = trainer_settings.threaded
     self._stats_reporter = StatsReporter(brain_name)
     self.is_training = training
     self.load = load
     self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
     self.policy_queues: List[AgentManagerQueue[Policy]] = []
     self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
     self.step: int = 0
     self.artifact_path = artifact_path
     self.summary_freq = self.trainer_settings.summary_freq
     self.policies: Dict[str, Policy] = {}
コード例 #10
0
def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4
    # Last trajectory should be the longest
    trajectory = tqueue.put.call_args_list[0][0][-1]

    # Make sure trajectory has the right Groupmate Experiences
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1
コード例 #11
0
ファイル: test_stats.py プロジェクト: solpaul/ml-agents
def test_stat_reporter_add_summary_write():
    # Test add_writer
    StatsReporter.writers.clear()
    mock_writer1 = mock.Mock()
    mock_writer2 = mock.Mock()
    StatsReporter.add_writer(mock_writer1)
    StatsReporter.add_writer(mock_writer2)
    assert len(StatsReporter.writers) == 2

    # Test add_stats and summaries
    statsreporter1 = StatsReporter("category1")
    statsreporter2 = StatsReporter("category2")
    for i in range(10):
        statsreporter1.add_stat("key1", float(i))
        statsreporter2.add_stat("key2", float(i))

    statsreportercalls = [
        mock.call(f"category{j}", f"key{j}", float(i),
                  StatsAggregationMethod.AVERAGE) for i in range(10)
        for j in [1, 2]
    ]

    mock_writer1.on_add_stat.assert_has_calls(statsreportercalls)
    mock_writer2.on_add_stat.assert_has_calls(statsreportercalls)

    statssummary1 = statsreporter1.get_stats_summaries("key1")
    statssummary2 = statsreporter2.get_stats_summaries("key2")

    assert statssummary1.num == 10
    assert statssummary2.num == 10
    assert statssummary1.mean == 4.5
    assert statssummary2.mean == 4.5
    assert statssummary1.std == pytest.approx(2.9, abs=0.1)
    assert statssummary2.std == pytest.approx(2.9, abs=0.1)

    # Test write_stats
    step = 10
    statsreporter1.write_stats(step)
    mock_writer1.write_stats.assert_called_once_with("category1",
                                                     {"key1": statssummary1},
                                                     step)
    mock_writer2.write_stats.assert_called_once_with("category1",
                                                     {"key1": statssummary1},
                                                     step)
コード例 #12
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_step = mb.create_mock_batchedstep(
        num_agents=2,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=num_vis_obs,
    )
    fake_action_info = ActionInfo(
        action=[0.1, 0.1],
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_step.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_step, 0, ActionInfo.empty())
    for _ in range(5):
        processor.add_experiences(mock_step, 0, fake_action_info)

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

    # Test empty BatchedStepResult
    mock_step = mb.create_mock_batchedstep(
        num_agents=0,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=num_vis_obs,
    )
    processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
コード例 #13
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = _create_action_info(2, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5
    # Make sure ungrouped agents don't have team obs
    for step in trajectory.steps:
        assert len(step.group_status) == 0

    # Assert that the AgentProcessor is empty
    assert len(processor._experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor._experience_buffers[0]) == 0
コード例 #14
0
def test_end_episode():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
    )
    # Run 3 trajectories, with different workers (to simulate different agents)
    remove_calls = []
    for _ep in range(3):
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))
        for _ in range(5):
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
            # Make sure we don't add experiences from the prior agents after the done

    # Call end episode
    processor.end_episode()
    # Check that we removed every agent
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0
コード例 #15
0
def test_agent_manager():
    policy = create_mock_policy()
    name_behavior_id = "test_brain_name"
    manager = AgentManager(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    assert len(manager.trajectory_queues) == 1
    assert isinstance(manager.trajectory_queues[0], AgentManagerQueue)
コード例 #16
0
def test_stat_reporter_property():
    # Test add_writer
    mock_writer = mock.Mock()
    StatsReporter.writers.clear()
    StatsReporter.add_writer(mock_writer)
    assert len(StatsReporter.writers) == 1

    statsreporter1 = StatsReporter("category1")

    # Test add_property
    statsreporter1.add_property("key", "this is a text")
    mock_writer.add_property.assert_called_once_with("category1", "key",
                                                     "this is a text")
コード例 #17
0
def test_stat_reporter_text():
    # Test add_writer
    mock_writer = mock.Mock()
    StatsReporter.writers.clear()
    StatsReporter.add_writer(mock_writer)
    assert len(StatsReporter.writers) == 1

    statsreporter1 = StatsReporter("category1")

    # Test write_text
    step = 10
    statsreporter1.write_text("this is a text", step)
    mock_writer.write_text.assert_called_once_with("category1", "this is a text", step)
コード例 #18
0
ファイル: test_stats.py プロジェクト: donlee90/ml-agents
def test_agent_manager_stats_report(aggregation_type):
    stats_reporter = StatsReporter("recorder_name")
    manager = AgentManager(None, "behaviorName", stats_reporter)

    values = range(5)

    env_stats = {"stat": [(i, aggregation_type) for i in values]}
    manager.record_environment_stats(env_stats, 0)
    summary = stats_reporter.get_stats_summaries("stat")
    aggregation_result = {
        StatsAggregationMethod.AVERAGE: sum(values) / len(values),
        StatsAggregationMethod.MOST_RECENT: values[-1],
        StatsAggregationMethod.SUM: sum(values),
        StatsAggregationMethod.HISTOGRAM: sum(values) / len(values),
    }

    assert summary.aggregated_value == aggregation_result[aggregation_type]
    stats_reporter.write_stats(0)
コード例 #19
0
def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    _, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
        agent_ids=[2, 3],
    )
    # Make decision steps continue for other agents
    mock_decision_steps_2, _ = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=False,
        grouped=True,
        agent_ids=[0, 1],
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    # Continue to add for remaining live agents
    fake_action_info = _create_action_info(4, mock_decision_steps_2.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps_2, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4

    # Get the first trajectory, which should have been agent 2 (one of the killed agents)
    trajectory = tqueue.put.call_args_list[0][0][-1]
    assert len(trajectory.steps) == 3
    # Make sure trajectory has the right Groupmate Experiences.
    # All three steps should contain all agents
    for step in trajectory.steps:
        assert len(step.group_status) == 3

    # Last trajectory should be the longest. It should be that of agent 1, one of the surviving agents.
    trajectory = tqueue.put.call_args_list[-1][0][-1]
    assert len(trajectory.steps) == 5

    # Make sure trajectory has the right Groupmate Experiences.
    # THe first 3 steps should contain all of the obs (that 3rd step is also the terminal step of 2 of the agents)
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died, there should only be 1 group status.
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1