Python AgentProcessor.add_experiences 예제들, mlagents.trainers.agent_processor.AgentProcessor.add_experiences Python 예제들

예제 #1

0

파일 보기

파일: test_agent_processor.py 프로젝트: zouhunter/ml-agents

def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    trainer = mock.Mock()
    processor = AgentProcessor(
        trainer,
        policy,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_braininfo = mb.create_mock_braininfo(
        num_agents=2,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    for i in range(5):
        processor.add_experiences(mock_braininfo, mock_braininfo,
                                  fake_action_outputs)

    # Assert that two trajectories have been added to the Trainer
    assert len(trainer.process_trajectory.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = trainer.process_trajectory.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

예제 #2

0

파일 보기

def test_end_episode():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
    )
    # Run 3 trajectories, with different workers (to simulate different agents)
    remove_calls = []
    for _ep in range(3):
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))
        for _ in range(5):
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
            # Make sure we don't add experiences from the prior agents after the done

    # Call end episode
    processor.end_episode()
    # Check that we removed every agent
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0

예제 #3

0

파일 보기

파일: test_agent_processor.py 프로젝트: SancySwachitha/Drone

def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4
    # Last trajectory should be the longest
    trajectory = tqueue.put.call_args_list[0][0][-1]

    # Make sure trajectory has the right Groupmate Experiences
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1

예제 #4

0

파일 보기

def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_step = mb.create_mock_batchedstep(
        num_agents=2,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=num_vis_obs,
    )
    fake_action_info = ActionInfo(
        action=[0.1, 0.1],
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_step.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
    for _ in range(5):
        processor.add_experiences(mock_step, 0, fake_action_info)

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

예제 #5

0

파일 보기

def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0

예제 #6

0

파일 보기

파일: test_agent_processor.py 프로젝트: schmitzmelissa/unity-ml-penguins

def test_agent_deletion():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": [0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1],
        "log_probs": [0.1],
    }
    mock_step = mb.create_mock_batchedstep(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=0,
    )
    mock_done_step = mb.create_mock_batchedstep(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        num_vis_observations=0,
        done=True,
    )
    fake_action_info = ActionInfo(
        action=[0.1],
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_step, 0, ActionInfo.empty())

    # Run 3 trajectories, with different workers (to simulate different agents)
    add_calls = []
    remove_calls = []
    for _ep in range(3):
        for _ in range(5):
            processor.add_experiences(mock_step, _ep, fake_action_info)
            add_calls.append(mock.call([get_global_agent_id(_ep, 0)], [0.1]))
        processor.add_experiences(mock_done_step, _ep, fake_action_info)
        # Make sure we don't add experiences from the prior agents after the done
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))

    policy.save_previous_action.assert_has_calls(add_calls)
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0

예제 #7

0

파일 보기

파일: test_agent_processor.py 프로젝트: SancySwachitha/Drone

def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = _create_action_info(2, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(5):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5
    # Make sure ungrouped agents don't have team obs
    for step in trajectory.steps:
        assert len(step.group_status) == 0

    # Assert that the AgentProcessor is empty
    assert len(processor._experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_specs=create_observation_specs_with_shapes(
            [(8,)] + num_vis_obs * [(84, 84, 3)]
        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor._experience_buffers[0]) == 0

예제 #8

0

파일 보기

def test_group_statuses():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=4,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        grouped=True,
    )
    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    for _ in range(2):
        processor.add_experiences(
            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
        )

    # Make terminal steps for some dead agents
    _, mock_terminal_steps_2 = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
        grouped=True,
        agent_ids=[2, 3],
    )
    # Make decision steps continue for other agents
    mock_decision_steps_2, _ = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=False,
        grouped=True,
        agent_ids=[0, 1],
    )

    processor.add_experiences(
        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
    )
    # Continue to add for remaining live agents
    fake_action_info = _create_action_info(4, mock_decision_steps_2.agent_id)
    for _ in range(3):
        processor.add_experiences(
            mock_decision_steps_2, mock_terminal_steps, 0, fake_action_info
        )

    # Assert that four trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 4

    # Get the first trajectory, which should have been agent 2 (one of the killed agents)
    trajectory = tqueue.put.call_args_list[0][0][-1]
    assert len(trajectory.steps) == 3
    # Make sure trajectory has the right Groupmate Experiences.
    # All three steps should contain all agents
    for step in trajectory.steps:
        assert len(step.group_status) == 3

    # Last trajectory should be the longest. It should be that of agent 1, one of the surviving agents.
    trajectory = tqueue.put.call_args_list[-1][0][-1]
    assert len(trajectory.steps) == 5

    # Make sure trajectory has the right Groupmate Experiences.
    # THe first 3 steps should contain all of the obs (that 3rd step is also the terminal step of 2 of the agents)
    for step in trajectory.steps[0:3]:
        assert len(step.group_status) == 3
    # After 2 agents has died, there should only be 1 group status.
    for step in trajectory.steps[3:]:
        assert len(step.group_status) == 1