コード例 #1
0
ファイル: test_buffer.py プロジェクト: SimpleG20/ml-agents
def test_buffer():
    agent_1_buffer = construct_fake_buffer(1)
    agent_2_buffer = construct_fake_buffer(2)
    agent_3_buffer = construct_fake_buffer(3)
    a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=1,
                                                         sequential=True)
    assert_array(np.array(a), np.array([[171, 172, 173], [181, 182, 183]]))
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=3,
                                                         sequential=True)
    assert_array(
        np.array(a),
        np.array([
            [231, 232, 233],
            [241, 242, 243],
            [251, 252, 253],
            [261, 262, 263],
            [271, 272, 273],
            [281, 282, 283],
        ]),
    )
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=3,
                                                         sequential=False)
    assert_array(
        np.array(a),
        np.array([
            [251, 252, 253],
            [261, 262, 263],
            [271, 272, 273],
            [261, 262, 263],
            [271, 272, 273],
            [281, 282, 283],
        ]),
    )
    agent_1_buffer.reset_agent()
    assert agent_1_buffer.num_experiences == 0
    update_buffer = AgentBuffer()
    agent_2_buffer.resequence_and_append(update_buffer,
                                         batch_size=None,
                                         training_length=2)
    agent_3_buffer.resequence_and_append(update_buffer,
                                         batch_size=None,
                                         training_length=2)
    assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20

    assert np.array(update_buffer[BufferKey.CONTINUOUS_ACTION]).shape == (20,
                                                                          2)

    c = update_buffer.make_mini_batch(start=0, end=1)
    assert c.keys() == update_buffer.keys()
    assert np.array(c[BufferKey.CONTINUOUS_ACTION]).shape == (1, 2)
コード例 #2
0
ファイル: utils.py プロジェクト: zereyak13/ml-agents
def create_agent_buffer(behavior_spec: BehaviorSpec,
                        number: int,
                        reward: float = 0.0) -> AgentBuffer:
    buffer = AgentBuffer()
    curr_obs = [
        np.random.normal(size=sen_spec.shape).astype(np.float32)
        for sen_spec in behavior_spec.sensor_specs
    ]
    next_obs = [
        np.random.normal(size=sen_spec.shape).astype(np.float32)
        for sen_spec in behavior_spec.sensor_specs
    ]
    action_buffer = behavior_spec.action_spec.random_action(1)
    action = {}
    if behavior_spec.action_spec.continuous_size > 0:
        action["continuous_action"] = action_buffer.continuous
    if behavior_spec.action_spec.discrete_size > 0:
        action["discrete_action"] = action_buffer.discrete

    for _ in range(number):
        for i, obs in enumerate(curr_obs):
            buffer[ObsUtil.get_name_at(i)].append(obs)
        for i, obs in enumerate(next_obs):
            buffer[ObsUtil.get_name_at_next(i)].append(obs)
        buffer["actions"].append(action)
        for _act_type, _act in action.items():
            buffer[_act_type].append(_act[0, :])
        buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
        buffer["masks"].append(np.ones(1, dtype=np.float32))
    buffer["done"] = np.zeros(number, dtype=np.float32)
    return buffer
コード例 #3
0
ファイル: test_buffer.py プロジェクト: terite/HexChess
def construct_fake_buffer(fake_agent_id):
    b = AgentBuffer()
    for step in range(9):
        b[ObsUtil.get_name_at(0)].append(
            np.array(
                [
                    100 * fake_agent_id + 10 * step + 1,
                    100 * fake_agent_id + 10 * step + 2,
                    100 * fake_agent_id + 10 * step + 3,
                ],
                dtype=np.float32,
            ))
        b[BufferKey.CONTINUOUS_ACTION].append(
            np.array(
                [
                    100 * fake_agent_id + 10 * step + 4,
                    100 * fake_agent_id + 10 * step + 5,
                ],
                dtype=np.float32,
            ))
        b[BufferKey.GROUP_CONTINUOUS_ACTION].append([
            np.array(
                [
                    100 * fake_agent_id + 10 * step + 4,
                    100 * fake_agent_id + 10 * step + 5,
                ],
                dtype=np.float32,
            )
        ] * 3)
    return b
コード例 #4
0
ファイル: utils.py プロジェクト: SancySwachitha/Drone
def create_agent_buffer(behavior_spec: BehaviorSpec,
                        number: int,
                        reward: float = 0.0) -> AgentBuffer:
    buffer = AgentBuffer()
    curr_obs = [
        np.random.normal(size=obs_spec.shape).astype(np.float32)
        for obs_spec in behavior_spec.observation_specs
    ]
    next_obs = [
        np.random.normal(size=obs_spec.shape).astype(np.float32)
        for obs_spec in behavior_spec.observation_specs
    ]
    action_buffer = behavior_spec.action_spec.random_action(1)
    action = {}
    if behavior_spec.action_spec.continuous_size > 0:
        action[BufferKey.CONTINUOUS_ACTION] = action_buffer.continuous
    if behavior_spec.action_spec.discrete_size > 0:
        action[BufferKey.DISCRETE_ACTION] = action_buffer.discrete

    for _ in range(number):
        for i, obs in enumerate(curr_obs):
            buffer[ObsUtil.get_name_at(i)].append(obs)
        for i, obs in enumerate(next_obs):
            buffer[ObsUtil.get_name_at_next(i)].append(obs)
        # TODO
        # buffer[AgentBufferKey.ACTIONS].append(action)
        for _act_type, _act in action.items():
            buffer[_act_type].append(_act[0, :])
        # TODO was "rewards"
        buffer[BufferKey.ENVIRONMENT_REWARDS].append(
            np.ones(1, dtype=np.float32) * reward)
        buffer[BufferKey.MASKS].append(np.ones(1, dtype=np.float32))
    buffer[BufferKey.DONE] = np.zeros(number, dtype=np.float32)
    return buffer
コード例 #5
0
ファイル: test_buffer.py プロジェクト: SimpleG20/ml-agents
def construct_fake_buffer(fake_agent_id):
    b = AgentBuffer()
    for step in range(9):
        b[ObsUtil.get_name_at(0)].append([
            100 * fake_agent_id + 10 * step + 1,
            100 * fake_agent_id + 10 * step + 2,
            100 * fake_agent_id + 10 * step + 3,
        ])
        b[BufferKey.CONTINUOUS_ACTION].append([
            100 * fake_agent_id + 10 * step + 4,
            100 * fake_agent_id + 10 * step + 5
        ])
    return b
コード例 #6
0
def make_demo_buffer(
    pair_infos: List[AgentInfoActionPairProto],
    behavior_spec: BehaviorSpec,
    sequence_length: int,
) -> AgentBuffer:
    # Create and populate buffer using experiences
    demo_raw_buffer = AgentBuffer()
    demo_processed_buffer = AgentBuffer()
    for idx, current_pair_info in enumerate(pair_infos):
        if idx > len(pair_infos) - 2:
            break
        next_pair_info = pair_infos[idx + 1]
        current_decision_step, current_terminal_step = steps_from_proto(
            [current_pair_info.agent_info], behavior_spec
        )
        next_decision_step, next_terminal_step = steps_from_proto(
            [next_pair_info.agent_info], behavior_spec
        )
        previous_action = (
            np.array(
                pair_infos[idx].action_info.vector_actions_deprecated, dtype=np.float32
            )
            * 0
        )
        if idx > 0:
            previous_action = np.array(
                pair_infos[idx - 1].action_info.vector_actions_deprecated,
                dtype=np.float32,
            )

        next_done = len(next_terminal_step) == 1
        next_reward = 0
        if len(next_terminal_step) == 1:
            next_reward = next_terminal_step.reward[0]
        else:
            next_reward = next_decision_step.reward[0]
        current_obs = None
        if len(current_terminal_step) == 1:
            current_obs = list(current_terminal_step.values())[0].obs
        else:
            current_obs = list(current_decision_step.values())[0].obs

        demo_raw_buffer["done"].append(next_done)
        demo_raw_buffer["rewards"].append(next_reward)
        for i, obs in enumerate(current_obs):
            demo_raw_buffer[ObsUtil.get_name_at(i)].append(obs)
        if (
            len(current_pair_info.action_info.continuous_actions) == 0
            and len(current_pair_info.action_info.discrete_actions) == 0
        ):
            if behavior_spec.action_spec.continuous_size > 0:
                demo_raw_buffer["continuous_action"].append(
                    current_pair_info.action_info.vector_actions_deprecated
                )
            else:
                demo_raw_buffer["discrete_action"].append(
                    current_pair_info.action_info.vector_actions_deprecated
                )
        else:
            if behavior_spec.action_spec.continuous_size > 0:
                demo_raw_buffer["continuous_action"].append(
                    current_pair_info.action_info.continuous_actions
                )
            if behavior_spec.action_spec.discrete_size > 0:
                demo_raw_buffer["discrete_action"].append(
                    current_pair_info.action_info.discrete_actions
                )
        demo_raw_buffer["prev_action"].append(previous_action)
        if next_done:
            demo_raw_buffer.resequence_and_append(
                demo_processed_buffer, batch_size=None, training_length=sequence_length
            )
            demo_raw_buffer.reset_agent()
    demo_raw_buffer.resequence_and_append(
        demo_processed_buffer, batch_size=None, training_length=sequence_length
    )
    return demo_processed_buffer
コード例 #7
0
ファイル: test_buffer.py プロジェクト: terite/HexChess
def test_buffer():
    agent_1_buffer = construct_fake_buffer(1)
    agent_2_buffer = construct_fake_buffer(2)
    agent_3_buffer = construct_fake_buffer(3)

    # Test get_batch
    a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=1,
                                                         sequential=True)
    assert_array(
        np.array(a),
        np.array([[171, 172, 173], [181, 182, 183]], dtype=np.float32))

    # Test get_batch
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=3,
                                                         sequential=True)
    assert_array(
        np.array(a),
        np.array(
            [
                [231, 232, 233],
                [241, 242, 243],
                [251, 252, 253],
                [261, 262, 263],
                [271, 272, 273],
                [281, 282, 283],
            ],
            dtype=np.float32,
        ),
    )
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2,
                                                         training_length=3,
                                                         sequential=False)
    assert_array(
        np.array(a),
        np.array([
            [251, 252, 253],
            [261, 262, 263],
            [271, 272, 273],
            [261, 262, 263],
            [271, 272, 273],
            [281, 282, 283],
        ]),
    )

    # Test padding
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=None,
                                                         training_length=4,
                                                         sequential=True)
    assert_array(
        np.array(a),
        np.array([
            [201, 202, 203],
            [211, 212, 213],
            [221, 222, 223],
            [231, 232, 233],
            [241, 242, 243],
            [251, 252, 253],
            [261, 262, 263],
            [271, 272, 273],
            [281, 282, 283],
            [0, 0, 0],
            [0, 0, 0],
            [0, 0, 0],
        ]),
    )
    # Test group entries return Lists of Lists. Make sure to pad properly!
    a = agent_2_buffer[BufferKey.GROUP_CONTINUOUS_ACTION].get_batch(
        batch_size=None, training_length=4, sequential=True)
    for _group_entry in a[:-3]:
        assert len(_group_entry) == 3
    for _group_entry in a[-3:]:
        assert len(_group_entry) == 0

    agent_1_buffer.reset_agent()
    assert agent_1_buffer.num_experiences == 0
    update_buffer = AgentBuffer()
    agent_2_buffer.resequence_and_append(update_buffer,
                                         batch_size=None,
                                         training_length=2)
    agent_3_buffer.resequence_and_append(update_buffer,
                                         batch_size=None,
                                         training_length=2)
    assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20

    assert np.array(update_buffer[BufferKey.CONTINUOUS_ACTION]).shape == (20,
                                                                          2)

    c = update_buffer.make_mini_batch(start=0, end=1)
    assert c.keys() == update_buffer.keys()
    # Make sure the values of c are AgentBufferField
    for val in c.values():
        assert isinstance(val, AgentBufferField)
    assert np.array(c[BufferKey.CONTINUOUS_ACTION]).shape == (1, 2)