Ejemplo n.º 1
0
 def external_brains(self) -> Dict[str, BrainParameters]:
     result = {}
     for brain_name in self.env.get_agent_groups():
         result[brain_name] = group_spec_to_brain_parameters(
             brain_name, self.env.get_agent_group_spec(brain_name)
         )
     return result
 def external_brains():
     result = {}
     for brain_name in env.get_agent_groups():
         result[brain_name] = group_spec_to_brain_parameters(
             brain_name, env.get_agent_group_spec(brain_name)
         )
     return result
    def external_brains():
        result = {}
        brain_name = env.spec.id
        # for brain_name in env.get_agent_groups():

        result[brain_name] = group_spec_to_brain_parameters(
            brain_name, {
                "observation_shapes": [(env.observation_space, )],
                "action_shape": (env.action_scheme.action_space.n, ),
                "action_type": 'DISCRETE'
            })
        return result
Ejemplo n.º 4
0
def demo_to_buffer(
    file_path: str, sequence_length: int
) -> Tuple[BrainParameters, AgentBuffer]:
    """
    Loads demonstration file and uses it to fill training buffer.
    :param file_path: Location of demonstration file (.demo).
    :param sequence_length: Length of trajectories to fill buffer.
    :return:
    """
    group_spec, info_action_pair, _ = load_demonstration(file_path)
    demo_buffer = make_demo_buffer(info_action_pair, group_spec, sequence_length)
    brain_params = group_spec_to_brain_parameters("DemoBrain", group_spec)
    return brain_params, demo_buffer
Ejemplo n.º 5
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    env.reset()
    brain_name = env.get_agent_groups()[0]
    batched_step = env.get_step_result(brain_name)
    brain_params = group_spec_to_brain_parameters(
        brain_name, env.get_agent_group_spec(brain_name))

    trainer_parameters = dummy_config
    model_path = brain_name
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
    run_out = policy.evaluate(batched_step, list(batched_step.agent_id))
    assert run_out["action"].shape == (3, 2)
    env.close()
Ejemplo n.º 6
0
def test_ppo_get_value_estimates(mock_communicator, mock_launcher,
                                 dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    env.reset()
    brain_name = env.get_agent_groups()[0]
    brain_info = step_result_to_brain_info(
        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name))
    brain_params = group_spec_to_brain_parameters(
        brain_name, env.get_agent_group_spec(brain_name))

    trainer_parameters = dummy_config
    model_path = brain_name
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
    run_out = policy.get_value_estimates(brain_info, 0, done=False)
    for key, val in run_out.items():
        assert type(key) is str
        assert type(val) is float

    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val == 0.0

    # Check if we ignore terminal states properly
    policy.reward_signals["extrinsic"].use_terminal_states = False
    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val != 0.0

    env.close()