예제 #1
0
def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    policy1.actor = policy1.actor.to(default_device())
    policy2.actor = policy2.actor.to(default_device())

    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    np_obs = decision_step.obs
    masks = policy1._extract_masks(decision_step)
    memories = torch.as_tensor(
        policy1.retrieve_memories(list(decision_step.agent_id))).unsqueeze(0)
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    with torch.no_grad():
        _, log_probs1, _, _ = policy1.sample_actions(tensor_obs,
                                                     masks=masks,
                                                     memories=memories)
        _, log_probs2, _, _ = policy2.sample_actions(tensor_obs,
                                                     masks=masks,
                                                     memories=memories)
    np.testing.assert_array_equal(
        ModelUtils.to_numpy(log_probs1.all_discrete_tensor),
        ModelUtils.to_numpy(log_probs2.all_discrete_tensor),
    )
예제 #2
0
def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    vec_vis_obs, masks = policy1._split_decision_step(decision_step)
    vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
    vis_obs = [
        torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
    ]
    memories = torch.as_tensor(
        policy1.retrieve_memories(list(decision_step.agent_id))).unsqueeze(0)

    with torch.no_grad():
        _, log_probs1, _, _, _ = policy1.sample_actions(vec_obs,
                                                        vis_obs,
                                                        masks=masks,
                                                        memories=memories,
                                                        all_log_probs=True)
        _, log_probs2, _, _, _ = policy2.sample_actions(vec_obs,
                                                        vis_obs,
                                                        masks=masks,
                                                        memories=memories,
                                                        all_log_probs=True)

    np.testing.assert_array_equal(log_probs1, log_probs2)
예제 #3
0
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
    run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))

    np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
예제 #4
0
def test_policy_evaluate(rnn, visual, discrete):
    # Test evaluate
    policy = create_policy_mock(TrainerSettings(),
                                use_rnn=rnn,
                                use_discrete=discrete,
                                use_visual=visual)
    decision_step, terminal_step = mb.create_steps_from_behavior_spec(
        policy.behavior_spec, num_agents=NUM_AGENTS)

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
    else:
        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)