Exemple #1
0
def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    decision_step, _ = mb.create_steps_from_brainparams(policy1.brain, num_agents=1)
    run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
    run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))

    np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
def test_policy_evaluate(dummy_config, rnn, visual, discrete):
    # Test evaluate
    tf.reset_default_graph()
    policy = create_policy_mock(dummy_config,
                                use_rnn=rnn,
                                use_discrete=discrete,
                                use_visual=visual)
    decision_step, terminal_step = mb.create_steps_from_brainparams(
        policy.brain, num_agents=NUM_AGENTS)

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
    else:
        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE[0])