Python create_steps_from_behavior_spec 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mlagents.trainers.tests.mock_brain

메소드/함수: create_steps_from_behavior_spec

hotexamples.com에서의 예제들: 4

Python create_steps_from_behavior_spec - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mlagents.trainers.tests.mock_brain.create_steps_from_behavior_spec에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_saver.py 프로젝트: terite/HexChess

def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    policy1.actor = policy1.actor.to(default_device())
    policy2.actor = policy2.actor.to(default_device())

    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    np_obs = decision_step.obs
    masks = policy1._extract_masks(decision_step)
    memories = torch.as_tensor(
        policy1.retrieve_memories(list(decision_step.agent_id))).unsqueeze(0)
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    with torch.no_grad():
        _, log_probs1, _, _ = policy1.sample_actions(tensor_obs,
                                                     masks=masks,
                                                     memories=memories)
        _, log_probs2, _, _ = policy2.sample_actions(tensor_obs,
                                                     masks=masks,
                                                     memories=memories)
    np.testing.assert_array_equal(
        ModelUtils.to_numpy(log_probs1.all_discrete_tensor),
        ModelUtils.to_numpy(log_probs2.all_discrete_tensor),
    )

예제 #2

파일 보기

파일: test_saver.py 프로젝트: rahzaazhar/ml-agents

def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    vec_vis_obs, masks = policy1._split_decision_step(decision_step)
    vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
    vis_obs = [
        torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
    ]
    memories = torch.as_tensor(
        policy1.retrieve_memories(list(decision_step.agent_id))).unsqueeze(0)

    with torch.no_grad():
        _, log_probs1, _, _, _ = policy1.sample_actions(vec_obs,
                                                        vis_obs,
                                                        masks=masks,
                                                        memories=memories,
                                                        all_log_probs=True)
        _, log_probs2, _, _, _ = policy2.sample_actions(vec_obs,
                                                        vis_obs,
                                                        masks=masks,
                                                        memories=memories,
                                                        all_log_probs=True)

    np.testing.assert_array_equal(log_probs1, log_probs2)

예제 #3

파일 보기

파일: test_nn_policy.py 프로젝트: zt1217396582/ml-agents

def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
    """
    decision_step, _ = mb.create_steps_from_behavior_spec(
        policy1.behavior_spec, num_agents=1)
    run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
    run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))

    np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])

예제 #4

파일 보기

def test_policy_evaluate(rnn, visual, discrete):
    # Test evaluate
    policy = create_policy_mock(TrainerSettings(),
                                use_rnn=rnn,
                                use_discrete=discrete,
                                use_visual=visual)
    decision_step, terminal_step = mb.create_steps_from_behavior_spec(
        policy.behavior_spec, num_agents=NUM_AGENTS)

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
    else:
        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)