def create_bc_trainer(dummy_config, is_discrete=False):
    mock_env = mock.Mock()
    if is_discrete:
        mock_brain = mb.create_mock_pushblock_brain()
        mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                                  num_vector_observations=70)
    else:
        mock_brain = mb.create_mock_3dball_brain()
        mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                                  num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    trainer_parameters["summary_path"] = "tmp"
    trainer_parameters["model_path"] = "tmp"
    trainer_parameters["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/test.demo")
    trainer = BCTrainer(mock_brain,
                        trainer_parameters,
                        training=True,
                        load=False,
                        seed=0,
                        run_id=0)
    trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy,
                                                       100)
    return trainer, env
Exemple #2
0
def create_ppo_policy_mock(
    mock_env, dummy_config, reward_signal_config, use_rnn, use_discrete, use_visual
):

    if not use_visual:
        mock_brain = mb.create_mock_brainparams(
            vector_action_space_type="discrete" if use_discrete else "continuous",
            vector_action_space_size=DISCRETE_ACTION_SPACE
            if use_discrete
            else VECTOR_ACTION_SPACE,
            vector_observation_space_size=VECTOR_OBS_SPACE,
        )
        mock_braininfo = mb.create_mock_braininfo(
            num_agents=NUM_AGENTS,
            num_vector_observations=VECTOR_OBS_SPACE,
            num_vector_acts=sum(
                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
            ),
            discrete=use_discrete,
        )
    else:
        mock_brain = mb.create_mock_brainparams(
            vector_action_space_type="discrete" if use_discrete else "continuous",
            vector_action_space_size=DISCRETE_ACTION_SPACE
            if use_discrete
            else VECTOR_ACTION_SPACE,
            vector_observation_space_size=0,
            number_visual_observations=1,
        )
        mock_braininfo = mb.create_mock_braininfo(
            num_agents=NUM_AGENTS,
            num_vis_observations=1,
            num_vector_acts=sum(
                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
            ),
            discrete=use_discrete,
        )
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["reward_signals"].update(reward_signal_config)
    trainer_parameters["use_recurrent"] = use_rnn
    policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False)
    return env, policy
Exemple #3
0
def create_ppo_policy_with_bc_mock(mock_env, mock_brain, dummy_config, use_rnn,
                                   demo_file):
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["use_recurrent"] = use_rnn
    trainer_parameters["pretraining"]["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file)
    policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False)
    return env, policy
def create_policy_with_bc_mock(mock_env, mock_brain, trainer_config, use_rnn,
                               demo_file):
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    model_path = env.external_brain_names[0]
    trainer_config["model_path"] = model_path
    trainer_config["keep_checkpoints"] = 3
    trainer_config["use_recurrent"] = use_rnn
    trainer_config["behavioral_cloning"]["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file)

    policy = (PPOPolicy(0, mock_brain, trainer_config, False, False)
              if trainer_config["trainer"] == "ppo" else SACPolicy(
                  0, mock_brain, trainer_config, False, False))
    return env, policy
Exemple #5
0
def test_bc_trainer(mock_env, dummy_config):
    mock_brain = mb.create_mock_3dball_brain()
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    trainer_parameters["summary_path"] = "tmp"
    trainer_parameters["model_path"] = "tmp"
    trainer_parameters["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/test.demo")
    trainer = BCTrainer(mock_brain,
                        trainer_parameters,
                        training=True,
                        load=False,
                        seed=0,
                        run_id=0)
    trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy,
                                                       100)
    trainer.update_policy()
    assert len(trainer.stats["Losses/Cloning Loss"]) > 0
    trainer.increment_step(1)
    assert trainer.step == 1