Ejemplo n.º 1
0
def test_load_save_policy(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params)
    model_saver = TorchModelSaver(trainer_params, path1)
    model_saver.register(policy)
    model_saver.initialize_or_load(policy)
    policy.set_step(2000)

    mock_brain_name = "MockBrain"
    model_saver.save_checkpoint(mock_brain_name, 2000)
    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    model_saver2 = TorchModelSaver(trainer_params, path1, load=True)
    policy2 = create_policy_mock(trainer_params)
    model_saver2.register(policy2)
    model_saver2.initialize_or_load(policy2)
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.init_path = path1
    model_saver3 = TorchModelSaver(trainer_params, path2)
    policy3 = create_policy_mock(trainer_params)
    model_saver3.register(policy3)
    model_saver3.initialize_or_load(policy3)
    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
Ejemplo n.º 2
0
def test_load_save_optimizer(tmp_path, optimizer):
    OptimizerClass, HyperparametersClass = optimizer

    trainer_settings = TrainerSettings()
    trainer_settings.hyperparameters = HyperparametersClass()
    policy = create_policy_mock(trainer_settings, use_discrete=False)
    optimizer = OptimizerClass(policy, trainer_settings)

    # save at path 1
    path1 = os.path.join(tmp_path, "runid1")
    model_saver = TorchModelSaver(trainer_settings, path1)
    model_saver.register(policy)
    model_saver.register(optimizer)
    model_saver.initialize_or_load()
    policy.set_step(2000)
    model_saver.save_checkpoint("MockBrain", 2000)

    # create a new optimizer and policy
    policy2 = create_policy_mock(trainer_settings, use_discrete=False)
    optimizer2 = OptimizerClass(policy2, trainer_settings)

    # load weights
    model_saver2 = TorchModelSaver(trainer_settings, path1, load=True)
    model_saver2.register(policy2)
    model_saver2.register(optimizer2)
    model_saver2.initialize_or_load()  # This is to load the optimizers

    # Compare the two optimizers
    _compare_two_optimizers(optimizer, optimizer2)
Ejemplo n.º 3
0
def test_is_new_instance():
    """
    Verify that every instance of RunOptions() and its subclasses
    is a new instance (i.e. all factory methods are used properly.)
    """
    check_if_different(RunOptions(), RunOptions())
    check_if_different(TrainerSettings(), TrainerSettings())
Ejemplo n.º 4
0
def test_load_save(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params, model_path=path1)
    policy.initialize_or_load()
    policy._set_step(2000)
    policy.save_model(2000)

    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
    policy2.initialize_or_load()
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.output_path = path2
    trainer_params.init_path = path1
    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
    policy3.initialize_or_load()

    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
Ejemplo n.º 5
0
def test_load_save(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params, model_path=path1)
    policy.initialize_or_load()
    policy._set_step(2000)

    mock_brain_name = "MockBrain"
    checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
    serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
    policy.checkpoint(checkpoint_path, serialization_settings)

    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
    policy2.initialize_or_load()
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.output_path = path2
    trainer_params.init_path = path1
    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
    policy3.initialize_or_load()

    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
def test_reward_provider_save(tmp_path, optimizer):
    OptimizerClass, HyperparametersClass = optimizer

    trainer_settings = TrainerSettings()
    trainer_settings.hyperparameters = HyperparametersClass()
    trainer_settings.reward_signals = {
        RewardSignalType.CURIOSITY: CuriositySettings(),
        RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH),
        RewardSignalType.RND: RNDSettings(),
    }
    policy = create_policy_mock(trainer_settings, use_discrete=False)
    optimizer = OptimizerClass(policy, trainer_settings)

    # save at path 1
    path1 = os.path.join(tmp_path, "runid1")
    model_saver = TorchModelSaver(trainer_settings, path1)
    model_saver.register(policy)
    model_saver.register(optimizer)
    model_saver.initialize_or_load()
    policy.set_step(2000)
    model_saver.save_checkpoint("MockBrain", 2000)

    # create a new optimizer and policy
    optimizer2 = OptimizerClass(policy, trainer_settings)
    policy2 = create_policy_mock(trainer_settings, use_discrete=False)

    # load weights
    model_saver2 = TorchModelSaver(trainer_settings, path1, load=True)
    model_saver2.register(policy2)
    model_saver2.register(optimizer2)
    model_saver2.initialize_or_load()  # This is to load the optimizers

    # assert the models have the same weights
    module_dict_1 = optimizer.get_modules()
    module_dict_2 = optimizer2.get_modules()
    assert "Module:GAIL" in module_dict_1
    assert "Module:GAIL" in module_dict_2
    assert "Module:Curiosity" in module_dict_1
    assert "Module:Curiosity" in module_dict_2
    assert "Module:RND-pred" in module_dict_1
    assert "Module:RND-pred" in module_dict_2
    assert "Module:RND-target" in module_dict_1
    assert "Module:RND-target" in module_dict_2
    for name, module1 in module_dict_1.items():
        assert name in module_dict_2
        module2 = module_dict_2[name]
        if hasattr(module1, "parameters"):
            for param1, param2 in zip(module1.parameters(),
                                      module2.parameters()):
                assert param1.data.ne(param2.data).sum() == 0

    # Run some rewards
    data = create_agent_buffer(policy.behavior_spec, 1)
    for reward_name in optimizer.reward_signals.keys():
        rp_1 = optimizer.reward_signals[reward_name]
        rp_2 = optimizer2.reward_signals[reward_name]
        assert np.array_equal(rp_1.evaluate(data), rp_2.evaluate(data))
def test_checkpoint_conversion(tmpdir, rnn, visual, discrete):
    dummy_config = TrainerSettings()
    model_path = os.path.join(tmpdir, "Mock_Brain")
    policy = create_policy_mock(
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    trainer_params = TrainerSettings()
    model_saver = TorchModelSaver(trainer_params, model_path)
    model_saver.register(policy)
    model_saver.save_checkpoint("Mock_Brain", 100)
    assert os.path.isfile(model_path + "/Mock_Brain-100.onnx")
Ejemplo n.º 8
0
def test_register(tmp_path):
    trainer_params = TrainerSettings()
    model_saver = TFModelSaver(trainer_params, tmp_path)

    opt = mock.Mock(spec=PPOOptimizer)
    model_saver.register(opt)
    assert model_saver.policy is None

    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params)
    model_saver.register(policy)
    assert model_saver.policy is not None
Ejemplo n.º 9
0
def test_register(tmp_path):
    trainer_params = TrainerSettings()
    model_saver = TorchModelSaver(trainer_params, tmp_path)

    opt = mock.Mock(spec=TorchPPOOptimizer)
    opt.get_modules = mock.Mock(return_value={})
    model_saver.register(opt)
    assert model_saver.policy is None

    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params)
    opt.get_modules = mock.Mock(return_value={})
    model_saver.register(policy)
    assert model_saver.policy is not None
Ejemplo n.º 10
0
def test_trainersettings_structure():
    """
    Test structuring method for TrainerSettings
    """
    trainersettings_dict = {
        "trainer_type": "sac",
        "hyperparameters": {
            "batch_size": 1024
        },
        "max_steps": 1.0,
        "reward_signals": {
            "curiosity": {
                "encoding_size": 64
            }
        },
    }
    trainer_settings = TrainerSettings.structure(trainersettings_dict,
                                                 TrainerSettings)
    assert isinstance(trainer_settings.hyperparameters, SACSettings)
    assert trainer_settings.trainer_type == TrainerType.SAC
    assert isinstance(trainer_settings.max_steps, int)
    assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals

    # Check invalid trainer type
    with pytest.raises(ValueError):
        trainersettings_dict = {
            "trainer_type": "puppo",
            "hyperparameters": {
                "batch_size": 1024
            },
            "max_steps": 1.0,
        }
        TrainerSettings.structure(trainersettings_dict, TrainerSettings)

    # Check invalid hyperparameter
    with pytest.raises(TrainerConfigError):
        trainersettings_dict = {
            "trainer_type": "ppo",
            "hyperparameters": {
                "notahyperparam": 1024
            },
            "max_steps": 1.0,
        }
        TrainerSettings.structure(trainersettings_dict, TrainerSettings)

    # Check non-dict
    with pytest.raises(TrainerConfigError):
        TrainerSettings.structure("notadict", TrainerSettings)

    # Check hyperparameters specified but trainer type left as default.
    # This shouldn't work as you could specify non-PPO hyperparameters.
    with pytest.raises(TrainerConfigError):
        trainersettings_dict = {"hyperparameters": {"batch_size": 1024}}
        TrainerSettings.structure(trainersettings_dict, TrainerSettings)
Ejemplo n.º 11
0
def test_normalization():
    behavior_spec = mb.setup_test_behavior_specs(use_discrete=True,
                                                 use_visual=False,
                                                 vector_action_space=[2],
                                                 vector_obs_space=1)

    time_horizon = 6
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        observation_shapes=[(1, )],
        action_space=[2],
    )
    # Change half of the obs to 0
    for i in range(3):
        trajectory.steps[i].obs[0] = np.zeros(1, dtype=np.float32)
    policy = NNPolicy(
        0,
        behavior_spec,
        TrainerSettings(network_settings=NetworkSettings(normalize=True)),
        False,
        "testdir",
        False,
    )

    trajectory_buffer = trajectory.to_agentbuffer()
    policy.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy.sess.run([
        policy.normalization_steps, policy.running_mean,
        policy.running_variance
    ])

    assert steps == 6
    assert mean[0] == 0.5
    # Note: variance is divided by number of steps, and initialized to 1 to avoid
    # divide by 0. The right answer is 0.25
    assert (variance[0] - 1) / steps == 0.25

    # Make another update, this time with all 1's
    time_horizon = 10
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        observation_shapes=[(1, )],
        action_space=[2],
    )
    trajectory_buffer = trajectory.to_agentbuffer()
    policy.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy.sess.run([
        policy.normalization_steps, policy.running_mean,
        policy.running_variance
    ])

    assert steps == 16
    assert mean[0] == 0.8125
    assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
Ejemplo n.º 12
0
def test_evaluate_actions(rnn, visual, discrete):
    policy = create_policy_mock(
        TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    act_masks = ModelUtils.list_to_tensor(buffer[BufferKey.ACTION_MASK])
    agent_action = AgentAction.from_buffer(buffer)
    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    memories = [
        ModelUtils.list_to_tensor(buffer[BufferKey.MEMORY][i])
        for i in range(0, len(buffer[BufferKey.MEMORY]), policy.sequence_length)
    ]
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

    log_probs, entropy, values = policy.evaluate_actions(
        tensor_obs,
        masks=act_masks,
        actions=agent_action,
        memories=memories,
        seq_len=policy.sequence_length,
    )
    if discrete:
        _size = policy.behavior_spec.action_spec.discrete_size
    else:
        _size = policy.behavior_spec.action_spec.continuous_size

    assert log_probs.flatten().shape == (64, _size)
    assert entropy.shape == (64,)
    for val in values.values():
        assert val.shape == (64,)
Ejemplo n.º 13
0
def test_sample_actions(rnn, visual, discrete):
    policy = create_policy_mock(
        TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    act_masks = ModelUtils.list_to_tensor(buffer[BufferKey.ACTION_MASK])

    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    memories = [
        ModelUtils.list_to_tensor(buffer[BufferKey.MEMORY][i])
        for i in range(0, len(buffer[BufferKey.MEMORY]), policy.sequence_length)
    ]
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

    (sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
        tensor_obs, masks=act_masks, memories=memories, seq_len=policy.sequence_length
    )
    if discrete:
        assert log_probs.all_discrete_tensor.shape == (
            64,
            sum(policy.behavior_spec.action_spec.discrete_branches),
        )
    else:
        assert log_probs.continuous_tensor.shape == (
            64,
            policy.behavior_spec.action_spec.continuous_size,
        )
    assert entropies.shape == (64,)

    if rnn:
        assert memories.shape == (1, 1, policy.m_size)
Ejemplo n.º 14
0
def test_normalizer_after_load(tmp_path):
    behavior_spec = mb.setup_test_behavior_specs(
        use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
    )
    time_horizon = 6
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        observation_shapes=[(1,)],
        action_spec=behavior_spec.action_spec,
    )
    # Change half of the obs to 0
    for i in range(3):
        trajectory.steps[i].obs[0] = np.zeros(1, dtype=np.float32)

    trainer_params = TrainerSettings(network_settings=NetworkSettings(normalize=True))
    policy = TFPolicy(0, behavior_spec, trainer_params)

    trajectory_buffer = trajectory.to_agentbuffer()
    policy.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy.sess.run(
        [policy.normalization_steps, policy.running_mean, policy.running_variance]
    )

    assert steps == 6
    assert mean[0] == 0.5
    assert variance[0] / steps == pytest.approx(0.25, abs=0.01)
    # Save ckpt and load into another policy
    path1 = os.path.join(tmp_path, "runid1")
    model_saver = TFModelSaver(trainer_params, path1)
    model_saver.register(policy)
    mock_brain_name = "MockBrain"
    model_saver.save_checkpoint(mock_brain_name, 6)
    assert len(os.listdir(tmp_path)) > 0
    policy1 = TFPolicy(0, behavior_spec, trainer_params)
    model_saver = TFModelSaver(trainer_params, path1, load=True)
    model_saver.register(policy1)
    model_saver.initialize_or_load(policy1)

    # Make another update to new policy, this time with all 1's
    time_horizon = 10
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        observation_shapes=[(1,)],
        action_spec=behavior_spec.action_spec,
    )
    trajectory_buffer = trajectory.to_agentbuffer()
    policy1.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy1.sess.run(
        [policy1.normalization_steps, policy1.running_mean, policy1.running_variance]
    )

    assert steps == 16
    assert mean[0] == 0.8125
    assert variance[0] / steps == pytest.approx(0.152, abs=0.01)
Ejemplo n.º 15
0
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
    # model_path = env.external_brain_names[0]
    trainer_config = TrainerSettings()
    trainer_config.network_settings.memory = (NetworkSettings.MemorySettings()
                                              if use_rnn else None)
    policy = NNPolicy(
        0,
        mock_behavior_specs,
        trainer_config,
        False,
        "test",
        False,
        tanhresample,
        tanhresample,
    )
    with policy.graph.as_default():
        bc_module = BCModule(
            policy,
            policy_learning_rate=trainer_config.hyperparameters.learning_rate,
            default_batch_size=trainer_config.hyperparameters.batch_size,
            default_num_epoch=3,
            settings=bc_settings,
        )
    policy.initialize_or_load(
    )  # Normally the optimizer calls this after the BCModule is created
    return bc_module
Ejemplo n.º 16
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    behavior_spec = basic_behavior_spec()
    policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output")
    no_agent_step = DecisionSteps.empty(behavior_spec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
Ejemplo n.º 17
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
    # Doesn't really matter what this is
    dummy_groupspec = BehaviorSpec([(1,)], "continuous", 1)
    no_agent_step = DecisionSteps.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
Ejemplo n.º 18
0
 def _sanitize_trainer_settings(cls, config: TrainerSettings) -> Dict[str, Any]:
     config_dict = copy.deepcopy(config.as_dict())
     if "init_path" in config_dict and config_dict["init_path"] is not None:
         hashed_path = cls._hash(config_dict["init_path"])
         config_dict["init_path"] = hashed_path
     if "demo_path" in config_dict and config_dict["demo_path"] is not None:
         hashed_path = cls._hash(config_dict["demo_path"])
         config_dict["demo_path"] = hashed_path
     return config_dict
Ejemplo n.º 19
0
def test_take_action_returns_nones_on_missing_values():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings())
    policy.evaluate = MagicMock(return_value={})
    policy.save_memories = MagicMock()
    step_with_agents = DecisionSteps([], np.array([], dtype=np.float32),
                                     np.array([0]), None)
    result = policy.get_action(step_with_agents, worker_id=0)
    assert result == ActionInfo(None, None, {}, [0])
Ejemplo n.º 20
0
def create_rl_trainer():
    trainer = FakeTrainer(
        "test_trainer",
        TrainerSettings(max_steps=100, checkpoint_interval=10,
                        summary_freq=20),
        True,
        0,
    )
    trainer.set_is_policy_updating(True)
    return trainer
Ejemplo n.º 21
0
def test_sample_actions(rnn, visual, discrete):
    policy = create_policy_mock(TrainerSettings(),
                                use_rnn=rnn,
                                use_discrete=discrete,
                                use_visual=visual)
    buffer = mb.simulate_rollout(64,
                                 policy.behavior_spec,
                                 memory_size=policy.m_size)
    vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
    act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])

    vis_obs = []
    for idx, _ in enumerate(
            policy.actor_critic.network_body.visual_processors):
        vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
        vis_obs.append(vis_ob)

    memories = [
        ModelUtils.list_to_tensor(buffer["memory"][i])
        for i in range(0, len(buffer["memory"]), policy.sequence_length)
    ]
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

    (
        sampled_actions,
        clipped_actions,
        log_probs,
        entropies,
        memories,
    ) = policy.sample_actions(
        vec_obs,
        vis_obs,
        masks=act_masks,
        memories=memories,
        seq_len=policy.sequence_length,
        all_log_probs=not policy.use_continuous_act,
    )
    if discrete:
        assert log_probs.shape == (
            64,
            sum(policy.behavior_spec.action_spec.discrete_branches),
        )
    else:
        assert log_probs.shape == (
            64, policy.behavior_spec.action_spec.continuous_size)
        assert clipped_actions.shape == (
            64,
            policy.behavior_spec.action_spec.continuous_size,
        )
    assert entropies.shape == (64, )

    if rnn:
        assert memories.shape == (1, 1, policy.m_size)
Ejemplo n.º 22
0
def create_rl_trainer():
    mock_brainparams = create_mock_brain()
    trainer = FakeTrainer(
        mock_brainparams,
        TrainerSettings(max_steps=100, checkpoint_interval=10,
                        summary_freq=20),
        True,
        0,
    )
    trainer.set_is_policy_updating(True)
    return trainer
Ejemplo n.º 23
0
def test_bad_config():
    brain_params = make_brain_parameters(discrete_action=False,
                                         visual_inputs=0,
                                         vec_obs_size=6)
    # Test that we throw an error if we have sequence length greater than batch size
    with pytest.raises(TrainerConfigError):
        TrainerSettings(
            network_settings=NetworkSettings(
                memory=NetworkSettings.MemorySettings(sequence_length=64)),
            hyperparameters=PPOSettings(batch_size=32),
        )
        _ = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
Ejemplo n.º 24
0
def test_policy_evaluate(rnn, visual, discrete):
    # Test evaluate
    policy = create_policy_mock(TrainerSettings(),
                                use_rnn=rnn,
                                use_discrete=discrete,
                                use_visual=visual)
    decision_step, terminal_step = mb.create_steps_from_behavior_spec(
        policy.behavior_spec, num_agents=NUM_AGENTS)

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
    else:
        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
Ejemplo n.º 25
0
def test_bcmodule_defaults():
    # See if default values match
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH)
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 3
    assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
    # Assign strange values and see if it overrides properly
    bc_settings = BehavioralCloningSettings(
        demo_path=CONTINUOUS_DEMO_PATH, num_epoch=100, batch_size=10000
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 100
    assert bc_module.batch_size == 10000
Ejemplo n.º 26
0
def create_rl_trainer(framework=FrameworkType.TENSORFLOW):
    trainer = FakeTrainer(
        "test_trainer",
        TrainerSettings(max_steps=100,
                        checkpoint_interval=10,
                        summary_freq=20,
                        framework=framework),
        True,
        False,
        "mock_model_path",
        0,
    )
    trainer.set_is_policy_updating(True)
    return trainer
Ejemplo n.º 27
0
 def test_version_compare(self):
     # Test write_stats
     with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
         path1 = tempfile.mkdtemp()
         trainer_params = TrainerSettings()
         policy = create_policy_mock(trainer_params, model_path=path1)
         policy.initialize_or_load()
         policy._check_model_version(
             "0.0.0"
         )  # This is not the right version for sure
         # Assert that 1 warning has been thrown with incorrect version
         assert len(cm.output) == 1
         policy._check_model_version(__version__)  # This should be the right version
         # Assert that no additional warnings have been thrown wth correct ver
         assert len(cm.output) == 1
Ejemplo n.º 28
0
def test_take_action_returns_action_info_when_available():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings())
    policy_eval_out = {
        "action": np.array([1.0], dtype=np.float32),
        "memory_out": np.array([[2.5]], dtype=np.float32),
        "value": np.array([1.1], dtype=np.float32),
    }
    policy.evaluate = MagicMock(return_value=policy_eval_out)
    step_with_agents = DecisionSteps([], np.array([], dtype=np.float32),
                                     np.array([0]), None)
    result = policy.get_action(step_with_agents)
    expected = ActionInfo(policy_eval_out["action"], policy_eval_out["value"],
                          policy_eval_out, [0])
    assert result == expected
Ejemplo n.º 29
0
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
    # model_path = env.external_brain_names[0]
    trainer_config = TrainerSettings()
    trainer_config.network_settings.memory = (NetworkSettings.MemorySettings()
                                              if use_rnn else None)
    policy = TorchPolicy(0, mock_behavior_specs, trainer_config, tanhresample,
                         tanhresample)
    bc_module = BCModule(
        policy,
        settings=bc_settings,
        policy_learning_rate=trainer_config.hyperparameters.learning_rate,
        default_batch_size=trainer_config.hyperparameters.batch_size,
        default_num_epoch=3,
    )
    return bc_module
Ejemplo n.º 30
0
def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
    mock_brain = basic_mock_brain()
    test_seed = 4  # moving up in the world
    policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
    n_steps = 5
    policy.get_current_step = MagicMock(return_value=n_steps)
    policy.saver = MagicMock()
    serialization_settings = SerializationSettings("output",
                                                   mock_brain.brain_name)
    checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
    policy.checkpoint(checkpoint_path, serialization_settings)
    policy.saver.save.assert_called_once_with(policy.sess,
                                              f"{checkpoint_path}.ckpt")
    export_policy_model_mock.assert_called_once_with(checkpoint_path,
                                                     serialization_settings,
                                                     policy.graph, policy.sess)