Example #1
0
def test_take_step_if_not_training(trainer_controller_with_take_step_mocks):
    tc, trainer_mock = trainer_controller_with_take_step_mocks
    tc.train_model = False

    brain_name = "testbrain"
    action_info_dict = {brain_name: MagicMock()}

    brain_info_dict = {brain_name: Mock()}
    old_step_info = EnvironmentStep(brain_info_dict, action_info_dict)
    new_step_info = EnvironmentStep(brain_info_dict, action_info_dict)

    trainer_mock._is_ready_update = MagicMock(return_value=False)

    env_mock = MagicMock()
    env_mock.step.return_value = [new_step_info]
    env_mock.reset.return_value = [old_step_info]

    tc.brain_name_to_identifier[brain_name].add(brain_name)

    tc.advance(env_mock)
    env_mock.reset.assert_not_called()
    env_mock.step.assert_called_once()
    manager_mock = tc.managers[brain_name]
    manager_mock.add_experiences.assert_called_once_with(
        new_step_info.current_all_brain_info[brain_name],
        new_step_info.brain_name_to_action_info[brain_name],
    )
    trainer_mock.advance.assert_called_once()
def test_take_step_adds_experiences_to_trainer_and_trains():
    tc, trainer_mock = trainer_controller_with_take_step_mocks()

    brain_name = "testbrain"
    action_info_dict = {brain_name: MagicMock()}

    brain_info_dict = {brain_name: Mock()}
    old_step_info = EnvironmentStep(brain_info_dict, brain_info_dict,
                                    action_info_dict)
    new_step_info = EnvironmentStep(brain_info_dict, brain_info_dict,
                                    action_info_dict)
    trainer_mock.is_ready_update = MagicMock(return_value=True)

    env_mock = MagicMock()
    env_mock.step.return_value = [new_step_info]
    env_mock.reset.return_value = [old_step_info]

    tc.advance(env_mock)
    env_mock.reset.assert_not_called()
    env_mock.step.assert_called_once()
    trainer_mock.add_experiences.assert_called_once_with(
        new_step_info.previous_all_brain_info[brain_name],
        new_step_info.current_all_brain_info[brain_name],
        new_step_info.brain_name_to_action_info[brain_name].outputs,
    )
    trainer_mock.process_experiences.assert_called_once_with(
        new_step_info.previous_all_brain_info[brain_name],
        new_step_info.current_all_brain_info[brain_name],
    )
    trainer_mock.update_policy.assert_called_once()
    trainer_mock.increment_step.assert_called_once()