def test_is_new_instance():
    """
    Verify that every instance of RunOptions() and its subclasses
    is a new instance (i.e. all factory methods are used properly.)
    """
    check_if_different(RunOptions(), RunOptions())
    check_if_different(TrainerSettings(), TrainerSettings())
Exemple #2
0
 def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager._step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with(EnvironmentCommand.STEP, step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.current_all_step_result, i
             )
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
Exemple #3
0
 def test_reset_passes_reset_params(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 1)
     params = {"test": "params"}
     manager._reset_env(params)
     manager.env_workers[0].send.assert_called_with(
         EnvironmentCommand.RESET, (params))
Exemple #4
0
 def test_environments_are_created(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     run_options = RunOptions()
     env = SubprocessEnvManager(mock_env_factory, run_options, 2)
     # Creates two processes
     env.create_worker.assert_has_calls([
         mock.call(0, env.step_queue, mock_env_factory, run_options),
         mock.call(1, env.step_queue, mock_env_factory, run_options),
     ])
     self.assertEqual(len(env.env_workers), 2)
def test_no_configuration():
    """
    Verify that a new config will have a PPO trainer with extrinsic rewards.
    """
    blank_runoptions = RunOptions()
    blank_runoptions.behaviors.set_config_specified(False)
    assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings)
    assert isinstance(blank_runoptions.behaviors["test"].hyperparameters,
                      PPOSettings)
    assert (RewardSignalType.EXTRINSIC
            in blank_runoptions.behaviors["test"].reward_signals)
Exemple #6
0
 def test_crashed_env_restarts(self, mock_create_worker):
     crashing_worker = MockEnvWorker(
         0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0)
     )
     restarting_worker = MockEnvWorker(
         0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0)
     )
     healthy_worker = MockEnvWorker(
         1, EnvironmentResponse(EnvironmentCommand.RESET, 1, 1)
     )
     mock_create_worker.side_effect = [
         crashing_worker,
         healthy_worker,
         restarting_worker,
     ]
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 2)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(
             EnvironmentCommand.ENV_EXITED,
             0,
             UnityCommunicationException("Test msg"),
         ),
         EnvironmentResponse(EnvironmentCommand.CLOSED, 0, None),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(0, None, {})),
         EmptyQueue(),
         EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(1, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(2, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     assert crashing_worker is manager.env_workers[0]
     assert healthy_worker is manager.env_workers[1]
     crashing_worker.previous_step = last_steps[0]
     crashing_worker.waiting = True
     healthy_worker.previous_step = last_steps[1]
     healthy_worker.waiting = True
     manager._take_step = Mock(return_value=step_mock)
     manager._step()
     healthy_worker.send.assert_has_calls(
         [
             call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY),
             call(EnvironmentCommand.RESET, ANY),
             call(EnvironmentCommand.STEP, ANY),
         ]
     )
     restarting_worker.send.assert_has_calls(
         [
             call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY),
             call(EnvironmentCommand.RESET, ANY),
             call(EnvironmentCommand.STEP, ANY),
         ]
     )
Exemple #7
0
    def test_reset_collects_results_from_all_envs(self, mock_create_worker):
        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)

        params = {"test": "params"}
        res = manager._reset_env(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with(EnvironmentCommand.RESET, (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_step_result, i
            )
        assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
Exemple #8
0
def test_subprocess_env_raises_errors(num_envs):
    def failing_env_factory(worker_id, config):
        import time

        # Sleep momentarily to allow time for the EnvManager to be waiting for the
        # subprocess response.  We won't be able to capture failures from the subprocess
        # that cause it to close the pipe before we can send the first message.
        time.sleep(0.5)
        raise UnityEnvironmentException()

    env_manager = SubprocessEnvManager(failing_env_factory, RunOptions(), num_envs)
    with pytest.raises(UnityEnvironmentException):
        env_manager.reset()
    env_manager.close()
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_name)
Exemple #10
0
def test_subprocess_failing_step(num_envs):
    def failing_step_env_factory(_worker_id, _config):
        env = UnexpectedExceptionEnvironment(
            ["1D"], use_discrete=True, to_raise=CustomTestOnlyException
        )
        return env

    env_manager = SubprocessEnvManager(failing_step_env_factory, RunOptions())
    # Expect the exception raised to be routed back up to the top level.
    with pytest.raises(CustomTestOnlyException):
        check_environment_trains(
            failing_step_env_factory(0, []),
            {"1D": ppo_dummy_config()},
            env_manager=env_manager,
            success_threshold=None,
        )
    env_manager.close()
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    # Pretend this was created without a YAML file
    no_default_config.set_config_specified(False)

    trainer_factory = TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
        param_manager=EnvironmentParameterManager(),
    )
    trainer_factory.generate(brain_name)
Exemple #12
0
    def test_training_behaviors_collects_results_from_all_envs(
            self, mock_create_worker):
        def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
            return MockEnvWorker(
                worker_id,
                EnvironmentResponse(EnvironmentCommand.RESET, worker_id,
                                    {f"key{worker_id}": worker_id}),
            )

        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)

        res = manager.training_behaviors
        for env in manager.env_workers:
            env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
            env.recv.assert_called()
        for worker_id in range(4):
            assert f"key{worker_id}" in res
            assert res[f"key{worker_id}"] == worker_id
Exemple #13
0
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
        return env

    env_manager = SubprocessEnvManager(simple_env_factory, RunOptions(), num_envs)
    # Run PPO using env_manager
    check_environment_trains(
        simple_env_factory(0, []),
        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(
        val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()
    )
    env_manager.close()
Exemple #14
0
    def test_advance(self, mock_create_worker, training_behaviors_mock,
                     step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        mock_create_worker.side_effect = create_worker_mock
        env_manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
        training_behaviors_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.side_effect = [
            mock_policy,
            mock_policy,
            AgentManagerQueue.Empty(),
        ]
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: (Mock(), Mock())}
        env_stats = {
            "averaged": (1.0, StatsAggregationMethod.AVERAGE),
            "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
        }
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict,
                                    env_stats)
        step_mock.return_value = [step_info]
        env_manager.process_steps(env_manager.get_steps())

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name][0],
            step_info.current_all_step_result[brain_name][1],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
Exemple #15
0
def test_handles_no_config_provided(BrainParametersMock):
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    brain_parameters = BrainParameters(
        brain_name=brain_name,
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_parameters.brain_name)
Exemple #16
0
def poca_config():
    return RunOptions(behaviors={"test_brain": poca_dummy_config()})
def test_pickle():
    # Make sure RunOptions is pickle-able.
    run_options = RunOptions()
    p = pickle.dumps(run_options)
    pickle.loads(p)
def test_exportable_settings(use_defaults):
    """
    Test that structuring and unstructuring a RunOptions object results in the same
    configuration representation.
    """
    # Try to enable as many features as possible in this test YAML to hit all the
    # edge cases. Set as much as possible as non-default values to ensure no flukes.
    test_yaml = """
    behaviors:
        3DBall:
            trainer_type: sac
            hyperparameters:
                learning_rate: 0.0004
                learning_rate_schedule: constant
                batch_size: 64
                buffer_size: 200000
                buffer_init_steps: 100
                tau: 0.006
                steps_per_update: 10.0
                save_replay_buffer: true
                init_entcoef: 0.5
                reward_signal_steps_per_update: 10.0
            network_settings:
                normalize: false
                hidden_units: 256
                num_layers: 3
                vis_encode_type: nature_cnn
                memory:
                    memory_size: 1288
                    sequence_length: 12
            reward_signals:
                extrinsic:
                    gamma: 0.999
                    strength: 1.0
                curiosity:
                    gamma: 0.999
                    strength: 1.0
            keep_checkpoints: 5
            max_steps: 500000
            time_horizon: 1000
            summary_freq: 12000
            checkpoint_interval: 1
            threaded: true
    env_settings:
        env_path: test_env_path
        env_args:
            - test_env_args1
            - test_env_args2
        base_port: 12345
        num_envs: 8
        seed: 12345
    engine_settings:
        width: 12345
        height: 12345
        quality_level: 12345
        time_scale: 12345
        target_frame_rate: 12345
        capture_frame_rate: 12345
        no_graphics: true
    checkpoint_settings:
        run_id: test_run_id
        initialize_from: test_directory
        load_model: false
        resume: true
        force: true
        train_model: false
        inference: false
    debug: true
    environment_parameters:
        big_wall_height:
            curriculum:
              - name: Lesson0
                completion_criteria:
                    measure: progress
                    behavior: BigWallJump
                    signal_smoothing: true
                    min_lesson_length: 100
                    threshold: 0.1
                value:
                    sampler_type: uniform
                    sampler_parameters:
                        min_value: 0.0
                        max_value: 4.0
              - name: Lesson1
                completion_criteria:
                    measure: reward
                    behavior: BigWallJump
                    signal_smoothing: true
                    min_lesson_length: 100
                    threshold: 0.2
                value:
                    sampler_type: gaussian
                    sampler_parameters:
                        mean: 4.0
                        st_dev: 7.0
              - name: Lesson2
                completion_criteria:
                    measure: progress
                    behavior: BigWallJump
                    signal_smoothing: true
                    min_lesson_length: 20
                    threshold: 0.3
                value:
                    sampler_type: multirangeuniform
                    sampler_parameters:
                        intervals: [[1.0, 2.0],[4.0, 5.0]]
              - name: Lesson3
                value: 8.0
        small_wall_height: 42.0
        other_wall_height:
            sampler_type: multirangeuniform
            sampler_parameters:
                intervals: [[1.0, 2.0],[4.0, 5.0]]
    """
    if not use_defaults:
        loaded_yaml = yaml.safe_load(test_yaml)
        run_options = RunOptions.from_dict(yaml.safe_load(test_yaml))
    else:
        run_options = RunOptions()
    dict_export = run_options.as_dict()

    if not use_defaults:  # Don't need to check if no yaml
        check_dict_is_at_least(loaded_yaml,
                               dict_export,
                               exceptions=["environment_parameters"])
    # Re-import and verify has same elements
    run_options2 = RunOptions.from_dict(dict_export)
    second_export = run_options2.as_dict()

    check_dict_is_at_least(dict_export, second_export)
    # Should be able to use equality instead of back-and-forth once environment_parameters
    # is working
    check_dict_is_at_least(second_export, dict_export)
    # Check that the two exports are the same
    assert dict_export == second_export
def test_exportable_settings(use_defaults):
    """
    Test that structuring and unstructuring a RunOptions object results in the same
    configuration representation.
    """
    # Try to enable as many features as possible in this test YAML to hit all the
    # edge cases. Set as much as possible as non-default values to ensure no flukes.
    test_yaml = """
    behaviors:
        3DBall:
            trainer_type: sac
            hyperparameters:
                learning_rate: 0.0004
                learning_rate_schedule: constant
                batch_size: 64
                buffer_size: 200000
                buffer_init_steps: 100
                tau: 0.006
                steps_per_update: 10.0
                save_replay_buffer: true
                init_entcoef: 0.5
                reward_signal_steps_per_update: 10.0
            network_settings:
                normalize: false
                hidden_units: 256
                num_layers: 3
                vis_encode_type: nature_cnn
                memory:
                    memory_size: 1288
                    sequence_length: 12
            reward_signals:
                extrinsic:
                    gamma: 0.999
                    strength: 1.0
                curiosity:
                    gamma: 0.999
                    strength: 1.0
            keep_checkpoints: 5
            max_steps: 500000
            time_horizon: 1000
            summary_freq: 12000
            checkpoint_interval: 1
            threaded: true
    env_settings:
        env_path: test_env_path
        env_args:
            - test_env_args1
            - test_env_args2
        base_port: 12345
        num_envs: 8
        seed: 12345
    engine_settings:
        width: 12345
        height: 12345
        quality_level: 12345
        time_scale: 12345
        target_frame_rate: 12345
        capture_frame_rate: 12345
        no_graphics: true
    checkpoint_settings:
        run_id: test_run_id
        initialize_from: test_directory
        load_model: false
        resume: true
        force: true
        train_model: false
        inference: false
    debug: true
    """
    if not use_defaults:
        loaded_yaml = yaml.safe_load(test_yaml)
        run_options = RunOptions.from_dict(yaml.safe_load(test_yaml))
    else:
        run_options = RunOptions()
    dict_export = run_options.as_dict()

    if not use_defaults:  # Don't need to check if no yaml
        check_dict_is_at_least(loaded_yaml, dict_export)

    # Re-import and verify has same elements
    run_options2 = RunOptions.from_dict(dict_export)
    second_export = run_options2.as_dict()

    # Check that the two exports are the same
    assert dict_export == second_export
Exemple #20
0
def dummy_config():
    return RunOptions(behaviors={"testbrain": ppo_dummy_config()})
def dummy_config():
    return RunOptions(behaviors={"testbrain": PPO_CONFIG})
Exemple #22
0
def test_register_stats_writers():
    # Make sure that the ExampleStatsWriter gets returned from the list of all StatsWriters
    stats_writers = register_stats_writer_plugins(RunOptions())
    assert any(isinstance(sw, ExampleStatsWriter) for sw in stats_writers)
Exemple #23
0
def sac_config():
    return RunOptions(behaviors={"test_brain": sac_dummy_config()})