def test_is_new_instance():
    """
    Verify that every instance of RunOptions() and its subclasses
    is a new instance (i.e. all factory methods are used properly.)
    """
    check_if_different(RunOptions(), RunOptions())
    check_if_different(TrainerSettings(), TrainerSettings())
Exemple #2
0
def test_config_specified():
    # Test require all behavior names to be specified (or not)
    # Remove any pre-set defaults
    TrainerSettings.default_override = None
    behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}}
    run_options_dict = {"behaviors": behaviors}
    ro = RunOptions.from_dict(run_options_dict)
    # Don't require all behavior names
    ro.behaviors.set_config_specified(False)
    # Test that we can grab an entry that is not in the dict.
    assert isinstance(ro.behaviors["test2"], TrainerSettings)

    # Create strict RunOptions with no defualt_settings
    run_options_dict = {"behaviors": behaviors}
    ro = RunOptions.from_dict(run_options_dict)
    # Require all behavior names
    ro.behaviors.set_config_specified(True)
    with pytest.raises(TrainerConfigError):
        # Variable must be accessed otherwise Python won't query the dict
        print(ro.behaviors["test2"])

    # Create strict RunOptions with default settings
    default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}}
    run_options_dict = {"default_settings": default_settings, "behaviors": behaviors}
    ro = RunOptions.from_dict(run_options_dict)
    # Require all behavior names
    ro.behaviors.set_config_specified(True)
    # Test that we can grab an entry that is not in the dict.
    assert isinstance(ro.behaviors["test2"], TrainerSettings)
def write_run_options(output_dir: str, run_options: RunOptions) -> None:
    run_options_path = os.path.join(output_dir, "configuration.yaml")
    try:
        with open(run_options_path, "w") as f:
            try:
                yaml.dump(run_options.as_dict(), f, sort_keys=False)
            except TypeError:  # Older versions of pyyaml don't support sort_keys
                yaml.dump(run_options.as_dict(), f)
    except FileNotFoundError:
        logger.warning(
            f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
        )
Exemple #4
0
 def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager._step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with(EnvironmentCommand.STEP, step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.current_all_step_result, i
             )
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
def test_curriculum_raises_all_completion_criteria_conversion():
    with pytest.warns(TrainerConfigWarning):
        run_options = RunOptions.from_dict(
            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
        )

        param_manager = EnvironmentParameterManager(
            run_options.environment_parameters, 1337, False
        )
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (False, False)
        assert param_manager.get_current_lesson_number() == {"param_1": 2}
Exemple #6
0
 def test_reset_passes_reset_params(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 1)
     params = {"test": "params"}
     manager._reset_env(params)
     manager.env_workers[0].send.assert_called_with(
         EnvironmentCommand.RESET, (params))
def test_curriculum_conversion():
    run_options = RunOptions.from_dict(
        yaml.safe_load(test_curriculum_config_yaml))
    assert "param_1" in run_options.environment_parameters
    lessons = run_options.environment_parameters["param_1"].curriculum
    assert len(lessons) == 3
    # First lesson
    lesson = lessons[0]
    assert lesson.completion_criteria is not None
    assert (lesson.completion_criteria.measure ==
            CompletionCriteriaSettings.MeasureType.REWARD)
    assert lesson.completion_criteria.behavior == "fake_behavior"
    assert lesson.completion_criteria.threshold == 30.0
    assert lesson.completion_criteria.min_lesson_length == 100
    assert lesson.completion_criteria.require_reset
    assert isinstance(lesson.value, ConstantSettings)
    assert lesson.value.value == 1
    # Second lesson
    lesson = lessons[1]
    assert lesson.completion_criteria is not None
    assert (lesson.completion_criteria.measure ==
            CompletionCriteriaSettings.MeasureType.REWARD)
    assert lesson.completion_criteria.behavior == "fake_behavior"
    assert lesson.completion_criteria.threshold == 60.0
    assert lesson.completion_criteria.min_lesson_length == 100
    assert not lesson.completion_criteria.require_reset
    assert isinstance(lesson.value, ConstantSettings)
    assert lesson.value.value == 2
    # Last lesson
    lesson = lessons[2]
    assert lesson.completion_criteria is None
    assert isinstance(lesson.value, UniformSettings)
    assert lesson.value.min_value == 1
    assert lesson.value.max_value == 3
def test_default_settings():
    # Make default settings, one nested and one not.
    default_settings = {
        "max_steps": 1,
        "network_settings": {
            "num_layers": 1000
        }
    }
    behaviors = {
        "test1": {
            "max_steps": 2,
            "network_settings": {
                "hidden_units": 2000
            }
        }
    }
    run_options_dict = {
        "default_settings": default_settings,
        "behaviors": behaviors
    }
    run_options = RunOptions.from_dict(run_options_dict)

    # Check that a new behavior has the default settings
    default_settings_cls = cattr.structure(default_settings, TrainerSettings)
    check_if_different(default_settings_cls, run_options.behaviors["test2"])

    # Check that an existing behavior overrides the defaults in specified fields
    test1_settings = run_options.behaviors["test1"]
    assert test1_settings.max_steps == 2
    assert test1_settings.network_settings.hidden_units == 2000
    assert test1_settings.network_settings.num_layers == 1000
    # Change the overridden fields back, and check if the rest are equal.
    test1_settings.max_steps = 1
    test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units
    check_if_different(test1_settings, default_settings_cls)
Exemple #9
0
def run_cli(options: RunOptions) -> None:
    try:
        print(
            """

                        ▄▄▄▓▓▓▓
                   ╓▓▓▓▓▓▓█▓▓▓▓▓
              ,▄▄▄m▀▀▀'  ,▓▓▓▀▓▓▄                           ▓▓▓  ▓▓▌
            ▄▓▓▓▀'      ▄▓▓▀  ▓▓▓      ▄▄     ▄▄ ,▄▄ ▄▄▄▄   ,▄▄ ▄▓▓▌▄ ▄▄▄    ,▄▄
          ▄▓▓▓▀        ▄▓▓▀   ▐▓▓▌     ▓▓▌   ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌  ╒▓▓▌
        ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓      ▓▀      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌   ▐▓▓▄ ▓▓▌
        ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄     ▓▓      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌    ▐▓▓▐▓▓
          ^█▓▓▓        ▀▓▓▄   ▐▓▓▌     ▓▓▓▓▄▓▓▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▓▄    ▓▓▓▓`
            '▀▓▓▓▄      ^▓▓▓  ▓▓▓       └▀▀▀▀ ▀▀ ^▀▀    `▀▀ `▀▀   '▀▀    ▐▓▓▌
               ▀▀▀▀▓▄▄▄   ▓▓▓▓▓▓,                                      ▓▓▓▓▀
                   `▀█▓▓▓▓▓▓▓▓▓▌
                        ¬`▀▀▀█▓

        """
        )
    except Exception:
        print("\n\n\tUnity Technologies\n")
    print(get_version_string())

    if options.debug:
        log_level = logging_util.DEBUG
    else:
        log_level = logging_util.INFO
        # disable noisy warnings from tensorflow
        tf_utils.set_warnings_enabled(False)

    logging_util.set_log_level(log_level)

    logger.debug("Configuration for this run:")
    logger.debug(json.dumps(options.as_dict(), indent=4))

    # Options deprecation warnings
    if options.checkpoint_settings.load_model:
        logger.warning(
            "The --load option has been deprecated. Please use the --resume option instead."
        )
    if options.checkpoint_settings.train_model:
        logger.warning(
            "The --train option has been deprecated. Train mode is now the default. Use "
            "--inference to run in inference mode."
        )

    run_seed = options.env_settings.seed

    # Add some timer metadata
    add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
    add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
    add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
    add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)
    add_timer_metadata("numpy_version", np.__version__)

    if options.env_settings.seed == -1:
        run_seed = np.random.randint(0, 10000)
        logger.info(f"run_seed set to {run_seed}")
    run_training(run_seed, options)
Exemple #10
0
def run_cli(options: RunOptions) -> None:
    try:
        print(
            """
            ┐  ╖
        ╓╖╬│╡  ││╬╖╖
    ╓╖╬│││││┘  ╬│││││╬╖
 ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗
 ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣
 ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣
 ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣
 ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜
 ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣
 ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣
   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣
      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
          ╙╬╬╬╣╣╣╜
             ╙
        """
        )
    except Exception:
        print("\n\n\tUnity Technologies\n")
    print(get_version_string())

    if options.debug:
        log_level = logging_util.DEBUG
    else:
        log_level = logging_util.INFO

    logging_util.set_log_level(log_level)

    logger.debug("Configuration for this run:")
    logger.debug(json.dumps(options.as_dict(), indent=4))

    # Options deprecation warnings
    if options.checkpoint_settings.load_model:
        logger.warning(
            "The --load option has been deprecated. Please use the --resume option instead."
        )
    if options.checkpoint_settings.train_model:
        logger.warning(
            "The --train option has been deprecated. Train mode is now the default. Use "
            "--inference to run in inference mode."
        )

    run_seed = options.env_settings.seed
    num_areas = options.env_settings.num_areas

    # Add some timer metadata
    add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
    add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
    add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
    add_timer_metadata("pytorch_version", torch_utils.torch.__version__)
    add_timer_metadata("numpy_version", np.__version__)

    if options.env_settings.seed == -1:
        run_seed = np.random.randint(0, 10000)
        logger.debug(f"run_seed set to {run_seed}")
    run_training(run_seed, options, num_areas)
def test_create_manager():
    run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml))
    param_manager = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, False
    )
    assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100
    assert param_manager.get_current_lesson_number() == {
        "param_1": 0,
        "param_2": 0,
        "param_3": 0,
    }
    assert param_manager.get_current_samplers() == {
        "param_1": ConstantSettings(seed=1337, value=1),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
    # Not enough episodes completed
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 99},
    ) == (False, False)
    # Not enough episodes reward
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1] * 101},
    ) == (False, False)
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 101},
    ) == (True, True)
    assert param_manager.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    param_manager_2 = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, restore=True
    )
    # The use of global status should make it so that the lesson numbers are maintained
    assert param_manager_2.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    # No reset required
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 700},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [0] * 101},
    ) == (True, False)
    assert param_manager.get_current_samplers() == {
        "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    # Pretend this was created without a YAML file
    no_default_config.set_config_specified(False)

    trainer_factory = TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
        param_manager=EnvironmentParameterManager(),
    )
    trainer_factory.generate(brain_name)
def main():
    """
    Provides an alternative CLI interface to mlagents-learn, 'mlagents-run-experiment'.
    Accepts a JSON/YAML formatted mlagents.trainers.learn.RunOptions object, and executes
    the run loop as defined in mlagents.trainers.learn.run_cli.
    """
    args = parse_command_line()
    expt_config = load_config(args.experiment_config_path)
    run_cli(RunOptions.from_dict(expt_config))
Exemple #14
0
 def test_environments_are_created(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     run_options = RunOptions()
     env = SubprocessEnvManager(mock_env_factory, run_options, 2)
     # Creates two processes
     env.create_worker.assert_has_calls([
         mock.call(0, env.step_queue, mock_env_factory, run_options),
         mock.call(1, env.step_queue, mock_env_factory, run_options),
     ])
     self.assertEqual(len(env.env_workers), 2)
def test_sampler_conversion():
    run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml))
    assert run_options.environment_parameters is not None
    assert "param_1" in run_options.environment_parameters
    lessons = run_options.environment_parameters["param_1"].curriculum
    assert len(lessons) == 1
    assert lessons[0].completion_criteria is None
    assert isinstance(lessons[0].value, UniformSettings)
    assert lessons[0].value.min_value == 0.5
    assert lessons[0].value.max_value == 10
Exemple #16
0
    def _sanitize_run_options(cls, config: RunOptions) -> Dict[str, Any]:
        res = copy.deepcopy(config.as_dict())

        # Filter potentially PII behavior names
        if "behaviors" in res and res["behaviors"]:
            res["behaviors"] = {cls._hash(k): v for (k, v) in res["behaviors"].items()}
            for (k, v) in res["behaviors"].items():
                if "init_path" in v and v["init_path"] is not None:
                    hashed_path = cls._hash(v["init_path"])
                    res["behaviors"][k]["init_path"] = hashed_path
                if "demo_path" in v and v["demo_path"] is not None:
                    hashed_path = cls._hash(v["demo_path"])
                    res["behaviors"][k]["demo_path"] = hashed_path

        # Filter potentially PII curriculum and behavior names from Checkpoint Settings
        if "environment_parameters" in res and res["environment_parameters"]:
            res["environment_parameters"] = {
                cls._hash(k): v for (k, v) in res["environment_parameters"].items()
            }
            for (curriculumName, curriculum) in res["environment_parameters"].items():
                updated_lessons = []
                for lesson in curriculum["curriculum"]:
                    new_lesson = copy.deepcopy(lesson)
                    if "name" in lesson:
                        new_lesson["name"] = cls._hash(lesson["name"])
                    if (
                        "completion_criteria" in lesson
                        and lesson["completion_criteria"] is not None
                    ):
                        new_lesson["completion_criteria"]["behavior"] = cls._hash(
                            new_lesson["completion_criteria"]["behavior"]
                        )
                    updated_lessons.append(new_lesson)
                res["environment_parameters"][curriculumName][
                    "curriculum"
                ] = updated_lessons

        # Filter potentially PII filenames from Checkpoint Settings
        if "checkpoint_settings" in res and res["checkpoint_settings"] is not None:
            if (
                "initialize_from" in res["checkpoint_settings"]
                and res["checkpoint_settings"]["initialize_from"] is not None
            ):
                res["checkpoint_settings"]["initialize_from"] = cls._hash(
                    res["checkpoint_settings"]["initialize_from"]
                )
            if (
                "results_dir" in res["checkpoint_settings"]
                and res["checkpoint_settings"]["results_dir"] is not None
            ):
                res["checkpoint_settings"]["results_dir"] = hash(
                    res["checkpoint_settings"]["results_dir"]
                )

        return res
Exemple #17
0
 def test_crashed_env_restarts(self, mock_create_worker):
     crashing_worker = MockEnvWorker(
         0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0)
     )
     restarting_worker = MockEnvWorker(
         0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0)
     )
     healthy_worker = MockEnvWorker(
         1, EnvironmentResponse(EnvironmentCommand.RESET, 1, 1)
     )
     mock_create_worker.side_effect = [
         crashing_worker,
         healthy_worker,
         restarting_worker,
     ]
     manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 2)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(
             EnvironmentCommand.ENV_EXITED,
             0,
             UnityCommunicationException("Test msg"),
         ),
         EnvironmentResponse(EnvironmentCommand.CLOSED, 0, None),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(0, None, {})),
         EmptyQueue(),
         EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(1, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(2, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     assert crashing_worker is manager.env_workers[0]
     assert healthy_worker is manager.env_workers[1]
     crashing_worker.previous_step = last_steps[0]
     crashing_worker.waiting = True
     healthy_worker.previous_step = last_steps[1]
     healthy_worker.waiting = True
     manager._take_step = Mock(return_value=step_mock)
     manager._step()
     healthy_worker.send.assert_has_calls(
         [
             call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY),
             call(EnvironmentCommand.RESET, ANY),
             call(EnvironmentCommand.STEP, ANY),
         ]
     )
     restarting_worker.send.assert_has_calls(
         [
             call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY),
             call(EnvironmentCommand.RESET, ANY),
             call(EnvironmentCommand.STEP, ANY),
         ]
     )
def test_no_configuration():
    """
    Verify that a new config will have a PPO trainer with extrinsic rewards.
    """
    blank_runoptions = RunOptions()
    blank_runoptions.behaviors.set_config_specified(False)
    assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings)
    assert isinstance(blank_runoptions.behaviors["test"].hyperparameters,
                      PPOSettings)
    assert (RewardSignalType.EXTRINSIC
            in blank_runoptions.behaviors["test"].reward_signals)
Exemple #19
0
    def test_reset_collects_results_from_all_envs(self, mock_create_worker):
        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)

        params = {"test": "params"}
        res = manager._reset_env(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with(EnvironmentCommand.RESET, (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_step_result, i
            )
        assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
def test_sampler_and_constant_conversion():
    run_options = RunOptions.from_dict(
        yaml.safe_load(test_sampler_and_constant_config_yaml))
    assert "param_1" in run_options.environment_parameters
    assert "param_2" in run_options.environment_parameters
    lessons_1 = run_options.environment_parameters["param_1"].curriculum
    lessons_2 = run_options.environment_parameters["param_2"].curriculum
    # gaussian
    assert isinstance(lessons_1[0].value, GaussianSettings)
    assert lessons_1[0].value.mean == 4
    assert lessons_1[0].value.st_dev == 5
    # constant
    assert isinstance(lessons_2[0].value, ConstantSettings)
    assert lessons_2[0].value.value == 20
Exemple #21
0
def test_subprocess_env_raises_errors(num_envs):
    def failing_env_factory(worker_id, config):
        import time

        # Sleep momentarily to allow time for the EnvManager to be waiting for the
        # subprocess response.  We won't be able to capture failures from the subprocess
        # that cause it to close the pipe before we can send the first message.
        time.sleep(0.5)
        raise UnityEnvironmentException()

    env_manager = SubprocessEnvManager(failing_env_factory, RunOptions(), num_envs)
    with pytest.raises(UnityEnvironmentException):
        env_manager.reset()
    env_manager.close()
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_name)
Exemple #23
0
def test_subprocess_failing_step(num_envs):
    def failing_step_env_factory(_worker_id, _config):
        env = UnexpectedExceptionEnvironment(
            ["1D"], use_discrete=True, to_raise=CustomTestOnlyException
        )
        return env

    env_manager = SubprocessEnvManager(failing_step_env_factory, RunOptions())
    # Expect the exception raised to be routed back up to the top level.
    with pytest.raises(CustomTestOnlyException):
        check_environment_trains(
            failing_step_env_factory(0, []),
            {"1D": ppo_dummy_config()},
            env_manager=env_manager,
            success_threshold=None,
        )
    env_manager.close()
Exemple #24
0
    def test_training_behaviors_collects_results_from_all_envs(
            self, mock_create_worker):
        def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
            return MockEnvWorker(
                worker_id,
                EnvironmentResponse(EnvironmentCommand.RESET, worker_id,
                                    {f"key{worker_id}": worker_id}),
            )

        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)

        res = manager.training_behaviors
        for env in manager.env_workers:
            env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
            env.recv.assert_called()
        for worker_id in range(4):
            assert f"key{worker_id}" in res
            assert res[f"key{worker_id}"] == worker_id
Exemple #25
0
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
        return env

    env_manager = SubprocessEnvManager(simple_env_factory, RunOptions(), num_envs)
    # Run PPO using env_manager
    check_environment_trains(
        simple_env_factory(0, []),
        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(
        val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()
    )
    env_manager.close()
def test_setup_init_path(tmpdir, dir_exists):
    """

    :return:
    """
    test_yaml = """
    behaviors:
        BigWallJump:
            init_path: BigWallJump-6540981.pt #full path
            trainer_type: ppo
        MediumWallJump:
            init_path: {}/test_setup_init_path_results/test_run_id/MediumWallJump/checkpoint.pt
            trainer_type: ppo
        SmallWallJump:
            trainer_type: ppo
    checkpoint_settings:
        run_id: test_run_id
        initialize_from: test_run_id
    """.format(tmpdir)
    run_options = RunOptions.from_dict(yaml.safe_load(test_yaml))
    if dir_exists:
        init_path = tmpdir.mkdir("test_setup_init_path_results").mkdir(
            "test_run_id")
        big = init_path.mkdir("BigWallJump").join("BigWallJump-6540981.pt")
        big.write("content")
        med = init_path.mkdir("MediumWallJump").join("checkpoint.pt")
        med.write("content")
        small = init_path.mkdir("SmallWallJump").join("checkpoint.pt")
        small.write("content")

        setup_init_path(run_options.behaviors, init_path)
        assert run_options.behaviors["BigWallJump"].init_path == big
        assert run_options.behaviors["MediumWallJump"].init_path == med
        assert run_options.behaviors["SmallWallJump"].init_path == small
    else:
        # don't make dirs and fail
        with pytest.raises(UnityTrainerException):
            setup_init_path(run_options.behaviors,
                            run_options.checkpoint_settings.maybe_init_path)
Exemple #27
0
    def test_advance(self, mock_create_worker, training_behaviors_mock,
                     step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        mock_create_worker.side_effect = create_worker_mock
        env_manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
        training_behaviors_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.side_effect = [
            mock_policy,
            mock_policy,
            AgentManagerQueue.Empty(),
        ]
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: (Mock(), Mock())}
        env_stats = {
            "averaged": (1.0, StatsAggregationMethod.AVERAGE),
            "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
        }
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict,
                                    env_stats)
        step_mock.return_value = [step_info]
        env_manager.process_steps(env_manager.get_steps())

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name][0],
            step_info.current_all_step_result[brain_name][1],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
Exemple #28
0
def test_handles_no_config_provided(BrainParametersMock):
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    brain_parameters = BrainParameters(
        brain_name=brain_name,
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_parameters.brain_name)
Exemple #29
0
def get_run_options(config_path: str, run_id: str) -> RunOptions:
    configured_dict: Dict[str, Any] = {
        "checkpoint_settings": {},
        "env_settings": {},
        "engine_settings": {},
    }

    if config_path is not None:
        config = mlagents.trainers.cli_utils.load_config(config_path)
        configured_dict.update(config)

    # Use the YAML file values for all values not specified in the CLI.
    for key in configured_dict.keys():
        # Detect bad config options
        if key not in attr.fields_dict(RunOptions):
            raise TrainerConfigError(
                "The option {} was specified in your YAML file, but is invalid.".format(
                    key
                )
            )

    configured_dict["checkpoint_settings"]["run_id"] = run_id

    return RunOptions.from_dict(configured_dict)
def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:
    args = parser.parse_args(argv)
    return RunOptions.from_argparse(args)