def test_is_new_instance(): """ Verify that every instance of RunOptions() and its subclasses is a new instance (i.e. all factory methods are used properly.) """ check_if_different(RunOptions(), RunOptions()) check_if_different(TrainerSettings(), TrainerSettings())
def test_config_specified(): # Test require all behavior names to be specified (or not) # Remove any pre-set defaults TrainerSettings.default_override = None behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}} run_options_dict = {"behaviors": behaviors} ro = RunOptions.from_dict(run_options_dict) # Don't require all behavior names ro.behaviors.set_config_specified(False) # Test that we can grab an entry that is not in the dict. assert isinstance(ro.behaviors["test2"], TrainerSettings) # Create strict RunOptions with no defualt_settings run_options_dict = {"behaviors": behaviors} ro = RunOptions.from_dict(run_options_dict) # Require all behavior names ro.behaviors.set_config_specified(True) with pytest.raises(TrainerConfigError): # Variable must be accessed otherwise Python won't query the dict print(ro.behaviors["test2"]) # Create strict RunOptions with default settings default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}} run_options_dict = {"default_settings": default_settings, "behaviors": behaviors} ro = RunOptions.from_dict(run_options_dict) # Require all behavior names ro.behaviors.set_config_specified(True) # Test that we can grab an entry that is not in the dict. assert isinstance(ro.behaviors["test2"], TrainerSettings)
def write_run_options(output_dir: str, run_options: RunOptions) -> None: run_options_path = os.path.join(output_dir, "configuration.yaml") try: with open(run_options_path, "w") as f: try: yaml.dump(run_options.as_dict(), f, sort_keys=False) except TypeError: # Older versions of pyyaml don't support sort_keys yaml.dump(run_options.as_dict(), f) except FileNotFoundError: logger.warning( f"Unable to save configuration to {run_options_path}. Make sure the directory exists" )
def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})), EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager._step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with(EnvironmentCommand.STEP, step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_step_result, i ) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def test_curriculum_raises_all_completion_criteria_conversion(): with pytest.warns(TrainerConfigWarning): run_options = RunOptions.from_dict( yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml) ) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (False, False) assert param_manager.get_current_lesson_number() == {"param_1": 2}
def test_reset_passes_reset_params(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 1) params = {"test": "params"} manager._reset_env(params) manager.env_workers[0].send.assert_called_with( EnvironmentCommand.RESET, (params))
def test_curriculum_conversion(): run_options = RunOptions.from_dict( yaml.safe_load(test_curriculum_config_yaml)) assert "param_1" in run_options.environment_parameters lessons = run_options.environment_parameters["param_1"].curriculum assert len(lessons) == 3 # First lesson lesson = lessons[0] assert lesson.completion_criteria is not None assert (lesson.completion_criteria.measure == CompletionCriteriaSettings.MeasureType.REWARD) assert lesson.completion_criteria.behavior == "fake_behavior" assert lesson.completion_criteria.threshold == 30.0 assert lesson.completion_criteria.min_lesson_length == 100 assert lesson.completion_criteria.require_reset assert isinstance(lesson.value, ConstantSettings) assert lesson.value.value == 1 # Second lesson lesson = lessons[1] assert lesson.completion_criteria is not None assert (lesson.completion_criteria.measure == CompletionCriteriaSettings.MeasureType.REWARD) assert lesson.completion_criteria.behavior == "fake_behavior" assert lesson.completion_criteria.threshold == 60.0 assert lesson.completion_criteria.min_lesson_length == 100 assert not lesson.completion_criteria.require_reset assert isinstance(lesson.value, ConstantSettings) assert lesson.value.value == 2 # Last lesson lesson = lessons[2] assert lesson.completion_criteria is None assert isinstance(lesson.value, UniformSettings) assert lesson.value.min_value == 1 assert lesson.value.max_value == 3
def test_default_settings(): # Make default settings, one nested and one not. default_settings = { "max_steps": 1, "network_settings": { "num_layers": 1000 } } behaviors = { "test1": { "max_steps": 2, "network_settings": { "hidden_units": 2000 } } } run_options_dict = { "default_settings": default_settings, "behaviors": behaviors } run_options = RunOptions.from_dict(run_options_dict) # Check that a new behavior has the default settings default_settings_cls = cattr.structure(default_settings, TrainerSettings) check_if_different(default_settings_cls, run_options.behaviors["test2"]) # Check that an existing behavior overrides the defaults in specified fields test1_settings = run_options.behaviors["test1"] assert test1_settings.max_steps == 2 assert test1_settings.network_settings.hidden_units == 2000 assert test1_settings.network_settings.num_layers == 1000 # Change the overridden fields back, and check if the rest are equal. test1_settings.max_steps = 1 test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units check_if_different(test1_settings, default_settings_cls)
def run_cli(options: RunOptions) -> None: try: print( """ ▄▄▄▓▓▓▓ ╓▓▓▓▓▓▓█▓▓▓▓▓ ,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌ ▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄ ▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌ ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌ ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓ ^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓` '▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌ ▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀ `▀█▓▓▓▓▓▓▓▓▓▌ ¬`▀▀▀█▓ """ ) except Exception: print("\n\n\tUnity Technologies\n") print(get_version_string()) if options.debug: log_level = logging_util.DEBUG else: log_level = logging_util.INFO # disable noisy warnings from tensorflow tf_utils.set_warnings_enabled(False) logging_util.set_log_level(log_level) logger.debug("Configuration for this run:") logger.debug(json.dumps(options.as_dict(), indent=4)) # Options deprecation warnings if options.checkpoint_settings.load_model: logger.warning( "The --load option has been deprecated. Please use the --resume option instead." ) if options.checkpoint_settings.train_model: logger.warning( "The --train option has been deprecated. Train mode is now the default. Use " "--inference to run in inference mode." ) run_seed = options.env_settings.seed # Add some timer metadata add_timer_metadata("mlagents_version", mlagents.trainers.__version__) add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__) add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION) add_timer_metadata("tensorflow_version", tf_utils.tf.__version__) add_timer_metadata("numpy_version", np.__version__) if options.env_settings.seed == -1: run_seed = np.random.randint(0, 10000) logger.info(f"run_seed set to {run_seed}") run_training(run_seed, options)
def run_cli(options: RunOptions) -> None: try: print( """ ┐ ╖ ╓╖╬│╡ ││╬╖╖ ╓╖╬│││││┘ ╬│││││╬╖ ╖╬│││││╬╜ ╙╬│││││╖╖ ╗╗╗ ╬╬╬╬╖││╦╖ ╖╬││╗╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╜╜╜ ╟╣╣ ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╒╣╣╖╗╣╣╣╗ ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖ ╣╣╣ ╬╬╬╬┐ ╙╬╬╬╬│╓╣╣╣╝╜ ╫╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣ ╣╣╣ ╙╟╣╣╜╙ ╫╣╣ ╟╣╣ ╬╬╬╬┐ ╙╬╬╣╣ ╫╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╟╣╣╬ ╣╣╣ ╣╣╣ ╟╣╣ ╣╣╣┌╣╣╜ ╬╬╬╜ ╬╬╣╣ ╙╝╣╣╬ ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬ ╣╣╣ ╣╣╣ ╟╣╣╦╓ ╣╣╣╣╣ ╙ ╓╦╖ ╬╬╣╣ ╓╗╗╖ ╙╝╣╣╣╣╝╜ ╘╝╝╜ ╝╝╝ ╝╝╝ ╙╣╣╣ ╟╣╣╣ ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝ ╫╣╣╣╣ ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜ ╙╬╬╬╣╣╣╜ ╙ """ ) except Exception: print("\n\n\tUnity Technologies\n") print(get_version_string()) if options.debug: log_level = logging_util.DEBUG else: log_level = logging_util.INFO logging_util.set_log_level(log_level) logger.debug("Configuration for this run:") logger.debug(json.dumps(options.as_dict(), indent=4)) # Options deprecation warnings if options.checkpoint_settings.load_model: logger.warning( "The --load option has been deprecated. Please use the --resume option instead." ) if options.checkpoint_settings.train_model: logger.warning( "The --train option has been deprecated. Train mode is now the default. Use " "--inference to run in inference mode." ) run_seed = options.env_settings.seed num_areas = options.env_settings.num_areas # Add some timer metadata add_timer_metadata("mlagents_version", mlagents.trainers.__version__) add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__) add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION) add_timer_metadata("pytorch_version", torch_utils.torch.__version__) add_timer_metadata("numpy_version", np.__version__) if options.env_settings.seed == -1: run_seed = np.random.randint(0, 10000) logger.debug(f"run_seed set to {run_seed}") run_training(run_seed, options, num_areas)
def test_create_manager(): run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 assert param_manager.get_current_lesson_number() == { "param_1": 0, "param_2": 0, "param_3": 0, } assert param_manager.get_current_samplers() == { "param_1": ConstantSettings(seed=1337, value=1), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), } # Not enough episodes completed assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 99}, ) == (False, False) # Not enough episodes reward assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1] * 101}, ) == (False, False) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } param_manager_2 = EnvironmentParameterManager( run_options.environment_parameters, 1337, restore=True ) # The use of global status should make it so that the lesson numbers are maintained assert param_manager_2.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } # No reset required assert param_manager.update_lessons( trainer_steps={"fake_behavior": 700}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [0] * 101}, ) == (True, False) assert param_manager.get_current_samplers() == { "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), }
def test_handles_no_config_provided(): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors # Pretend this was created without a YAML file no_default_config.set_config_specified(False) trainer_factory = TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_name)
def main(): """ Provides an alternative CLI interface to mlagents-learn, 'mlagents-run-experiment'. Accepts a JSON/YAML formatted mlagents.trainers.learn.RunOptions object, and executes the run loop as defined in mlagents.trainers.learn.run_cli. """ args = parse_command_line() expt_config = load_config(args.experiment_config_path) run_cli(RunOptions.from_dict(expt_config))
def test_environments_are_created(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock run_options = RunOptions() env = SubprocessEnvManager(mock_env_factory, run_options, 2) # Creates two processes env.create_worker.assert_has_calls([ mock.call(0, env.step_queue, mock_env_factory, run_options), mock.call(1, env.step_queue, mock_env_factory, run_options), ]) self.assertEqual(len(env.env_workers), 2)
def test_sampler_conversion(): run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) assert run_options.environment_parameters is not None assert "param_1" in run_options.environment_parameters lessons = run_options.environment_parameters["param_1"].curriculum assert len(lessons) == 1 assert lessons[0].completion_criteria is None assert isinstance(lessons[0].value, UniformSettings) assert lessons[0].value.min_value == 0.5 assert lessons[0].value.max_value == 10
def _sanitize_run_options(cls, config: RunOptions) -> Dict[str, Any]: res = copy.deepcopy(config.as_dict()) # Filter potentially PII behavior names if "behaviors" in res and res["behaviors"]: res["behaviors"] = {cls._hash(k): v for (k, v) in res["behaviors"].items()} for (k, v) in res["behaviors"].items(): if "init_path" in v and v["init_path"] is not None: hashed_path = cls._hash(v["init_path"]) res["behaviors"][k]["init_path"] = hashed_path if "demo_path" in v and v["demo_path"] is not None: hashed_path = cls._hash(v["demo_path"]) res["behaviors"][k]["demo_path"] = hashed_path # Filter potentially PII curriculum and behavior names from Checkpoint Settings if "environment_parameters" in res and res["environment_parameters"]: res["environment_parameters"] = { cls._hash(k): v for (k, v) in res["environment_parameters"].items() } for (curriculumName, curriculum) in res["environment_parameters"].items(): updated_lessons = [] for lesson in curriculum["curriculum"]: new_lesson = copy.deepcopy(lesson) if "name" in lesson: new_lesson["name"] = cls._hash(lesson["name"]) if ( "completion_criteria" in lesson and lesson["completion_criteria"] is not None ): new_lesson["completion_criteria"]["behavior"] = cls._hash( new_lesson["completion_criteria"]["behavior"] ) updated_lessons.append(new_lesson) res["environment_parameters"][curriculumName][ "curriculum" ] = updated_lessons # Filter potentially PII filenames from Checkpoint Settings if "checkpoint_settings" in res and res["checkpoint_settings"] is not None: if ( "initialize_from" in res["checkpoint_settings"] and res["checkpoint_settings"]["initialize_from"] is not None ): res["checkpoint_settings"]["initialize_from"] = cls._hash( res["checkpoint_settings"]["initialize_from"] ) if ( "results_dir" in res["checkpoint_settings"] and res["checkpoint_settings"]["results_dir"] is not None ): res["checkpoint_settings"]["results_dir"] = hash( res["checkpoint_settings"]["results_dir"] ) return res
def test_crashed_env_restarts(self, mock_create_worker): crashing_worker = MockEnvWorker( 0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0) ) restarting_worker = MockEnvWorker( 0, EnvironmentResponse(EnvironmentCommand.RESET, 0, 0) ) healthy_worker = MockEnvWorker( 1, EnvironmentResponse(EnvironmentCommand.RESET, 1, 1) ) mock_create_worker.side_effect = [ crashing_worker, healthy_worker, restarting_worker, ] manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 2) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse( EnvironmentCommand.ENV_EXITED, 0, UnityCommunicationException("Test msg"), ), EnvironmentResponse(EnvironmentCommand.CLOSED, 0, None), EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(0, None, {})), EmptyQueue(), EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(1, None, {})), EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(2, None, {})), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] assert crashing_worker is manager.env_workers[0] assert healthy_worker is manager.env_workers[1] crashing_worker.previous_step = last_steps[0] crashing_worker.waiting = True healthy_worker.previous_step = last_steps[1] healthy_worker.waiting = True manager._take_step = Mock(return_value=step_mock) manager._step() healthy_worker.send.assert_has_calls( [ call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY), call(EnvironmentCommand.RESET, ANY), call(EnvironmentCommand.STEP, ANY), ] ) restarting_worker.send.assert_has_calls( [ call(EnvironmentCommand.ENVIRONMENT_PARAMETERS, ANY), call(EnvironmentCommand.RESET, ANY), call(EnvironmentCommand.STEP, ANY), ] )
def test_no_configuration(): """ Verify that a new config will have a PPO trainer with extrinsic rewards. """ blank_runoptions = RunOptions() blank_runoptions.behaviors.set_config_specified(False) assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings) assert isinstance(blank_runoptions.behaviors["test"].hyperparameters, PPOSettings) assert (RewardSignalType.EXTRINSIC in blank_runoptions.behaviors["test"].reward_signals)
def test_reset_collects_results_from_all_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4) params = {"test": "params"} res = manager._reset_env(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with(EnvironmentCommand.RESET, (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_step_result, i ) assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
def test_sampler_and_constant_conversion(): run_options = RunOptions.from_dict( yaml.safe_load(test_sampler_and_constant_config_yaml)) assert "param_1" in run_options.environment_parameters assert "param_2" in run_options.environment_parameters lessons_1 = run_options.environment_parameters["param_1"].curriculum lessons_2 = run_options.environment_parameters["param_2"].curriculum # gaussian assert isinstance(lessons_1[0].value, GaussianSettings) assert lessons_1[0].value.mean == 4 assert lessons_1[0].value.st_dev == 5 # constant assert isinstance(lessons_2[0].value, ConstantSettings) assert lessons_2[0].value.value == 20
def test_subprocess_env_raises_errors(num_envs): def failing_env_factory(worker_id, config): import time # Sleep momentarily to allow time for the EnvManager to be waiting for the # subprocess response. We won't be able to capture failures from the subprocess # that cause it to close the pipe before we can send the first message. time.sleep(0.5) raise UnityEnvironmentException() env_manager = SubprocessEnvManager(failing_env_factory, RunOptions(), num_envs) with pytest.raises(UnityEnvironmentException): env_manager.reset() env_manager.close()
def test_handles_no_config_provided(): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors trainer_factory = trainer_util.TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, ) trainer_factory.generate(brain_name)
def test_subprocess_failing_step(num_envs): def failing_step_env_factory(_worker_id, _config): env = UnexpectedExceptionEnvironment( ["1D"], use_discrete=True, to_raise=CustomTestOnlyException ) return env env_manager = SubprocessEnvManager(failing_step_env_factory, RunOptions()) # Expect the exception raised to be routed back up to the top level. with pytest.raises(CustomTestOnlyException): check_environment_trains( failing_step_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) env_manager.close()
def test_training_behaviors_collects_results_from_all_envs( self, mock_create_worker): def create_worker_mock(worker_id, step_queue, env_factor, engine_c): return MockEnvWorker( worker_id, EnvironmentResponse(EnvironmentCommand.RESET, worker_id, {f"key{worker_id}": worker_id}), ) mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4) res = manager.training_behaviors for env in manager.env_workers: env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS) env.recv.assert_called() for worker_id in range(4): assert f"key{worker_id}" in res assert res[f"key{worker_id}"] == worker_id
def test_subprocess_env_endtoend(num_envs): def simple_env_factory(worker_id, config): env = SimpleEnvironment(["1D"], action_sizes=(0, 1)) return env env_manager = SubprocessEnvManager(simple_env_factory, RunOptions(), num_envs) # Run PPO using env_manager check_environment_trains( simple_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) # Note we can't check the env's rewards directly (since they're in separate processes) so we # check the StatsReporter's debug stat writer's last reward. assert isinstance(StatsReporter.writers[0], DebugWriter) assert all( val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values() ) env_manager.close()
def test_setup_init_path(tmpdir, dir_exists): """ :return: """ test_yaml = """ behaviors: BigWallJump: init_path: BigWallJump-6540981.pt #full path trainer_type: ppo MediumWallJump: init_path: {}/test_setup_init_path_results/test_run_id/MediumWallJump/checkpoint.pt trainer_type: ppo SmallWallJump: trainer_type: ppo checkpoint_settings: run_id: test_run_id initialize_from: test_run_id """.format(tmpdir) run_options = RunOptions.from_dict(yaml.safe_load(test_yaml)) if dir_exists: init_path = tmpdir.mkdir("test_setup_init_path_results").mkdir( "test_run_id") big = init_path.mkdir("BigWallJump").join("BigWallJump-6540981.pt") big.write("content") med = init_path.mkdir("MediumWallJump").join("checkpoint.pt") med.write("content") small = init_path.mkdir("SmallWallJump").join("checkpoint.pt") small.write("content") setup_init_path(run_options.behaviors, init_path) assert run_options.behaviors["BigWallJump"].init_path == big assert run_options.behaviors["MediumWallJump"].init_path == med assert run_options.behaviors["SmallWallJump"].init_path == small else: # don't make dirs and fail with pytest.raises(UnityTrainerException): setup_init_path(run_options.behaviors, run_options.checkpoint_settings.maybe_init_path)
def test_advance(self, mock_create_worker, training_behaviors_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} mock_create_worker.side_effect = create_worker_mock env_manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3) training_behaviors_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.side_effect = [ mock_policy, mock_policy, AgentManagerQueue.Empty(), ] env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: (Mock(), Mock())} env_stats = { "averaged": (1.0, StatsAggregationMethod.AVERAGE), "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), } step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats) step_mock.return_value = [step_info] env_manager.process_steps(env_manager.get_steps()) # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name][0], step_info.current_all_step_result[brain_name][1], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def test_handles_no_config_provided(BrainParametersMock): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors brain_parameters = BrainParameters( brain_name=brain_name, vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) trainer_factory = trainer_util.TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, ) trainer_factory.generate(brain_parameters.brain_name)
def get_run_options(config_path: str, run_id: str) -> RunOptions: configured_dict: Dict[str, Any] = { "checkpoint_settings": {}, "env_settings": {}, "engine_settings": {}, } if config_path is not None: config = mlagents.trainers.cli_utils.load_config(config_path) configured_dict.update(config) # Use the YAML file values for all values not specified in the CLI. for key in configured_dict.keys(): # Detect bad config options if key not in attr.fields_dict(RunOptions): raise TrainerConfigError( "The option {} was specified in your YAML file, but is invalid.".format( key ) ) configured_dict["checkpoint_settings"]["run_id"] = run_id return RunOptions.from_dict(configured_dict)
def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions: args = parser.parse_args(argv) return RunOptions.from_argparse(args)