def basic_trainer_controller(): trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() return TrainerController( trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", param_manager=EnvironmentParameterManager(), train=True, training_seed=99, )
def test_curriculum_raises_all_completion_criteria_conversion(): with pytest.warns(TrainerConfigWarning): run_options = RunOptions.from_dict( yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml) ) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (False, False) assert param_manager.get_current_lesson_number() == {"param_1": 2}
def check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, env_parameter_manager=None, success_threshold=0.9, env_manager=None, training_seed=None, ): if env_parameter_manager is None: env_parameter_manager = EnvironmentParameterManager() # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" seed = 1337 if training_seed is None else training_seed StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file debug_writer = DebugWriter() StatsReporter.add_writer(debug_writer) if env_manager is None: env_manager = SimpleEnvManager(env, EnvironmentParametersChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, output_path=dir, train_model=True, load_model=False, seed=seed, param_manager=env_parameter_manager, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, output_path=dir, run_id=run_id, param_manager=env_parameter_manager, train=True, training_seed=seed, ) # Begin training tc.start_learning(env_manager) if (success_threshold is not None ): # For tests where we are just checking setup and not reward processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] assert all(not math.isnan(reward) for reward in processed_rewards) assert all(reward > success_threshold for reward in processed_rewards)
def test_handles_no_config_provided(): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors trainer_factory = trainer_util.TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_name)
def test_sac_trainer_update_normalization(sac_config): behavior_id_team0 = "test_brain?team=0" brain_name = BehaviorIdentifiers.from_name_behavior_id( behavior_id_team0).brain_name mock_specs = mb.setup_test_behavior_specs(True, False, vector_action_space=[2], vector_obs_space=1) base_config = sac_config.behaviors output_path = "results_dir" train_model = True load_model = False seed = 42 trainer_factory = TrainerFactory( trainer_config=base_config, output_path=output_path, train_model=train_model, load_model=load_model, seed=seed, param_manager=EnvironmentParameterManager(), ) sac_trainer = trainer_factory.generate(brain_name) parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id( behavior_id_team0) policy = sac_trainer.create_policy(parsed_behavior_id0, mock_specs) sac_trainer.add_policy(parsed_behavior_id0, policy) trajectory_queue0 = AgentManagerQueue(behavior_id_team0) sac_trainer.subscribe_trajectory_queue(trajectory_queue0) time_horizon = 15 trajectory = make_fake_trajectory( length=time_horizon, max_step_complete=True, observation_specs=create_observation_specs_with_shapes([(1, )]), action_spec=mock_specs.action_spec, ) trajectory_queue0.put(trajectory) # mocking out update_normalization in both the policy and critic with patch( "mlagents.trainers.torch.networks.ValueNetwork.update_normalization" ) as optimizer_update_normalization_mock, patch( "mlagents.trainers.policy.torch_policy.TorchPolicy.update_normalization" ) as policy_update_normalization_mock: sac_trainer.advance() optimizer_update_normalization_mock.assert_called_once() policy_update_normalization_mock.assert_called_once()
def test_handles_no_config_provided(): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors # Pretend this was created without a YAML file no_default_config.set_config_specified(False) trainer_factory = TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_name)
def test_initialize_ppo_trainer(BehaviorSpecMock, dummy_config): brain_name = "testbrain" training_behaviors = {"testbrain": BehaviorSpecMock()} output_path = "results_dir" train_model = True load_model = False seed = 11 expected_reward_buff_cap = 1 base_config = dummy_config.behaviors expected_config = ppo_dummy_config() def mock_constructor( self, brain, reward_buff_cap, trainer_settings, training, load, seed, artifact_path, ): assert brain == brain_name assert trainer_settings == expected_config assert reward_buff_cap == expected_reward_buff_cap assert training == train_model assert load == load_model assert seed == seed assert artifact_path == os.path.join(output_path, brain_name) with patch.object(PPOTrainer, "__init__", mock_constructor): trainer_factory = trainer_util.TrainerFactory( trainer_config=base_config, output_path=output_path, train_model=train_model, load_model=load_model, seed=seed, param_manager=EnvironmentParameterManager(), ) trainers = {} for brain_name in training_behaviors.keys(): trainers[brain_name] = trainer_factory.generate(brain_name) assert "testbrain" in trainers assert isinstance(trainers["testbrain"], PPOTrainer)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from is not None else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure Tensorboard Writers and StatsReporter tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def _initialize_trainer( trainer_settings: TrainerSettings, brain_name: str, output_path: str, train_model: bool, load_model: bool, ghost_controller: GhostController, seed: int, param_manager: EnvironmentParameterManager, init_path: str = None, multi_gpu: bool = False, ) -> Trainer: """ Initializes a trainer given a provided trainer configuration and brain parameters, as well as some general training session options. :param trainer_settings: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :param init_path: Path from which to load model, if different from model_path. :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) min_lesson_length = param_manager.get_minimum_reward_buffer_size( brain_name) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type if trainer_type == TrainerType.PPO: trainer = PPOTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, trainer_artifact_path, ) elif trainer_type == TrainerType.SAC: trainer = SACTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, trainer_artifact_path, ) else: raise TrainerConfigError( f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' ) if trainer_settings.self_play is not None: trainer = GhostTrainer( trainer, brain_name, ghost_controller, min_lesson_length, trainer_settings, train_model, trainer_artifact_path, ) return trainer
def test_create_manager(): run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 assert param_manager.get_current_lesson_number() == { "param_1": 0, "param_2": 0, "param_3": 0, } assert param_manager.get_current_samplers() == { "param_1": ConstantSettings(seed=1337, value=1), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), } # Not enough episodes completed assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 99}, ) == (False, False) # Not enough episodes reward assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1] * 101}, ) == (False, False) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } param_manager_2 = EnvironmentParameterManager( run_options.environment_parameters, 1337, restore=True ) # The use of global status should make it so that the lesson numbers are maintained assert param_manager_2.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } # No reset required assert param_manager.update_lessons( trainer_steps={"fake_behavior": 700}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [0] * 101}, ) == (True, False) assert param_manager.get_current_samplers() == { "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), }
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param num_areas: Number of training areas to instantiate :param options: parsed command line arguments """ with hierarchical_timer("run_training.setup"): torch_utils.set_torch_config(options.torch_settings) checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings run_logs_dir = checkpoint_settings.run_logs_dir port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( checkpoint_settings.write_path, checkpoint_settings.resume, checkpoint_settings.force, checkpoint_settings.maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states in case of resume if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # In case of initialization, set full init_path for all behaviors elif checkpoint_settings.maybe_init_path is not None: setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path) # Configure Tensorboard Writers and StatsReporter stats_writers = register_stats_writer_plugins(options) for sw in stats_writers: StatsReporter.add_writer(sw) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, num_areas, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=checkpoint_settings.write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=checkpoint_settings.maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, checkpoint_settings.write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(checkpoint_settings.write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def test_curriculum_no_behavior(): with pytest.raises(TypeError): run_options = RunOptions.from_dict( yaml.safe_load(test_curriculum_no_behavior_yaml)) EnvironmentParameterManager(run_options.environment_parameters, 1337, False)