def __init__( self, settings: Optional[Dict[str, EnvironmentParameterSettings]] = None, run_seed: int = -1, restore: bool = False, ): """ EnvironmentParameterManager manages all the environment parameters of a training session. It determines when parameters should change and gives access to the current sampler of each parameter. :param settings: A dictionary from environment parameter to EnvironmentParameterSettings. :param run_seed: When the seed is not provided for an environment parameter, this seed will be used instead. :param restore: If true, the EnvironmentParameterManager will use the GlobalTrainingStatus to try and reload the lesson status of each environment parameter. """ if settings is None: settings = {} self._dict_settings = settings for parameter_name in self._dict_settings.keys(): initial_lesson = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM) if initial_lesson is None or not restore: GlobalTrainingStatus.set_parameter_state( parameter_name, StatusType.LESSON_NUM, 0) self._smoothed_values: Dict[str, float] = defaultdict(float) for key in self._dict_settings.keys(): self._smoothed_values[key] = 0.0 # Update the seeds of the samplers self._set_sampler_seeds(run_seed)
def test_model_management(tmpdir): results_path = os.path.join(tmpdir, "results") brain_name = "Mock_brain" final_model_path = os.path.join(results_path, brain_name) test_checkpoint_list = [ { "steps": 1, "file_path": os.path.join(final_model_path, f"{brain_name}-1.nn"), "reward": 1.312, "creation_time": time.time(), "auxillary_file_paths": [], }, { "steps": 2, "file_path": os.path.join(final_model_path, f"{brain_name}-2.nn"), "reward": 1.912, "creation_time": time.time(), "auxillary_file_paths": [], }, { "steps": 3, "file_path": os.path.join(final_model_path, f"{brain_name}-3.nn"), "reward": 2.312, "creation_time": time.time(), "auxillary_file_paths": [], }, ] GlobalTrainingStatus.set_parameter_state(brain_name, StatusType.CHECKPOINTS, test_checkpoint_list) new_checkpoint_4 = ModelCheckpoint( 4, os.path.join(final_model_path, f"{brain_name}-4.nn"), 2.678, time.time()) ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4) assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4 new_checkpoint_5 = ModelCheckpoint( 5, os.path.join(final_model_path, f"{brain_name}-5.nn"), 3.122, time.time()) ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4) assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4 final_model_path = f"{final_model_path}.nn" final_model_time = time.time() current_step = 6 final_model = ModelCheckpoint(current_step, final_model_path, 3.294, final_model_time) ModelCheckpointManager.track_final_checkpoint(brain_name, final_model) assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4 check_checkpoints = GlobalTrainingStatus.saved_state[brain_name][ StatusType.CHECKPOINTS.value] assert check_checkpoints is not None final_model = GlobalTrainingStatus.saved_state[ StatusType.FINAL_CHECKPOINT.value] assert final_model is not None
def log_current_lesson(self, parameter_name: Optional[str] = None) -> None: """ Logs the current lesson number and sampler value of the parameter with name parameter_name. If no parameter_name is provided, the values and lesson numbers of all parameters will be displayed. """ if parameter_name is not None: settings = self._dict_settings[parameter_name] lesson_number = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM ) lesson_name = settings.curriculum[lesson_number].name lesson_value = settings.curriculum[lesson_number].value logger.info( f"Parameter '{parameter_name}' is in lesson '{lesson_name}' " f"and has value '{lesson_value}'." ) else: for parameter_name, settings in self._dict_settings.items(): lesson_number = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM ) lesson_name = settings.curriculum[lesson_number].name lesson_value = settings.curriculum[lesson_number].value logger.info( f"Parameter '{parameter_name}' is in lesson '{lesson_name}' " f"and has value '{lesson_value}'." )
def save_model(self) -> None: """ Forwarding call to wrapped trainers save_model. """ GlobalTrainingStatus.set_parameter_state(self.brain_name, StatusType.ELO, self.current_elo) self.trainer.save_model()
def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]: checkpoint_list = GlobalTrainingStatus.get_parameter_state( behavior_name, StatusType.CHECKPOINTS) if not checkpoint_list: checkpoint_list = [] GlobalTrainingStatus.set_parameter_state(behavior_name, StatusType.CHECKPOINTS, checkpoint_list) return checkpoint_list
def test_globaltrainingstatus(tmpdir): path_dir = os.path.join(tmpdir, "test.json") GlobalTrainingStatus.set_parameter_state("Category1", StatusType.LESSON_NUM, 3) GlobalTrainingStatus.save_state(path_dir) with open(path_dir) as fp: test_json = json.load(fp) assert "Category1" in test_json assert StatusType.LESSON_NUM.value in test_json["Category1"] assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3 assert "metadata" in test_json GlobalTrainingStatus.load_state(path_dir) restored_val = GlobalTrainingStatus.get_parameter_state( "Category1", StatusType.LESSON_NUM) assert restored_val == 3 # Test unknown categories and status types (keys) unknown_category = GlobalTrainingStatus.get_parameter_state( "Category3", StatusType.LESSON_NUM) class FakeStatusType(Enum): NOTAREALKEY = "notarealkey" unknown_key = GlobalTrainingStatus.get_parameter_state( "Category1", FakeStatusType.NOTAREALKEY) assert unknown_category is None assert unknown_key is None
def update_lessons( self, trainer_steps: Dict[str, int], trainer_max_steps: Dict[str, int], trainer_reward_buffer: Dict[str, List[float]], ) -> Tuple[bool, bool]: """ Given progress metrics, calculates if at least one environment parameter is in a new lesson and if at least one environment parameter requires the env to reset. :param trainer_steps: A dictionary from behavior_name to the number of training steps this behavior's trainer has performed. :param trainer_max_steps: A dictionary from behavior_name to the maximum number of training steps this behavior's trainer has performed. :param trainer_reward_buffer: A dictionary from behavior_name to the list of the most recent episode returns for this behavior's trainer. :returns: A tuple of two booleans : (True if any lesson has changed, True if environment needs to reset) """ must_reset = False updated = False for param_name, settings in self._dict_settings.items(): lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM) next_lesson_num = lesson_num + 1 lesson = settings.curriculum[lesson_num] if (lesson.completion_criteria is not None and len(settings.curriculum) > next_lesson_num): behavior_to_consider = lesson.completion_criteria.behavior if behavior_to_consider in trainer_steps: must_increment, new_smoothing = lesson.completion_criteria.need_increment( float(trainer_steps[behavior_to_consider]) / float(trainer_max_steps[behavior_to_consider]), trainer_reward_buffer[behavior_to_consider], self._smoothed_values[param_name], ) self._smoothed_values[param_name] = new_smoothing if must_increment: GlobalTrainingStatus.set_parameter_state( param_name, StatusType.LESSON_NUM, next_lesson_num) new_lesson_name = settings.curriculum[ next_lesson_num].name new_lesson_value = settings.curriculum[ next_lesson_num].value logger.info( f"Parameter '{param_name}' has been updated to {new_lesson_value}." + f" Now in lesson '{new_lesson_name}'") updated = True if lesson.completion_criteria.require_reset: must_reset = True return updated, must_reset
def track_final_checkpoint(cls, behavior_name: str, final_checkpoint: NNCheckpoint) -> None: """ Ensures number of checkpoints stored is within the max number of checkpoints defined by the user and finally stores the information about the final model (or intermediate model if training is interrupted). :param behavior_name: Behavior name of the model. :param final_checkpoint: Checkpoint information for the final model. """ final_model_dict = attr.asdict(final_checkpoint) GlobalTrainingStatus.set_parameter_state(behavior_name, StatusType.FINAL_CHECKPOINT, final_model_dict)
def add_checkpoint(cls, behavior_name: str, new_checkpoint: NNCheckpoint, keep_checkpoints: int) -> None: """ Make room for new checkpoint if needed and insert new checkpoint information. :param behavior_name: Behavior name for the checkpoint. :param new_checkpoint: The new checkpoint to be recorded. :param keep_checkpoints: Number of checkpoints to record (user-defined). """ new_checkpoint_dict = attr.asdict(new_checkpoint) checkpoints = cls.get_checkpoints(behavior_name) checkpoints.append(new_checkpoint_dict) cls._cleanup_extra_checkpoints(checkpoints, keep_checkpoints) GlobalTrainingStatus.set_parameter_state(behavior_name, StatusType.CHECKPOINTS, checkpoints)
def get_current_lesson_number(self) -> Dict[str, int]: """ Creates a dictionary from environment parameter to the current lesson number. If not using curriculum, this number is always 0 for that environment parameter. """ result: Dict[str, int] = {} for parameter_name in self._dict_settings.keys(): result[parameter_name] = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM) return result
def advance(self, env: EnvManager) -> int: # Get steps with hierarchical_timer("env_step"): num_steps = env.advance() # Report current lesson if self.meta_curriculum: for brain_name, curr in self.meta_curriculum.brains_to_curricula.items( ): if brain_name in self.trainers: self.trainers[brain_name].stats_reporter.set_stat( "Environment/Lesson", curr.lesson_num) GlobalTrainingStatus.set_parameter_state( brain_name, StatusType.LESSON_NUM, curr.lesson_num) for trainer in self.trainers.values(): if not trainer.threaded: with hierarchical_timer("trainer_advance"): trainer.advance() return num_steps
def get_current_samplers( self) -> Dict[str, ParameterRandomizationSettings]: """ Creates a dictionary from environment parameter name to their corresponding ParameterRandomizationSettings. If curriculum is used, the ParameterRandomizationSettings corresponds to the sampler of the current lesson. """ samplers: Dict[str, ParameterRandomizationSettings] = {} for param_name, settings in self._dict_settings.items(): lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM) lesson = settings.curriculum[lesson_num] samplers[param_name] = lesson.value return samplers
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ options.checkpoint_settings.run_id = "test8" with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = (os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json")) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) if env_settings.env_path is None: port = None # Begin training env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) max_steps = options.behaviors['Brain'].max_steps options.behaviors['Brain'].max_steps = 10 trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=0) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights tc.init_weights(env_manager) inital_weights = deepcopy(tc.weights) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) options.behaviors['Brain'].max_steps = max_steps step = 0 counter = 0 max_meta_updates = 200 while counter < max_meta_updates: sample = np.random.random_sample() if (sample > 1): print("Performing Meta-learning on Carry Object stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe" else: print("Performing Meta-learning on Find Target stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=step) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate = 0.0005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.beta = 0.005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.epsilon = 0.2 * ( 1 - counter / max_meta_updates) print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format( trainer_factory.trainer_config['Brain'].hyperparameters. learning_rate, trainer_factory.trainer_config['Brain'].hyperparameters.beta, trainer_factory.trainer_config['Brain'].hyperparameters.epsilon)) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights print("Start learning at step: " + str(step) + " meta_step: " + str(counter)) print("Inital weights: " + str(inital_weights[8])) weights_after_train = tc.start_learning(env_manager, inital_weights) print(tc.trainers['Brain'].optimizer) # weights_after_train = tc.weights # print("Trained weights: " + str(weights_after_train[8])) step += options.behaviors['Brain'].max_steps print("meta step:" + str(step)) # print(weights_after_train) # equal = [] # for i, weight in enumerate(tc.weights): # equal.append(np.array_equal(inital_weights[i], weights_after_train[i])) # print(all(equal)) finally: print(len(weights_after_train), len(inital_weights)) for i, weight in enumerate(weights_after_train): inital_weights[i] = weights_after_train[i] env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) counter += 1
def write_training_status(output_dir: str) -> None: GlobalTrainingStatus.save_state( os.path.join(output_dir, TRAINING_STATUS_FILE_NAME))
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from is not None else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure Tensorboard Writers and StatsReporter tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def __init__( self, trainer, brain_name, controller, reward_buff_cap, trainer_settings, training, artifact_path, ): """ Creates a GhostTrainer. :param trainer: The trainer of the policy/policies being trained with self_play :param brain_name: The name of the brain associated with trainer config :param controller: GhostController that coordinates all ghost trainers and calculates ELO :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. :param training: Whether the trainer is set for training. :param artifact_path: Path to store artifacts from this trainer. """ super().__init__(brain_name, trainer_settings, training, artifact_path, reward_buff_cap) self.trainer = trainer self.controller = controller self._internal_trajectory_queues: Dict[ str, AgentManagerQueue[Trajectory]] = {} self._internal_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {} self._team_to_name_to_policy_queue: DefaultDict[int, Dict[ str, AgentManagerQueue[Policy]]] = defaultdict(dict) self._name_to_parsed_behavior_id: Dict[str, BehaviorIdentifiers] = {} # assign ghost's stats collection to wrapped trainer's self._stats_reporter = self.trainer.stats_reporter # Set the logging to print ELO in the console self._stats_reporter.add_property(StatsPropertyType.SELF_PLAY, True) self_play_parameters = trainer_settings.self_play self.window = self_play_parameters.window self.play_against_latest_model_ratio = ( self_play_parameters.play_against_latest_model_ratio) if (self.play_against_latest_model_ratio > 1.0 or self.play_against_latest_model_ratio < 0.0): logger.warning( "The play_against_latest_model_ratio is not between 0 and 1.") self.steps_between_save = self_play_parameters.save_steps self.steps_between_swap = self_play_parameters.swap_steps self.steps_to_train_team = self_play_parameters.team_change if self.steps_to_train_team > self.get_max_steps: logger.warning( "The max steps of the GhostTrainer for behavior name {} is less than team change. This team will not face \ opposition that has been trained if the opposition is managed by a different GhostTrainer as in an \ asymmetric game.".format(self.brain_name)) # Counts the number of steps of the ghost policies. Snapshot swapping # depends on this counter whereas snapshot saving and team switching depends # on the wrapped. This ensures that all teams train for the same number of trainer # steps. self.ghost_step: int = 0 # A list of dicts from brain name to a single snapshot for this trainer's policies self.policy_snapshots: List[Dict[str, List[float]]] = [] # A dict from brain name to the current snapshot of this trainer's policies self.current_policy_snapshot: Dict[str, List[float]] = {} self.snapshot_counter: int = 0 # wrapped_training_team and learning team need to be separate # in the situation where new agents are created destroyed # after learning team switches. These agents need to be added # to trainers properly. self._learning_team: int = None self.wrapped_trainer_team: int = None self.last_save: int = 0 self.last_swap: int = 0 self.last_team_change: int = 0 self.initial_elo = GlobalTrainingStatus.get_parameter_state( self.brain_name, StatusType.ELO) if self.initial_elo is None: self.initial_elo = self_play_parameters.initial_elo self.policy_elos: List[float] = [self.initial_elo] * ( self.window + 1) # for learning policy self.current_opponent: int = 0
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume ) maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) trainer_factory = TrainerFactory( options.behaviors, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param num_areas: Number of training areas to instantiate :param options: parsed command line arguments """ with hierarchical_timer("run_training.setup"): torch_utils.set_torch_config(options.torch_settings) checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings run_logs_dir = checkpoint_settings.run_logs_dir port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( checkpoint_settings.write_path, checkpoint_settings.resume, checkpoint_settings.force, checkpoint_settings.maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states in case of resume if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # In case of initialization, set full init_path for all behaviors elif checkpoint_settings.maybe_init_path is not None: setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path) # Configure Tensorboard Writers and StatsReporter stats_writers = register_stats_writer_plugins(options) for sw in stats_writers: StatsReporter.add_writer(sw) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, num_areas, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=checkpoint_settings.write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=checkpoint_settings.maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, checkpoint_settings.write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(checkpoint_settings.write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)