def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: """ Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale :param gray_scale: Whether to convert the image to grayscale. :param image_bytes: input byte array corresponding to image :return: processed numpy array of observation from environment """ with hierarchical_timer("image_decompress"): image_bytearray = bytearray(image_bytes) image = Image.open(io.BytesIO(image_bytearray)) # Normally Image loads lazily, this forces it to do loading in the timer scope. image.load() s = np.array(image) / 255.0 if gray_scale: s = np.mean(s, axis=2) s = np.reshape(s, [s.shape[0], s.shape[1], 1]) return s
def advance(self, env: EnvManager) -> int: # Get steps num_steps = self._get_and_process_experiences(env) # Report current lesson if self.meta_curriculum: for brain_name, curr in self.meta_curriculum.brains_to_curricula.items( ): if brain_name in self.trainers: self.trainers[brain_name].stats_reporter.set_stat( "Environment/Lesson", curr.lesson_num) # Advance trainers. This can be done in a separate loop in the future. with hierarchical_timer("trainer_advance"): for trainer in self.trainers.values(): trainer.advance() return num_steps
def step(self) -> None: if self._is_first_message: return self.reset() if not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") # fill the blanks for missing actions for group_name in self._env_specs: if group_name not in self._env_actions: n_agents = 0 if group_name in self._env_state: n_agents = len(self._env_state[group_name][0]) self._env_actions[group_name] = self._env_specs[ group_name].create_empty_action(n_agents) step_input = self._generate_step_input(self._env_actions) with hierarchical_timer("communicator.exchange"): outputs = self.communicator.exchange(step_input) if outputs is None: raise UnityCommunicationException("Communicator has stopped.") self._update_behavior_specs(outputs) rl_output = outputs.rl_output self._update_state(rl_output) self._env_actions.clear()
def join_threads(self, timeout_seconds: float = 1.0) -> None: """ Wait for threads to finish, and merge their timer information into the main thread. :param timeout_seconds: :return: """ self.kill_trainers = True for t in self.trainer_threads: try: t.join(timeout_seconds) except Exception: pass with hierarchical_timer("trainer_threads") as main_timer_node: for trainer_thread in self.trainer_threads: thread_timer_stack = get_timer_stack_for_thread(trainer_thread) if thread_timer_stack: main_timer_node.merge( thread_timer_stack.root, root_name="thread_root", is_parallel=True, ) merge_gauges(thread_timer_stack.gauges)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): model_path = f"./models/{options.run_id}" maybe_init_path = ( f"./models/{options.initialize_from}" if options.initialize_from else None ) summaries_dir = "./summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) handle_existing_directories( model_path, summaries_dir, options.resume, options.force, maybe_init_path ) tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.no_graphics, run_seed, port, options.env_args ) engine_config = EngineConfig( width=options.width, height=options.height, quality_level=options.quality_level, time_scale=options.time_scale, target_frame_rate=options.target_frame_rate, capture_frame_rate=options.capture_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed ) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, not options.inference, options.resume, run_seed, maybe_init_path, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, not options.inference, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_timing_tree(summaries_dir, options.run_id)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ options.checkpoint_settings.run_id = "test8" with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = (os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json")) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) if env_settings.env_path is None: port = None # Begin training env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) max_steps = options.behaviors['Brain'].max_steps options.behaviors['Brain'].max_steps = 10 trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=0) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights tc.init_weights(env_manager) inital_weights = deepcopy(tc.weights) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) options.behaviors['Brain'].max_steps = max_steps step = 0 counter = 0 max_meta_updates = 200 while counter < max_meta_updates: sample = np.random.random_sample() if (sample > 1): print("Performing Meta-learning on Carry Object stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe" else: print("Performing Meta-learning on Find Target stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=step) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate = 0.0005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.beta = 0.005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.epsilon = 0.2 * ( 1 - counter / max_meta_updates) print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format( trainer_factory.trainer_config['Brain'].hyperparameters. learning_rate, trainer_factory.trainer_config['Brain'].hyperparameters.beta, trainer_factory.trainer_config['Brain'].hyperparameters.epsilon)) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights print("Start learning at step: " + str(step) + " meta_step: " + str(counter)) print("Inital weights: " + str(inital_weights[8])) weights_after_train = tc.start_learning(env_manager, inital_weights) print(tc.trainers['Brain'].optimizer) # weights_after_train = tc.weights # print("Trained weights: " + str(weights_after_train[8])) step += options.behaviors['Brain'].max_steps print("meta step:" + str(step)) # print(weights_after_train) # equal = [] # for i, weight in enumerate(tc.weights): # equal.append(np.array_equal(inital_weights[i], weights_after_train[i])) # print(all(equal)) finally: print(len(weights_after_train), len(inital_weights)) for i, weight in enumerate(weights_after_train): inital_weights[i] = weights_after_train[i] env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) counter += 1
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from is not None else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure Tensorboard Writers and StatsReporter tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def test_timers() -> None: test_timer = timers.TimerStack() with mock.patch("mlagents_envs.timers._get_thread_timer", return_value=test_timer): # First, run some simple code with timers.hierarchical_timer("top_level"): for i in range(3): with timers.hierarchical_timer("multiple"): decorated_func(i, i) raised = False try: with timers.hierarchical_timer("raises"): raise RuntimeError("timeout!") except RuntimeError: raised = True with timers.hierarchical_timer("post_raise"): assert raised pass # We expect the hierarchy to look like # (root) # top_level # multiple # decorated_func # raises # post_raise root = test_timer.root assert root.children.keys() == {"top_level"} top_level = root.children["top_level"] assert top_level.children.keys() == { "multiple", "raises", "post_raise" } # make sure the scope was closed properly when the exception was raised raises = top_level.children["raises"] assert raises.count == 1 multiple = top_level.children["multiple"] assert multiple.count == 3 timer_tree = test_timer.get_timing_tree() expected_tree = { "name": "root", "total": mock.ANY, "count": 1, "self": mock.ANY, "children": { "top_level": { "total": mock.ANY, "count": 1, "self": mock.ANY, "children": { "multiple": { "total": mock.ANY, "count": 3, "self": mock.ANY, "children": { "decorated_func": { "total": mock.ANY, "count": 3, "self": mock.ANY, } }, }, "raises": { "total": mock.ANY, "count": 1, "self": mock.ANY }, "post_raise": { "total": mock.ANY, "count": 1, "self": mock.ANY }, }, } }, "gauges": { "my_gauge": { "value": 4.0, "max": 4.0, "min": 0.0, "count": 3 } }, "metadata": { "timer_format_version": timers.TIMER_FORMAT_VERSION, "start_time_seconds": mock.ANY, "end_time_seconds": mock.ANY, "python_version": mock.ANY, "command_line_arguments": mock.ANY, }, } assert timer_tree == expected_tree
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume ) maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) trainer_factory = TrainerFactory( options.behaviors, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def trainer_update_func(self, trainer: Trainer) -> None: while not self.kill_trainers: with hierarchical_timer("trainer_advance"): trainer.advance()
def run_training_aai(run_seed: int, options: RunOptionsAAI) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param options: training parameters """ with hierarchical_timer("run_training.setup"): # Recognize and use docker volume if one is passed as an argument # if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" # else: # model_path = f"/{options.docker_target_name}/models/{options.run_id}" # summaries_dir = f"/{options.docker_target_name}/summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter(summaries_dir) gauge_write = GaugeWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) if options.env_path is None: port = AnimalAIEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory_aai( options.env_path, # options.docker_target_name, run_seed, port, options.n_arenas_per_env, options.arena_config, options.resolution, ) if options.train_model: engine_config = EngineConfig( options.width, options.height, AnimalAIEnvironment.QUALITY_LEVEL.train, AnimalAIEnvironment.TIMESCALE.train, AnimalAIEnvironment.TARGET_FRAME_RATE.train, ) else: engine_config = EngineConfig( AnimalAIEnvironment.WINDOW_WIDTH.play, AnimalAIEnvironment.WINDOW_HEIGHT.play, AnimalAIEnvironment.QUALITY_LEVEL.play, AnimalAIEnvironment.TIMESCALE.play, AnimalAIEnvironment.TARGET_FRAME_RATE.play, ) env_manager = SubprocessEnvManagerAAI(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, # options.multi_gpu, ) # Create controller and begin training. tc = TrainerControllerAAI( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_timing_tree(summaries_dir, options.run_id)
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param num_areas: Number of training areas to instantiate :param options: parsed command line arguments """ with hierarchical_timer("run_training.setup"): torch_utils.set_torch_config(options.torch_settings) checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings run_logs_dir = checkpoint_settings.run_logs_dir port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( checkpoint_settings.write_path, checkpoint_settings.resume, checkpoint_settings.force, checkpoint_settings.maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states in case of resume if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # In case of initialization, set full init_path for all behaviors elif checkpoint_settings.maybe_init_path is not None: setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path) # Configure Tensorboard Writers and StatsReporter stats_writers = register_stats_writer_plugins(options) for sw in stats_writers: StatsReporter.add_writer(sw) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, num_areas, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=checkpoint_settings.write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=checkpoint_settings.maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, checkpoint_settings.write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(checkpoint_settings.write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def load_demonstration( file_path: str ) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 file_paths = [] if os.path.isdir(file_path): all_files = os.listdir(file_path) for _file in all_files: if _file.endswith(".demo"): file_paths.append(os.path.join(file_path, _file)) if not all_files: raise ValueError("There are no '.demo' files in the provided directory.") elif os.path.isfile(file_path): file_paths.append(file_path) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension." ) else: raise FileNotFoundError( "The demonstration file or directory {} does not exist.".format(file_path) ) brain_params = None brain_param_proto = None info_action_pairs = [] total_expected = 0 for _file_path in file_paths: with open(_file_path, "rb") as fp: with hierarchical_timer("read_file"): data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos : pos + next_pos]) if brain_params is None: brain_params = BrainParameters.from_proto( brain_param_proto, agent_info_action.agent_info ) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 if not brain_params: raise RuntimeError( f"No BrainParameters found in demonstration file at {file_path}." ) return brain_params, info_action_pairs, total_expected
def test_timers() -> None: with mock.patch("mlagents_envs.timers._global_timer_stack", new_callable=timers.TimerStack) as test_timer: # First, run some simple code with timers.hierarchical_timer("top_level"): for i in range(3): with timers.hierarchical_timer("multiple"): decorated_func(i, i) raised = False try: with timers.hierarchical_timer("raises"): raise RuntimeError("timeout!") except RuntimeError: raised = True with timers.hierarchical_timer("post_raise"): assert raised pass # We expect the hierarchy to look like # (root) # top_level # multiple # decorated_func # raises # post_raise root = test_timer.root assert root.children.keys() == {"top_level"} top_level = root.children["top_level"] assert top_level.children.keys() == { "multiple", "raises", "post_raise" } # make sure the scope was closed properly when the exception was raised raises = top_level.children["raises"] assert raises.count == 1 multiple = top_level.children["multiple"] assert multiple.count == 3 timer_tree = test_timer.get_timing_tree() expected_tree = { "name": "root", "total": mock.ANY, "count": 1, "self": mock.ANY, "children": { "top_level": { "total": mock.ANY, "count": 1, "self": mock.ANY, "children": { "multiple": { "total": mock.ANY, "count": 3, "self": mock.ANY, "children": { "decorated_func": { "total": mock.ANY, "count": 3, "self": mock.ANY, } }, }, "raises": { "total": mock.ANY, "count": 1, "self": mock.ANY }, "post_raise": { "total": mock.ANY, "count": 1, "self": mock.ANY }, }, } }, "gauges": { "my_gauge": { "value": 4.0, "max": 4.0, "min": 0.0, "count": 3 } }, } assert timer_tree == expected_tree
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" else: model_path = f"/{options.docker_target_name}/models/{options.run_id}" summaries_dir = f"/{options.docker_target_name}/summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter(summaries_dir) gauge_write = GaugeWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()