def test_step_takes_steps_for_all_non_waiting_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id) ) manager = SubprocessEnvManager(mock_env_factory, 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse("step", 0, StepResponse(0, None)), EnvironmentResponse("step", 1, StepResponse(1, None)), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager.step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with("step", step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_brain_info, i ) self.assertEqual( manager.env_workers[i].previous_step.previous_all_brain_info, last_steps[i].current_all_brain_info, ) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def advance(self, env: SubprocessEnvManager) -> int: with hierarchical_timer("env_step"): time_start_step = time() new_step_infos = env.step() delta_time_step = time() - time_start_step for step_info in new_step_infos: for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) trainer.add_experiences( step_info.previous_all_brain_info, step_info.current_all_brain_info, step_info.brain_name_to_action_info[brain_name].outputs, ) trainer.process_experiences(step_info.previous_all_brain_info, step_info.current_all_brain_info) for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) if self.train_model and trainer.get_step <= trainer.get_max_steps: trainer.increment_step(len(new_step_infos)) if trainer.is_ready_update(): # Perform gradient descent with experience buffer with hierarchical_timer("update_policy"): trainer.update_policy() env.set_policy(brain_name, trainer.policy) return len(new_step_infos)
def test_reset_passes_reset_params(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id) ) manager = SubprocessEnvManager(mock_env_factory, 1) params = {"test": "params"} manager.reset(params, False) manager.env_workers[0].send.assert_called_with("reset", (params, False, None))
def test_reset_collects_results_from_all_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, 4) params = {"test": "params"} res = manager.reset(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with("reset", (params, True, None)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_brain_info, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))
def test_environments_are_created(self): SubprocessEnvManager.create_worker = MagicMock() env = SubprocessEnvManager(mock_env_factory, 2) # Creates two processes env.create_worker.assert_has_calls([ mock.call(0, env.step_queue, mock_env_factory), mock.call(1, env.step_queue, mock_env_factory), ]) self.assertEqual(len(env.env_workers), 2)
def run_training(sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = (run_options["--docker-target-name"] if run_options["--docker-target-name"] != "None" else None) # General parameters env_path = run_options["--env"] if run_options["--env"] != "None" else None run_id = run_options["--run-id"] load_model = run_options["--load"] train_model = run_options["--train"] save_freq = int(run_options["--save-freq"]) keep_checkpoints = int(run_options["--keep-checkpoints"]) base_port = int(run_options["--base-port"]) num_envs = int(run_options["--num-envs"]) curriculum_folder = (run_options["--curriculum"] if run_options["--curriculum"] != "None" else None) lesson = int(run_options["--lesson"]) fast_simulation = not bool(run_options["--slow"]) no_graphics = run_options["--no-graphics"] trainer_config_path = run_options["<trainer-config-path>"] sampler_file_path = (run_options["--sampler"] if run_options["--sampler"] != "None" else None) # Recognize and use docker volume if one is passed as an argument if not docker_target_name: model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=docker_target_name) trainer_config = load_config(trainer_config_path) env_factory = create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), list([str(x) for t in run_options.items() for x in t]), # NOTE passes all arguments to Unity ) env = SubprocessEnvManager(env_factory, num_envs) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env) sampler_manager, resampling_interval = create_sampler_manager( sampler_file_path, env.reset_parameters) # Create controller and begin training. tc = TrainerController( model_path, summaries_dir, run_id + "-" + str(sub_id), save_freq, maybe_meta_curriculum, load_model, train_model, keep_checkpoints, lesson, run_seed, fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env, trainer_config)
def run_training( sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = "./models/{run_id}-{sub_id}".format( run_id=options.run_id, sub_id=sub_id ) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=options.docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=options.docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=options.docker_target_name, run_id=options.run_id, sub_id=sub_id, ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=options.docker_target_name ) trainer_config = load_config(trainer_config_path) port = options.base_port + (sub_id * options.num_envs) if options.env_path is None: port = 5004 # This is the in Editor Training Port env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( curriculum_folder, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, run_seed ) trainer_factory = TrainerFactory( trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id + "-" + str(sub_id), options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def run_training( sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue, inject_create_environment_factory: None ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = ( run_options["--docker-target-name"] if run_options["--docker-target-name"] != "None" else None ) # General parameters env_path = run_options["--env"] if run_options["--env"] != "None" else None run_id = run_options["--run-id"] load_model = run_options["--load"] train_model = run_options["--train"] save_freq = int(run_options["--save-freq"]) keep_checkpoints = int(run_options["--keep-checkpoints"]) base_port = int(run_options["--base-port"]) num_envs = int(run_options["--num-envs"]) curriculum_folder = ( run_options["--curriculum"] if run_options["--curriculum"] != "None" else None ) lesson = int(run_options["--lesson"]) fast_simulation = not bool(run_options["--slow"]) no_graphics = run_options["--no-graphics"] multi_gpu = run_options["--multi-gpu"] trainer_config_path = run_options["<trainer-config-path>"] sampler_file_path = ( run_options["--sampler"] if run_options["--sampler"] != "None" else None ) # Recognize and use docker volume if one is passed as an argument if not docker_target_name: model_path = "./train/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=docker_target_name ) trainer_config = load_config(trainer_config_path) if inject_create_environment_factory is None: env_factory = create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), ) else: env_factory = inject_create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), ) # HACK for debug use SimpleEnvManager if num_envs > 1: # create a mock env for parsin examples (kill internal) mock_env = env_factory(9999) # from minerl_to_mlagent_wrapper import MineRLToMLAgentWrapper # MineRLToMLAgentWrapper.set_wrappers_for_pretraining(mock_env.brain_names[0], mock_env) # close inner minerl enviroment try: for k, v in mock_env._envs.items(): for e in v: e.unwrapped.close() except AttributeError: mock_env.close() env = SubprocessEnvManager(env_factory, num_envs) else: env = env_factory(0) env = SimpleEnvManager(env) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson) sampler_manager, resampling_interval = create_sampler_manager( sampler_file_path, env.reset_parameters, run_seed ) trainers = initialize_trainers( trainer_config, env.external_brains, summaries_dir, run_id, model_path, keep_checkpoints, train_model, load_model, run_seed, maybe_meta_curriculum, multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainers, model_path, summaries_dir, run_id + "-" + str(sub_id), save_freq, maybe_meta_curriculum, train_model, run_seed, fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env)
def run_training( sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = "./train/{run_id}-{sub_id}".format( run_id=options.run_id, sub_id=sub_id ) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=options.docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=options.docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=options.docker_target_name, run_id=options.run_id, sub_id=sub_id, ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=options.docker_target_name ) trainer_config = load_config(trainer_config_path) env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, options.base_port + (sub_id * options.num_envs), options.env_args, ) env = SubprocessEnvManager(env_factory, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( curriculum_folder, env, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, env.reset_parameters, run_seed ) trainers = initialize_trainers( trainer_config, env.external_brains, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainers, model_path, summaries_dir, options.run_id + "-" + str(sub_id), options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, options.fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env)