def _create_engine_channel(self): engine_channel = EngineConfigurationChannel() engine_config = EngineConfig(80, 80, 1, 4.0, 30 * 4) if self.train_mode else EngineConfig( 1280, 720, 1, 1.0, 60) engine_channel.set_configuration(engine_config) return engine_channel
def test_environments_are_created(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock env = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 2) # Creates two processes env.create_worker.assert_has_calls([ mock.call(0, env.step_queue, mock_env_factory, EngineConfig.default_config()), mock.call(1, env.step_queue, mock_env_factory, EngineConfig.default_config()), ]) self.assertEqual(len(env.env_workers), 2)
def test_step_takes_steps_for_all_non_waiting_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse("step", 0, StepResponse(0, None)), EnvironmentResponse("step", 1, StepResponse(1, None)), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager.step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with("step", step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step. current_all_brain_info, i) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def make_chickAI_unity_env(options): """ Build ChickAI UnityEnvironment from command line options. """ engine_config = EngineConfig( width=options.width, height=options.height, quality_level=options.quality_level, time_scale=options.time_scale, target_frame_rate=options.target_frame_rate, capture_frame_rate=options.capture_frame_rate, ) env_args = _build_chickAI_env_args( input_resolution=options.input_resolution, episode_steps=options.episode_steps, video_1_path=options.video1, video_2_path=options.video2, log_dir=options.log_dir, test_mode=options.test_mode) # Set up FloatPropertiesChannel to receive auxiliary agent information. agent_info_channel = FloatPropertiesChannel() unity_env = make_unity_env(env_path=options.env_path, port=options.base_port, seed=options.seed, env_args=env_args, engine_config=engine_config, side_channels=[agent_info_channel]) return unity_env, agent_info_channel
def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})), EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager._step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with(EnvironmentCommand.STEP, step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step. current_all_step_result, i) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def test_reset_passes_reset_params(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 1) params = {"test": "params"} manager._reset_env(params) manager.env_workers[0].send.assert_called_with("reset", (params))
def test_advance(self, external_brains_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id) ) env_manager = SubprocessEnvManager( mock_env_factory, EngineConfig.default_config(), 3 ) external_brains_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: Mock()} step_info = EnvironmentStep(step_info_dict, 0, action_info_dict) step_mock.return_value = [step_info] env_manager.advance() # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy env_manager.advance() assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def test_engine_configuration(): sender = EngineConfigurationChannel() # We use a raw bytes channel to interpred the data receiver = RawBytesChannel(sender.channel_id) config = EngineConfig.default_config() sender.set_configuration(config) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) received_data = receiver.get_and_clear_received_messages() assert len(received_data) == 5 # 5 different messages one for each setting sent_time_scale = 4.5 sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) message = IncomingMessage(receiver.get_and_clear_received_messages()[0]) message.read_int32() time_scale = message.read_float32() assert time_scale == sent_time_scale with pytest.raises(UnitySideChannelException): sender.set_configuration_parameters(width=None, height=42) with pytest.raises(UnityCommunicationException): # try to send data to the EngineConfigurationChannel sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([sender]).process_side_channel_message(data)
def test_reset_passes_reset_params(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 1) params = {"test": "params"} manager.reset(params) manager.env_workers[0].send.assert_called_with("reset", (params))
def create_engine_config_side_channel(self) -> EngineConfigurationChannel: if self.play or self.inference: engine_configuration = EngineConfig( width=self.WINDOW_WIDTH.play, height=self.WINDOW_HEIGHT.play, quality_level=self.QUALITY_LEVEL.play, time_scale=self.TIMESCALE.play, target_frame_rate=self.TARGET_FRAME_RATE.play, ) else: engine_configuration = EngineConfig( width=self.WINDOW_WIDTH.train, height=self.WINDOW_HEIGHT.train, quality_level=self.QUALITY_LEVEL.train, time_scale=self.TIMESCALE.train, target_frame_rate=self.TARGET_FRAME_RATE.train, ) engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) return engine_configuration_channel
def _make_unity_env( env_path: Optional[str] = None, port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, seed: int = -1, env_args: Optional[List[str]] = None, engine_config: Optional[EngineConfig] = None, side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment: """ Create a UnityEnvironment. """ # Use Unity Editor if env file is not provided. if env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT else: launch_string = UnityEnvironment.validate_environment_path(env_path) if launch_string is None: raise UnityEnvironmentException( f"Couldn't launch the {env_path} environment. Provided filename does not match any environments." ) logger.info(f"Starting environment from {env_path}.") # Configure Unity Engine. if engine_config is None: engine_config = EngineConfig.default_config() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_config) if side_channels is None: side_channels = [engine_configuration_channel] else: side_channels.append(engine_configuration_channel) # Find an available port to connect to Unity environment. while True: try: env = UnityEnvironment( file_name=env_path, seed=seed, base_port=port, args=env_args, side_channels=side_channels, ) except UnityWorkerInUseException: logger.debug(f"port {port} in use.") port += 1 else: logger.info(f"Connected to environment using port {port}.") break return env
def test_subprocess_env_raises_errors(num_envs): def failing_env_factory(worker_id, config): import time # Sleep momentarily to allow time for the EnvManager to be waiting for the # subprocess response. We won't be able to capture failures from the subprocess # that cause it to close the pipe before we can send the first message. time.sleep(0.1) raise UnityEnvironmentException() env_manager = SubprocessEnvManager(failing_env_factory, EngineConfig.default_config(), num_envs) with pytest.raises(UnityEnvironmentException): env_manager.reset() env_manager.close()
def test_reset_collects_results_from_all_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 4) params = {"test": "params"} res = manager.reset(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with("reset", (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_brain_info, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))
def test_reset_collects_results_from_all_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 4) params = {"test": "params"} res = manager._reset_env(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with(EnvironmentCommand.RESET, (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_step_result, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))
def test_subprocess_env_endtoend(num_envs): env_manager = SubprocessEnvManager(simple_env_factory, EngineConfig.default_config(), num_envs) trainer_config = generate_config(PPO_CONFIG) # Run PPO using env_manager _check_environment_trains( simple_env_factory(0, []), trainer_config, env_manager=env_manager, success_threshold=None, ) # Note we can't check the env's rewards directly (since they're in separate processes) so we # check the StatsReporter's debug stat writer's last reward. assert isinstance(StatsReporter.writers[0], DebugWriter) assert all(val > 0.99 for val in StatsReporter.writers[0].get_last_rewards().values()) env_manager.close()
def __init__( self, env_path: Optional[str] = None, imprint_video: Optional[str] = None, test_video: Optional[str] = None, log_dir: Optional[str] = None, input_resolution: int = 64, episode_steps: int = 1000, seed: int = 0, test_mode: bool = False, base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, time_scale: int = 20, capture_frame_rate: int = 60, width: int = 80, height: int = 80, use_visual: bool = True, **kwargs, ): engine_config = EngineConfig( width=width, height=height, quality_level=5, time_scale=time_scale, target_frame_rate=-1, capture_frame_rate=capture_frame_rate, ) env_args = _build_chickAI_env_args(input_resolution=input_resolution, episode_steps=episode_steps, imprint_video=imprint_video, test_video=test_video, log_dir=log_dir, test_mode=test_mode) agent_info_channel = FloatPropertiesChannel() unity_env = _make_unity_env(env_path=env_path, port=base_port, seed=seed, env_args=env_args, engine_config=engine_config, side_channels=[agent_info_channel]) env = UnityToGymWrapper(unity_env, flatten_branched=True, use_visual=use_visual) super().__init__(env) self.env = env self.agent_info_channel = agent_info_channel
def test_subprocess_failing_step(num_envs): def failing_step_env_factory(_worker_id, _config): env = UnexpectedExceptionEnvironment(["1D"], use_discrete=True, to_raise=CustomTestOnlyException) return env env_manager = SubprocessEnvManager(failing_step_env_factory, EngineConfig.default_config()) # Expect the exception raised to be routed back up to the top level. with pytest.raises(CustomTestOnlyException): check_environment_trains( failing_step_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) env_manager.close()
def test_subprocess_env_endtoend(num_envs): def simple_env_factory(worker_id, config): env = SimpleEnvironment(["1D"], use_discrete=True) return env env_manager = SubprocessEnvManager(simple_env_factory, EngineConfig.default_config(), num_envs) # Run PPO using env_manager check_environment_trains( simple_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) # Note we can't check the env's rewards directly (since they're in separate processes) so we # check the StatsReporter's debug stat writer's last reward. assert isinstance(StatsReporter.writers[0], DebugWriter) assert all(val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()) env_manager.close()
def test_advance(self, mock_create_worker, training_behaviors_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} mock_create_worker.side_effect = create_worker_mock env_manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) training_behaviors_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.side_effect = [ mock_policy, mock_policy, AgentManagerQueue.Empty(), ] env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: (Mock(), Mock())} env_stats = { "averaged": (1.0, StatsAggregationMethod.AVERAGE), "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), } step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats) step_mock.return_value = [step_info] env_manager.process_steps(env_manager.get_steps()) # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name][0], step_info.current_all_step_result[brain_name][1], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" else: model_path = f"/{options.docker_target_name}/models/{options.run_id}" summaries_dir = f"/{options.docker_target_name}/summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length" ], ) tb_writer = TensorboardWriter(summaries_dir) StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): model_path = f"./models/{options.run_id}" maybe_init_path = ( f"./models/{options.initialize_from}" if options.initialize_from else None ) summaries_dir = "./summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) handle_existing_directories( model_path, summaries_dir, options.resume, options.force, maybe_init_path ) tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.no_graphics, run_seed, port, options.env_args ) engine_config = EngineConfig( width=options.width, height=options.height, quality_level=options.quality_level, time_scale=options.time_scale, target_frame_rate=options.target_frame_rate, capture_frame_rate=options.capture_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed ) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, not options.inference, options.resume, run_seed, maybe_init_path, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, not options.inference, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_timing_tree(summaries_dir, options.run_id)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from is not None else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure Tensorboard Writers and StatsReporter tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ options.checkpoint_settings.run_id = "test8" with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = (os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json")) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) if env_settings.env_path is None: port = None # Begin training env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) max_steps = options.behaviors['Brain'].max_steps options.behaviors['Brain'].max_steps = 10 trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=0) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights tc.init_weights(env_manager) inital_weights = deepcopy(tc.weights) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) options.behaviors['Brain'].max_steps = max_steps step = 0 counter = 0 max_meta_updates = 200 while counter < max_meta_updates: sample = np.random.random_sample() if (sample > 1): print("Performing Meta-learning on Carry Object stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe" else: print("Performing Meta-learning on Find Target stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=step) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate = 0.0005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.beta = 0.005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.epsilon = 0.2 * ( 1 - counter / max_meta_updates) print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format( trainer_factory.trainer_config['Brain'].hyperparameters. learning_rate, trainer_factory.trainer_config['Brain'].hyperparameters.beta, trainer_factory.trainer_config['Brain'].hyperparameters.epsilon)) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights print("Start learning at step: " + str(step) + " meta_step: " + str(counter)) print("Inital weights: " + str(inital_weights[8])) weights_after_train = tc.start_learning(env_manager, inital_weights) print(tc.trainers['Brain'].optimizer) # weights_after_train = tc.weights # print("Trained weights: " + str(weights_after_train[8])) step += options.behaviors['Brain'].max_steps print("meta step:" + str(step)) # print(weights_after_train) # equal = [] # for i, weight in enumerate(tc.weights): # equal.append(np.array_equal(inital_weights[i], weights_after_train[i])) # print(all(equal)) finally: print(len(weights_after_train), len(inital_weights)) for i, weight in enumerate(weights_after_train): inital_weights[i] = weights_after_train[i] env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) counter += 1
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume ) maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) trainer_factory = TrainerFactory( options.behaviors, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, run_options: RunOptions, log_level: int = logging_util.INFO, ) -> None: env_factory: Callable[ [int, List[SideChannel]], UnityEnvironment ] = cloudpickle.loads(restricted_loads(pickled_env_factory)) env_parameters = EnvironmentParametersChannel() engine_config = EngineConfig( width=run_options.engine_settings.width, height=run_options.engine_settings.height, quality_level=run_options.engine_settings.quality_level, time_scale=run_options.engine_settings.time_scale, target_frame_rate=run_options.engine_settings.target_frame_rate, capture_frame_rate=run_options.engine_settings.capture_frame_rate, ) engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_config) stats_channel = StatsSideChannel() training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None if worker_id == 0: training_analytics_channel = TrainingAnalyticsSideChannel() env: UnityEnvironment = None # Set log level. On some platforms, the logger isn't common with the # main process, so we need to set it again. logging_util.set_log_level(log_level) def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None: parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.behavior_specs: all_step_result[brain_name] = env.get_steps(brain_name) return all_step_result try: side_channels = [env_parameters, engine_configuration_channel, stats_channel] if training_analytics_channel is not None: side_channels.append(training_analytics_channel) env = env_factory(worker_id, side_channels) if ( not env.academy_capabilities or not env.academy_capabilities.trainingAnalytics ): # Make sure we don't try to send training analytics if the environment doesn't know how to process # them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used. training_analytics_channel = None if training_analytics_channel: training_analytics_channel.environment_initialized(run_options) while True: req: EnvironmentRequest = parent_conn.recv() if req.cmd == EnvironmentCommand.STEP: all_action_info = req.payload for brain_name, action_info in all_action_info.items(): if len(action_info.agent_ids) > 0: env.set_actions(brain_name, action_info.env_action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse( all_step_result, get_timer_root(), env_stats ) step_queue.put( EnvironmentResponse( EnvironmentCommand.STEP, worker_id, step_response ) ) reset_timers() elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS: _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.TRAINING_STARTED: behavior_name, trainer_config = req.payload if training_analytics_channel: training_analytics_channel.training_started( behavior_name, trainer_config ) elif req.cmd == EnvironmentCommand.RESET: env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) elif req.cmd == EnvironmentCommand.CLOSE: break except ( KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException, UnityEnvironmentException, UnityCommunicatorStoppedException, ) as ex: logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex) ) _send_response(EnvironmentCommand.ENV_EXITED, ex) except Exception as ex: logger.exception( f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception." ) step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex) ) _send_response(EnvironmentCommand.ENV_EXITED, ex) finally: logger.debug(f"UnityEnvironment worker {worker_id} closing.") if env is not None: env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.") parent_conn.close() step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None)) step_queue.close()
def run_training(sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = "./models/{run_id}-{sub_id}".format(run_id=options.run_id, sub_id=sub_id) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=options.docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=options.docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=options.docker_target_name, run_id=options.run_id, sub_id=sub_id, ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=options.docker_target_name) trainer_config = load_config(trainer_config_path) port = options.base_port + (sub_id * options.num_envs) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length" ], ) tb_writer = TensorboardWriter(summaries_dir) StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) if options.env_path is None: port = 5004 # This is the in Editor Training Port env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env_manager, options.lesson) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, run_seed) trainer_factory = TrainerFactory( trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id + "-" + str(sub_id), options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def apply_config(self): # set FloatProperties grid_size_x = self.config.get("grid_size_x") if not isinstance(grid_size_x, list) or len(grid_size_x) != 2: raise ("The provided grid_size_x parameter is no list of type " "[min, max]. Please correct this.") grid_size_y = self.config.get("grid_size_y") if not isinstance(grid_size_y, list) or len(grid_size_y) != 2: raise ("The provided grid_size_y parameter is no list of type " "[min, max]. Please correct this.") vis_obs_size = self.config.get("vis_obs_size") if not isinstance(vis_obs_size, list) or len(vis_obs_size) != 2: raise ("The provided vis_obs_size parameter is no list of type " "[min, max]. Please correct this.") base_size_x = self.config.get("base_size_x") if not isinstance(base_size_x, list) or len(base_size_x) != 2: raise ("The provided base_size_x parameter is no list of type " "[min, max]. Please correct this.") base_size_y = self.config.get("base_size_x") if not isinstance(base_size_x, list) or len(base_size_x) != 2: raise ("The provided base_size_x parameter is no list of type " "[min, max]. Please correct this.") num_per_base_type = self.config.get("num_per_base_type") if not isinstance(num_per_base_type, list) or len(num_per_base_type) != 2: raise ( "The provided num_per_base_type parameter is no list of type " "[min, max]. Please correct this.") num_per_item = self.config.get("num_per_item") if not isinstance(num_per_item, list) or len(num_per_item) != 2: raise ("The provided num_per_item parameter is no list of type " "[min, max]. Please correct this.") color_pool = self.config.get("color_pool") if not isinstance(color_pool, list): raise ("The provided color_pool parameter is not of type list. " "Please correct this.") camera_type = self.config.get("camera_type") camera_type_f: float = CAMERA_TYPES[camera_type] or 0.0 # set properties in reset channel self.env_param_channel.set_float_parameter("minGridSizeX", grid_size_x[0]) self.env_param_channel.set_float_parameter("maxGridSizeX", grid_size_x[1]) self.env_param_channel.set_float_parameter("minGridSizeY", grid_size_y[0]) self.env_param_channel.set_float_parameter("maxGridSizeY", grid_size_y[1]) self.env_param_channel.set_float_parameter("cameraType", camera_type_f) # area settings # check if num train areas should be set if self.is_already_initialized: print("You're trying to change the number of " "train areas, during runtime. This is only possible at " "initialization.") else: self.env_param_channel.set_float_parameter( "numTrainAreas", self.config.get("num_train_areas")) self.env_param_channel.set_float_parameter( "numBaseTypesToUse", self.config.get("num_base_types")) self.env_param_channel.set_float_parameter("numberPerBaseTypeMax", num_per_base_type[1]) self.env_param_channel.set_float_parameter("numberPerBaseTypeMin", num_per_base_type[0]) self.env_param_channel.set_float_parameter("baseSizeXMax", base_size_x[1]) self.env_param_channel.set_float_parameter("baseSizeXMin", base_size_x[0]) self.env_param_channel.set_float_parameter("baseSizeZMax", base_size_y[1]) self.env_param_channel.set_float_parameter("baseSizeZMin", base_size_y[0]) self.env_param_channel.set_float_parameter( "baseInCornersOnly", 1 if self.config.get("base_in_corners_only") else 0) self.env_param_channel.set_float_parameter( "boxesVanish", 1 if self.config.get("boxes_vanish") else 0) self.env_param_channel.set_float_parameter( "boxesNeedDrop", 1 if self.config.get("boxes_need_drop") else 0) self.env_param_channel.set_float_parameter( "sparseReward", 1 if self.config.get("sparse_reward_only") else 0) # color settings self.env_param_channel.set_float_parameter( "noBaseFillColor", 1 if self.config.get("no_base_fill_color") else 0) self.env_param_channel.set_float_parameter( "brighterBases", 1 if self.config.get("brighter_bases") else 0) self.env_param_channel.set_float_parameter( "full_base_line", 1 if self.config.get("fullBaseLine") else 0) # item settings self.env_param_channel.set_float_parameter( "numItemTypesToUse", self.config.get("num_item_types")) self.env_param_channel.set_float_parameter("numberPerItemTypeMax", num_per_item[1]) self.env_param_channel.set_float_parameter("numberPerItemTypeMin", num_per_item[0]) # general settings self.env_param_channel.set_float_parameter( "noDisplay", 1 if self.config.get("no_display") else 0) self.env_param_channel.set_float_parameter("visObsWidth", vis_obs_size[0]) self.env_param_channel.set_float_parameter("visObsHeight", vis_obs_size[1]) self.env_param_channel.set_float_parameter( "useVisual", 1 if self.config.get("use_visual") and not self.config.get("use_object_property_camera") else 0) self.env_param_channel.set_float_parameter( "useRayPerception", 1 if self.config.get("use_ray_perception") else 0) self.env_param_channel.set_float_parameter( "useObjectPropertyCamera", 1 if self.config.get("use_object_property_camera") else 0) self.env_param_channel.set_float_parameter( "maxSteps", self.config.get("max_steps")) self.env_param_channel.set_float_parameter( "taskLevel", self.config.get("task_level")) # Read engine config engine_config = self.config.get("engine_config") # Configure the Engine engine_config = EngineConfig( width=engine_config.get("window_width"), height=engine_config.get("window_height"), quality_level=engine_config.get("quality_level"), time_scale=engine_config.get("sim_speed"), target_frame_rate=engine_config.get("target_frame_rate"), capture_frame_rate=60) self.engine_channel.set_configuration(engine_config) # set list properties self.color_pool_channel.set_property("colorPool", self.config.get("color_pool")) self.is_already_initialized = True
def run_training_aai(run_seed: int, options: RunOptionsAAI) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param options: training parameters """ with hierarchical_timer("run_training.setup"): # Recognize and use docker volume if one is passed as an argument # if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" # else: # model_path = f"/{options.docker_target_name}/models/{options.run_id}" # summaries_dir = f"/{options.docker_target_name}/summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter(summaries_dir) gauge_write = GaugeWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) if options.env_path is None: port = AnimalAIEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory_aai( options.env_path, # options.docker_target_name, run_seed, port, options.n_arenas_per_env, options.arena_config, options.resolution, ) if options.train_model: engine_config = EngineConfig( options.width, options.height, AnimalAIEnvironment.QUALITY_LEVEL.train, AnimalAIEnvironment.TIMESCALE.train, AnimalAIEnvironment.TARGET_FRAME_RATE.train, ) else: engine_config = EngineConfig( AnimalAIEnvironment.WINDOW_WIDTH.play, AnimalAIEnvironment.WINDOW_HEIGHT.play, AnimalAIEnvironment.QUALITY_LEVEL.play, AnimalAIEnvironment.TIMESCALE.play, AnimalAIEnvironment.TARGET_FRAME_RATE.play, ) env_manager = SubprocessEnvManagerAAI(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, # options.multi_gpu, ) # Create controller and begin training. tc = TrainerControllerAAI( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_timing_tree(summaries_dir, options.run_id)