def make_chickAI_unity_env(options): """ Build ChickAI UnityEnvironment from command line options. """ engine_config = EngineConfig( width=options.width, height=options.height, quality_level=options.quality_level, time_scale=options.time_scale, target_frame_rate=options.target_frame_rate, capture_frame_rate=options.capture_frame_rate, ) env_args = _build_chickAI_env_args( input_resolution=options.input_resolution, episode_steps=options.episode_steps, video_1_path=options.video1, video_2_path=options.video2, log_dir=options.log_dir, test_mode=options.test_mode) # Set up FloatPropertiesChannel to receive auxiliary agent information. agent_info_channel = FloatPropertiesChannel() unity_env = make_unity_env(env_path=options.env_path, port=options.base_port, seed=options.seed, env_args=env_args, engine_config=engine_config, side_channels=[agent_info_channel]) return unity_env, agent_info_channel
def _check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, meta_curriculum=None, success_threshold=0.9, env_manager=None, ): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file debug_writer = DebugWriter() StatsReporter.add_writer(debug_writer) # Make sure threading is turned off for determinism trainer_config["threading"] = False if env_manager is None: env_manager = SimpleEnvManager(env, FloatPropertiesChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) if (success_threshold is not None ): # For tests where we are just checking setup and not reward processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] assert all(not math.isnan(reward) for reward in processed_rewards) assert all(reward > success_threshold for reward in processed_rewards)
def initialise_environment(self): """Initialise and reset unity environment""" engine_configuration_channel = EngineConfigurationChannel() self.float_properties_channel = FloatPropertiesChannel() self.env = UnityEnvironment(file_name=self.env_path, base_port=5004, side_channels=[ engine_configuration_channel, self.float_properties_channel ]) # Reset the environment self.env.reset() # Set the default brain to work with self.group_name = self.env.get_agent_groups()[0] self.group_spec = self.env.get_agent_group_spec(self.group_name) # Set the time scale of the engine engine_configuration_channel.set_configuration_parameters( time_scale=self.time_scale)
def _check_environment_trains(env, config, meta_curriculum=None, success_threshold=0.99): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file trainer_config = yaml.safe_load(config) env_manager = SimpleEnvManager(env, FloatPropertiesChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) print(tc._get_measure_vals()) if (success_threshold is not None ): # For tests where we are just checking setup and not reward for mean_reward in tc._get_measure_vals().values(): assert not math.isnan(mean_reward) assert mean_reward > success_threshold
def __init__( self, env_path: Optional[str] = None, imprint_video: Optional[str] = None, test_video: Optional[str] = None, log_dir: Optional[str] = None, input_resolution: int = 64, episode_steps: int = 1000, seed: int = 0, test_mode: bool = False, base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, time_scale: int = 20, capture_frame_rate: int = 60, width: int = 80, height: int = 80, use_visual: bool = True, **kwargs, ): engine_config = EngineConfig( width=width, height=height, quality_level=5, time_scale=time_scale, target_frame_rate=-1, capture_frame_rate=capture_frame_rate, ) env_args = _build_chickAI_env_args(input_resolution=input_resolution, episode_steps=episode_steps, imprint_video=imprint_video, test_video=test_video, log_dir=log_dir, test_mode=test_mode) agent_info_channel = FloatPropertiesChannel() unity_env = _make_unity_env(env_path=env_path, port=base_port, seed=seed, env_args=env_args, engine_config=engine_config, side_channels=[agent_info_channel]) env = UnityToGymWrapper(unity_env, flatten_branched=True, use_visual=use_visual) super().__init__(env) self.env = env self.agent_info_channel = agent_info_channel
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, ) -> None: env_factory: Callable[[int, List[SideChannel]], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) shared_float_properties = FloatPropertiesChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) stats_channel = StatsSideChannel() env: BaseEnv = env_factory( worker_id, [shared_float_properties, engine_configuration_channel, stats_channel], ) def _send_response(cmd_name, payload): parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.get_agent_groups(): all_step_result[brain_name] = env.get_step_result(brain_name) return all_step_result def external_brains(): result = {} for brain_name in env.get_agent_groups(): result[brain_name] = group_spec_to_brain_parameters( brain_name, env.get_agent_group_spec(brain_name)) return result try: while True: cmd: EnvironmentCommand = parent_conn.recv() if cmd.name == "step": all_action_info = cmd.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse(all_step_result, get_timer_root(), env_stats) step_queue.put( EnvironmentResponse("step", worker_id, step_response)) reset_timers() elif cmd.name == "external_brains": _send_response("external_brains", external_brains()) elif cmd.name == "get_properties": reset_params = shared_float_properties.get_property_dict_copy() _send_response("get_properties", reset_params) elif cmd.name == "reset": for k, v in cmd.payload.items(): shared_float_properties.set_property(k, v) env.reset() all_step_result = _generate_all_results() _send_response("reset", all_step_result) elif cmd.name == "close": break except (KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException): logger.info( f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put(EnvironmentResponse("env_close", worker_id, None)) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")
def test_float_properties(): sender = FloatPropertiesChannel() receiver = FloatPropertiesChannel() sender.set_property("prop1", 1.0) data = UnityEnvironment._generate_side_channel_data( {sender.channel_id: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_id: receiver}, data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val is None sender.set_property("prop2", 2.0) data = UnityEnvironment._generate_side_channel_data( {sender.channel_id: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_id: receiver}, data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val == 2.0 assert len(receiver.list_properties()) == 2 assert "prop1" in receiver.list_properties() assert "prop2" in receiver.list_properties() val = sender.get_property("prop1") assert val == 1.0 assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0} assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
def test_float_properties(): sender = FloatPropertiesChannel() receiver = FloatPropertiesChannel() sender.set_property("prop1", 1.0) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val is None sender.set_property("prop2", 2.0) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val == 2.0 assert len(receiver.list_properties()) == 2 assert "prop1" in receiver.list_properties() assert "prop2" in receiver.list_properties() val = sender.get_property("prop1") assert val == 1.0 assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0} assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
class UnityContainer: """ Class encapsulating the ML-Agents connection with Unity allowing easy interaction with the simulated environment """ VISUAL_OBSERVATION_INDEX = 0 VECTOR_OBSERVATIONS_INDEX = 1 # Path of the pre-build environment BUILD_PATH = os.path.join( os.path.dirname(__file__), "../../Unity Environment/build/deep active inference agent environment" ) def __init__(self, use_editor, time_scale=1): """ Set up the Unity environment :param use_editor: Set to true to connect directly to the Unity editor, set to false to use the pre-build environment at BUILD_PATH :param time_scale: time_scale of the environment (1 is normal time) """ if use_editor: self.env_path = None else: self.env_path = self.BUILD_PATH self.time_scale = time_scale self.env = None self.float_properties_channel = None self.group_name = None self.group_spec = None def initialise_environment(self): """Initialise and reset unity environment""" engine_configuration_channel = EngineConfigurationChannel() self.float_properties_channel = FloatPropertiesChannel() self.env = UnityEnvironment(file_name=self.env_path, base_port=5004, side_channels=[ engine_configuration_channel, self.float_properties_channel ]) # Reset the environment self.env.reset() # Set the default brain to work with self.group_name = self.env.get_agent_groups()[0] self.group_spec = self.env.get_agent_group_spec(self.group_name) # Set the time scale of the engine engine_configuration_channel.set_configuration_parameters( time_scale=self.time_scale) def set_condition(self, condition: Condition): """Sets the experimental condition setting""" self.float_properties_channel.set_property("condition", condition.value) def set_visible_arm(self, visible_arm: VisibleArm): """Sets the visible arm setting""" self.float_properties_channel.set_property("visiblearm", visible_arm.value) def set_stimulation(self, stimulation: Stimulation): """Sets the stimulation setting""" self.float_properties_channel.set_property("stimulation", stimulation.value) def get_joint_observation(self): """:returns joint angles of the agent""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, :2] def get_touch_observation(self): """:returns last visual and tactile touch events""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 2:4] def get_current_env_time(self): """:returns current env time""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 4] def get_cartesian_distance(self): """:returns cartesian euclidean (absolute) distance between real hand and rubber hand""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][0, 5] def get_horizontal_distance(self): """:returns horizontal euclidean distance between real hand and rubber hand""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][0, 6] def get_rubber_joint_observation(self): """:returns joint angles of the rubber arm""" return self.env.get_step_result( self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 7:9] def get_visual_observation(self): """:returns visual perception of the agent""" return self.env.get_step_result( self.group_name).obs[self.VISUAL_OBSERVATION_INDEX][0] def act(self, action): """Make the agent perform an action (velocity) in the environment""" self.env.set_actions(self.group_name, np.append([[0]], action, axis=1)) self.env.step() def set_rotation(self, rotation): """Manually set the joint angles to a particular rotation""" self.env.set_actions(self.group_name, np.append([[1]], rotation, axis=1)) self.env.step() def set_rubber_arm_rotation(self, rotation): """Manually set the joint angles of the rubber arm to a particular rotation""" self.env.set_actions(self.group_name, np.append([[2]], rotation, axis=1)) self.env.step() def reset(self): """Reset the environment""" self.env.reset() def close(self): """Gracefully close the environment""" self.env.close()