def _parse_side_channel_message( side_channels: Dict[int, SideChannel], data: bytes ) -> None: offset = 0 while offset < len(data): try: channel_type, message_len = struct.unpack_from("<ii", data, offset) offset = offset + 8 message_data = data[offset : offset + message_len] offset = offset + message_len except Exception: raise UnityEnvironmentException( "There was a problem reading a message in a SideChannel. " "Please make sure the version of MLAgents in Unity is " "compatible with the Python version." ) if len(message_data) != message_len: raise UnityEnvironmentException( "The message received by the side channel {0} was " "unexpectedly short. Make sure your Unity Environment " "sending side channel data properly.".format(channel_type) ) if channel_type in side_channels: side_channels[channel_type].on_message_received(message_data) else: logger.warning( "Unknown side channel data received. Channel type " ": {0}.".format(channel_type) )
def load_config(config_path: str) -> Dict[str, Any]: try: with open(config_path) as data_file: return _load_config(data_file) except IOError: raise UnityEnvironmentException( f"Config file could not be found at {config_path}.") except UnicodeDecodeError: raise UnityEnvironmentException( f"There was an error decoding Config file from {config_path}. " f"Make sure your file is save using UTF-8")
def load_config(trainer_config_path): try: with open(trainer_config_path) as data_file: trainer_config = yaml.load(data_file) return trainer_config except IOError: raise UnityEnvironmentException('Parameter file could not be found ' 'at {}.'.format(trainer_config_path)) except UnicodeDecodeError: raise UnityEnvironmentException( 'There was an error decoding ' 'Trainer Config from this path : {}'.format(trainer_config_path))
def load_config(trainer_config_path: str) -> Dict[str, Any]: try: with open(trainer_config_path) as data_file: trainer_config = yaml.safe_load(data_file) return trainer_config except IOError: raise UnityEnvironmentException("Parameter file could not be found " "at {}.".format(trainer_config_path)) except UnicodeDecodeError: raise UnityEnvironmentException( "There was an error decoding " "Trainer Config from this path : {}".format(trainer_config_path))
def close(self): """ Sends a shutdown signal to the unity environment, and closes the socket connection. """ if self._loaded: self._close() else: raise UnityEnvironmentException("No Unity environment is loaded.")
def _create_model_path(model_path): try: if not os.path.exists(model_path): os.makedirs(model_path) except Exception: raise UnityEnvironmentException( "The folder {} containing the " "generated model could not be " "accessed. Please make sure the " "permissions are set correctly.".format(model_path))
def _load_config(fp: TextIO) -> Dict[str, Any]: """ Load the yaml config from the file-like object. """ try: return yaml.safe_load(fp) except yaml.parser.ParserError as e: raise UnityEnvironmentException( "Error parsing yaml file. Please check for formatting errors. " "A tool such as http://www.yamllint.com/ can be helpful with this." ) from e
def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]): """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} print("External Brains") print(self.external_brains) for brain_name in self.external_brains: print(brain_name) trainer_parameters = trainer_config['default'].copy() trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id) + '_' + brain_name) trainer_parameters['model_path'] = '{basedir}/{name}'.format( basedir=self.model_path, name=brain_name) trainer_parameters['keep_checkpoints'] = self.keep_checkpoints if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.external_brains: if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc': self.trainers[brain_name] = OfflineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc': self.trainers[brain_name] = OnlineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo': print("Now implement ppo method in trainer_controller") print( "Parameters brain name for PP////////O trainer in trainer_controller" ) print(self.external_brains[brain_name]) print("Now end of parameters") self.trainers[brain_name] = PPOTrainer( self.external_brains[brain_name], self.meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if self.meta_curriculum else 0, trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) self.trainer_metrics[brain_name] = self.trainers[ brain_name].trainer_metrics else: raise UnityEnvironmentException('The trainer config contains ' 'an unknown trainer type for ' 'brain {}'.format(brain_name))
def reset(self) -> None: if self._loaded: outputs = self.communicator.exchange(self._generate_reset_input()) if outputs is None: raise UnityCommunicationException("Communicator has stopped.") self._update_group_specs(outputs) rl_output = outputs.rl_output self._update_state(rl_output) self._is_first_message = False self._env_actions.clear() else: raise UnityEnvironmentException("No Unity environment is loaded.")
def step_result_to_brain_info( step_result: BatchedStepResult, group_spec: AgentGroupSpec, agent_id_prefix: int = None, ) -> BrainInfo: n_agents = step_result.n_agents() vis_obs_indices = [] vec_obs_indices = [] for index, observation in enumerate(step_result.obs): if len(observation.shape) == 2: vec_obs_indices.append(index) elif len(observation.shape) == 4: vis_obs_indices.append(index) else: raise UnityEnvironmentException( "Invalid input received from the environment, the observation should " "either be a vector of float or a PNG image") if len(vec_obs_indices) == 0: vec_obs = np.zeros((n_agents, 0), dtype=np.float32) else: vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1) vis_obs = [step_result.obs[i] for i in vis_obs_indices] mask = np.ones((n_agents, np.sum(group_spec.action_size)), dtype=np.float32) if group_spec.is_action_discrete(): mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)), dtype=np.float32) if step_result.action_mask is not None: mask = 1 - np.concatenate(step_result.action_mask, axis=1) if agent_id_prefix is None: agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)] else: agent_ids = [ f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id ] return BrainInfo( vis_obs, vec_obs, list(step_result.reward), agent_ids, list(step_result.done), list(step_result.max_step), mask, )
def _initialize_trainers(self, trainer_config, sess): trainer_parameters_dict = {} # TODO: This probably doesn't need to be reinitialized. self.trainers = {} for brain_name in self.env.external_brain_names: trainer_parameters = trainer_config['default'].copy() if len(self.env.external_brain_names) > 1: graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name) trainer_parameters['graph_scope'] = graph_scope trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id) + '_' + graph_scope) else: trainer_parameters['graph_scope'] = '' trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id)) if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.env.external_brain_names: if trainer_parameters_dict[brain_name]['trainer'] == 'imitation': self.trainers[brain_name] = BehavioralCloningTrainer( sess, self.env.brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.seed, self.run_id) elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo': ############################################################################### ####### External brain becomes internal brain in here ########## ############################################################################### self.trainers[brain_name] = PPOTrainer( sess, self.env.brains[brain_name], self.meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if self.meta_curriculum else 0, trainer_parameters_dict[brain_name], self.train_model, self.seed, self.run_id) else: raise UnityEnvironmentException('The trainer config contains ' 'an unknown trainer type for ' 'brain {}'.format(brain_name))
def step(self) -> None: if self._is_first_message: return self.reset() if not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") # fill the blanks for missing actions for group_name in self._env_specs: if group_name not in self._env_actions: n_agents = 0 if group_name in self._env_state: n_agents = self._env_state[group_name].n_agents() self._env_actions[group_name] = self._env_specs[ group_name ].create_empty_action(n_agents) step_input = self._generate_step_input(self._env_actions) with hierarchical_timer("communicator.exchange"): outputs = self.communicator.exchange(step_input) if outputs is None: raise UnityCommunicationException("Communicator has stopped.") self._update_group_specs(outputs) rl_output = outputs.rl_output self._update_state(rl_output) self._env_actions.clear()
def executable_launcher(self, file_name, docker_training, no_graphics, args): cwd = os.getcwd() file_name = ( file_name.strip() .replace(".app", "") .replace(".exe", "") .replace(".x86_64", "") .replace(".x86", "") ) true_filename = os.path.basename(os.path.normpath(file_name)) logger.debug("The true file name is {}".format(true_filename)) launch_string = None if platform == "linux" or platform == "linux2": candidates = glob.glob(os.path.join(cwd, file_name) + ".x86_64") if len(candidates) == 0: candidates = glob.glob(os.path.join(cwd, file_name) + ".x86") if len(candidates) == 0: candidates = glob.glob(file_name + ".x86_64") if len(candidates) == 0: candidates = glob.glob(file_name + ".x86") if len(candidates) > 0: launch_string = candidates[0] elif platform == "darwin": candidates = glob.glob( os.path.join( cwd, file_name + ".app", "Contents", "MacOS", true_filename ) ) if len(candidates) == 0: candidates = glob.glob( os.path.join(file_name + ".app", "Contents", "MacOS", true_filename) ) if len(candidates) == 0: candidates = glob.glob( os.path.join(cwd, file_name + ".app", "Contents", "MacOS", "*") ) if len(candidates) == 0: candidates = glob.glob( os.path.join(file_name + ".app", "Contents", "MacOS", "*") ) if len(candidates) > 0: launch_string = candidates[0] elif platform == "win32": candidates = glob.glob(os.path.join(cwd, file_name + ".exe")) if len(candidates) == 0: candidates = glob.glob(file_name + ".exe") if len(candidates) > 0: launch_string = candidates[0] if launch_string is None: self._close() raise UnityEnvironmentException( "Couldn't launch the {0} environment. " "Provided filename does not match any environments.".format( true_filename ) ) else: logger.debug("This is the launch string {}".format(launch_string)) # Launch Unity environment if not docker_training: subprocess_args = [launch_string] if no_graphics: subprocess_args += ["-nographics", "-batchmode"] subprocess_args += ["--port", str(self.port)] subprocess_args += args try: self.proc1 = subprocess.Popen( subprocess_args, # start_new_session=True means that signals to the parent python process # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms. # This is generally good since we want the environment to have a chance to shutdown, # but may be undesirable in come cases; if so, we'll add a command-line toggle. # Note that on Windows, the CTRL_C signal will still be sent. start_new_session=True, ) except PermissionError as perm: # This is likely due to missing read or execute permissions on file. raise UnityEnvironmentException( f"Error when trying to launch environment - make sure " f"permissions are set correctly. For example " f'"chmod -R 755 {launch_string}"' ) from perm else: # Comments for future maintenance: # xvfb-run is a wrapper around Xvfb, a virtual xserver where all # rendering is done to virtual memory. It automatically creates a # new virtual server automatically picking a server number `auto-servernum`. # The server is passed the arguments using `server-args`, we are telling # Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits. # Note that 640 X 480 are the default width and height. The main reason for # us to add this is because we'd like to change the depth from the default # of 8 bits to 24. # Unfortunately, this means that we will need to pass the arguments through # a shell which is why we set `shell=True`. Now, this adds its own # complications. E.g SIGINT can bounce off the shell and not get propagated # to the child processes. This is why we add `exec`, so that the shell gets # launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell # we created with `xvfb`. # docker_ls = ( "exec xvfb-run --auto-servernum" " --server-args='-screen 0 640x480x24'" " {0} --port {1}" ).format(launch_string, str(self.port)) self.proc1 = subprocess.Popen( docker_ls, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, )
def initialize_trainers( trainer_config: Dict[str, Any], external_brains: Dict[str, BrainParameters], summaries_dir: str, run_id: str, model_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, seed: int, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Dict[str, Trainer]: """ Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as some general training session options. :param trainer_config: Original trainer configuration loaded from YAML :param external_brains: BrainParameters provided by the Unity environment :param summaries_dir: Directory to store trainer summary statistics :param run_id: Run ID to associate with this training run :param model_path: Path to save the model :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param seed: The random seed to use :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :param multi_gpu: Whether to use multi-GPU training :return: """ trainers = {} trainer_parameters_dict = {} for brain_name in external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=summaries_dir, name=str(run_id) + "_" + brain_name) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": trainers[brain_name] = OfflineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": trainers[brain_name] = OnlineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": trainers[brain_name] = PPOTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, multi_gpu, ) elif trainer_parameters_dict[brain_name]["trainer"] == "sac": trainers[brain_name] = SACTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) else: raise UnityEnvironmentException("The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name)) return trainers
def __init__( self, file_name: Optional[str] = None, worker_id: int = 0, base_port: int = 5005, seed: int = 0, docker_training: bool = False, no_graphics: bool = False, timeout_wait: int = 60, args: Optional[List[str]] = None, side_channels: Optional[List[SideChannel]] = None, ): """ Starts a new unity environment and establishes a connection with the environment. Notice: Currently communication between Unity and Python takes place over an open socket without authentication. Ensure that the network where training takes place is secure. :string file_name: Name of Unity environment binary. :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this. :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios. :bool docker_training: Informs this class whether the process is being run within a container. :bool no_graphics: Whether to run the Unity simulator in no-graphics mode :int timeout_wait: Time (in seconds) to wait for connection from environment. :bool train_mode: Whether to run in training mode, speeding up the simulation, by default. :list args: Addition Unity command line arguments :list side_channels: Additional side channel for no-rl communication with Unity """ args = args or [] atexit.register(self._close) self.port = base_port + worker_id self._buffer_size = 12000 self._version_ = UnityEnvironment.API_VERSION # If true, this means the environment was successfully loaded self._loaded = False # The process that is started. If None, no process was started self.proc1 = None self.timeout_wait: int = timeout_wait self.communicator = self.get_communicator(worker_id, base_port, timeout_wait) self.worker_id = worker_id self.side_channels: Dict[int, SideChannel] = {} if side_channels is not None: for _sc in side_channels: if _sc.channel_type in self.side_channels: raise UnityEnvironmentException( "There cannot be two side channels with the same channel type {0}.".format( _sc.channel_type ) ) self.side_channels[_sc.channel_type] = _sc # If the environment name is None, a new environment will not be launched # and the communicator will directly try to connect to an existing unity environment. # If the worker-id is not 0 and the environment name is None, an error is thrown if file_name is None and worker_id != 0: raise UnityEnvironmentException( "If the environment name is None, " "the worker-id must be 0 in order to connect with the Editor." ) if file_name is not None: self.executable_launcher(file_name, docker_training, no_graphics, args) else: logger.info( f"Listening on port {self.port}. " f"Start training by pressing the Play button in the Unity Editor." ) self._loaded = True rl_init_parameters_in = UnityRLInitializationInputProto(seed=seed) try: aca_output = self.send_academy_parameters(rl_init_parameters_in) aca_params = aca_output.rl_initialization_output except UnityTimeOutException: self._close() raise # TODO : think of a better way to expose the academyParameters self._unity_version = aca_params.version if self._unity_version != self._version_: self._close() raise UnityEnvironmentException( f"The API number is not compatible between Unity and python. " f"Python API: {self._version_}, Unity API: {self._unity_version}.\n" f"Please go to https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release" f"to download the latest version of ML-Agents." ) self._env_state: Dict[str, BatchedStepResult] = {} self._env_specs: Dict[str, AgentGroupSpec] = {} self._env_actions: Dict[str, np.ndarray] = {} self._is_first_message = True self._update_group_specs(aca_output)
def initialize_trainers( self, trainer_config: Dict[str, Any], external_brains: Dict[str, BrainParameters], ) -> None: """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} for brain_name in external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=self.model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = self.keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": self.trainers[brain_name] = OfflineBCTrainer( brain=external_brains[brain_name], trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": self.trainers[brain_name] = OnlineBCTrainer( brain=external_brains[brain_name], trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": # Find lesson length based on the form of learning if self.meta_curriculum: lesson_length = self.meta_curriculum.brains_to_curriculums[ brain_name].min_lesson_length else: lesson_length = 1 self.trainers[brain_name] = PPOTrainer( brain=external_brains[brain_name], reward_buff_cap=lesson_length, trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) self.trainer_metrics[brain_name] = self.trainers[ brain_name].trainer_metrics else: raise UnityEnvironmentException("The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name))
def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]): """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} for brain_name in self.external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name ) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=self.model_path, name=brain_name ) trainer_parameters["keep_checkpoints"] = self.keep_checkpoints if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": self.trainers[brain_name] = OfflineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": self.trainers[brain_name] = OnlineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": self.trainers[brain_name] = PPOTrainer( self.external_brains[brain_name], self.meta_curriculum.brains_to_curriculums[ brain_name ].min_lesson_length if self.meta_curriculum else 0, trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) self.trainer_metrics[brain_name] = self.trainers[ brain_name ].trainer_metrics else: raise UnityEnvironmentException( "The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name) )