def training_started(self, behavior_name: str, config: TrainerSettings) -> None: msg = TrainingBehaviorInitialized( behavior_name=behavior_name, trainer_type=config.trainer_type.value, extrinsic_reward_enabled=(RewardSignalType.EXTRINSIC in config.reward_signals), gail_reward_enabled=(RewardSignalType.GAIL in config.reward_signals), curiosity_reward_enabled=(RewardSignalType.CURIOSITY in config.reward_signals), rnd_reward_enabled=(RewardSignalType.RND in config.reward_signals), behavioral_cloning_enabled=config.behavioral_cloning is not None, recurrent_enabled=config.network_settings.memory is not None, visual_encoder=config.network_settings.vis_encode_type.value, num_network_layers=config.network_settings.num_layers, num_network_hidden_units=config.network_settings.hidden_units, trainer_threaded=config.threaded, self_play_enabled=config.self_play is not None, curriculum_enabled=self._behavior_uses_curriculum(behavior_name), ) any_message = Any() any_message.Pack(msg) training_start_msg = OutgoingMessage() training_start_msg.set_raw_bytes(any_message.SerializeToString()) super().queue_message_to_send(training_start_msg)
def send_raw_data(self, data: bytearray) -> None: """ Queues a message to be sent by the environment at the next call to step. """ msg = OutgoingMessage() msg.set_raw_bytes(data) super().queue_message_to_send(msg)
def serialize_int_list_prop(key: str, value: List[int]) -> OutgoingMessage: result = bytearray() encoded_key = key.encode("ascii") result += struct.pack("<i", len(encoded_key)) result += encoded_key result += struct.pack("<i", len(value)) for v in value: result += struct.pack("<I", v) msg = OutgoingMessage() msg.set_raw_bytes(result) return msg
def environment_initialized(self) -> None: # Tuple of (major, minor, patch) vi = sys.version_info msg = TrainingEnvironmentInitialized( python_version=f"{vi[0]}.{vi[1]}.{vi[2]}", mlagents_version="Custom", mlagents_envs_version=mlagents_envs.__version__, torch_version="Unknown", torch_device_type="Unknown", ) any_message = Any() any_message.Pack(msg) env_init_msg = OutgoingMessage() env_init_msg.set_raw_bytes( any_message.SerializeToString()) # type: ignore super().queue_message_to_send(env_init_msg)
def environment_initialized(self, run_options: RunOptions) -> None: self.run_options = run_options # Tuple of (major, minor, patch) vi = sys.version_info env_params = run_options.environment_parameters msg = TrainingEnvironmentInitialized( python_version=f"{vi[0]}.{vi[1]}.{vi[2]}", mlagents_version=mlagents.trainers.__version__, mlagents_envs_version=mlagents_envs.__version__, torch_version=torch_utils.torch.__version__, torch_device_type=torch_utils.default_device().type, num_envs=run_options.env_settings.num_envs, num_environment_parameters=len(env_params) if env_params else 0, ) any_message = Any() any_message.Pack(msg) env_init_msg = OutgoingMessage() env_init_msg.set_raw_bytes(any_message.SerializeToString()) super().queue_message_to_send(env_init_msg)