def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, model_path: str, load: bool = False, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ self.m_size = 0 self.trainer_settings = trainer_settings self.network_settings: NetworkSettings = trainer_settings.network_settings # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} self.sequence_length = 1 self.seed = seed self.behavior_spec = behavior_spec self.act_size = (list(behavior_spec.discrete_action_branches) if behavior_spec.is_action_discrete() else [behavior_spec.action_size]) self.vec_obs_size = sum(shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1) self.vis_obs_size = sum(1 for shape in behavior_spec.observation_shapes if len(shape) == 3) self.use_recurrent = self.network_settings.memory is not None self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = self.behavior_spec.action_size self.previous_action_dict: Dict[str, np.array] = {} self.normalize = self.network_settings.normalize self.use_continuous_act = behavior_spec.is_action_continuous() self.model_path = model_path self.initialize_path = self.trainer_settings.init_path self.keep_checkpoints = self.trainer_settings.keep_checkpoints self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self.seed = seed if self.network_settings.memory is not None: self.m_size = self.network_settings.memory.memory_size self.sequence_length = self.network_settings.memory.sequence_length self._initialize_tensorflow_references() self.load = load
def __init__(self, seed, brain, trainer_parameters, load=False): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_parameters: The trainer parameters. """ self._version_number_ = 2 self.m_size = 0 # for ghost trainer save/load snapshots self.assign_phs = [] self.assign_ops = [] self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.act_size = brain.vector_action_space_size self.vec_obs_size = brain.vector_observation_space_size self.vis_obs_size = brain.number_visual_observations self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = trainer_parameters["output_path"] self.initialize_path = trainer_parameters.get("init_path", None) self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() self.sess = tf.Session( config=tf_utils.generate_session_config(), graph=self.graph ) self.saver = None self.seed = seed if self.use_recurrent: self.m_size = trainer_parameters["memory_size"] self.sequence_length = trainer_parameters["sequence_length"] if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format(brain.brain_name) ) elif self.m_size % 2 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 2.".format( brain.brain_name, self.m_size ) ) self._initialize_tensorflow_references() self.load = load
def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, tanh_squash: bool = False, reparameterize: bool = False, condition_sigma_on_obs: bool = True, create_tf_graph: bool = True, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. """ super().__init__( seed, behavior_spec, trainer_settings, tanh_squash, reparameterize, condition_sigma_on_obs, ) if ( self.behavior_spec.action_spec.continuous_size > 0 and self.behavior_spec.action_spec.discrete_size > 0 ): raise UnityPolicyException( "TensorFlow does not support mixed action spaces. Please run with the Torch framework." ) # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.update_dict: Dict[str, tf.Tensor] = {} self.inference_dict: Dict[str, tf.Tensor] = {} self.first_normalization_update: bool = False self.graph = tf.Graph() self.sess = tf.Session( config=tf_utils.generate_session_config(), graph=self.graph ) self._initialize_tensorflow_references() self.grads = None self.update_batch: Optional[tf.Operation] = None self.trainable_variables: List[tf.Variable] = [] self.rank = get_rank() if create_tf_graph: self.create_tf_graph()
def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, model_path: str, load: bool = False, tanh_squash: bool = False, reparameterize: bool = False, condition_sigma_on_obs: bool = True, create_tf_graph: bool = True, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ super().__init__( seed, behavior_spec, trainer_settings, model_path, load, tanh_squash, reparameterize, condition_sigma_on_obs, ) # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.update_dict: Dict[str, tf.Tensor] = {} self.inference_dict: Dict[str, tf.Tensor] = {} self.first_normalization_update: bool = False self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self._initialize_tensorflow_references() self.grads = None self.update_batch: Optional[tf.Operation] = None self.trainable_variables: List[tf.Variable] = [] if create_tf_graph: self.create_tf_graph()
def _dict_to_tensorboard(self, name: str, input_dict: Dict[str, Any]) -> str: """ Convert a dict to a Tensorboard-encoded string. :param name: The name of the text. :param input_dict: A dictionary that will be displayed in a table on Tensorboard. """ try: with tf.Session(config=generate_session_config()) as sess: s_op = tf.summary.text( name, tf.convert_to_tensor( ([[str(x), str(input_dict[x])] for x in input_dict])), ) s = sess.run(s_op) return s except Exception: logger.warning("Could not write text summary for Tensorboard.") return ""
def __init__(self, seed, brain, trainer_parameters): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_parameters: The trainer parameters. """ self.m_size = None self.model = None self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} self.reward_signals: Dict[str, "RewardSignal"] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = trainer_parameters["model_path"] self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver = None if self.use_recurrent: self.m_size = trainer_parameters["memory_size"] self.sequence_length = trainer_parameters["sequence_length"] if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format( brain.brain_name)) elif self.m_size % 4 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 4.".format( brain.brain_name, self.m_size))
def write_tensorboard_text(self, key: str, input_dict: Dict[str, Any]) -> None: """ Saves text to Tensorboard. Note: Only works on tensorflow r1.2 or above. :param key: The name of the text. :param input_dict: A dictionary that will be displayed in a table on Tensorboard. """ try: with tf.Session(config=tf_utils.generate_session_config()) as sess: s_op = tf.summary.text( key, tf.convert_to_tensor( ([[str(x), str(input_dict[x])] for x in input_dict]) ), ) s = sess.run(s_op) self.stats_reporter.write_text(s, self.get_step) except Exception: LOGGER.info("Could not write text summary for Tensorboard.") pass
def __init__( self, seed: int, brain: BrainParameters, trainer_settings: TrainerSettings, model_path: str, load: bool = False, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ self.m_size = 0 self.trainer_settings = trainer_settings self.network_settings: NetworkSettings = trainer_settings.network_settings # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.act_size = brain.vector_action_space_size self.vec_obs_size = brain.vector_observation_space_size self.vis_obs_size = brain.number_visual_observations self.use_recurrent = self.network_settings.memory is not None self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = self.network_settings.normalize self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = model_path self.initialize_path = self.trainer_settings.init_path self.keep_checkpoints = self.trainer_settings.keep_checkpoints self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self.seed = seed if self.network_settings.memory is not None: self.m_size = self.network_settings.memory.memory_size self.sequence_length = self.network_settings.memory.sequence_length if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format( brain.brain_name)) elif self.m_size % 2 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 2.".format( brain.brain_name, self.m_size)) self._initialize_tensorflow_references() self.load = load