def test_min_visual_size(): # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType) for encoder_type in EncoderType: with tf.Graph().as_default(): good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] good_res = Tensor3DShape(width=good_size, height=good_size, num_channels=3) vis_input = ModelUtils.create_visual_input(good_res, "test_min_visual_size") ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) enc_func = ModelUtils.get_encoder_for_type(encoder_type) enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False) # Anything under the min size should raise an exception. If not, decrease the min size! with pytest.raises(Exception): with tf.Graph().as_default(): bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1 bad_res = Tensor3DShape(width=bad_size, height=bad_size, num_channels=3) vis_input = ModelUtils.create_visual_input( bad_res, "test_min_visual_size" ) with pytest.raises(UnityTrainerException): # Make sure we'd hit a friendly error during model setup time. ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) enc_func = ModelUtils.get_encoder_for_type(encoder_type) enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
def test_tanh_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32) distribution = GaussianDistribution(logits, act_size=VECTOR_ACTION_SPACE, reparameterize=False, tanh_squash=True) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): output = sess.run( [distribution.sample, distribution.log_probs]) for out in output: assert out.shape[1] == VECTOR_ACTION_SPACE[0] # Assert action never exceeds [-1,1] action = output[0][0] for act in action: assert act >= -1 and act <= 1 output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1
def convert_frozen_to_onnx( settings: SerializationSettings, frozen_graph_def: tf.GraphDef ) -> Any: # This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py inputs = _get_input_node_names(frozen_graph_def) outputs = _get_output_node_names(frozen_graph_def) logger.info(f"onnx export - inputs:{inputs} outputs:{outputs}") frozen_graph_def = tf_optimize( inputs, outputs, frozen_graph_def, fold_constant=True ) with tf.Graph().as_default() as tf_graph: tf.import_graph_def(frozen_graph_def, name="") with tf.Session(graph=tf_graph): g = process_tf_graph( tf_graph, input_names=inputs, output_names=outputs, opset=settings.onnx_opset, ) onnx_graph = optimizer.optimize_graph(g) model_proto = onnx_graph.make_model(settings.brain_name) return model_proto
def test_gaussian_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[1, 1]], trainable=True, dtype=tf.float32) distribution = GaussianDistribution( logits, act_size=VECTOR_ACTION_SPACE, reparameterize=False, tanh_squash=False, ) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): output = sess.run( [distribution.sample, distribution.log_probs]) for out in output: assert out.shape[1] == VECTOR_ACTION_SPACE[0] output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1 # Test entropy is correct log_std_tensor = tf.get_default_graph().get_tensor_by_name( "log_std/BiasAdd:0") feed_dict = {log_std_tensor: [[1.0, 1.0]]} entropy = sess.run([distribution.entropy], feed_dict=feed_dict) # Entropy with log_std of 1.0 should be 2.42 assert pytest.approx(entropy[0], 0.01) == 2.42
def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, model_path: str, load: bool = False, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ self.m_size = 0 self.trainer_settings = trainer_settings self.network_settings: NetworkSettings = trainer_settings.network_settings # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} self.sequence_length = 1 self.seed = seed self.behavior_spec = behavior_spec self.act_size = (list(behavior_spec.discrete_action_branches) if behavior_spec.is_action_discrete() else [behavior_spec.action_size]) self.vec_obs_size = sum(shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1) self.vis_obs_size = sum(1 for shape in behavior_spec.observation_shapes if len(shape) == 3) self.use_recurrent = self.network_settings.memory is not None self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = self.behavior_spec.action_size self.previous_action_dict: Dict[str, np.array] = {} self.normalize = self.network_settings.normalize self.use_continuous_act = behavior_spec.is_action_continuous() self.model_path = model_path self.initialize_path = self.trainer_settings.init_path self.keep_checkpoints = self.trainer_settings.keep_checkpoints self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self.seed = seed if self.network_settings.memory is not None: self.m_size = self.network_settings.memory.memory_size self.sequence_length = self.network_settings.memory.sequence_length self._initialize_tensorflow_references() self.load = load
def __init__(self, seed, brain, trainer_parameters, load=False): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_parameters: The trainer parameters. """ self._version_number_ = 2 self.m_size = 0 # for ghost trainer save/load snapshots self.assign_phs = [] self.assign_ops = [] self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.act_size = brain.vector_action_space_size self.vec_obs_size = brain.vector_observation_space_size self.vis_obs_size = brain.number_visual_observations self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = trainer_parameters["output_path"] self.initialize_path = trainer_parameters.get("init_path", None) self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() self.sess = tf.Session( config=tf_utils.generate_session_config(), graph=self.graph ) self.saver = None self.seed = seed if self.use_recurrent: self.m_size = trainer_parameters["memory_size"] self.sequence_length = trainer_parameters["sequence_length"] if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format(brain.brain_name) ) elif self.m_size % 2 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 2.".format( brain.brain_name, self.m_size ) ) self._initialize_tensorflow_references() self.load = load
def __init__(self, seed, brain, trainer_parameters): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_parameters: The trainer parameters. """ self.m_size = None self.model = None self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} self.reward_signals: Dict[str, "RewardSignal"] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = trainer_parameters["model_path"] self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True # For multi-GPU training, set allow_soft_placement to True to allow # placing the operation into an alternative device automatically # to prevent from exceptions if the device doesn't suppport the operation # or the device does not exist config.allow_soft_placement = True self.sess = tf.Session(config=config, graph=self.graph) self.saver = None if self.use_recurrent: self.m_size = trainer_parameters["memory_size"] self.sequence_length = trainer_parameters["sequence_length"] if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format(brain.brain_name) ) elif self.m_size % 4 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 4.".format( brain.brain_name, self.m_size ) )
def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, tanh_squash: bool = False, reparameterize: bool = False, condition_sigma_on_obs: bool = True, create_tf_graph: bool = True, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. """ super().__init__( seed, behavior_spec, trainer_settings, tanh_squash, reparameterize, condition_sigma_on_obs, ) if ( self.behavior_spec.action_spec.continuous_size > 0 and self.behavior_spec.action_spec.discrete_size > 0 ): raise UnityPolicyException( "TensorFlow does not support mixed action spaces. Please run with the Torch framework." ) # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.update_dict: Dict[str, tf.Tensor] = {} self.inference_dict: Dict[str, tf.Tensor] = {} self.first_normalization_update: bool = False self.graph = tf.Graph() self.sess = tf.Session( config=tf_utils.generate_session_config(), graph=self.graph ) self._initialize_tensorflow_references() self.grads = None self.update_batch: Optional[tf.Operation] = None self.trainable_variables: List[tf.Variable] = [] self.rank = get_rank() if create_tf_graph: self.create_tf_graph()
def __init__( self, seed: int, behavior_spec: BehaviorSpec, trainer_settings: TrainerSettings, model_path: str, load: bool = False, tanh_squash: bool = False, reparameterize: bool = False, condition_sigma_on_obs: bool = True, create_tf_graph: bool = True, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ super().__init__( seed, behavior_spec, trainer_settings, model_path, load, tanh_squash, reparameterize, condition_sigma_on_obs, ) # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.update_dict: Dict[str, tf.Tensor] = {} self.inference_dict: Dict[str, tf.Tensor] = {} self.first_normalization_update: bool = False self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self._initialize_tensorflow_references() self.grads = None self.update_batch: Optional[tf.Operation] = None self.trainable_variables: List[tf.Variable] = [] if create_tf_graph: self.create_tf_graph()
def test_multicategorical_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32) action_masks = tf.Variable( initial_value=[[1 for _ in range(sum(DISCRETE_ACTION_SPACE))]], trainable=True, dtype=tf.float32, ) distribution = MultiCategoricalDistribution( logits, act_size=DISCRETE_ACTION_SPACE, action_masks=action_masks) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): sample, log_probs, entropy = sess.run([ distribution.sample, distribution.log_probs, distribution.entropy ]) assert len(log_probs[0]) == sum(DISCRETE_ACTION_SPACE) # Assert action never exceeds [-1,1] assert len(sample[0]) == len(DISCRETE_ACTION_SPACE) for i, act in enumerate(sample[0]): assert act >= 0 and act <= DISCRETE_ACTION_SPACE[i] output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1 # Make sure entropy is correct assert entropy[0] > 3.8 # Test masks mask = [] for space in DISCRETE_ACTION_SPACE: mask.append(1) for _action_space in range(1, space): mask.append(0) for _ in range(10): sample, log_probs = sess.run( [distribution.sample, distribution.log_probs], feed_dict={action_masks: [mask]}, ) for act in sample[0]: assert act >= 0 and act <= 1 output = sess.run([distribution.total_log_probs])
def convert_frozen_to_onnx(settings: SerializationSettings, frozen_graph_def: tf.GraphDef) -> Any: # This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py # Some constants in the graph need to be read by the inference system. # These aren't used by the model anywhere, so trying to make sure they propagate # through conversion and import is a losing battle. Instead, save them now, # so that we can add them back later. constant_values = {} for n in frozen_graph_def.node: if n.name in MODEL_CONSTANTS: val = n.attr["value"].tensor.int_val[0] constant_values[n.name] = val inputs = _get_input_node_names(frozen_graph_def) outputs = _get_output_node_names(frozen_graph_def) logger.info(f"onnx export - inputs:{inputs} outputs:{outputs}") frozen_graph_def = tf_optimize(inputs, outputs, frozen_graph_def, fold_constant=True) with tf.Graph().as_default() as tf_graph: tf.import_graph_def(frozen_graph_def, name="") with tf.Session(graph=tf_graph): g = process_tf_graph( tf_graph, input_names=inputs, output_names=outputs, opset=settings.onnx_opset, ) onnx_graph = optimizer.optimize_graph(g) model_proto = onnx_graph.make_model(settings.brain_name) # Save the constant values back the graph initializer. # This will ensure the importer gets them as global constants. constant_nodes = [] for k, v in constant_values.items(): constant_node = _make_onnx_node_for_constant(k, v) constant_nodes.append(constant_node) model_proto.graph.initializer.extend(constant_nodes) return model_proto
def __init__(self, seed, brain, trainer_parameters): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_parameters: The trainer parameters. """ self.m_size = None self.model = None self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} self.reward_signals: Dict[str, "RewardSignal"] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = trainer_parameters["model_path"] self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver = None if self.use_recurrent: self.m_size = trainer_parameters["memory_size"] self.sequence_length = trainer_parameters["sequence_length"] if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format( brain.brain_name)) elif self.m_size % 4 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 4.".format( brain.brain_name, self.m_size))
def __init__( self, seed: int, brain: BrainParameters, trainer_settings: TrainerSettings, model_path: str, load: bool = False, ): """ Initialized the policy. :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. :param model_path: Where to load/save the model. :param load: If True, load model from model_path. Otherwise, create new model. """ self.m_size = 0 self.trainer_settings = trainer_settings self.network_settings: NetworkSettings = trainer_settings.network_settings # for ghost trainer save/load snapshots self.assign_phs: List[tf.Tensor] = [] self.assign_ops: List[tf.Operation] = [] self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} self.sequence_length = 1 self.seed = seed self.brain = brain self.act_size = brain.vector_action_space_size self.vec_obs_size = brain.vector_observation_space_size self.vis_obs_size = brain.number_visual_observations self.use_recurrent = self.network_settings.memory is not None self.memory_dict: Dict[str, np.ndarray] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = self.network_settings.normalize self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] self.model_path = model_path self.initialize_path = self.trainer_settings.init_path self.keep_checkpoints = self.trainer_settings.keep_checkpoints self.graph = tf.Graph() self.sess = tf.Session(config=tf_utils.generate_session_config(), graph=self.graph) self.saver: Optional[tf.Operation] = None self.seed = seed if self.network_settings.memory is not None: self.m_size = self.network_settings.memory.memory_size self.sequence_length = self.network_settings.memory.sequence_length if self.m_size == 0: raise UnityPolicyException( "The memory size for brain {0} is 0 even " "though the trainer uses recurrent.".format( brain.brain_name)) elif self.m_size % 2 != 0: raise UnityPolicyException( "The memory size for brain {0} is {1} " "but it must be divisible by 2.".format( brain.brain_name, self.m_size)) self._initialize_tensorflow_references() self.load = load