def make_inputs(self) -> None: """ Creates the input layers for the discriminator """ self.done_expert_holder = tf.placeholder(shape=[None], dtype=tf.float32) self.done_policy_holder = tf.placeholder(shape=[None], dtype=tf.float32) self.done_expert = tf.expand_dims(self.done_expert_holder, -1) self.done_policy = tf.expand_dims(self.done_policy_holder, -1) if self.policy.brain.vector_action_space_type == "continuous": action_length = self.policy.act_size[0] self.action_in_expert = tf.placeholder( shape=[None, action_length], dtype=tf.float32 ) self.expert_action = tf.identity(self.action_in_expert) else: action_length = len(self.policy.act_size) self.action_in_expert = tf.placeholder( shape=[None, action_length], dtype=tf.int32 ) self.expert_action = tf.concat( [ tf.one_hot(self.action_in_expert[:, i], act_size) for i, act_size in enumerate(self.policy.act_size) ], axis=1, ) encoded_policy_list = [] encoded_expert_list = [] if self.policy.vec_obs_size > 0: self.obs_in_expert = tf.placeholder( shape=[None, self.policy.vec_obs_size], dtype=tf.float32 ) if self.policy.normalize: encoded_expert_list.append( ModelUtils.normalize_vector_obs( self.obs_in_expert, self.policy.running_mean, self.policy.running_variance, self.policy.normalization_steps, ) ) encoded_policy_list.append(self.policy.processed_vector_in) else: encoded_expert_list.append(self.obs_in_expert) encoded_policy_list.append(self.policy.vector_in) if self.policy.vis_obs_size > 0: self.expert_visual_in: List[tf.Tensor] = [] visual_policy_encoders = [] visual_expert_encoders = [] for i in range(self.policy.vis_obs_size): # Create input ops for next (t+1) visual observations. visual_input = ModelUtils.create_visual_input( self.policy.brain.camera_resolutions[i], name="gail_visual_observation_" + str(i), ) self.expert_visual_in.append(visual_input) encoded_policy_visual = ModelUtils.create_visual_observation_encoder( self.policy.visual_in[i], self.encoding_size, ModelUtils.swish, 1, "gail_stream_{}_visual_obs_encoder".format(i), False, ) encoded_expert_visual = ModelUtils.create_visual_observation_encoder( self.expert_visual_in[i], self.encoding_size, ModelUtils.swish, 1, "gail_stream_{}_visual_obs_encoder".format(i), True, ) visual_policy_encoders.append(encoded_policy_visual) visual_expert_encoders.append(encoded_expert_visual) hidden_policy_visual = tf.concat(visual_policy_encoders, axis=1) hidden_expert_visual = tf.concat(visual_expert_encoders, axis=1) encoded_policy_list.append(hidden_policy_visual) encoded_expert_list.append(hidden_expert_visual) self.encoded_expert = tf.concat(encoded_expert_list, axis=1) self.encoded_policy = tf.concat(encoded_policy_list, axis=1)
def make_inputs(self) -> None: """ Creates the input layers for the discriminator """ self.done_expert_holder = tf.placeholder(shape=[None], dtype=tf.float32) self.done_policy_holder = tf.placeholder(shape=[None], dtype=tf.float32) self.done_expert = tf.expand_dims(self.done_expert_holder, -1) self.done_policy = tf.expand_dims(self.done_policy_holder, -1) if self.policy.behavior_spec.is_action_continuous(): action_length = self.policy.act_size[0] self.action_in_expert = tf.placeholder(shape=[None, action_length], dtype=tf.float32) self.expert_action = tf.identity(self.action_in_expert) else: action_length = len(self.policy.act_size) self.action_in_expert = tf.placeholder(shape=[None, action_length], dtype=tf.int32) self.expert_action = tf.concat( [ tf.one_hot(self.action_in_expert[:, i], act_size) for i, act_size in enumerate(self.policy.act_size) ], axis=1, ) encoded_policy_list = [] encoded_expert_list = [] ( self.obs_in_expert, self.expert_visual_in, ) = ModelUtils.create_input_placeholders( self.policy.behavior_spec.observation_shapes, "gail_") if self.policy.vec_obs_size > 0: if self.policy.normalize: encoded_expert_list.append( ModelUtils.normalize_vector_obs( self.obs_in_expert, self.policy.running_mean, self.policy.running_variance, self.policy.normalization_steps, )) encoded_policy_list.append(self.policy.processed_vector_in) else: encoded_expert_list.append(self.obs_in_expert) encoded_policy_list.append(self.policy.vector_in) if self.expert_visual_in: visual_policy_encoders = [] visual_expert_encoders = [] for i, (vis_in, exp_vis_in) in enumerate( zip(self.policy.visual_in, self.expert_visual_in)): encoded_policy_visual = ModelUtils.create_visual_observation_encoder( vis_in, self.encoding_size, ModelUtils.swish, 1, "gail_stream_{}_visual_obs_encoder".format(i), False, ) encoded_expert_visual = ModelUtils.create_visual_observation_encoder( exp_vis_in, self.encoding_size, ModelUtils.swish, 1, "gail_stream_{}_visual_obs_encoder".format(i), True, ) visual_policy_encoders.append(encoded_policy_visual) visual_expert_encoders.append(encoded_expert_visual) hidden_policy_visual = tf.concat(visual_policy_encoders, axis=1) hidden_expert_visual = tf.concat(visual_expert_encoders, axis=1) encoded_policy_list.append(hidden_policy_visual) encoded_expert_list.append(hidden_expert_visual) self.encoded_expert = tf.concat(encoded_expert_list, axis=1) self.encoded_policy = tf.concat(encoded_policy_list, axis=1)
def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: """ Creates state encoders for current and future observations. Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction See https://arxiv.org/abs/1705.05363 for more details. :return: current and future state encoder tensors. """ encoded_state_list = [] encoded_next_state_list = [] # Create input ops for next (t+1) visual observations. self.next_vector_in, self.next_visual_in = ModelUtils.create_input_placeholders( self.policy.behavior_spec.observation_shapes, name_prefix="curiosity_next_") if self.next_visual_in: visual_encoders = [] next_visual_encoders = [] for i, (vis_in, next_vis_in) in enumerate( zip(self.policy.visual_in, self.next_visual_in)): # Create the encoder ops for current and next visual input. # Note that these encoders are siamese. encoded_visual = ModelUtils.create_visual_observation_encoder( vis_in, self.encoding_size, ModelUtils.swish, 1, "curiosity_stream_{}_visual_obs_encoder".format(i), False, ) encoded_next_visual = ModelUtils.create_visual_observation_encoder( next_vis_in, self.encoding_size, ModelUtils.swish, 1, "curiosity_stream_{}_visual_obs_encoder".format(i), True, ) visual_encoders.append(encoded_visual) next_visual_encoders.append(encoded_next_visual) hidden_visual = tf.concat(visual_encoders, axis=1) hidden_next_visual = tf.concat(next_visual_encoders, axis=1) encoded_state_list.append(hidden_visual) encoded_next_state_list.append(hidden_next_visual) if self.policy.vec_obs_size > 0: encoded_vector_obs = ModelUtils.create_vector_observation_encoder( self.policy.vector_in, self.encoding_size, ModelUtils.swish, 2, "curiosity_vector_obs_encoder", False, ) encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder( self.next_vector_in, self.encoding_size, ModelUtils.swish, 2, "curiosity_vector_obs_encoder", True, ) encoded_state_list.append(encoded_vector_obs) encoded_next_state_list.append(encoded_next_vector_obs) encoded_state = tf.concat(encoded_state_list, axis=1) encoded_next_state = tf.concat(encoded_next_state_list, axis=1) return encoded_state, encoded_next_state
def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: """ Creates state encoders for current and future observations. Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction See https://arxiv.org/abs/1705.05363 for more details. :return: current and future state encoder tensors. """ encoded_state_list = [] encoded_next_state_list = [] if self.policy.vis_obs_size > 0: self.next_visual_in = [] visual_encoders = [] next_visual_encoders = [] for i in range(self.policy.vis_obs_size): # Create input ops for next (t+1) visual observations. next_visual_input = ModelUtils.create_visual_input( self.policy.brain.camera_resolutions[i], name="curiosity_next_visual_observation_" + str(i), ) self.next_visual_in.append(next_visual_input) # Create the encoder ops for current and next visual input. # Note that these encoders are siamese. encoded_visual = ModelUtils.create_visual_observation_encoder( self.policy.visual_in[i], self.encoding_size, ModelUtils.swish, 1, "curiosity_stream_{}_visual_obs_encoder".format(i), False, ) encoded_next_visual = ModelUtils.create_visual_observation_encoder( self.next_visual_in[i], self.encoding_size, ModelUtils.swish, 1, "curiosity_stream_{}_visual_obs_encoder".format(i), True, ) visual_encoders.append(encoded_visual) next_visual_encoders.append(encoded_next_visual) hidden_visual = tf.concat(visual_encoders, axis=1) hidden_next_visual = tf.concat(next_visual_encoders, axis=1) encoded_state_list.append(hidden_visual) encoded_next_state_list.append(hidden_next_visual) if self.policy.vec_obs_size > 0: # Create the encoder ops for current and next vector input. # Note that these encoders are siamese. # Create input op for next (t+1) vector observation. self.next_vector_in = tf.placeholder( shape=[None, self.policy.vec_obs_size], dtype=tf.float32, name="curiosity_next_vector_observation", ) encoded_vector_obs = ModelUtils.create_vector_observation_encoder( self.policy.vector_in, self.encoding_size, ModelUtils.swish, 2, "curiosity_vector_obs_encoder", False, ) encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder( self.next_vector_in, self.encoding_size, ModelUtils.swish, 2, "curiosity_vector_obs_encoder", True, ) encoded_state_list.append(encoded_vector_obs) encoded_next_state_list.append(encoded_next_vector_obs) encoded_state = tf.concat(encoded_state_list, axis=1) encoded_next_state = tf.concat(encoded_next_state_list, axis=1) return encoded_state, encoded_next_state