def _head(self, neck_outputs): logits = self._policy_logits(neck_outputs) if self._debug_writer: self._debug_writer.log_named_tensor('action_logits', logits.numpy()) value = tf.squeeze(self._baseline(neck_outputs), axis=-1) return common.AgentOutput(policy_logits=logits, baseline=value)
def _head(self, neck_outputs): # The shape of hidden_state is [batch_size * time, 1, hidden_size] hidden_state = neck_outputs['hidden_state'] image_features = neck_outputs[constants.PANO_ENC] # c_visual has shape [batch_size * time, 1, self._c_visual_attention_size] c_visual = self._visual_attention([ self._visual_attention_project_ctext(neck_outputs['c_text']), self._visual_attention_project_feature(image_features), ]) # Concatenating the h_t, c_text and c_visual as described in RCM paper. input_feature = tf.concat( [hidden_state, neck_outputs['c_text'], c_visual], axis=2) connection_encoding = neck_outputs[constants.CONN_ENC] logits = self._dot_product([ self._project_feature(input_feature), self._project_action(connection_encoding) ]) # The final shape of logits is [batch_size * time, num_connections] logits = tf.squeeze(logits, axis=1) # mask out invalid connections. valid_conn_mask = neck_outputs[constants.VALID_CONN_MASK] logits += (1. - valid_conn_mask) * -_INFINITY value = self._value_network(tf.squeeze(neck_outputs['c_text'], axis=1)) value = tf.squeeze(value, axis=1) return common.AgentOutput(policy_logits=logits, baseline=value)
def __init__(self, unroll_length=1): super(MockAgent, self).__init__(name='mock_agent') self._state_size = 5 # This matches the action space of MockEnv self._action_space_size = 2 self._agent_spec = common.AgentOutput( policy_logits=tf.TensorSpec(shape=[unroll_length + 1, 2], dtype=tf.float32), baseline=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.float32))
def __init__(self, unroll_length=1): super(MockAgent, self).__init__(name='mock_agent') self._state_size = 5 # This matches the action space of MockEnv self._action_space_size = 2 self._logits_layer = tf.keras.layers.Dense( self._action_space_size, kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self._agent_spec = common.AgentOutput( policy_logits=tf.TensorSpec(shape=[unroll_length + 1, 2], dtype=tf.float32), baseline=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.float32))
def _head(self, neck_output): # Verify neck_output np.testing.assert_equal((self._total_timesteps * self._batch_size, 6), neck_output.shape) arrays = [] for i in range(self._total_timesteps): arrays.append(np.ones((self._batch_size, 6)) * (i + 1)) expected_neck_output = np.concatenate(arrays, axis=0) np.testing.assert_array_almost_equal(expected_neck_output, neck_output.numpy()) return common.AgentOutput( policy_logits=tf.zeros( shape=[self._total_timesteps * self._batch_size, 4]), baseline=tf.ones(shape=[self._total_timesteps * self._batch_size]))
def _head(self, neck_outputs): # The shape of hidden_state is [batch_size * time, 1, hidden_size] hidden_state = neck_outputs['hidden_state'] if self._scan_classifier is not None: scan_classifier_logits = self._scan_classifier(hidden_state) else: scan_classifier_logits = tf.zeros(shape=(tf.shape(hidden_state)[0], 61)) image_features = tf.cast(neck_outputs[constants.PANO_ENC], tf.float32) # c_visual has shape [batch_size * time, 1, self._c_visual_attention_size] c_visual = self._visual_attention([ self._visual_attention_project_ctext(neck_outputs['c_text']), self._visual_attention_project_feature(image_features), ]) # Concatenating the h_t, c_text and c_visual as described in RCM paper. input_feature = tf.concat( [hidden_state, neck_outputs['c_text'], c_visual], axis=2) connection_encoding = neck_outputs[constants.CONN_ENC] connection_encoding = tf.cast(connection_encoding, tf.float32) logits = self._dot_product([ self._project_feature(input_feature), self._project_action(connection_encoding) ]) # The final shape of logits is [batch_size * time, num_connections] logits = tf.squeeze(logits, axis=1) # mask out invalid connections. valid_conn_mask = tf.cast(neck_outputs[constants.VALID_CONN_MASK], tf.float32) logits += (1. - valid_conn_mask) * -_INFINITY value = self._value_network(tf.squeeze(neck_outputs['c_text'], axis=1)) value = tf.squeeze(value, axis=1) return common.AgentOutput(policy_logits=logits, baseline={ 'value': value, 'ins_classifier_logits': neck_outputs['ins_classifier_logits'], 'scan_classifier_logits': scan_classifier_logits, })
def setUp(self): super(LossFnsTest, self).setUp() # Shape = [batch, batch]. self._similarity_logits = tf.constant( [[-0.5, -0.3, 0.8], [-0.3, 0.4, 0.7], [0.8, 0.7, 1.0]], dtype=tf.float32) # Shape = [batch, batch]. self._similarity_mask = tf.constant([[True, True, False], [True, True, True], [False, True, True]]) # Shape = [batch]. self._labels = tf.constant([0., 1.0, 1.0], dtype=tf.float32) # Shape = [batch]. self._baseline_logits = tf.constant([-0.4, -.1, 0.9], dtype=tf.float32) output_logits = { 'similarity': self._similarity_logits, 'similarity_mask': self._similarity_mask, 'labels': self._labels } agent_output = common.AgentOutput(policy_logits=output_logits, baseline=self._baseline_logits) # Add time dim as required by actor that shape must be [time, batch, ...] self._agent_output = tf.nest.map_structure( lambda t: tf.expand_dims(t, 0), agent_output)
def _head(self, env_output, neck_outputs): disc_mask = tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]) # Get first_true time step for text states as it's the same for all steps # in a path. # Shape = [time, batch] for both disc_mask and first_true first_true = utils.get_first_true_column(disc_mask) # Transpose to [batch, time] to ensure correct batch order for boolean_mask. first_true = tf.transpose(first_true, perm=[1, 0]) # Transpose a list of n_lstm_layers (h, c) states to batch major. raw_text_state = tf.nest.map_structure( lambda t: tf.transpose(t, perm=[1, 0, 2]), neck_outputs['text_state']) tf.debugging.assert_equal( raw_text_state[0][0].shape, [self._current_batch_size, self._current_num_timesteps, 512]) # Take the first step's text state since it's the same for all steps. # Selected state has shape [batch, hidden] text_state = self._select_by_mask(raw_text_state, first_true) # Projected shape: [batch, hidden_dim]. text_feature = self._get_final_projection( self._instruction_feature_projection, text_state) # Get last_true mask for image states, i.e., state at end of sequence. # Shape = [time, batch] for both disc_mask and last_true last_true = utils.get_last_true_column(disc_mask) last_true = tf.transpose(last_true, perm=[1, 0]) # Sanity check: ensure the first and last text states in a path are same. text_state_last_true = self._select_by_mask(raw_text_state, last_true) tf.debugging.assert_equal(text_state[-1][0], text_state_last_true[-1][0]) # Transpose image states, a list of (h, c) states, into batch major. Each # state has shape [batch, time_step, hidden_dim] raw_image_state = tf.nest.map_structure( lambda t: tf.transpose(t, perm=[1, 0, 2]), neck_outputs['visual_state']) if self._average_image_states_of_all_steps: # Shape = [batch, time_step, 1] float_disc_mask = tf.expand_dims(tf.cast(tf.transpose(disc_mask), tf.float32), axis=2) # Shape of each reduced state: [batch, hidden_dim] image_state = tf.nest.map_structure( lambda x: tf.reduce_mean(x * float_disc_mask, 1), raw_image_state) else: # Selected state has shape [batch, hidden_dim]. image_state = self._select_by_mask(raw_image_state, last_true) # Projected shape: [batch, hidden]. visual_feature = self._get_final_projection( self._image_feature_projection, image_state) # Normalize features. visual_feature = tf.nn.l2_normalize(visual_feature, axis=-1) text_feature = tf.nn.l2_normalize(text_feature, axis=-1) # Select path_ids for current batch. # Transposed shape = [batch, time]. raw_path_ids = tf.transpose(env_output.observation[constants.PATH_ID]) # Shape = [batch]. path_ids = self._select_by_mask(raw_path_ids, first_true) # Asserts first true and last true are referring to the same path. path_ids_last_true = self._select_by_mask(raw_path_ids, last_true) tf.debugging.assert_equal(path_ids, path_ids_last_true) # Shape = [time, batch] raw_labels = tf.cast(env_output.observation['label'], tf.float32) raw_labels = tf.transpose(raw_labels) # Shape = [batch] labels = self._select_by_mask(raw_labels, first_true) tf.debugging.assert_equal(labels, self._select_by_mask(raw_labels, last_true)) # Add time dimension as required by actor. Shape = [1, batch] labels = tf.expand_dims(labels, axis=0) # Shape: [batch, batch] similarity = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[1, 0])) # Add time dim as required by actor. Shape = [1, batch, batch] similarity = tf.expand_dims(similarity, axis=0) # Make similarity mask to exclude multiple positive matching labels diag_mask = tf.eye(self._current_batch_size, dtype=tf.bool) # path_id mask where matching col-row pairs are 1 except diagnal pairs. rows = tf.tile(tf.reshape(path_ids, [self._current_batch_size, 1]), [1, self._current_batch_size]) cols = tf.tile(tf.reshape(path_ids, [1, self._current_batch_size]), [self._current_batch_size, 1]) path_id_mask = tf.logical_and(tf.equal(rows, cols), tf.logical_not(diag_mask)) # Filter the mask by label. Positive labels are 1. row_labels = tf.tile(tf.reshape(labels, [self._current_batch_size, 1]), [1, self._current_batch_size]) col_labels = tf.tile(tf.reshape(labels, [1, self._current_batch_size]), [self._current_batch_size, 1]) label_mask = tf.logical_and(tf.cast(row_labels, tf.bool), tf.cast(col_labels, tf.bool)) # M[i, j]=0 (i!=j) if path_id_mask[i,j] is True and label_mask[i, j] is True similarity_mask = tf.logical_not( tf.logical_and(path_id_mask, label_mask)) # Add timestep dim as required by actor. Shape = [1, batch, batch] similarity_mask = tf.expand_dims(similarity_mask, axis=0) # Computes logits by transforming similarity from [-1, 1] to unbound. # Shape: [time, batch, batch] similarity_logits = self.similarity_scaler * similarity output_logits = { 'similarity': similarity_logits, 'similarity_mask': similarity_mask, 'labels': labels } # Logits for classification loss. Shape = [time, batch] classification_logits = ( self.affine_a * tf.linalg.diag_part(similarity) + self.affine_b) return common.AgentOutput(policy_logits=output_logits, baseline=classification_logits)
def _head(self, neck_outputs): # Shape : [time * batch] path_ids = neck_outputs[constants.PATH_ID] path_ids = tf.transpose( tf.reshape( path_ids, [self._current_num_timesteps, self._current_batch_size])) # <tf.float32>[time * batch_size, 1, hidden_dim] visual_feature = neck_outputs['visual_feature'] # <tf.float32>[time * batch_size, num_tokens, hidden_dim] raw_text_feature = tf.reshape( neck_outputs['text_feature'], [self._current_num_timesteps, self._current_batch_size] + neck_outputs['text_feature'].shape[1:].as_list()) # Shape = [batch_size, time, num_tokens, hidden_dim] raw_text_feature = tf.transpose(raw_text_feature, perm=[1, 0, 2, 3]) # <tf.float32>[time, batch_size, 1, hidden_dim] visual_feature = tf.reshape( visual_feature, [self._current_num_timesteps, self._current_batch_size] + visual_feature.shape[1:].as_list()) # <tf.float32>[batch_size, time, hidden_dim] visual_feature = tf.squeeze(visual_feature, axis=2) visual_feature = tf.transpose(visual_feature, [1, 0, 2]) first_true = utils.get_first_true_column( tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) first_true = tf.transpose(first_true) # Sanity Check: path_ids are consistent for first_true and last_true. last_true = utils.get_last_true_column( tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) last_true = tf.transpose(last_true) path_ids_first_true = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(path_ids, first_true), lambda: path_ids[:, 0]) path_ids_last_true = tf.cond( tf.keras.backend.any(last_true), lambda: tf.boolean_mask(path_ids, last_true), lambda: path_ids[:, 0]) tf.debugging.assert_equal(path_ids_first_true, path_ids_last_true) # <tf.float32>[batch_size, num_tokens, hidden_dim] text_feature = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(raw_text_feature, first_true), lambda: raw_text_feature[:, 0, :, :]) text_feature_last_true = tf.cond( tf.keras.backend.any(last_true), lambda: tf.boolean_mask(raw_text_feature, last_true), lambda: raw_text_feature[:, 0, :, :]) tf.debugging.assert_equal(text_feature, text_feature_last_true) # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2) # text_feature = tf.nn.l2_normalize(text_feature, axis=2) # <tf.float32>[batch_size, time, num_tokens] alpha_i_j = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[0, 2, 1])) # <tf.float32>[batch, time, num_tokens] c_i_j = tf.nn.softmax(alpha_i_j) # <tf.float32>[batch_size, time, num_tokens] mask = tf.cast( tf.transpose(tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]), perm=[1, 0]), tf.float32) # <tf.float32>[batch, time] score = tf.reduce_sum(c_i_j * alpha_i_j, 2) # Compute softmin(x) = softmax(-x) # Use stable softmax since softmax(x) = softmax(x+c) for any constant c. # Here we use constant c = max(-x). negative_score = -1.0 * score escore = tf.exp(negative_score - tf.reduce_max(negative_score)) * mask sum_escore = tf.tile(tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]]) score_weight = tf.divide(escore, sum_escore) similarities = tf.reduce_sum(mask * score * score_weight, 1) similarities = tf.expand_dims(similarities, axis=0) # shape: [time * batch_size] similarities = tf.reshape( tf.tile(similarities, [self._current_num_timesteps, 1]), [-1]) # Apply an affine transform. similarities = similarities * self.affine_a + self.affine_b output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1]) output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1]) # shape: [time * batch] output_a = tf.reshape( tf.tile(output_a, [self._current_num_timesteps, self._current_batch_size]), [-1]) output_b = tf.reshape( tf.tile(output_b, [self._current_num_timesteps, self._current_batch_size]), [-1]) return common.AgentOutput(policy_logits=similarities, baseline=(output_a, output_b))
def _head(self, neck_output): return common.AgentOutput( policy_logits=tf.zeros( shape=[tf.shape(neck_output)[0], self._action_space_size]), baseline=tf.ones(shape=[tf.shape(neck_output)[0]]))
def _head(self, neck_output): return common.AgentOutput( policy_logits=self._logits_layer(neck_output), baseline=tf.ones(shape=[tf.shape(neck_output)[0]]))
def _head(self, neck_outputs): # <tf.float32>[time * batch_size, 1, hidden_dim] visual_feature = neck_outputs['visual_feature'] # <tf.float32>[time * batch_size, num_tokens, hidden_dim] text_feature = neck_outputs['text_feature'] # <tf.float32>[time, batch_size, 1, hidden_dim] visual_feature = tf.reshape( visual_feature, [self._current_num_timesteps, self._current_batch_size] + visual_feature.shape[1:].as_list()) # <tf.float32>[batch_size, time, hidden_dim] visual_feature = tf.squeeze(visual_feature, axis=2) visual_feature = tf.transpose(visual_feature, [1, 0, 2]) first_true = utils.get_first_true_column( tf.reshape(neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) # <tf.float32>[batch_size, num_tokens, hidden_dim] text_feature = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(text_feature, tf.reshape(first_true, [-1])), lambda: tf.reshape(text_feature, [ self._current_num_timesteps, self._current_batch_size ] + text_feature.shape[1:].as_list())[0, :, :, :]) # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2) # text_feature = tf.nn.l2_normalize(text_feature, axis=2) # <tf.float32>[batch_size, time, num_tokens] alpha_i_j = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[0, 2, 1])) # <tf.float32>[batch_size, time, num_tokens] ealpha_i_j = tf.exp(alpha_i_j) sum_i_j = tf.tile( tf.expand_dims(tf.reduce_sum(ealpha_i_j, 2), 2), [1, 1, tf.shape(ealpha_i_j)[2]]) mask = tf.cast( tf.transpose( tf.reshape(neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]), perm=[1, 0]), tf.float32) # <tf.float32>[batch, time, num_tokens] c_i_j = tf.divide(ealpha_i_j, sum_i_j) # <tf.float32>[batch, time] score = tf.reduce_sum(c_i_j * alpha_i_j, 2) escore = tf.exp(-1 * score) * mask sum_escore = tf.tile( tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]]) score_weight = tf.divide(escore, sum_escore) similarities = tf.reduce_sum(mask * score * score_weight, 1) similarities = tf.expand_dims(similarities, axis=0) # [time_step, batch_size] similarities = tf.tile(similarities, [self._current_num_timesteps, 1]) # Apply an affine transform. similarities = similarities * self.affine_a + self.affine_b output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1]) output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1]) output_a = tf.tile(output_a, [self._current_num_timesteps, self._current_batch_size]) output_b = tf.tile(output_b, [self._current_num_timesteps, self._current_batch_size]) return common.AgentOutput( policy_logits=similarities, baseline=(output_a, output_b))