Example #1
0
 def _head(self, neck_outputs):
     logits = self._policy_logits(neck_outputs)
     if self._debug_writer:
         self._debug_writer.log_named_tensor('action_logits',
                                             logits.numpy())
     value = tf.squeeze(self._baseline(neck_outputs), axis=-1)
     return common.AgentOutput(policy_logits=logits, baseline=value)
Example #2
0
    def _head(self, neck_outputs):
        # The shape of hidden_state is [batch_size * time, 1, hidden_size]
        hidden_state = neck_outputs['hidden_state']
        image_features = neck_outputs[constants.PANO_ENC]

        # c_visual has shape [batch_size * time, 1, self._c_visual_attention_size]
        c_visual = self._visual_attention([
            self._visual_attention_project_ctext(neck_outputs['c_text']),
            self._visual_attention_project_feature(image_features),
        ])

        # Concatenating the h_t, c_text and c_visual as described in RCM paper.
        input_feature = tf.concat(
            [hidden_state, neck_outputs['c_text'], c_visual], axis=2)
        connection_encoding = neck_outputs[constants.CONN_ENC]
        logits = self._dot_product([
            self._project_feature(input_feature),
            self._project_action(connection_encoding)
        ])
        # The final shape of logits is [batch_size * time, num_connections]
        logits = tf.squeeze(logits, axis=1)
        # mask out invalid connections.
        valid_conn_mask = neck_outputs[constants.VALID_CONN_MASK]
        logits += (1. - valid_conn_mask) * -_INFINITY
        value = self._value_network(tf.squeeze(neck_outputs['c_text'], axis=1))
        value = tf.squeeze(value, axis=1)
        return common.AgentOutput(policy_logits=logits, baseline=value)
Example #3
0
 def __init__(self, unroll_length=1):
     super(MockAgent, self).__init__(name='mock_agent')
     self._state_size = 5
     # This matches the action space of MockEnv
     self._action_space_size = 2
     self._agent_spec = common.AgentOutput(
         policy_logits=tf.TensorSpec(shape=[unroll_length + 1, 2],
                                     dtype=tf.float32),
         baseline=tf.TensorSpec(shape=[unroll_length + 1],
                                dtype=tf.float32))
Example #4
0
 def __init__(self, unroll_length=1):
     super(MockAgent, self).__init__(name='mock_agent')
     self._state_size = 5
     # This matches the action space of MockEnv
     self._action_space_size = 2
     self._logits_layer = tf.keras.layers.Dense(
         self._action_space_size,
         kernel_regularizer=tf.keras.regularizers.l2(0.0001))
     self._agent_spec = common.AgentOutput(
         policy_logits=tf.TensorSpec(shape=[unroll_length + 1, 2],
                                     dtype=tf.float32),
         baseline=tf.TensorSpec(shape=[unroll_length + 1],
                                dtype=tf.float32))
Example #5
0
 def _head(self, neck_output):
     # Verify neck_output
     np.testing.assert_equal((self._total_timesteps * self._batch_size, 6),
                             neck_output.shape)
     arrays = []
     for i in range(self._total_timesteps):
         arrays.append(np.ones((self._batch_size, 6)) * (i + 1))
     expected_neck_output = np.concatenate(arrays, axis=0)
     np.testing.assert_array_almost_equal(expected_neck_output,
                                          neck_output.numpy())
     return common.AgentOutput(
         policy_logits=tf.zeros(
             shape=[self._total_timesteps * self._batch_size, 4]),
         baseline=tf.ones(shape=[self._total_timesteps * self._batch_size]))
Example #6
0
    def _head(self, neck_outputs):
        # The shape of hidden_state is [batch_size * time, 1, hidden_size]
        hidden_state = neck_outputs['hidden_state']
        if self._scan_classifier is not None:
            scan_classifier_logits = self._scan_classifier(hidden_state)
        else:
            scan_classifier_logits = tf.zeros(shape=(tf.shape(hidden_state)[0],
                                                     61))
        image_features = tf.cast(neck_outputs[constants.PANO_ENC], tf.float32)

        # c_visual has shape [batch_size * time, 1, self._c_visual_attention_size]
        c_visual = self._visual_attention([
            self._visual_attention_project_ctext(neck_outputs['c_text']),
            self._visual_attention_project_feature(image_features),
        ])

        # Concatenating the h_t, c_text and c_visual as described in RCM paper.
        input_feature = tf.concat(
            [hidden_state, neck_outputs['c_text'], c_visual], axis=2)
        connection_encoding = neck_outputs[constants.CONN_ENC]
        connection_encoding = tf.cast(connection_encoding, tf.float32)
        logits = self._dot_product([
            self._project_feature(input_feature),
            self._project_action(connection_encoding)
        ])
        # The final shape of logits is [batch_size * time, num_connections]
        logits = tf.squeeze(logits, axis=1)
        # mask out invalid connections.
        valid_conn_mask = tf.cast(neck_outputs[constants.VALID_CONN_MASK],
                                  tf.float32)
        logits += (1. - valid_conn_mask) * -_INFINITY
        value = self._value_network(tf.squeeze(neck_outputs['c_text'], axis=1))
        value = tf.squeeze(value, axis=1)
        return common.AgentOutput(policy_logits=logits,
                                  baseline={
                                      'value':
                                      value,
                                      'ins_classifier_logits':
                                      neck_outputs['ins_classifier_logits'],
                                      'scan_classifier_logits':
                                      scan_classifier_logits,
                                  })
Example #7
0
 def setUp(self):
     super(LossFnsTest, self).setUp()
     # Shape = [batch, batch].
     self._similarity_logits = tf.constant(
         [[-0.5, -0.3, 0.8], [-0.3, 0.4, 0.7], [0.8, 0.7, 1.0]],
         dtype=tf.float32)
     # Shape = [batch, batch].
     self._similarity_mask = tf.constant([[True, True, False],
                                          [True, True, True],
                                          [False, True, True]])
     # Shape = [batch].
     self._labels = tf.constant([0., 1.0, 1.0], dtype=tf.float32)
     # Shape = [batch].
     self._baseline_logits = tf.constant([-0.4, -.1, 0.9], dtype=tf.float32)
     output_logits = {
         'similarity': self._similarity_logits,
         'similarity_mask': self._similarity_mask,
         'labels': self._labels
     }
     agent_output = common.AgentOutput(policy_logits=output_logits,
                                       baseline=self._baseline_logits)
     # Add time dim as required by actor that shape must be [time, batch, ...]
     self._agent_output = tf.nest.map_structure(
         lambda t: tf.expand_dims(t, 0), agent_output)
Example #8
0
    def _head(self, env_output, neck_outputs):
        disc_mask = tf.reshape(
            neck_outputs[constants.DISC_MASK],
            [self._current_num_timesteps, self._current_batch_size])
        # Get first_true time step for text states as it's the same for all steps
        # in a path.
        # Shape = [time, batch] for both disc_mask and first_true
        first_true = utils.get_first_true_column(disc_mask)
        # Transpose to [batch, time] to ensure correct batch order for boolean_mask.
        first_true = tf.transpose(first_true, perm=[1, 0])

        # Transpose a list of n_lstm_layers (h, c) states to batch major.
        raw_text_state = tf.nest.map_structure(
            lambda t: tf.transpose(t, perm=[1, 0, 2]),
            neck_outputs['text_state'])
        tf.debugging.assert_equal(
            raw_text_state[0][0].shape,
            [self._current_batch_size, self._current_num_timesteps, 512])
        # Take the first step's text state since it's the same for all steps.
        # Selected state has shape [batch, hidden]
        text_state = self._select_by_mask(raw_text_state, first_true)

        # Projected shape: [batch, hidden_dim].
        text_feature = self._get_final_projection(
            self._instruction_feature_projection, text_state)

        # Get last_true mask for image states, i.e., state at end of sequence.
        # Shape = [time, batch] for both disc_mask and last_true
        last_true = utils.get_last_true_column(disc_mask)
        last_true = tf.transpose(last_true, perm=[1, 0])
        # Sanity check: ensure the first and last text states in a path are same.
        text_state_last_true = self._select_by_mask(raw_text_state, last_true)
        tf.debugging.assert_equal(text_state[-1][0],
                                  text_state_last_true[-1][0])

        # Transpose image states, a list of (h, c) states, into batch major. Each
        # state has shape [batch, time_step, hidden_dim]
        raw_image_state = tf.nest.map_structure(
            lambda t: tf.transpose(t, perm=[1, 0, 2]),
            neck_outputs['visual_state'])
        if self._average_image_states_of_all_steps:
            # Shape = [batch, time_step, 1]
            float_disc_mask = tf.expand_dims(tf.cast(tf.transpose(disc_mask),
                                                     tf.float32),
                                             axis=2)
            # Shape of each reduced state: [batch, hidden_dim]
            image_state = tf.nest.map_structure(
                lambda x: tf.reduce_mean(x * float_disc_mask, 1),
                raw_image_state)
        else:
            # Selected state has shape [batch, hidden_dim].
            image_state = self._select_by_mask(raw_image_state, last_true)
        # Projected shape: [batch, hidden].
        visual_feature = self._get_final_projection(
            self._image_feature_projection, image_state)

        # Normalize features.
        visual_feature = tf.nn.l2_normalize(visual_feature, axis=-1)
        text_feature = tf.nn.l2_normalize(text_feature, axis=-1)

        # Select path_ids for current batch.
        # Transposed shape = [batch, time].
        raw_path_ids = tf.transpose(env_output.observation[constants.PATH_ID])
        # Shape = [batch].
        path_ids = self._select_by_mask(raw_path_ids, first_true)
        # Asserts first true and last true are referring to the same path.
        path_ids_last_true = self._select_by_mask(raw_path_ids, last_true)
        tf.debugging.assert_equal(path_ids, path_ids_last_true)

        # Shape = [time, batch]
        raw_labels = tf.cast(env_output.observation['label'], tf.float32)
        raw_labels = tf.transpose(raw_labels)
        # Shape = [batch]
        labels = self._select_by_mask(raw_labels, first_true)
        tf.debugging.assert_equal(labels,
                                  self._select_by_mask(raw_labels, last_true))
        # Add time dimension as required by actor. Shape = [1, batch]
        labels = tf.expand_dims(labels, axis=0)

        # Shape: [batch, batch]
        similarity = tf.matmul(visual_feature,
                               tf.transpose(text_feature, perm=[1, 0]))
        # Add time dim as required by actor. Shape = [1, batch, batch]
        similarity = tf.expand_dims(similarity, axis=0)

        # Make similarity mask to exclude multiple positive matching labels
        diag_mask = tf.eye(self._current_batch_size, dtype=tf.bool)
        # path_id mask where matching col-row pairs are 1 except diagnal pairs.
        rows = tf.tile(tf.reshape(path_ids, [self._current_batch_size, 1]),
                       [1, self._current_batch_size])
        cols = tf.tile(tf.reshape(path_ids, [1, self._current_batch_size]),
                       [self._current_batch_size, 1])
        path_id_mask = tf.logical_and(tf.equal(rows, cols),
                                      tf.logical_not(diag_mask))
        # Filter the mask by label. Positive labels are 1.
        row_labels = tf.tile(tf.reshape(labels, [self._current_batch_size, 1]),
                             [1, self._current_batch_size])
        col_labels = tf.tile(tf.reshape(labels, [1, self._current_batch_size]),
                             [self._current_batch_size, 1])
        label_mask = tf.logical_and(tf.cast(row_labels, tf.bool),
                                    tf.cast(col_labels, tf.bool))

        # M[i, j]=0 (i!=j) if path_id_mask[i,j] is True and label_mask[i, j] is True
        similarity_mask = tf.logical_not(
            tf.logical_and(path_id_mask, label_mask))
        # Add timestep dim as required by actor. Shape = [1, batch, batch]
        similarity_mask = tf.expand_dims(similarity_mask, axis=0)

        # Computes logits by transforming similarity from [-1, 1] to unbound.
        # Shape: [time, batch, batch]
        similarity_logits = self.similarity_scaler * similarity

        output_logits = {
            'similarity': similarity_logits,
            'similarity_mask': similarity_mask,
            'labels': labels
        }

        # Logits for classification loss. Shape = [time, batch]
        classification_logits = (
            self.affine_a * tf.linalg.diag_part(similarity) + self.affine_b)

        return common.AgentOutput(policy_logits=output_logits,
                                  baseline=classification_logits)
Example #9
0
    def _head(self, neck_outputs):
        # Shape : [time * batch]
        path_ids = neck_outputs[constants.PATH_ID]
        path_ids = tf.transpose(
            tf.reshape(
                path_ids,
                [self._current_num_timesteps, self._current_batch_size]))

        # <tf.float32>[time * batch_size, 1, hidden_dim]
        visual_feature = neck_outputs['visual_feature']
        # <tf.float32>[time * batch_size, num_tokens, hidden_dim]
        raw_text_feature = tf.reshape(
            neck_outputs['text_feature'],
            [self._current_num_timesteps, self._current_batch_size] +
            neck_outputs['text_feature'].shape[1:].as_list())
        # Shape = [batch_size, time, num_tokens, hidden_dim]
        raw_text_feature = tf.transpose(raw_text_feature, perm=[1, 0, 2, 3])

        # <tf.float32>[time, batch_size, 1, hidden_dim]
        visual_feature = tf.reshape(
            visual_feature,
            [self._current_num_timesteps, self._current_batch_size] +
            visual_feature.shape[1:].as_list())

        # <tf.float32>[batch_size, time, hidden_dim]
        visual_feature = tf.squeeze(visual_feature, axis=2)
        visual_feature = tf.transpose(visual_feature, [1, 0, 2])

        first_true = utils.get_first_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        first_true = tf.transpose(first_true)

        # Sanity Check: path_ids are consistent for first_true and last_true.
        last_true = utils.get_last_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        last_true = tf.transpose(last_true)
        path_ids_first_true = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(path_ids, first_true),
            lambda: path_ids[:, 0])
        path_ids_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(path_ids, last_true),
            lambda: path_ids[:, 0])
        tf.debugging.assert_equal(path_ids_first_true, path_ids_last_true)

        # <tf.float32>[batch_size, num_tokens, hidden_dim]
        text_feature = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(raw_text_feature, first_true),
            lambda: raw_text_feature[:, 0, :, :])

        text_feature_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(raw_text_feature, last_true),
            lambda: raw_text_feature[:, 0, :, :])
        tf.debugging.assert_equal(text_feature, text_feature_last_true)
        # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2)
        # text_feature = tf.nn.l2_normalize(text_feature, axis=2)

        # <tf.float32>[batch_size, time, num_tokens]
        alpha_i_j = tf.matmul(visual_feature,
                              tf.transpose(text_feature, perm=[0, 2, 1]))
        # <tf.float32>[batch, time, num_tokens]
        c_i_j = tf.nn.softmax(alpha_i_j)
        # <tf.float32>[batch_size, time, num_tokens]
        mask = tf.cast(
            tf.transpose(tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]),
                         perm=[1, 0]), tf.float32)

        # <tf.float32>[batch, time]
        score = tf.reduce_sum(c_i_j * alpha_i_j, 2)

        # Compute softmin(x) = softmax(-x)
        # Use stable softmax since softmax(x) = softmax(x+c) for any constant c.
        # Here we use constant c = max(-x).
        negative_score = -1.0 * score
        escore = tf.exp(negative_score - tf.reduce_max(negative_score)) * mask
        sum_escore = tf.tile(tf.expand_dims(tf.reduce_sum(escore, 1), 1),
                             [1, tf.shape(escore)[1]])
        score_weight = tf.divide(escore, sum_escore)

        similarities = tf.reduce_sum(mask * score * score_weight, 1)
        similarities = tf.expand_dims(similarities, axis=0)
        # shape: [time * batch_size]
        similarities = tf.reshape(
            tf.tile(similarities, [self._current_num_timesteps, 1]), [-1])

        # Apply an affine transform.
        similarities = similarities * self.affine_a + self.affine_b

        output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1])
        output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1])

        # shape: [time * batch]
        output_a = tf.reshape(
            tf.tile(output_a,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])
        output_b = tf.reshape(
            tf.tile(output_b,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])

        return common.AgentOutput(policy_logits=similarities,
                                  baseline=(output_a, output_b))
Example #10
0
 def _head(self, neck_output):
     return common.AgentOutput(
         policy_logits=tf.zeros(
             shape=[tf.shape(neck_output)[0], self._action_space_size]),
         baseline=tf.ones(shape=[tf.shape(neck_output)[0]]))
Example #11
0
 def _head(self, neck_output):
     return common.AgentOutput(
         policy_logits=self._logits_layer(neck_output),
         baseline=tf.ones(shape=[tf.shape(neck_output)[0]]))
Example #12
0
  def _head(self, neck_outputs):

    # <tf.float32>[time * batch_size, 1, hidden_dim]
    visual_feature = neck_outputs['visual_feature']
    # <tf.float32>[time * batch_size, num_tokens, hidden_dim]
    text_feature = neck_outputs['text_feature']

    # <tf.float32>[time, batch_size, 1, hidden_dim]
    visual_feature = tf.reshape(
        visual_feature,
        [self._current_num_timesteps, self._current_batch_size] +
        visual_feature.shape[1:].as_list())

    # <tf.float32>[batch_size, time, hidden_dim]
    visual_feature = tf.squeeze(visual_feature, axis=2)
    visual_feature = tf.transpose(visual_feature, [1, 0, 2])

    first_true = utils.get_first_true_column(
        tf.reshape(neck_outputs[constants.DISC_MASK],
                   [self._current_num_timesteps, self._current_batch_size]))

    # <tf.float32>[batch_size, num_tokens, hidden_dim]
    text_feature = tf.cond(
        tf.keras.backend.any(first_true),
        lambda: tf.boolean_mask(text_feature, tf.reshape(first_true, [-1])),
        lambda: tf.reshape(text_feature, [
            self._current_num_timesteps, self._current_batch_size
        ] + text_feature.shape[1:].as_list())[0, :, :, :])
    # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2)
    # text_feature = tf.nn.l2_normalize(text_feature, axis=2)

    # <tf.float32>[batch_size, time, num_tokens]
    alpha_i_j = tf.matmul(visual_feature,
                          tf.transpose(text_feature, perm=[0, 2, 1]))
    # <tf.float32>[batch_size, time, num_tokens]
    ealpha_i_j = tf.exp(alpha_i_j)
    sum_i_j = tf.tile(
        tf.expand_dims(tf.reduce_sum(ealpha_i_j, 2), 2),
        [1, 1, tf.shape(ealpha_i_j)[2]])
    mask = tf.cast(
        tf.transpose(
            tf.reshape(neck_outputs[constants.DISC_MASK],
                       [self._current_num_timesteps, self._current_batch_size]),
            perm=[1, 0]), tf.float32)
    # <tf.float32>[batch, time, num_tokens]
    c_i_j = tf.divide(ealpha_i_j, sum_i_j)
    # <tf.float32>[batch, time]
    score = tf.reduce_sum(c_i_j * alpha_i_j, 2)

    escore = tf.exp(-1 * score) * mask
    sum_escore = tf.tile(
        tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]])
    score_weight = tf.divide(escore, sum_escore)
    similarities = tf.reduce_sum(mask * score * score_weight, 1)
    similarities = tf.expand_dims(similarities, axis=0)
    # [time_step, batch_size]
    similarities = tf.tile(similarities, [self._current_num_timesteps, 1])

    # Apply an affine transform.
    similarities = similarities * self.affine_a + self.affine_b

    output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1])
    output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1])

    output_a = tf.tile(output_a,
                       [self._current_num_timesteps, self._current_batch_size])
    output_b = tf.tile(output_b,
                       [self._current_num_timesteps, self._current_batch_size])

    return common.AgentOutput(
        policy_logits=similarities, baseline=(output_a, output_b))