Example #1
0
def _sparse_intersect_indices(sp_tensor, required_sp_tensor):
  """Filters timestamps in sp_tensor to those present in required_sp_tensor."""
  # We extend both sp_tensor and required_sp_tensor with each others indices
  # so that they have the same indices.
  # E.g. their dense representation of one batch entry could be:
  # [dummy, dummy, 1 ]
  dummy_value = 'n/a'
  dummy_required_sp_tensor = _extend_with_dummy(
      sp_tensor, required_sp_tensor, dummy_value)
  dummy_sp_tensor = _extend_with_dummy(required_sp_tensor, sp_tensor,
                                       dummy_value)
  # We get rid to dummy values both for indices in the required_sp_tensor and
  # the sp_tensor.
  # First get rid of indices with dummy values in dummy_required_sp_tensor.
  in_required = tf.sparse_retain(
      dummy_sp_tensor,
      tf.logical_not(tf.equal(dummy_required_sp_tensor.values, dummy_value)))
  # Remove empty timesteps so that the timesteps align with the original
  # required_sp_tensor.
  # Then remove the indices with dummy values.
  in_required = tf.sparse_retain(
      _remove_empty_timesteps(in_required),
      tf.logical_not(tf.equal(in_required.values, dummy_value)))
  if sp_tensor.values.dtype != tf.string:
    in_required = tf.SparseTensor(
        indices=in_required.indices, dense_shape=in_required.dense_shape,
        values=tf.strings.to_number(
            in_required.values, out_type=sp_tensor.values.dtype))
  return in_required
Example #2
0
    def get_scheduled_sample_inputs(self, done_warm_start, groundtruth_items,
                                    generated_items, scheduled_sampling_func):
        """Scheduled sampling.

    Args:
      done_warm_start: whether we are done with warm start or not.
      groundtruth_items: list of ground truth items.
      generated_items: list of generated items.
      scheduled_sampling_func: scheduled sampling function to choose between
        groundtruth items and generated items.

    Returns:
      A mix list of ground truth and generated items.
    """
        def sample():
            """Calculate the scheduled sampling params based on iteration number."""
            with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE):
                return [
                    scheduled_sampling_func(item_gt, item_gen) for item_gt,
                    item_gen in zip(groundtruth_items, generated_items)
                ]

        cases = [
            (tf.logical_not(done_warm_start), lambda: groundtruth_items),
            (tf.logical_not(self.is_training), lambda: generated_items),
        ]
        output_items = tf.case(cases, default=sample, strict=True)

        return output_items
Example #3
0
def _get_triplet_mask(labels):
    """Return a 3D mask where mask[a, p, n] is True iff the triplet (a, p, n) is valid.
    A triplet (i, j, k) is valid if:
        - i, j, k are distinct
        - labels[i] == labels[j] and labels[i] != labels[k]
    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    """
    # Check that i, j and k are distinct
    indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
    indices_not_equal = tf.logical_not(indices_equal)
    i_not_equal_j = tf.expand_dims(indices_not_equal, 2)
    i_not_equal_k = tf.expand_dims(indices_not_equal, 1)
    j_not_equal_k = tf.expand_dims(indices_not_equal, 0)

    distinct_indices = tf.logical_and(
        tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)

    # Check if labels[i] == labels[j] and labels[i] != labels[k]
    label_equal = tf.equal(tf.expand_dims(labels, 0),
                           tf.expand_dims(labels, 1))
    i_equal_j = tf.expand_dims(label_equal, 2)
    i_equal_k = tf.expand_dims(label_equal, 1)

    valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k))

    # Combine the two masks
    mask = tf.logical_and(distinct_indices, valid_labels)

    return mask
Example #4
0
    def _loop_cond(i, unused_alive_seq, alive_log_probs, unused_finished_seq,
                   finished_scores, finished_in_finished):
        """Checking termination condition.

    We terminate when we decoded up to decode_length or the lowest scoring item
    in finished has a greater score that the higest prob item in alive divided
    by the max length penalty. Optionally also terminate if all alive scores
    are below lower bound.

    Args:
      i: loop index
      alive_log_probs: probabilities of the beams. [batch_size, beam_size]
      finished_scores: scores for each of these sequences.
        [batch_size, beam_size]
      finished_in_finished: finished bools for each of these sequences.
        [batch_size, beam_size]

    Returns:
      True to continue the loop, False to stop.
    """
        max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) / 6.),
                                    alpha)
        # The best possible score of the most likley alive sequence
        lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty

        # Now to compute the lowest score of a finished sequence in finished
        # If the sequence isn't finished, we multiply it's score by 0. since
        # scores are all -ve, taking the min will give us the score of the lowest
        # finished item.
        lowest_score_of_finished_in_finished = tf.reduce_min(
            finished_scores * tf.to_float(finished_in_finished), axis=1)
        # If none of the sequences have finished, then the min will be 0 and
        # we have to replace it by -ve INF if it is. The score of any seq in alive
        # will be much higher than -ve INF and the termination condition will not
        # be met.
        lowest_score_of_finished_in_finished = _apply_negative_infinity_mask(
            lowest_score_of_finished_in_finished,
            tf.logical_not(tf.reduce_any(finished_in_finished, 1)))

        # Will terminate beam search early if bound_is_met is True.
        bound_is_met = tf.reduce_all(
            tf.greater(lowest_score_of_finished_in_finished,
                       lower_bound_alive_scores))

        # Check if all alive scores are below minimum.
        if minimum_score:
            minimum_score_log = tf.log(minimum_score)
            bound_is_met = tf.logical_or(
                bound_is_met,
                tf.reduce_all(
                    tf.less(lower_bound_alive_scores, minimum_score_log)))

        return tf.logical_and(tf.less(i, decode_length),
                              tf.logical_not(bound_is_met))
Example #5
0
def sequence_accuracy(gt_seqs,
                      decode_seqs,
                      gt_seq_lengths,
                      pr_seq_lengths,
                      debug=False,
                      name=""):
    """Computes the complete and the partial sequence accuracy."""
    gt_shape = common_layers.shape_list(gt_seqs)
    pr_shape = common_layers.shape_list(decode_seqs)
    batch_size = gt_shape[0]
    depth = gt_shape[-1]
    gt_len = gt_shape[1]
    pr_len = pr_shape[1]
    max_len = tf.maximum(gt_len, pr_len)
    gt_seqs = tf.pad(gt_seqs, [[0, 0], [0, max_len - gt_len], [0, 0]])
    decode_seqs = tf.pad(decode_seqs, [[0, 0], [0, max_len - pr_len], [0, 0]])
    gt_seqs = tf.where(
        tf.tile(
            tf.expand_dims(tf.sequence_mask(gt_seq_lengths, maxlen=max_len),
                           2), [1, 1, depth]), gt_seqs,
        tf.fill(tf.shape(gt_seqs), -1))
    decode_seqs = tf.where(
        tf.tile(
            tf.expand_dims(tf.sequence_mask(pr_seq_lengths, maxlen=max_len),
                           2), [1, 1, depth]), decode_seqs,
        tf.fill(tf.shape(decode_seqs), -1))
    # [batch_size, decode_length]
    corrects = tf.reduce_all(tf.equal(gt_seqs, decode_seqs), -1)
    correct_mask = tf.reduce_all(corrects, -1)
    # [batch_size]
    if debug:
        incorrect_mask = tf.logical_not(correct_mask)
        incorrect_gt = tf.boolean_mask(gt_seqs, incorrect_mask)
        incorrect_pr = tf.boolean_mask(decode_seqs, incorrect_mask)
        with tf.control_dependencies([
                tf.print(name + "_mismatch",
                         incorrect_gt,
                         incorrect_pr,
                         summarize=1000)
        ]):
            correct_mask = tf.identity(correct_mask)
    correct_seqs = tf.to_float(correct_mask)
    total_correct_seqs = tf.reduce_sum(correct_seqs)
    mean_complete_accuracy = total_correct_seqs / tf.to_float(batch_size)
    # Compute partial accuracy
    errors = tf.logical_not(corrects)
    errors = tf.cast(tf.cumsum(tf.to_float(errors), axis=-1), tf.bool)
    # [batch_size]
    correct_steps = tf.reduce_sum(tf.to_float(tf.logical_not(errors)), axis=-1)
    mean_partial_accuracy = tf.reduce_mean(
        tf.div(tf.minimum(correct_steps, gt_seq_lengths), gt_seq_lengths))
    return mean_complete_accuracy, mean_partial_accuracy
Example #6
0
    def preprocess_device_grads(self, device_grads):
        compact_grads = (self.benchmark_cnn.params.use_fp16 and
                         self.benchmark_cnn.params.compact_gradient_transfer)
        defer_grads = (
            self.benchmark_cnn.params.variable_consistency == 'relaxed')

        grads_to_reduce = [[g for g, _ in grad_vars]
                           for grad_vars in device_grads]
        algorithm = batch_allreduce.algorithm_from_params(
            self.benchmark_cnn.params)
        reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
            grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
            compact_grads, defer_grads, self.benchmark_cnn.params.xla_compile)
        if self.benchmark_cnn.enable_auto_loss_scale:
            # Check for infs or nans
            is_finite_list = []
            with tf.name_scope('check_for_inf_and_nan'):
                for tower_grads in reduced_grads:
                    with tf.colocate_with(tower_grads[0]):
                        # TODO(tanmingxing): Create fused op that takes in a list of tensors
                        # as input and returns scalar boolean True if there are any
                        # infs/nans.
                        is_finite_list.append(
                            tf.reduce_all([
                                tf.reduce_all(tf.is_finite(g))
                                for g in tower_grads
                            ]))
                self.grad_has_inf_nan = tf.logical_not(
                    tf.reduce_all(is_finite_list))
        reduced_device_grads = [[
            (g, v) for g, (_, v) in zip(grads, grad_vars)
        ] for grads, grad_vars in zip(reduced_grads, device_grads)]
        return self.benchmark_cnn.devices, reduced_device_grads
Example #7
0
    def _match(self, similarity_matrix, valid_rows):
        """Bipartite matches a collection rows and columns. A greedy bi-partite.

    TODO(rathodv): Add num_valid_columns options to match only that many columns
    with all the rows.

    Args:
      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
        where higher values mean more similar.
      valid_rows: A boolean tensor of shape [N] indicating the rows that are
        valid.

    Returns:
      match_results: int32 tensor of shape [M] with match_results[i]=-1
        meaning that column i is not matched and otherwise that it is matched to
        row match_results[i].
    """
        valid_row_sim_matrix = tf.gather(
            similarity_matrix, tf.squeeze(tf.where(valid_rows), axis=-1))
        invalid_row_sim_matrix = tf.gather(
            similarity_matrix,
            tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1))
        similarity_matrix = tf.concat(
            [valid_row_sim_matrix, invalid_row_sim_matrix], axis=0)
        # Convert similarity matrix to distance matrix as tf.image.bipartite tries
        # to find minimum distance matches.
        distance_matrix = -1 * similarity_matrix
        num_valid_rows = tf.reduce_sum(tf.cast(valid_rows, dtype=tf.float32))
        _, match_results = image_ops.bipartite_match(
            distance_matrix, num_valid_rows=num_valid_rows)
        match_results = tf.reshape(match_results, [-1])
        match_results = tf.cast(match_results, tf.int32)
        return match_results
Example #8
0
def _compute_head_weights_with_time_prior(weights, paddings, time_deltas,
                                          num_heads, time_exp_base,
                                          overlapping_chunks):
  """Computes head-specific attention weights with time prior.

  This function simply masks out the weights for items if they don't belong to a
  certain chunk. Here, chunks are allocated based on time information. We use
  exponential function--pow(time_exp_base,i)--to allocate segment boundaries.
  Note that time delta values represent number of days.

  Example 1: Let overlapping_chunks=False, time_exp_base=3 and num_heads=3.
  1st head focuses on the items within time interval [0, pow(3,0)],
  2nd head focuses on the items within time interval (pow(3,0), pow(3,1)],
  3rd (last) head focuses on the items within time interval (pow(3,1), inf]

  Example 2: Let overlapping_chunks=True, time_exp_base=3 and num_heads=3.
  1st head focuses on the items within time interval [0, pow(3,0)],
  2nd head focuses on the items within time interval [0, pow(3,1)],
  3rd (last) head focuses on the items within time interval [0, inf]

  Args:
    weights: A 3d tensor with shape of [h*N, T_q, T_k].
    paddings: A 3d tensor with shape of [h*N, T_q, T_k].
    time_deltas: A 3d tensor with shape of [N, T_q, T_k].
    num_heads: An integer denoting number of chunks.
    time_exp_base: A scalar. Base for exponential time intervals.
    overlapping_chunks: Boolean. Whether to use overlapping chunks.

  Returns:
    A list of h tensors (each shaped [N, T_q, T_k]) where tensors correspond to
    chunk specific weights.
  """
  tf.logging.info(
      "Computing with time_exp_base:{} and overlapping_chunks:{}".format(
          time_exp_base, overlapping_chunks))
  chunk_outputs_list = []
  weights_split = tf.split(weights, num_heads, axis=0)
  paddings_split = tf.split(paddings, num_heads, axis=0)
  ones_tensor = tf.ones_like(time_deltas)  # (N, T_q, T_k)

  # False in previous items and True in future items.
  mask_previous_head = time_deltas < 0  # (N, T_q, T_k)
  for i in range(num_heads):
    if i == (num_heads - 1):  # Last chunk considers all the remaining items.
      # All True.
      mask_next_head = tf.ones_like(time_deltas, dtype=bool)  # (N, T_q, T_k)
    else:
      mask_next_head = tf.math.less_equal(
          time_deltas, (time_exp_base**i) * ones_tensor)  # (N, T_q, T_k)
    mask = tf.logical_and(tf.logical_not(mask_previous_head),
                          mask_next_head)  # (N, T_q, T_k)
    output = tf.where(mask, weights_split[i],
                      paddings_split[i])  # (N, T_q, T_k)
    chunk_outputs_list.append(output)

    # Update previous mask for non-overlapping chunks.
    if not overlapping_chunks:
      mask_previous_head = mask_next_head

  return chunk_outputs_list
Example #9
0
  def __init__(self,
               sess,
               reward_scale,
               ipd_scale,
               observation_shape=NATURE_DQN_OBSERVATION_SHAPE,
               resize_shape=PSEUDO_COUNT_OBSERVATION_SHAPE,
               quantization_factor=PSEUDO_COUNT_QUANTIZATION_FACTOR,
               tf_device='/cpu:*',
               optimizer=tf.train.RMSPropOptimizer(
                   learning_rate=0.0001,
                   momentum=0.9,
                   epsilon=0.0001)):
    self._sess = sess
    self.reward_scale = reward_scale
    self.ipd_scale = ipd_scale
    self.observation_shape = observation_shape
    self.resize_shape = resize_shape
    self.quantization_factor = quantization_factor
    self.optimizer = optimizer

    with tf.device(tf_device), tf.name_scope('intrinsic_pixelcnn'):
      observation_shape = (1,) + observation_shape + (1,)
      self.obs_ph = tf.placeholder(tf.uint8, shape=observation_shape,
                                   name='obs_ph')
      self.preproccessed_obs = self._preprocess(self.obs_ph, resize_shape)
      self.iter_ph = tf.placeholder(tf.uint32, shape=[], name='iter_num')
      self.eval_ph = tf.placeholder(tf.bool, shape=[], name='eval_mode')
      self.network = tf.make_template('PixelCNN', self._network_template)
      self.ipd = tf.cond(tf.logical_not(self.eval_ph),
                         self.update,
                         self.virtual_update)
      self.reward = self.ipd_to_reward(self.ipd, self.iter_ph)
Example #10
0
    def get_gan_loss(self, true_frames, gen_frames, name):
        """Get the discriminator + generator loss at every step.

    This performs an 1:1 update of the discriminator and generator at every
    step.

    Args:
      true_frames: 5-D Tensor of shape (num_steps, batch_size, H, W, C)
                   Assumed to be ground truth.
      gen_frames: 5-D Tensor of shape (num_steps, batch_size, H, W, C)
                  Assumed to be fake.
      name: discriminator scope.
    Returns:
      loss: 0-D Tensor, with d_loss + g_loss
    """
        # D - STEP
        with tf.variable_scope("%s_discriminator" % name, reuse=tf.AUTO_REUSE):
            gan_d_loss, _, fake_logits_stop = self.d_step(
                true_frames, gen_frames)

        # G - STEP
        with tf.variable_scope("%s_discriminator" % name, reuse=True):
            gan_g_loss_pos_d, gan_g_loss_neg_d = self.g_step(
                gen_frames, fake_logits_stop)
        gan_g_loss = gan_g_loss_pos_d + gan_g_loss_neg_d
        tf.summary.scalar("gan_loss_%s" % name, gan_g_loss_pos_d + gan_d_loss)

        if self.hparams.gan_optimization == "joint":
            gan_loss = gan_g_loss + gan_d_loss
        else:
            curr_step = self.get_iteration_num()
            gan_loss = tf.cond(tf.logical_not(curr_step % 2 == 0),
                               lambda: gan_g_loss, lambda: gan_d_loss)
        return gan_loss
Example #11
0
def prune_completely_outside_window(boxes, window):
    """
    Prunes bounding boxes that fall completely outside of the given window.
    This function does not clip partially overflowing boxes.

    Arguments:
        boxes: a float tensor with shape [M_in, 4].
        window: a float tensor with shape [4] representing [ymin, xmin, ymax, xmax]
            of the window.
    Returns:
        boxes: a float tensor with shape [M_out, 4] where 0 <= M_out <= M_in.
        valid_indices: a long tensor with shape [M_out] indexing the valid bounding boxes
            in the input 'boxes' tensor.
    """
    y_min, x_min, y_max, x_max = tf.split(boxes, num_or_size_splits=4, axis=1)
    # they have shape [None, 1]
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
    # they have shape []

    coordinate_violations = tf.concat([
        tf.greater_equal(y_min, win_y_max),
        tf.greater_equal(x_min, win_x_max),
        tf.less_equal(y_max, win_y_min),
        tf.less_equal(x_max, win_x_min)
    ],
                                      axis=1)
    valid_indices = tf.squeeze(tf.where(
        tf.logical_not(tf.reduce_any(coordinate_violations, 1))),
                               axis=1)
    boxes = tf.gather(boxes, valid_indices)
    return boxes, valid_indices
Example #12
0
def prune_outside_window(boxlist, window, scope=None):
    """Prunes bounding boxes that fall outside a given window.

  This function prunes bounding boxes that even partially fall outside the given
  window. See also clip_to_window which only prunes bounding boxes that fall
  completely outside the window, and clips any bounding boxes that partially
  overflow.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
    with tf.name_scope(scope, 'PruneOutsideWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        coordinate_violations = tf.concat([
            tf.less(y_min, win_y_min),
            tf.less(x_min, win_x_min),
            tf.greater(y_max, win_y_max),
            tf.greater(x_max, win_x_max)
        ], 1)
        valid_indices = tf.reshape(
            tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))),
            [-1])
        return gather(boxlist, valid_indices), valid_indices
Example #13
0
def prune_completely_outside_window(boxlist, window, scope=None):
    """Prunes bounding boxes that fall completely outside of the given window.

  The function clip_to_window prunes bounding boxes that fall
  completely outside the window, but also clips any bounding boxes that
  partially overflow. This function does not clip partially overflowing boxes.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
      the window.
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
    with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        coordinate_violations = tf.concat([
            tf.greater_equal(y_min, win_y_max),
            tf.greater_equal(x_min, win_x_max),
            tf.less_equal(y_max, win_y_min),
            tf.less_equal(x_max, win_x_min)
        ], 1)
        valid_indices = tf.reshape(
            tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))),
            [-1])
        return gather(boxlist, valid_indices), valid_indices
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
  """Filters out groundtruth with no bounding boxes.

  Args:
    tensor_dict: a dictionary of following groundtruth tensors -
      fields.InputDataFields.groundtruth_boxes
      fields.InputDataFields.groundtruth_classes
      fields.InputDataFields.groundtruth_confidences
      fields.InputDataFields.groundtruth_keypoints
      fields.InputDataFields.groundtruth_instance_masks
      fields.InputDataFields.groundtruth_is_crowd
      fields.InputDataFields.groundtruth_area
      fields.InputDataFields.groundtruth_label_types

  Returns:
    a dictionary of tensors containing only the groundtruth that have bounding
    boxes.
  """
  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.cast(
      tf.is_nan(groundtruth_boxes), dtype=tf.int32), reduction_indices=[1]), 0)
  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
  valid_indices = tf.where(valid_indicator_vector)

  return retain_groundtruth(tensor_dict, valid_indices)
Example #15
0
    def correct_keypoints(image_shape, keypoints):
        """
        Arguments:
            image_shape: an int tensor with shape [3].
            keypoints: an int tensor with shape [num_persons, 17, 3].
        Returns:
            an int tensor with shape [num_persons, 17, 3].
        """
        y, x, v = tf.split(keypoints, 3, axis=2)

        height = image_shape[0]
        width = image_shape[1]

        coordinate_violations = tf.concat([
            tf.less(y, 0),
            tf.less(x, 0),
            tf.greater_equal(y, height),
            tf.greater_equal(x, width)
        ],
                                          axis=2)  # shape [num_persons, 17, 4]

        valid_indicator = tf.logical_not(
            tf.reduce_any(coordinate_violations, axis=2))
        valid_indicator = tf.expand_dims(valid_indicator, 2)
        # it has shape [num_persons, 17, 1]

        v *= tf.to_int32(valid_indicator)
        keypoints = tf.concat([y, x, v], axis=2)
        return keypoints
Example #16
0
    def call(self, similarity, mask=None):
        """
            Args:
                  similarity: a Tensor with shape [batch_size, heads (optional), q/k_length, q/k_length]
                  mask: a Tensor with shape [batch_size, q/k_length, q/k_length]

            Returns:
                masked_similarity: a Tensor with shape [batch_size, heads (optional), q/k_length, q/k_length]
        """
        if mask is None:
            return similarity

        similarity_rank_assert = tf.assert_rank_in(similarity, (3, 4))
        mask_rank_assert = tf.assert_rank(mask, 3)

        # There are so many different reasons a mask might be constructed a particular manner.
        # Because of this we don't want to infer a particular construction.
        with tf.control_dependencies(
            [similarity_rank_assert, mask_rank_assert]):
            # If shapes don't match, then similarity has been split for multi-headed attention
            if len(mask.shape) != len(similarity.shape):
                similarity[:, 0].shape.assert_is_compatible_with(mask.shape)
                mask = mask[:, None]
            else:
                similarity.shape.assert_is_compatible_with(mask.shape)

            # We know that we're passing this through a softmax later, thus just add a relatively large negative
            # value to mask the output avoids a hadamard product (though I think that technically it's not
            # any more efficient to do it this way operations wise)
            bias = -1e9 * tf.cast(tf.logical_not(mask), tf.float32)
            masked_similarity = similarity + bias
            return masked_similarity
Example #17
0
 def cond(ctx, cache, probs):
     # ctx = tf.Print(ctx,[tf.shape(ctx)])
     is_eos = tf.reduce_all(
         tf.reduce_any(tf.equal(ctx[:, -1:], eos_token), axis=1))
     is_max_len = tf.greater_equal(get_shape_list(probs)[1], max_len)
     is_min_len = tf.greater_equal(get_shape_list(probs)[1], min_len)
     first_cond = tf.logical_and(is_eos, is_min_len)
     return tf.logical_not(first_cond)
Example #18
0
 def cond_sufficient_descent(learning_rate_action,
                             cond_sufficient_descent,
                             cost_perturbed):
     del cost_perturbed
     cond_1 = tf.math.greater(learning_rate_action,
                              self.learning_rate_action)
     return tf.math.logical_and(
         cond_1, tf.logical_not(cond_sufficient_descent))
Example #19
0
def get_attention_bias(sequence_length):
  """Create attention bias so attention is not applied at padding position."""
  # attention_bias: [batch, 1, 1, memory_length]
  invert_sequence_mask = tf.to_float(tf.logical_not(tf.sequence_mask(
      sequence_length)))
  attention_bias = common_attention.attention_bias_ignore_padding(
      invert_sequence_mask)
  return attention_bias
  def subsample(self, indicator, batch_size, labels, scope=None):
    """Returns subsampled minibatch.

    Args:
      indicator: boolean tensor of shape [N] whose True entries can be sampled.
      batch_size: desired batch size. If None, keeps all positive samples and
        randomly selects negative samples so that the positive sample fraction
        matches self._positive_fraction. It cannot be None is is_static is True.
      labels: boolean tensor of shape [N] denoting positive(=True) and negative
          (=False) examples.
      scope: name scope.

    Returns:
      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
        are sampled.

    Raises:
      ValueError: if labels and indicator are not 1D boolean tensors.
    """
    if len(indicator.get_shape().as_list()) != 1:
      raise ValueError('indicator must be 1 dimensional, got a tensor of '
                       'shape %s' % indicator.get_shape())
    if len(labels.get_shape().as_list()) != 1:
      raise ValueError('labels must be 1 dimensional, got a tensor of '
                       'shape %s' % labels.get_shape())
    if labels.dtype != tf.bool:
      raise ValueError('labels should be of type bool. Received: %s' %
                       labels.dtype)
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be of type bool. Received: %s' %
                       indicator.dtype)
    with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'):
      if self._is_static:
        return self._static_subsample(indicator, batch_size, labels)

      else:
        # Only sample from indicated samples
        negative_idx = tf.logical_not(labels)
        positive_idx = tf.logical_and(labels, indicator)
        negative_idx = tf.logical_and(negative_idx, indicator)

        # Sample positive and negative samples separately
        if batch_size is None:
          max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx))
        else:
          max_num_pos = int(self._positive_fraction * batch_size)
        sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
        num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
        if batch_size is None:
          negative_positive_ratio = (
              1 - self._positive_fraction) / self._positive_fraction
          max_num_neg = tf.to_int32(
              negative_positive_ratio * tf.to_float(num_sampled_pos))
        else:
          max_num_neg = batch_size - num_sampled_pos
        sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)

        return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
Example #21
0
  def next_inputs(self, time, outputs, state, sample_ids, name=None):
    with tf.name_scope(name, "ScheduledOutputTrainingHelperNextInputs",
                       [time, outputs, state, sample_ids]):
      (finished, base_next_inputs, state) = (
          super(ScheduledOutputTrainingHelper, self).next_inputs(
              time=time,
              outputs=outputs,
              state=state,
              sample_ids=sample_ids,
              name=name))
      sample_ids = tf.cast(sample_ids, tf.bool)

      def maybe_sample():
        """Perform scheduled sampling."""

        def maybe_concatenate_auxiliary_inputs(outputs_, indices=None):
          """Concatenate outputs with auxiliary inputs, if they exist."""
          if self._auxiliary_input_tas is None:
            return outputs_

          next_time = time + 1
          auxiliary_inputs = tf.nest.map_structure(
              lambda ta: ta.read(next_time), self._auxiliary_input_tas)
          if indices is not None:
            auxiliary_inputs = tf.gather_nd(auxiliary_inputs, indices)
          return tf.nest.map_structure(
              lambda x, y: tf.concat((x, y), -1),
              outputs_, auxiliary_inputs)

        if self._next_inputs_fn is None:
          return tf.where(
              sample_ids, maybe_concatenate_auxiliary_inputs(outputs),
              base_next_inputs)

        where_sampling = tf.cast(
            tf.where(sample_ids), tf.int32)
        where_not_sampling = tf.cast(
            tf.where(tf.logical_not(sample_ids)), tf.int32)
        outputs_sampling = tf.gather_nd(outputs, where_sampling)
        inputs_not_sampling = tf.gather_nd(base_next_inputs,
                                           where_not_sampling)
        sampled_next_inputs = maybe_concatenate_auxiliary_inputs(
            self._next_inputs_fn(outputs_sampling), where_sampling)

        base_shape = tf.shape(base_next_inputs)
        return (tf.scatter_nd(indices=where_sampling,
                              updates=sampled_next_inputs,
                              shape=base_shape)
                + tf.scatter_nd(indices=where_not_sampling,
                                updates=inputs_not_sampling,
                                shape=base_shape))

      all_finished = tf.reduce_all(finished)
      no_samples = tf.logical_not(tf.reduce_any(sample_ids))
      next_inputs = tf.cond(
          tf.logical_or(all_finished, no_samples),
          lambda: base_next_inputs, maybe_sample)
      return (finished, next_inputs, state)
Example #22
0
def lengths_to_area_mask(feature_length, length, max_area_size):
  """Generates a non-padding mask for areas based on lengths.

  Args:
    feature_length: a tensor of [batch_size]
    length: the length of the batch
    max_area_size: the maximum area size considered
  Returns:
    mask: a tensor in shape of [batch_size, num_areas]
  """

  paddings = tf.cast(tf.expand_dims(
      tf.logical_not(
          tf.sequence_mask(feature_length, maxlen=length)), 2), tf.float32)
  _, _, area_sum, _, _ = compute_area_features(paddings,
                                               max_area_width=max_area_size)
  mask = tf.squeeze(tf.logical_not(tf.cast(area_sum, tf.bool)), [2])
  return mask
Example #23
0
    def build_graph(self):
        input_dim_with_batch = (self.batchsize,
                                self.num_frame_stack) + self.pic_size
        input_dim_general = (None, self.num_frame_stack) + self.pic_size

        self.input_prev_state = tf.placeholder(tf.float32, input_dim_general,
                                               "prev_state")
        self.input_next_state = tf.placeholder(tf.float32,
                                               input_dim_with_batch,
                                               "next_state")
        self.input_reward = tf.placeholder(tf.float32, self.batchsize,
                                           "reward")
        self.input_actions = tf.placeholder(tf.int32, self.batchsize,
                                            "actions")
        self.input_done_mask = tf.placeholder(tf.int32, self.batchsize,
                                              "done_mask")

        # These are the state action values for all states
        # The target Q-values come from the fixed network
        with tf.variable_scope("fixed"):
            qsa_targets = self.create_network(self.input_next_state,
                                              trainable=False)

        with tf.variable_scope("train"):
            qsa_estimates = self.create_network(self.input_prev_state,
                                                trainable=True)

        self.best_action = tf.argmax(qsa_estimates, axis=1)

        not_done = tf.cast(
            tf.logical_not(tf.cast(self.input_done_mask, "bool")), "float32")
        q_target = tf.reduce_max(
            qsa_targets, -1) * self.gamma * not_done + self.input_reward
        # select the chosen action from each row
        # in numpy this is qsa_estimates[range(batchsize), self.input_actions]
        action_slice = tf.stack(
            [tf.range(0, self.batchsize), self.input_actions], axis=1)
        q_estimates_for_input_action = tf.gather_nd(qsa_estimates,
                                                    action_slice)

        training_loss = tf.nn.l2_loss(
            q_target - q_estimates_for_input_action) / self.batchsize

        optimizer = tf.train.AdamOptimizer(**(self.optimizer_params))

        reg_loss = tf.add_n(tf.losses.get_regularization_losses())
        self.train_op = optimizer.minimize(reg_loss + training_loss)

        train_params = self.get_variables("train")
        fixed_params = self.get_variables("fixed")

        assert (len(train_params) == len(fixed_params))
        self.copy_network_ops = [
            tf.assign(fixed_v, train_v)
            for train_v, fixed_v in zip(train_params, fixed_params)
        ]
        def infer_step(result, length):
            """Inference step."""
            def print_info(result, length, new_length):
                vocab = self.problem_hparams.vocabulary["targets"]
                tf.logging.info(
                    "length=%s new_length=%s length_diff=%s new_suffix=%s",
                    length,
                    new_length,
                    new_length - length,
                    str([
                        vocab._subtoken_id_to_subtoken_string(index)  # pylint: disable=protected-access
                        for index in result[0, -block_size:, 0,
                                            0][:new_length - length]
                    ]).decode("unicode-escape"),
                )

            features["targets"] = tf.pad(result,
                                         [[0, 0], [0, 1], [0, 0], [0, 0]])
            samples, logits, losses = self.sample(features)  # pylint: disable=unused-variable

            _, top_k_indices = tf.nn.top_k(
                logits[:, :-1, :1, :, :],
                k=self._decode_hparams.guess_and_check_top_k)
            in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices),
                                              tf.expand_dims(result, 4)),
                                     axis=4)

            eos_cumsum = tf.cumsum(tf.to_int32(
                tf.equal(result, text_encoder.EOS_ID)),
                                   axis=1)
            after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0)

            correct = tf.logical_and(in_top_k, tf.logical_not(after_eos))
            correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1)
            perfect_cumsum = 1 + tf.range(tf.shape(correct)[1])
            for axis in [0, 2, 3]:
                perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis)

            new_length = tf.reduce_sum(tf.to_int32(
                tf.equal(correct_cumsum, perfect_cumsum)),
                                       axis=1)
            new_length = tf.squeeze(new_length, axis=[0, 1, 2])
            new_length = tf.minimum(new_length, decode_length)

            new_result = tf.concat([
                result[:, :new_length, :, :],
                tf.reshape(samples[:, new_length, :block_size, :],
                           [1, block_size, 1, 1])
            ],
                                   axis=1)

            with tf.control_dependencies(
                [tf.py_func(print_info, [result, length, new_length], [])]):
                new_result = tf.identity(new_result)

            return new_result, new_length
Example #25
0
def get_cross_block_att(block_ids,
                        block_pos,
                        all_block_ids,
                        all_block_pos,
                        cross_block_attention_mode,
                        cast_to_int32=True):
    """Computes attention mask between blocks based on their document IDs."""
    # [batch_size, 1]
    block_ids_expanded = tf.expand_dims(block_ids, 1)
    # [1, global_batch_size]
    all_block_ids_expanded = tf.expand_dims(all_block_ids, 0)

    # [batch_size, 1]
    block_pos_expanded = tf.expand_dims(block_pos, 1)
    # [1, global_batch_size]
    all_block_pos_expanded = tf.expand_dims(all_block_pos, 0)

    # [batch_size, global_batch_size]
    cross_block_attention = tf.logical_and(
        tf.not_equal(block_ids_expanded, 0),
        tf.not_equal(all_block_ids_expanded, 0))

    if cross_block_attention_mode == "doc":
        # [batch_size, global_batch_size]
        cross_block_attention = tf.logical_and(
            tf.equal(block_ids_expanded, all_block_ids_expanded),
            cross_block_attention)
    elif cross_block_attention_mode == "block":
        # [batch_size, global_batch_size]
        cross_block_attention = tf.logical_and(
            tf.equal(block_ids_expanded, all_block_ids_expanded),
            cross_block_attention)
        cross_block_attention = tf.logical_and(
            tf.equal(block_pos_expanded, all_block_pos_expanded),
            cross_block_attention)
    elif cross_block_attention_mode == "other_blocks":
        is_the_same_doc = tf.equal(block_ids_expanded, all_block_ids_expanded)
        is_the_same_block = tf.logical_and(
            tf.equal(block_pos_expanded, all_block_pos_expanded),
            is_the_same_doc)
        is_the_same_doc_but_not_block = tf.logical_and(
            is_the_same_doc, tf.logical_not(is_the_same_block))
        cross_block_attention = tf.logical_and(is_the_same_doc_but_not_block,
                                               cross_block_attention)
    elif cross_block_attention_mode == "batch":
        pass
    else:
        raise ValueError("Unknown cross_block_attention_mode: " +
                         cross_block_attention_mode)

    if cast_to_int32:
        cross_block_attention = tf.cast(cross_block_attention, dtype=tf.int32)
    return cross_block_attention
Example #26
0
def verb_refs_to_lengths(task, verb_refs, include_eos=True):
  """Computes the length of a sequence."""
  eos_positions = tf.to_int32(tf.expand_dims(
      tf.where(tf.equal(task, 1))[:, 1], 1))
  seq_mask = tf.logical_not(tf.cast(tf.cumsum(tf.to_int32(
      tf.logical_and(
          tf.equal(verb_refs[:, :, 0], eos_positions),
          tf.equal(verb_refs[:, :, 1], eos_positions + 1))), axis=-1), tf.bool))
  lengths = tf.reduce_sum(tf.to_float(seq_mask), axis=-1)
  if include_eos:
    lengths = lengths + 1
  return lengths
Example #27
0
def _top_p_sample(logits, ignore_ids=None, num_samples=1, p=0.9):
    """
    Does top-p sampling. if ignore_ids is on, then we will zero out those logits.
    :param logits: [batch_size, vocab_size] tensor
    :param ignore_ids: [vocab_size] one-hot representation of the indices we'd like to ignore and never predict,
                        like padding maybe
    :param p: topp threshold to use, either a float or a [batch_size] vector
    :return: [batch_size, num_samples] samples

    # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK
    """
    with tf.variable_scope('top_p_sample'):
        batch_size, vocab_size = get_shape_list(logits, expected_rank=2)

        probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,
                              axis=-1)

        if isinstance(p, float) and p > 0.999999:
            # Don't do top-p sampling in this case
            print("Top-p sampling DISABLED", flush=True)
            return {
                'probs': probs,
                'sample': tf.random.categorical(
                    logits=logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,
                    num_samples=num_samples, dtype=tf.int32),
            }

        # [batch_size, vocab_perm]
        indices = tf.argsort(probs, direction='DESCENDING')
        cumulative_probabilities = tf.math.cumsum(tf.batch_gather(probs, indices), axis=-1, exclusive=False)

        # find the top pth index to cut off. careful we don't want to cutoff everything!
        # result will be [batch_size, vocab_perm]
        p_expanded = p if isinstance(p, float) else p[:, None]
        exclude_mask = tf.logical_not(
            tf.logical_or(cumulative_probabilities < p_expanded, tf.range(vocab_size)[None] < 1))

        # OPTION A - sample in the sorted space, then unsort.
        logits_to_use = tf.batch_gather(logits, indices) - tf.cast(exclude_mask, tf.float32) * 1e10
        sample_perm = tf.random.categorical(logits=logits_to_use, num_samples=num_samples)
        sample = tf.batch_gather(indices, sample_perm)

        # OPTION B - unsort first - Indices need to go back to 0 -> N-1 -- then sample
        # unperm_indices = tf.argsort(indices, direction='ASCENDING')
        # include_mask_unperm = tf.batch_gather(include_mask, unperm_indices)
        # logits_to_use = logits - (1 - tf.cast(include_mask_unperm, tf.float32)) * 1e10
        # sample = tf.random.categorical(logits=logits_to_use, num_samples=num_samples, dtype=tf.int32)

    return {
        'probs': probs,
        'sample': sample,
    }
Example #28
0
 def metric_fn(query_mask, block_mask, labels, predictions, mask_query):
     masked_accuracy = tf.metrics.accuracy(labels=labels,
                                           predictions=predictions,
                                           weights=mask_query)
     unmasked_accuracy = tf.metrics.accuracy(
         labels=labels,
         predictions=predictions,
         weights=tf.logical_not(mask_query))
     return dict(query_non_padding=tf.metrics.mean(query_mask),
                 block_non_padding=tf.metrics.mean(block_mask),
                 actual_mask_ratio=tf.metrics.mean(mask_query),
                 masked_accuracy=masked_accuracy,
                 unmasked_accuracy=unmasked_accuracy)
 def expand_labels(relation_tensor, confidence_value):
   """Expand to ancestors or descendants depending on arguments."""
   mask = tf.equal(image_confidences, confidence_value)
   target_image_classes = tf.boolean_mask(image_classes, mask)
   expanded_indices = tf.reduce_any((tf.gather(
       relation_tensor, target_image_classes - _LABEL_OFFSET, axis=0) > 0),
                                    axis=0)
   expanded_indices = tf.where(expanded_indices)[:, 0] + _LABEL_OFFSET
   new_groundtruth_image_classes = (
       tf.concat([
           tf.boolean_mask(image_classes, tf.logical_not(mask)),
           expanded_indices,
       ],
                 axis=0))
   new_groundtruth_image_confidences = (
       tf.concat([
           tf.boolean_mask(image_confidences, tf.logical_not(mask)),
           tf.ones([tf.shape(expanded_indices)[0]],
                   dtype=image_confidences.dtype) * confidence_value,
       ],
                 axis=0))
   return new_groundtruth_image_classes, new_groundtruth_image_confidences
 def do_process_boundary(start_points, end_points, input_length, t1_id,
                         t2_id, all_tokenized_diag):
     """function that contains the majority of the logic to proess boundary."""
     masks_start = tf.sequence_mask(start_points, input_length)
     masks_end = tf.sequence_mask(end_points, input_length)
     xor_masks = tf.logical_xor(masks_start, masks_end)
     mask1 = tf.reduce_any(xor_masks, axis=0)
     mask2 = tf.logical_not(mask1)
     all_turn1 = tf.equal(all_tokenized_diag, t1_id)
     all_turn2 = tf.equal(all_tokenized_diag, t2_id)
     turn_point = tf.logical_or(all_turn1, all_turn2)
     turn_point = tf.cast(turn_point, dtype=tf.float32)
     return mask1, mask2, turn_point