Ejemplo n.º 1
0
    def _MaybePadSourceInputs(self, src_inputs, src_paddings):
        p = self.params
        if not p.append_eos_frame:
            return src_inputs, src_paddings

        per_src_len = tf.reduce_sum(1 - src_paddings, 1)
        per_src_len += 1
        max_src_len = tf.reduce_max(per_src_len)
        input_shape = tf.shape(src_inputs)
        input_len = tf.maximum(input_shape[1], tf.cast(max_src_len, tf.int32))
        pad_steps = input_len - input_shape[1]
        src_inputs = tf.concat([
            src_inputs,
            tf.zeros(inplace_ops.inplace_update(input_shape, 1, pad_steps),
                     src_inputs.dtype)
        ], 1)
        src_paddings = 1 - tf.sequence_mask(
            tf.reshape(per_src_len, [input_shape[0]]), tf.reshape(
                input_len, []), src_paddings.dtype)
        return src_inputs, src_paddings
Ejemplo n.º 2
0
  def _TrimIfPossibleThenTranspose(self, input_batch):
    paddings = input_batch['paddings']
    max_seq_len = None
    if not py_utils.use_tpu():
      max_seq_len = tf.cast(
          tf.reduce_max(tf.reduce_sum(1.0 - paddings, 1)), tf.int32)

    def _TrimAndTranspose(name):
      x = input_batch[name]
      if name not in ('ids', 'paddings', 'labels', 'weights', 'segment_ids',
                      'segment_pos'):
        return x
      with tf.name_scope(f'trim_and_transpose_{name}'):
        x = py_utils.HasShape(x, tf.shape(paddings))
        if max_seq_len is not None:
          x = x[:, :max_seq_len]
        return tf.transpose(x)

    return py_utils.NestedMap(
        {name: _TrimAndTranspose(name) for name in input_batch})
Ejemplo n.º 3
0
 def _CreateDynamicShapeInputs(self, batch_dim, length_dim, input_dim):
     inputs = tf.random.normal([batch_dim, length_dim, input_dim],
                               seed=92837472)
     # Create segment_ids with random number of 1s and stack 0s at end.
     num_ones = tf.random.uniform(shape=(),
                                  minval=1,
                                  maxval=length_dim,
                                  dtype=tf.int32)
     segment_ids = tf.concat([
         tf.ones([batch_dim, num_ones]),
         tf.zeros([batch_dim, length_dim - num_ones])
     ],
                             axis=1)
     # Remove unpadded positions from the end.
     max_seq_len = tf.cast(tf.reduce_max(tf.reduce_sum(segment_ids, -1)),
                           tf.int32)
     inputs = inputs[:, :max_seq_len, :]
     segment_ids = segment_ids[:, :max_seq_len]
     unused_segment_pos = tf.zeros_like(segment_ids)
     return inputs, segment_ids, unused_segment_pos
Ejemplo n.º 4
0
    def ExtractUsingExtractors(self, record):
        """Extracts Tensors from a tf.Example record using self.extractors.

    Args:
      record: A tf.Example input to pass to tf.parse_single_example.

    Returns:
      bucket_id: A scalar int Tensor.
      extracted: a NestedMap of Tensors extracted.
    """
        feature_map = {}
        self._extractors.Transform(
            lambda e: feature_map.update(e.FeatureMap()))

        if self.params.record_type not in _PARSING_FUNCTIONS:
            raise ValueError('Invalid record_type: {}'.format(
                self.params.record_type))
        parsing_fn = _PARSING_FUNCTIONS[self.params.record_type]
        features = parsing_fn(record, feature_map)

        def ExtractAndFilter(e):
            with tf.name_scope(e.params.name):
                with tf.name_scope('extract'):
                    extracted = e.Extract(features)
                with tf.name_scope('filter'):
                    bucket = e.Filter(extracted)
            return bucket, extracted

        bucket_extracted = self._extractors.Transform(ExtractAndFilter)
        buckets = bucket_extracted.Transform(lambda x: x[0])
        extracted = bucket_extracted.Transform(lambda x: x[1])

        # Return the maximum bucket id so that any extractor can decide whether
        # to filter the entire example.

        for key, preprocessor in zip(self.params.preprocessors_order,
                                     self.preprocessors):
            with tf.name_scope(key), tf.name_scope(preprocessor.params.name):
                extracted = preprocessor.TransformFeatures(extracted)

        return tf.reduce_max(buckets.Flatten()), extracted
Ejemplo n.º 5
0
        def _PaddedMaxFn(inp):
            """Apply padded max using reduce_max with paddings replaced by neginf."""
            # Replace all padded features with -inf.
            neginf_padding = tf.where(inp.padding > 0, -np.inf * inp.padding,
                                      inp.padding)
            features = inp.features + neginf_padding[..., tf.newaxis]
            features = tf.reduce_max(features, axis=-2)

            # Replace features of all padded points by zeros. If a batch of points are
            # all padded, then reduce_min over the padding will be 1. We set the
            # features to be zero, so that we don't get any downstream issue with
            # NaNs. Note that inf * 0 = NaN.
            padding = tf.reduce_min(inp.padding, axis=-1)
            features = tf.where_v2(tf.cast(padding[..., tf.newaxis], tf.bool),
                                   tf.zeros_like(features), features)
            features = py_utils.CheckNumerics(features)

            if nested_output:
                return py_utils.NestedMap(features=features, padding=padding)
            else:
                return features
Ejemplo n.º 6
0
  def _PadAndReshapeSpec(self, mel_spectrogram, mel_spectrogram_paddings):
    p = self.params
    batch_size = py_utils.GetShape(mel_spectrogram)[0]
    # Stack and sub-sample. Only subsampling with a stride of the stack size
    # is supported.
    if p.stack_left_context > 0:
      # Since left context is leading, pad the left by duplicating the first
      # frame.
      stack_size = 1 + p.stack_left_context
      mel_spectrogram = tf.concat(
          [mel_spectrogram[:, 0:1, :]] * p.stack_left_context +
          [mel_spectrogram],
          axis=1)
      mel_spectrogram_paddings = tf.concat(
          [mel_spectrogram_paddings[:, 0:1]] * p.stack_left_context +
          [mel_spectrogram_paddings],
          axis=1)

      # Note that this is the maximum number of frames. Actual frame count
      # depends on padding.
      stacked_frame_dim = tf.shape(mel_spectrogram)[1] // stack_size
      mel_spectrogram = tf.reshape(
          mel_spectrogram[:, 0:(stack_size) * stacked_frame_dim, :],
          [batch_size, stacked_frame_dim, stack_size * p.num_bins])
      # After stacking paddings, pad if any source frame was padded.
      # Stacks into [batch_size, stacked_frame_dim, stack_size] like the
      # spectrogram stacking above, and then reduces the stack_size dim
      # to the max (effectively, making padding = 1.0 if any of the pre-stacked
      # frames were 1.0). Final shape is [batch_size, stacked_frame_dim].
      mel_spectrogram_paddings = tf.reshape(
          mel_spectrogram_paddings[:, 0:(stack_size) * stacked_frame_dim],
          [batch_size, stacked_frame_dim, stack_size])
      mel_spectrogram_paddings = tf.reduce_max(mel_spectrogram_paddings, axis=2)

    # Add feature dim. Shape = [batch, time, features, 1]
    mel_spectrogram = tf.expand_dims(mel_spectrogram, -1)
    return mel_spectrogram, mel_spectrogram_paddings
Ejemplo n.º 7
0
    def FProp(self, theta, input_batch):
        """Embeds source ids and transforms with TransformerStack.

    Args:
      theta: A `.NestedMap` object containing weights' values of this
        layer and its children layers.
      input_batch: A `.NestedMap` with fields:

        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].
        - task_ids: If p.task_emb is provided, must contain per-token task
            ids of shape [batch, time].

    Returns:
      A NestedMap containing

      - encoded: The encoded features, either a tensor of shape
        [time, batch, depth], or a list of tensors if is_transparent is set in
        transformer_stack.
      - padding: of shape [time, batch]
      - segment_id: [time, batch] if packed inputs are supported by the model
        (and all layers), or None otherwise.
      - embedded_inputs: [time, batch, depth] embedded inputs tokens without
        positional encodings.
    """

        p = self.params
        with tf.name_scope(p.name):
            src_segment_id = None
            src_segment_pos = None
            input_ids = py_utils.with_dependencies([
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            tf.shape(input_batch.paddings)),
                py_utils.assert_equal(tf.rank(input_batch.ids), 2)
            ], input_batch.ids)

            if (not py_utils.use_tpu()
                    and tf.flags.FLAGS.transformer_encoder_truncates_inputs):
                max_seq_length = tf.cast(
                    tf.reduce_max(tf.reduce_sum(1.0 - input_batch.paddings,
                                                1)), tf.int32)
                paddings = py_utils.with_dependencies([
                    py_utils.assert_equal(
                        tf.constant(True, tf.bool),
                        tf.reduce_all(
                            input_batch.paddings[:, max_seq_length:] > 0.5))
                ], input_batch.paddings)
                input_ids = input_ids[:, :max_seq_length]
                paddings = paddings[:, :max_seq_length]
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids[:, :
                                                             max_seq_length]
                    src_segment_pos = input_batch.segment_pos[:, :
                                                              max_seq_length]
            else:
                paddings = input_batch.paddings
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids
                    src_segment_pos = input_batch.segment_pos

            max_time = tf.shape(input_ids)[1]

            # Input token embeddings + positional embeddings
            if not p.shared_emb:
                input_embs = self.token_emb.EmbLookup(
                    theta.token_emb, tf.reshape(input_ids, [-1]))
            else:
                input_embs = self.softmax.EmbLookup(
                    theta.softmax, tf.reshape(input_ids, [-1]))

            input_embs = tf.reshape(input_embs,
                                    [-1, max_time, p.token_emb.embedding_dim])
            # [time, batch, dim]
            orig_input_embs = tf.transpose(input_embs, [1, 0, 2])

            if p.packed_input:
                position_embs = self.position_emb.FPropWithPosition(
                    theta.position_emb, src_segment_pos)
            else:
                position_embs = self.position_emb.FProp(
                    theta.position_emb, max_time)
                position_embs = tf.reshape(
                    position_embs, [1, max_time, p.token_emb.embedding_dim])
            input_embs += position_embs
            if p.task_emb:
                input_embs += self.task_emb.EmbLookup(theta.task_emb,
                                                      input_batch.task_ids)

            if p.model_dim != p.token_emb.embedding_dim:
                input_embs = self.emb_proj.FProp(theta.emb_proj, input_embs)

            paddings = tf.cast(tf.transpose(paddings), py_utils.FPropDtype(p))
            if p.packed_input:
                src_segment_id = tf.transpose(src_segment_id)
            input_embs = self.input_dropout.FProp(theta.input_dropout,
                                                  input_embs)

            # [time, batch, dim]
            transformer_input = tf.transpose(input_embs, [1, 0, 2])

        if not self.do_eval and p.apply_source_mask:
            # Augment padding for masked source word positions.
            dtype = paddings.dtype
            source_mask = tf.where(tf.equal(input_ids, p.source_mask_id),
                                   tf.ones_like(input_ids, dtype=dtype),
                                   tf.zeros_like(input_ids, dtype=dtype))
            # Make sure padding is between 0 and 1.
            paddings = tf.clip_by_value(paddings + tf.transpose(source_mask),
                                        0.0, 1.0)

        encoded, padding, segment_id = self.transformer_stack.FProp(
            theta.transformer_stack, transformer_input, paddings,
            src_segment_id)
        return py_utils.NestedMap(encoded=encoded,
                                  padding=padding,
                                  segment_id=segment_id,
                                  embedded_inputs=orig_input_embs)
Ejemplo n.º 8
0
 def _iter_body(i, mat_m, mat_h, unused_old_mat_h, error, unused_run_step):
   mat_m_i = (1 - alpha) * identity + alpha * mat_m
   new_mat_m = tf.matmul(mat_power(mat_m_i, exponent), mat_m)
   new_mat_h = tf.matmul(mat_h, mat_m_i)
   new_error = tf.reduce_max(tf.abs(new_mat_m - identity))
   return (i + 1, new_mat_m, new_mat_h, mat_h, new_error, new_error < error)
Ejemplo n.º 9
0
    def ExtractUsingExtractors(self, record):
        """Extracts Tensors from a tf.Example record using self.extractors.

    Args:
      record: A tf.Example input to pass to tf.parse_single_example.

    Returns:
      A tuple of tensors:

      - bucket_id: A scalar int Tensor.
      - extracted: a NestedMap of Tensors extracted.
    """
        feature_map = {}
        context_map = {}
        self._extractors.Transform(
            lambda e: feature_map.update(e.FeatureMap()))
        if self.params.record_type == 'SEQUENCE_EXAMPLE':
            self._extractors.Transform(
                lambda e: context_map.update(e.ContextMap()))

        if self.params.record_type not in _PARSING_FUNCTIONS:
            raise ValueError('Invalid record_type: {}'.format(
                self.params.record_type))
        parsing_fn = _PARSING_FUNCTIONS[self.params.record_type]
        if self.params.record_type == 'SEQUENCE_EXAMPLE':
            features = parsing_fn(record, feature_map, context_map)
        else:
            features = parsing_fn(record, feature_map)

        def ExtractAndFilter(e):
            with tf.name_scope(e.params.name):
                with tf.name_scope('extract'):
                    extracted = e.Extract(features)
                with tf.name_scope('filter'):
                    bucket = e.Filter(extracted)
            return bucket, extracted

        bucket_extracted = self._extractors.Transform(ExtractAndFilter)
        buckets = bucket_extracted.Transform(lambda x: x[0])
        extracted = bucket_extracted.Transform(lambda x: x[1])

        # Return the maximum bucket id so that any extractor can decide whether
        # to filter the entire example.
        max_bucket = tf.reduce_max(buckets.Flatten())

        def NullLike():
            """A function to return the same Tensor signature as Preprocess.

      This is necessary for the tf.cond() to avoid executing the preprocessor
      for examples that are going to be dropped because it exceeds the bucket
      limit; tf.cond() requires that the output of both branches yields the same
      structure.

      Returns:
        A structure with the same Tensor dtype and shape as the output of
        Preprocess.
      """
            shapes = self.Shape()
            rets = [
                tf.zeros(dtype=dtype, shape=shape)
                for (dtype,
                     shape) in zip(self.DType().Flatten(), shapes.Flatten())
            ]
            return shapes.Pack(rets)

        def Preprocess(extracted):
            for key, preprocessor in zip(self.params.preprocessors_order,
                                         self.preprocessors):
                with tf.name_scope(key), tf.name_scope(
                        preprocessor.params.name):
                    extracted = preprocessor.TransformFeatures(extracted)
            return extracted

        # If the extractor wants to filter the example, don't run the preprocessor.
        #
        # Preprocessors can then assume that only examples that pass filtering will
        # be executed.
        final_output = tf.cond(tf.less(max_bucket, BUCKET_UPPER_BOUND),
                               lambda: Preprocess(extracted), NullLike)

        return max_bucket, final_output
Ejemplo n.º 10
0
    def AssignAnchors(self,
                      anchor_bboxes,
                      gt_bboxes,
                      gt_bboxes_labels,
                      gt_bboxes_mask,
                      foreground_assignment_threshold=0.5,
                      background_assignment_threshold=0.35,
                      background_class_id=0,
                      force_match=True,
                      similarity_fn=None):
        """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:

      - Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      - Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      - Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      - Use assigned_cls_mask for weighting the classification loss. The mask
        is set such that the loss applies to foreground and background
        assignments only - ignored anchors will be set to 0.
      - Use assigned_reg_mask for weighting the regression loss. The mask is set
        such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys

      - assigned_gt_idx: shape [A] index corresponding to the index of the
        assigned ground truth box. Anchors not assigned to a ground truth box
        will have the index set to -1.
      - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.
      - assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.
      - assigned_gt_labels: shape [A] label assigned to bbox.
      - assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.
      - assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.
    """
        if similarity_fn is None:
            similarity_fn = self.IOU2DRotatedBoxes

        # Shape validation.
        anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
        num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
        gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
        num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

        # Compute similarity score and reduce max by anchors and by ground-truth.
        similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
        similarity_score = py_utils.HasShape(
            similarity_score, [num_anchor_bboxes, num_gt_bboxes])

        # Reduce over ground-truth boxes, so we have the max score per anchor.
        anchor_max_score = tf.reduce_max(similarity_score, axis=1)
        anchor_max_idx = tf.argmax(similarity_score, axis=1)

        if force_match:
            # Reduce over anchors, so we have the max score per ground truth box.
            gt_max_score = tf.reduce_max(similarity_score,
                                         axis=0,
                                         keepdims=True)

            # Force matches occur when the top matching gt bbox for an anchor is the
            # top matching anchor for the gt bbox. When force matching, we match
            # these boxes as long as their similarity score exceeds 0.
            force_matches = (
                tf.equal(similarity_score, gt_max_score)
                & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
                & tf.greater(similarity_score, 0.)
                & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
            force_match_indicator = tf.reduce_any(force_matches, axis=1)
            force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32),
                                        axis=1)

            # In assigning foreground/background anchors later, force_match_indicator
            # is used to determine which anchors are force foreground, and the index
            # assigned will be taken from anchor_max_idx.

            # Force matchers must also be the max scoring gt bbox per anchor.
            # We overwrite anchor_max_idx to ensure that the right match is done.
            anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                      anchor_max_idx)

        # Ensure that max score boxes are not padded boxes by setting score to 0
        # for boxes that are padded.
        gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
        anchor_max_score = tf.where(tf.equal(gathered_mask, 1),
                                    anchor_max_score,
                                    tf.zeros_like(anchor_max_score))

        # Boolean tensors corresponding to whether an anchor is background or
        # foreground based on thresholding.
        background_anchors = tf.less_equal(anchor_max_score,
                                           background_assignment_threshold)
        foreground_anchors = tf.greater_equal(anchor_max_score,
                                              foreground_assignment_threshold)
        if force_match:
            # Background anchors are below threshold and not force matches.
            background_anchors &= ~force_match_indicator
            # Foreground anchors are above thresholds or force matches.
            foreground_anchors |= force_match_indicator

        # Add dummy background bbox to gt_boxes to facilitate batch gather.
        dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

        # Since we are concatenating the dummy bbox, the index corresponds to the
        # number of boxes.
        dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

        gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
        gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                     axis=0)

        # Gather indices so that all foreground boxes are gathered from gt_bboxes,
        # while all background and ignore boxes gather the dummy_bbox.
        anchor_gather_idx = tf.where(
            foreground_anchors, anchor_max_idx,
            tf.constant(dummy_bbox_idx,
                        shape=py_utils.GetShape(anchor_max_idx),
                        dtype=anchor_max_idx.dtype))

        # Gather the bboxes and weights.
        assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
        assigned_gt_labels = tf.batch_gather(gt_bboxes_labels,
                                             anchor_gather_idx)

        # Set masks for classification and regression losses.
        assigned_cls_mask = tf.cast(background_anchors | foreground_anchors,
                                    tf.float32)
        assigned_reg_mask = tf.cast(foreground_anchors, tf.float32)

        # Set assigned_gt_idx such that dummy boxes have idx = -1.
        assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx),
                                   tf.ones_like(anchor_gather_idx) * -1,
                                   anchor_gather_idx)
        assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32)

        return py_utils.NestedMap(
            assigned_gt_idx=assigned_gt_idx,
            assigned_gt_bbox=assigned_gt_bbox,
            assigned_gt_similarity_score=anchor_max_score,
            assigned_gt_labels=assigned_gt_labels,
            assigned_cls_mask=assigned_cls_mask,
            assigned_reg_mask=assigned_reg_mask)
Ejemplo n.º 11
0
  def ProcessFeatures(self, features):
    """Process extracted features.

    Args:
      features: A dict of extracted Tensors from the records.

    Returns:
      A tuple of tensors:

      - bucket_id: A scalar int Tensor.
      - extracted: a NestedMap of Tensors extracted.
    """
    def ExtractAndFilter(e):
      with tf.name_scope(e.params.name):
        with tf.name_scope('extract'):
          extracted = e.Extract(features)
        with tf.name_scope('filter'):
          bucket = e.Filter(extracted)
      return bucket, extracted

    bucket_extracted = self._extractors.Transform(ExtractAndFilter)
    buckets = bucket_extracted.Transform(lambda x: x[0])
    extracted = bucket_extracted.Transform(lambda x: x[1])

    # Return the maximum bucket id so that any extractor can decide whether
    # to filter the entire example.
    max_bucket = tf.reduce_max(buckets.Flatten())

    def NullLike():
      """A function to return the same Tensor signature as Preprocess.

      This is necessary for the tf.cond() to avoid executing the preprocessor
      for examples that are going to be dropped because it exceeds the bucket
      limit; tf.cond() requires that the output of both branches yields the same
      structure.

      Returns:
        A structure with the same Tensor dtype as the output of
        Preprocess.
      """
      shapes = self.Shape()
      rets = []
      for dtype, shape in zip(self.DType().Flatten(), shapes.Flatten()):
        if shape.is_fully_defined():
          rets += [tf.zeros(dtype=dtype, shape=shape)]
        else:
          rets += [tf.zeros(dtype=dtype, shape=[])]  # Our best guess.
      return shapes.Pack(rets)

    def Preprocess(extracted):
      for key, preprocessor in zip(self.params.preprocessors_order,
                                   self.preprocessors):
        with tf.name_scope(key), tf.name_scope(preprocessor.params.name):
          extracted = preprocessor.TransformFeatures(extracted)
      return extracted

    # If the extractor wants to filter the example, don't run the preprocessor.
    #
    # Preprocessors can then assume that only examples that pass filtering will
    # be executed.
    #
    # Note that the NullLike branch may return tensors with shapes different
    # from self.Shape().
    final_output = tf.cond(
        tf.less(max_bucket, BUCKET_UPPER_BOUND), lambda: Preprocess(extracted),
        NullLike)

    return max_bucket, final_output
Ejemplo n.º 12
0
def MakeLocalMask(seq_len,
                  block_size,
                  left_context,
                  right_context,
                  query_stride=1,
                  dtype=tf.float32):
    """Makes the mask tensor for a full sequence.

  The returned mask reflects the given context sizes, where position i
  attends to tokens in the range [i - (left_context-1), i + right_context].

  For example, given seq_len=4, block_size=2, left_context=3, right_context=0,
  the result mask is
  [[[0., 0., 1., 0.], 1st query in 1st block attends 1st key.
  [0., 0., 1., 1.]],  2nd query in 1st block attends 2nd and left keys
  [[1., 1., 1., 0.],  1st query in 2nd block attends 1st and left keys
  [0., 1., 1., 1.]]]  2st query in 2nd block attends 2nd and left keys

  The position i can move by stride, which means queries are pooled by stride.
  For example, given same params and stride=2, the result mask is
  [[[0., 0., 1., 1.]], The pooled query in 1st block attends 1st and 2nd keys
  [[1., 1., 1., 1.]]]  The pooled query in 2st block attends 1st, 2nd and left

  Args:
    seq_len: int or scalar int tensor. Sequence length.
    block_size: int. Number of time frames in a block.
    left_context: int. Left context size.
    right_context: int. Right context size.
    query_stride: int. Query stride for funnel pool.
    dtype: tf.dtype, default is tf.float32.

  Returns:
    A tensor of [num_blocks, block_size//stride, context_size] taking values in
    {0, 1}, where context_size = block_size + (left_context - 1) + right_context
    Element b, i, j is 1 if in the b-th block, the i-th frame can access
    the j-th frame in the context.
  """
    assert block_size % query_stride == 0, (
        f'block_size({block_size}) must be a multiple of '
        f'query_stride({query_stride}).')
    seq_len = py_utils.with_dependencies([
        py_utils.assert_greater_equal(
            seq_len, 1, message='seq_len must be at least 1')
    ], seq_len)

    num_blocks = (seq_len + block_size - 1) // block_size
    context_size = block_size + (left_context - 1) + right_context

    # [num_blocks, block_size]: source positions in the original sequence.
    src_positions = tf.reshape(tf.range(num_blocks * block_size),
                               [num_blocks, block_size])
    # [num_blocks,]: source positions at the start of each block.
    block_start_positions = tf.range(0, num_blocks * block_size, block_size)
    # [context_size]:  positions relative to the block start.
    relative_context_positions = tf.range(context_size) - (left_context - 1)

    # [num_blocks, context_size]: target positions in the original sequence.
    tgt_positions = (block_start_positions[:, tf.newaxis] +
                     relative_context_positions[tf.newaxis, :])
    # [num_blocks, block_size, context_size]: position differences between source-
    # target pairs.
    position_diff = src_positions[:, :,
                                  tf.newaxis] - tgt_positions[:, tf.newaxis, :]
    # [num_blocks, block_size, context_size]: if attention is allowed between
    # source-target pairs.
    valid_atten = tf.math.logical_and(-right_context <= position_diff,
                                      position_diff < left_context)

    # [num_blocks, block_size]: if the source position is valid, not padded.
    valid_src = src_positions < seq_len
    # [num_blocks, context_size]: if the target position is valid, not padded.
    valid_tgt = tf.math.logical_and(0 <= tgt_positions,
                                    tgt_positions < seq_len)

    valid_atten &= tf.math.logical_and(valid_src[:, :, tf.newaxis],
                                       valid_tgt[:, tf.newaxis, :])
    valid_atten = tf.cast(valid_atten, dtype=dtype)

    if query_stride:
        valid_atten = tf.reshape(valid_atten, [
            num_blocks, block_size // query_stride, query_stride, context_size
        ])
        valid_atten = tf.reduce_max(valid_atten, axis=-2)

    return valid_atten
Ejemplo n.º 13
0
def _SingleClassDecodeWithNMS(predicted_bboxes,
                              classification_scores,
                              nms_iou_threshold,
                              score_threshold,
                              max_boxes_per_class=None):
    """Perform NMS on predicted bounding boxes / associated logits.

  Args:
    predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing
      predicted bounding box coordinates.
    classification_scores: [batch_size, num_boxes, num_classes] float Tensor
      containing predicted classification scores for each box.
    nms_iou_threshold: IoU threshold to use when determining whether two boxes
      overlap for purposes of suppression.
    score_threshold: The score threshold passed to NMS that allows NMS to
      quickly ignore irrelevant boxes.
    max_boxes_per_class: The maximum number of boxes per example to emit. If
      None, this value is set to num_boxes from the shape of predicted_bboxes.

  Returns:
    predicted_bboxes: Filtered bboxes after NMS of shape
      [batch_size, num_classes, max_boxes_per_class, 7].
    bbox_scores: A float32 Tensor with the score for each box of shape
      [batch_size, num_classes, max_boxes_per_class].
    valid_mask: A float32 Tensor with 1/0 values indicating the validity of
      each box. 1 indicates valid, and 0 invalid. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
  """
    utils_3d = detection_3d_lib.Utils3D()
    predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7])
    batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes)
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_predicted_boxes, -1])
    _, _, num_classes = py_utils.GetShape(classification_scores)

    if not isinstance(nms_iou_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`nms_iou_threshold`.')
    if not isinstance(score_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`score_threshold`.')

    if max_boxes_per_class is None:
        max_boxes_per_class = num_predicted_boxes

    # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and
    # per-class thresholding.
    # [batch, num_predicted_boxes]
    nms_scores = tf.reduce_max(classification_scores, axis=-1)

    # Compute the most likely label by computing the highest class score from
    # the output of the sigmoid.
    likely_labels = tf.argmax(classification_scores, axis=-1)

    # When background is the most likely class for the box, mask out the scores
    # of that box from NMS scoring so the background boxes don't dominate the
    # NMS.
    nms_scores *= tf.cast(likely_labels > 0, tf.float32)

    # Compute NMS for every sample in the batch.
    nms_indices, valid_mask = utils_3d.BatchedNMSIndices(
        predicted_bboxes,
        nms_scores,
        nms_iou_threshold=nms_iou_threshold,
        score_threshold=score_threshold,
        max_num_boxes=max_boxes_per_class)

    # Reorder the box data and logits according to NMS scoring.
    predicted_bboxes = tf.array_ops.batch_gather(predicted_bboxes, nms_indices)
    classification_scores = tf.array_ops.batch_gather(classification_scores,
                                                      nms_indices)

    # Now reformat the output of NMS to match the format of the
    # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result.
    # This takes the leading shape of
    # [batch_size, num_classes, max_boxes_per_class] for all outputs, which
    # means since this NMS is not class specific we need to tile the outputs
    # num_classes times or reorder the data such that its [batch, num_classes].
    predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :],
                               [1, num_classes, 1, 1])
    classification_scores = tf.transpose(classification_scores, (0, 2, 1))
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_classes, max_boxes_per_class])
    valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1])
    return predicted_bboxes, classification_scores, valid_mask
Ejemplo n.º 14
0
  def FProp(self, theta, input_batch, interpolation_batch=None, lambdas=None):
    # pyformat: disable
    """Interpolates source ids in input_batch and interpolation_batch.

    Refer to Eq. (4) in paper https://arxiv.org/abs/2106.04060.
    It is a standard Transformer Encoder if interpolation_batch != None.

    Args:
      theta: A `.NestedMap` object containing weights values of this layer and
        its children layers.
      input_batch: A `.NestedMap` with fields:

        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].
        - task_ids: If p.task_emb is provided, must contain per-token task ids
          of shape [batch, time].
      interpolation_batch: A `.NestedMap` with fields:

        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].
        - task_ids: If p.task_emb is provided, must contain per-token task ids
          of shape [batch, time].
        - embs: Embeddings of ids.
      lambdas: A pair of tensors to combine embeddings of ids in input_batch and
        interpolation_batch.

    Returns:
      A NestedMap of

        - encoded: The encoded features, either a tensor of shape
          [time, batch, depth], or a list of tensors if is_transparent is set in
          transformer_stack.
        - padding: of shape [time, batch]
        - segment_id: [time, batch] if packed inputs are supported by the model
          (and all layers), or None otherwise.
        - embedded_inputs: [time, batch, depth] embedded inputs tokens without
          positional encodings.
    """
    # pyformat: enable

    p = self.params
    with tf.name_scope(p.name):
      src_segment_id = None
      src_segment_pos = None
      input_ids = py_utils.with_dependencies([
          py_utils.assert_shape_match(
              tf.shape(input_batch.ids), tf.shape(input_batch.paddings)),
          py_utils.assert_equal(tf.rank(input_batch.ids), 2)
      ], input_batch.ids)

      max_seq_length = None
      if (not py_utils.use_tpu() and
          FLAGS.transformer_encoder_truncates_inputs):
        max_seq_length = tf.cast(
            tf.reduce_max(tf.reduce_sum(1.0 - input_batch.paddings, 1)),
            tf.int32)
        paddings = py_utils.with_dependencies([
            py_utils.assert_equal(
                tf.constant(True, tf.bool),
                tf.reduce_all(input_batch.paddings[:, max_seq_length:] > 0.5))
        ], input_batch.paddings)
        input_ids = input_ids[:, :max_seq_length]
        paddings = paddings[:, :max_seq_length]
        if p.packed_input:
          src_segment_id = input_batch.segment_ids[:, :max_seq_length]
          src_segment_pos = input_batch.segment_pos[:, :max_seq_length]
      else:
        paddings = input_batch.paddings
        if p.packed_input:
          src_segment_id = input_batch.segment_ids
          src_segment_pos = input_batch.segment_pos

      max_time = tf.shape(input_ids)[1]

      # Input token embeddings + positional embeddings
      if not p.shared_emb:
        input_embs = self.token_emb.EmbLookup(theta.token_emb,
                                              tf.reshape(input_ids, [-1]))
      else:
        input_embs = self.softmax.EmbLookup(theta.softmax,
                                            tf.reshape(input_ids, [-1]))

      if interpolation_batch is not None:
        other_input_ids = interpolation_batch.ids
        if not p.shared_emb:
          other_input_embs = self.token_emb.EmbLookup(
              theta.token_emb, tf.reshape(other_input_ids, [-1]))
        else:
          other_input_embs = self.softmax.EmbLookup(
              theta.softmax, tf.reshape(other_input_ids, [-1]))
        lambdas = [tf.expand_dims(a, -1) for a in lambdas]
        if 'embs' in input_batch and input_batch.embs is not None:
          input_embs = input_batch.embs
        if 'embs' in interpolation_batch and interpolation_batch.embs is not None:
          other_input_embs = interpolation_batch.embs
        else:
          input_embs = tf.reshape(
              input_embs,
              [-1, tf.shape(input_ids)[1], p.token_emb.embedding_dim])
          other_input_embs = tf.reshape(
              other_input_embs,
              [-1, tf.shape(other_input_ids)[1], p.token_emb.embedding_dim])
        input_embs = lambdas[0] * input_embs + lambdas[1] * other_input_embs
        paddings = paddings + interpolation_batch.paddings - 1.0
        paddings = tf.clip_by_value(paddings, 0.0, 1.0)

      input_embs = tf.reshape(input_embs,
                              [-1, max_time, p.token_emb.embedding_dim])

      orig_input_embs = input_embs
      if p.task_emb:
        if interpolation_batch is None:
          input_embs += self.task_emb.EmbLookup(theta.task_emb,
                                                input_batch.task_ids)
        else:
          task_embs = self.task_emb.EmbLookup(theta.task_emb,
                                              input_batch.task_ids)
          other_task_embs = self.task_emb.EmbLookup(
              theta.task_emb, interpolation_batch.task_ids)
          task_embs = lambdas[0] * task_embs + lambdas[1] * other_task_embs
          input_embs += task_embs

      if p.packed_input:
        position_embs = self.position_emb.FPropWithPosition(
            theta.position_emb, src_segment_pos)
      else:
        position_embs = self.position_emb.FProp(theta.position_emb, max_time)
        position_embs = tf.reshape(position_embs,
                                   [1, max_time, p.token_emb.embedding_dim])
      input_embs += position_embs

      if p.model_dim != p.token_emb.embedding_dim:
        input_embs = self.emb_proj.FProp(theta.emb_proj, input_embs)

      paddings = tf.cast(tf.transpose(paddings), py_utils.FPropDtype(p))
      if p.packed_input:
        src_segment_id = tf.transpose(src_segment_id)

      input_embs = self.input_dropout.FProp(theta.input_dropout, input_embs)

      # [time, batch, dim]
      transformer_input = tf.transpose(input_embs, [1, 0, 2])

    if not self.do_eval and p.apply_source_mask:
      # Augment padding for masked source word positions.
      dtype = paddings.dtype
      source_mask = tf.where(
          tf.equal(input_ids, p.source_mask_id),
          tf.ones_like(input_ids, dtype=dtype),
          tf.zeros_like(input_ids, dtype=dtype))
      # Make sure padding is between 0 and 1.
      paddings = tf.clip_by_value(paddings + tf.transpose(source_mask), 0.0,
                                  1.0)

    encoded, padding, segment_id = self.transformer_stack.FProp(
        theta.transformer_stack, transformer_input, paddings, src_segment_id)

    return py_utils.NestedMap(
        encoded=encoded,
        padding=padding,
        segment_id=segment_id,
        embedded_inputs=orig_input_embs)
Ejemplo n.º 15
0
  def ProcessFeatures(self, features):
    """Process extracted features.

    Args:
      features: A dict of extracted Tensors from the records.

    Returns:
      A tuple of tensors:

      - bucket_id: A scalar int Tensor.
      - extracted: a NestedMap of Tensors extracted.
    """
    def ExtractAndFilter(e):
      with tf.name_scope(e.params.name):
        with tf.name_scope('extract'):
          # Filter out extracted features from other extractors.
          filtered_features = {}
          if self.params.record_type == 'TEXT':
            # Text extractors only produce {'line': record} and their
            # FeatureMap() is empty, so don't do any filtering.
            filtered_features = features
          else:
            filtered_keys = e.FeatureMap().keys() | e.ContextMap().keys()
            filtered_features = {
                k: v for k, v in features.items() if k in filtered_keys
            }
          try:
            if self.params.batched_input:
              extracted = e.ExtractBatch(filtered_features)
            else:
              extracted = e.Extract(filtered_features)
          except Exception as exc:  # pylint:disable=bare-except
            # Raise exception with context about which extractor failed.
            raise RuntimeError('Failed running extractor '
                               f'{e.params.name}. '
                               'See above exception for details.') from exc
        with tf.name_scope('filter'):
          if self.params.batched_input:
            bucket = e.FilterBatch(extracted)
          else:
            bucket = e.Filter(extracted)
      return bucket, extracted

    bucket_extracted = self._extractors.Transform(ExtractAndFilter)
    buckets = bucket_extracted.Transform(lambda x: x[0])
    extracted = bucket_extracted.Transform(lambda x: x[1])

    # Return the maximum bucket id so that any extractor can decide whether
    # to filter the entire example.
    max_bucket = tf.reduce_max(buckets.Flatten())

    def NullLike():
      """A function to return the same Tensor signature as Preprocess.

      This is necessary for the tf.cond() to avoid executing the preprocessor
      for examples that are going to be dropped because it exceeds the bucket
      limit; tf.cond() requires that the output of both branches yields the same
      structure.

      Returns:
        A structure with the same Tensor dtype as the output of
        Preprocess.
      """
      shapes = self.Shape()
      rets = []
      for dtype, shape in zip(self.DType().Flatten(), shapes.Flatten()):
        if shape.is_fully_defined():
          rets += [tf.zeros(dtype=dtype, shape=shape)]
        else:
          rets += [tf.zeros(dtype=dtype, shape=[])]  # Our best guess.
      return shapes.Pack(rets)

    def Preprocess(extracted):
      for key, preprocessor in zip(self.params.preprocessors_order,
                                   self.preprocessors):
        with tf.name_scope(key), tf.name_scope(preprocessor.params.name):
          if self.params.batched_input:
            extracted = preprocessor.TransformBatchedFeatures(extracted)
          else:
            extracted = preprocessor.TransformFeatures(extracted)
      return extracted

    # If the extractor wants to filter the example, don't run the preprocessor.
    #
    # Preprocessors can then assume that only examples that pass filtering will
    # be executed.
    #
    # Note that the NullLike branch may return tensors with shapes different
    # from self.Shape().
    final_output = tf.cond(
        tf.less(max_bucket, BUCKET_UPPER_BOUND), lambda: Preprocess(extracted),
        NullLike)

    return max_bucket, final_output
Ejemplo n.º 16
0
    def FProp(self,
              theta,
              x,
              x_paddings=None,
              eos_id=1,
              force_sample_last_token=True):
        """Applies SymbolInsertionLayer.

    We take in a `x`, which represents the groundtruth sequence (i.e., English
    sequence). We return a sampled rollin (observed) canvas (i.e., random subset
    of the English sequence), as well as the target (indices) for an
    insertion-based model (i.e., the targets given the random observed subset).

    Args:
      theta: Ignored, this can be None.
      x: The symbol ids of shape `[batch_size, time_dim]`.
      x_paddings: The paddings (1 or 0) of shape `[batch_size, time_dim]` where
        0 is valid and 1 is invalid.
      eos_id: The <eos> token id to represent end-of-slot.
      force_sample_last_token: Set True to force sample the last token of `x`.

    Returns:
      A `NestedMap`.
        - canvas: The canvas (based off of the `rollin_policy`) of shape
          [batch_size, c_dim]. Note that, `c_dim` <= `time_dim` but need not be
          equal.
        - canvas_indices: The canvas indices (into `x`).
        - canvas_paddings: The paddings of `canvas_indices`.
        - target_indices: The target indices of shape [num_targets, 3].
          `num_targets` is the number of total targets in the entire batch.
          [:, 0] captures the batch, [:, 1] captures the slot, and [:, 2]
          captures the token. Each row [batch, slot, vocab] represents the
          indices of the target -- i.e., the batch, slot and vocab combination
          of the target. Typical usage of these indices is to tf.gather_nd
          the log-probs (from the softmax layer).
        - target_weights: The target weights.

    Raises:
      ValueError: If invalid params.
    """
        p = self.params

        batch_size = py_utils.GetShape(x)[0]
        time_dim = py_utils.GetShape(x)[1]

        if x_paddings is None:
            x_paddings = tf.zeros([batch_size, time_dim], tf.float32)

        oracle_policy = p.oracle_policy
        rollin_policy = (oracle_policy
                         if p.rollin_policy == 'oracle' else p.rollin_policy)

        if rollin_policy != 'uniform':
            raise ValueError('Unknown or unsupported rollin policy: %s' %
                             rollin_policy)
        if oracle_policy != 'uniform':
            raise ValueError('Unknown or unsupported oracle policy: %s' %
                             oracle_policy)

        x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32)

        # Compute the desired length per example in the batch.
        ratio = tf.random.uniform([batch_size], 0.0, 1.0, seed=p.random_seed)
        if force_sample_last_token:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len, tf.float32), tf.int32),
                x_len - 1) + 1
        else:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len + 1, tf.float32), tf.int32),
                x_len)
        # Compute the maximum length across the batch.
        c_len_max = tf.reduce_max(c_len)

        # Grab subset of random valid indices per example.
        z_logits = tf.cast(
            tf.expand_dims(tf.range(time_dim), 0) >= tf.expand_dims(x_len, 1),
            tf.float32) * -1e9
        if force_sample_last_token:
            # Force sample the last token -- i.e., as indexed by `x_len - 1`. We can
            # accomplish this by add +LARGE_NUMBER to the logits.
            z_logits += tf.cast(
                tf.equal(tf.expand_dims(tf.range(time_dim), 0),
                         tf.expand_dims(x_len - 1, 1)), tf.float32) * 1e9
        # Gumbel-max trick to sample (we only sample valid positions per sample in
        # the batch).
        z = -tf.math.log(-tf.math.log(
            tf.random.uniform([batch_size, time_dim], seed=p.random_seed)))
        unused_c_values, c_indices = tf.nn.top_k(z_logits + z, time_dim)

        # Trim everything > c_len_max.
        c_indices = c_indices[:, :c_len_max]

        # Invalidate any indices >= c_len, we use the last index as the default
        # invalid index.
        c_indices = tf.where(
            tf.expand_dims(tf.range(c_len_max), 0) < tf.expand_dims(c_len, 1),
            c_indices, tf.fill(py_utils.GetShape(c_indices), time_dim - 1))

        # Materialize the canvas.
        c_indices = tf.sort(c_indices)
        c = tf.gather_nd(
            x,
            tf.stack([
                tf.reshape(
                    tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                            [1, c_len_max]), [-1]),
                tf.reshape(c_indices, [-1])
            ], 1))
        c = tf.reshape(c, [batch_size, c_len_max])

        # Compute the paddings.
        c_paddings = 1 - tf.sequence_mask(
            c_len, c_len_max, dtype=x_paddings.dtype)
        c *= tf.cast(1 - c_paddings, tf.int32)

        indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, c_len_max]), [batch_size * c_len_max, 1]),
            tf.reshape(c_indices, [batch_size * c_len_max, 1])
        ], 1)
        x_token_is_observed = tf.scatter_nd(
            indices, tf.ones([batch_size * c_len_max], tf.int32),
            py_utils.GetShape(x))
        # `x_segments` captures which slot each `x` belongs to (both observed and
        # tokens that need to be observed).
        x_segments = tf.cumsum(x_token_is_observed, 1, exclusive=True)

        x_token_is_observed = tf.cast(x_token_is_observed, tf.bool)
        prev_x_token_is_observed = tf.pad(x_token_is_observed[:, :-1],
                                          [[0, 0], [1, 0]],
                                          constant_values=True)
        x_token_is_observed = tf.reshape(x_token_is_observed, [-1])
        prev_x_token_is_observed = tf.reshape(prev_x_token_is_observed, [-1])
        x_is_valid = tf.cast(1 - x_paddings, tf.bool)
        x_is_valid = tf.reshape(x_is_valid, [-1])

        # Remap all the observed to <eos>, note some of these need a zero weight
        # (or else there would be <eos> and valid token in the same slot).
        target_indices = tf.cast(tf.reshape(x, [-1, 1]), tf.int32)
        target_indices = tf.where(
            x_token_is_observed,
            tf.fill(py_utils.GetShape(target_indices), eos_id), target_indices)

        # TODO(williamchan): We give uniform 1.0 weight, however, math suggests
        # we may want to weigh this term by the original sequence length.
        target_weights = tf.ones_like(target_indices, tf.float32)

        # We need to set all the weights for <eos> which actually have valid tokens
        # in the slot to zero.
        target_weights = tf.where(
            x_token_is_observed & ~prev_x_token_is_observed,
            tf.zeros_like(target_weights), target_weights)

        # TODO(williamchan): Consider dropping the entries w/ weight zero.

        # Add the batch and slot indices.
        target_indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, time_dim]), [batch_size * time_dim, 1]),
            tf.reshape(x_segments, [-1, 1]), target_indices
        ], 1)

        # Select only the valid indices. The selected valid ones include slots w/
        # <eos>.
        target_indices = target_indices[x_is_valid]
        target_weights = target_weights[x_is_valid]

        return py_utils.NestedMap(canvas=c,
                                  canvas_indices=c_indices,
                                  canvas_paddings=c_paddings,
                                  target_indices=target_indices,
                                  target_weights=target_weights)
Ejemplo n.º 17
0
def NeighborhoodIndices(points,
                        query_points,
                        k,
                        points_padding=None,
                        max_distance=None,
                        sample_neighbors_uniformly=False):
    """Get indices to k-neighbors of query_points in points.

  Padding is returned along-side indices. Non-padded points are guaranteed to
  be unique (non-repeated) points from original non-padded points.

  Padded points arise due to either a lack of points (k exceeds the number
  of original non-padded points) or points are too far away (exceeds max
  distance).

  Note: Padded point indices may refer to padded points from the original, or
  may be duplicates of the closest point.

  TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded
  points are repeated points from the original points. This behavior is
  maintained here, but we should update PointCNN to respect indices paddings.

  Args:
    points: tensor of shape [N, P1, dims].
    query_points: tensor of shape [N, P2, dims]
    k: Integer.
    points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the
      point is a padded point. if None, then all points are considered real
      points.
    max_distance: float representing the maximum distance that each neighbor can
      be. If there are no points within the distance, then the closest point is
      returned (regardless of distance). If this is set to None, then no
      filtering by distance is performed.
    sample_neighbors_uniformly: boolean specifying whether to sample neighbors
      uniformly if they are within max distance.

  Returns:
    A pair of tensors:

    - indices: tensor of shape [N, P2, k].
    - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and
      0 represents an unpadded (real) point.

  """
    n, p1 = py_utils.GetShape(points, 2)
    query_points = py_utils.HasShape(query_points, [n, -1, -1])
    _, p2 = py_utils.GetShape(query_points, 2)

    # Compute pair-wise squared distances.
    # Note that dist_mat contains the squared distance (without sqrt). Thus, when
    # using max_distance, we will need to square max_distance to make sure it's
    # in the same units.
    dist_mat = SquaredDistanceMatrix(query_points, points)
    dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1])

    # Add a large scalar to the distances for padded points.
    # dist_mat[i, j, k] will be:
    #   if k < valid_num[i]: distance between points[i, k] and query_points[i, j]
    #   otherwise:           a large scalar added to dist_mat[i, j, k]
    if points_padding is not None:
        points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32)
        points_padding = py_utils.HasShape(points_padding, [n, 1, p1])
        large_scalar = tf.reduce_max(dist_mat) + 1
        dist_mat += points_padding * large_scalar

    # To perform sampling neighbors uniformly efficiently, we set all neighbors
    # that are within the distance threshold to have distances be drawn uniformly
    # at random. Using top_k with this enables selecting a random set quickly
    # without replacement.
    if sample_neighbors_uniformly:
        if max_distance is not None:
            mask_by_distance = tf.less_equal(dist_mat, max_distance**2)
            dist_mat = tf.where(
                mask_by_distance,
                tf.square(max_distance) *
                tf.random_uniform(tf.shape(dist_mat)), dist_mat)
        else:
            raise ValueError(
                'Uniform sampling requires specifying max_distance.')

    top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k,
                                      sorted=True)  # N x P2 x K

    # Set padding using top_k_dist; padded points will have distance exceeding
    # the large_scalar.
    if points_padding is not None:
        paddings = tf.greater_equal(-top_k_dist, large_scalar)
    else:
        paddings = tf.zeros_like(top_k_dist, dtype=tf.bool)

    # Filter by max_distances by setting all indices that exceed the max_distance
    # to the closest point.
    if max_distance is not None:
        # Mask is true for points that are further than max_distance.
        mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance))
        closest_idx = tf.tile(indices[:, :, :1], [1, 1, k])
        indices = tf.where(mask_by_distance, closest_idx, indices)
        paddings |= mask_by_distance

    indices = tf.reshape(indices, [n, p2, k])
    paddings = tf.cast(paddings, tf.float32)

    return indices, paddings
Ejemplo n.º 18
0
    def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
        """Takes a tensor of strings and returns id/padding tensors.

    This generates `token_ids`, `target_ids`, and `paddings` in the format that
    is expected for tokenizers. This performs padding to a fixed length and
    appends the end-of-sentence token as appropriate.

    Args:
      strs: a string Tensor.
      max_length: a python integer. The second dimension of the returned arrays.
        All sequences are padded or truncated to that length.
      append_eos: a python bool. See `BaseTokenizer` for explanation.
      languages: A vector of strings with the same length as `strs`.

    Returns:
      A tuple of 3 tensors:

      - token_ids: a tensor of sequences of WPM ids starting with SOS. Sequences
        always end with EOS unless the sequence exceeds the maximum length.
        Always padded with EOS.
      - target_ids: a tensor of sequences of WPM ids not starting with SOS
        but ending with EOS. Always padded with EOS.
      - paddings: a tensor of floats indicating, at each position, whether
        the corresponding position is padded.
    """
        p = self.params
        if append_eos is None:
            append_eos = p.append_eos

        batch_size = py_utils.GetShape(strs)[0]
        token_ids_ta = tf.TensorArray(tf.int32, batch_size)
        target_ids_ta = tf.TensorArray(tf.int32, batch_size)
        paddings_ta = tf.TensorArray(tf.float32, batch_size)

        def _TokenizeOneSentence(i, strs, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            ids, _ = self._wpm_encoder.Encode(strs[i])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)

            # This truncates after the eos is added, so some sentences might
            # not have </s> at the end.
            token_ids_ta = token_ids_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.concat([[self.sos_id], ids], axis=0),
                                     [max_length], self.eos_id))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], self.eos_id))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta

        _, _, token_ids_ta, target_ids_ta, paddings_ta = tf.while_loop(
            lambda i, *_: i < batch_size,
            _TokenizeOneSentence,
            loop_vars=(tf.constant(0, tf.int32), strs, token_ids_ta,
                       target_ids_ta, paddings_ta),
            parallel_iterations=30,
            back_prop=False)

        token_ids = token_ids_ta.stack()
        target_ids = target_ids_ta.stack()
        paddings = paddings_ta.stack()

        if not p.pad_to_max_length:
            maxlen = tf.cast(
                tf.round(tf.reduce_max(tf.reduce_sum(1.0 - paddings, axis=1))),
                tf.int32)
            token_ids = token_ids[:, :maxlen]
            target_ids = target_ids[:, :maxlen]
            paddings = paddings[:, :maxlen]

        return token_ids, target_ids, paddings
Ejemplo n.º 19
0
def inlined_matrix_inverse_pth_root(mat_g,
                                    mat_g_size,
                                    alpha,
                                    iter_count=100,
                                    error_tolerance=1e-6,
                                    ridge_epsilon=1e-6):
    """Computes mat_g^alpha, where alpha = -1/p, p is one of 2, 4, or 8.

  We use an iterative Schur-Newton method from equation 3.2 on page 9 of:

  A Schur-Newton Method for the Matrix p-th Root and its Inverse
  by Chun-Hua Guo and Nicholas J. Higham
  SIAM Journal on Matrix Analysis and Applications,
  2006, Vol. 28, No. 3 : pp. 788-804
  https://pdfs.semanticscholar.org/0abe/7f77433cf5908bfe2b79aa91af881da83858.pdf

  Args:
    mat_g: the symmetric PSD matrix whose power it to be computed
    mat_g_size: size of mat_g.
    alpha: exponent, must be -1/p for p a positive integer.
    iter_count: Maximum number of iterations.
    error_tolerance: Error indicator, useful for early termination.
    ridge_epsilon: Ridge epsilon added to make the matrix positive definite.

  Returns:
    mat_g^alpha
  """
    alpha = tf.cast(alpha, tf.float64)
    neg_alpha = -1.0 * alpha
    exponent = 1.0 / neg_alpha
    identity = tf.eye(tf.cast(mat_g_size, tf.int32), dtype=tf.float64)

    def _unrolled_mat_pow_2(mat_m):
        """Computes mat_m^2."""
        return tf.matmul(mat_m, mat_m)

    def _unrolled_mat_pow_4(mat_m):
        """Computes mat_m^4."""
        mat_pow_2 = _unrolled_mat_pow_2(mat_m)
        return tf.matmul(mat_pow_2, mat_pow_2)

    def _unrolled_mat_pow_8(mat_m):
        """Computes mat_m^4."""
        mat_pow_4 = _unrolled_mat_pow_4(mat_m)
        return tf.matmul(mat_pow_4, mat_pow_4)

    def mat_power(mat_m, p):
        """Computes mat_m^p, for p == 2 or 4 or 8.

    Args:
      mat_m: a square matrix
      p: a positive integer

    Returns:
      mat_m^p
    """
        branch_index = tf.cast(p / 2 - 1, tf.int32)
        return tf.switch_case(
            branch_index, {
                0: functools.partial(_unrolled_mat_pow_2, mat_m),
                1: functools.partial(_unrolled_mat_pow_4, mat_m),
                2: functools.partial(_unrolled_mat_pow_8, mat_m),
            })

    def _iter_condition(i, unused_mat_m, unused_mat_h, unused_old_mat_h, error,
                        run_step):
        return tf.math.logical_and(
            tf.math.logical_and(i < iter_count, error > error_tolerance),
            run_step)

    def _iter_body(i, mat_m, mat_h, unused_old_mat_h, error, unused_run_step):
        mat_m_i = (1 - alpha) * identity + alpha * mat_m
        new_mat_m = tf.matmul(mat_power(mat_m_i, exponent), mat_m)
        new_mat_h = tf.matmul(mat_h, mat_m_i)
        new_error = tf.reduce_max(tf.abs(new_mat_m - identity))
        return (i + 1, new_mat_m, new_mat_h, mat_h, new_error,
                new_error < error)

    if mat_g_size == 1:
        mat_h = tf.pow(mat_g + ridge_epsilon, alpha)
    else:
        damped_mat_g = mat_g + ridge_epsilon * identity
        z = (1 - 1 / alpha) / (2 * tf.norm(damped_mat_g))
        # The best value for z is
        # (1 - 1/alpha) * (c_max^{-alpha} - c_min^{-alpha}) /
        #                 (c_max^{1-alpha} - c_min^{1-alpha})
        # where c_max and c_min are the largest and smallest singular values of
        # damped_mat_g.
        # The above estimate assumes that c_max > c_min * 2^p. (p = -1/alpha)
        # Can replace above line by the one below, but it is less accurate,
        # hence needs more iterations to converge.
        # z = (1 - 1/alpha) / tf.trace(damped_mat_g)
        # If we want the method to always converge, use z = 1 / norm(damped_mat_g)
        # or z = 1 / tf.trace(damped_mat_g), but these can result in many
        # extra iterations.
        new_mat_m_0 = damped_mat_g * z
        new_error = tf.reduce_max(tf.abs(new_mat_m_0 - identity))
        new_mat_h_0 = identity * tf.pow(z, neg_alpha)
        _, mat_m, mat_h, old_mat_h, error, convergence = tf.while_loop(
            _iter_condition, _iter_body,
            [0, new_mat_m_0, new_mat_h_0, new_mat_h_0, new_error, True])
        error = tf.reduce_max(tf.abs(mat_m - identity))
        is_converged = tf.cast(convergence, old_mat_h.dtype)
        resultant_mat_h = is_converged * mat_h + (1 - is_converged) * old_mat_h
    return resultant_mat_h, error
Ejemplo n.º 20
0
    def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
        del languages
        p = self.params
        if append_eos is None:
            append_eos = p.append_eos

        batch_size = py_utils.GetShape(strs)[0]
        token_ids_ta = tf.TensorArray(tf.int32, batch_size)
        target_ids_ta = tf.TensorArray(tf.int32, batch_size)
        paddings_ta = tf.TensorArray(tf.float32, batch_size)

        def _TokenizeOneSentence(i, text, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            if tf.is_tensor(i):
                text_i = tf.gather(text, i)
            else:
                text_i = text[i]
            ids = self._tokenizer.tokenize(text_i).merge_dims(0, -1)
            ids.set_shape([None])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)
            sos_ids = tf.concat([[self.sos_id], ids], axis=0)
            if p.prepend_sos:
                ids = sos_ids

            # This truncates after the EOS is added, so some sentences might
            # not have EOS at the end.
            token_ids_ta = token_ids_ta.write(
                i, py_utils.PadOrTrimTo(sos_ids, [max_length], 0))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], 0))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta

        _, _, token_ids_ta, target_ids_ta, paddings_ta = tf.while_loop(
            lambda i, *_: i < batch_size,
            _TokenizeOneSentence,
            loop_vars=(tf.constant(0, tf.int32), strs, token_ids_ta,
                       target_ids_ta, paddings_ta),
            parallel_iterations=30,
            back_prop=False)

        token_ids = token_ids_ta.stack()
        target_ids = target_ids_ta.stack()
        paddings = paddings_ta.stack()

        if not p.pad_to_max_length:
            maxlen = tf.cast(
                tf.round(tf.reduce_max(tf.reduce_sum(1.0 - paddings, axis=1))),
                tf.int32)
            token_ids = token_ids[:, :maxlen]
            target_ids = target_ids[:, :maxlen]
            paddings = paddings[:, :maxlen]

        return token_ids, target_ids, paddings