Example #1
0
  def _GetWarpMatrix(self,
                     batch_size,
                     choose_range,
                     matrix_size,
                     global_seed,
                     max_warp_frames=None,
                     dtype=tf.float32,
                     max_ratio=1.0):
    """Returns warp matrices starting from random positions.

    In this function when max_warp_frames != None:
      1) Sample random warp displacements from the interval
         [-max_warp_frames, max_warp_frames) to yield shift tensor
         with shape (batch_size,).
      2) Truncate lengths to a maximum magnitude of (choose_range * max_ratio),
         so that each shift is fully contained within the
         corresponding sequence.
      3) Random sample origin points of shape (batch_size, multiplicity)
         with in [shift, choose_range - shift).
      4) Return a batch of 1-D linear maps that fix the boundary points and
         shift the origin point by the shift.

    When max_warp_frames == None:
      1) Sample random warp displacements with magnitudes less than
         (choose_range * max_ratio) to yield shift tensor with
         shape (batch_size,).
      2) Proceed through steps 3), 4).

    Args:
      batch_size: Batch size. Integer number.
      choose_range: Range within which the warp reference points must lie.
        Tensor of shape (batch_size,).
      matrix_size: Dimension of vector space warp matrix is applied to. Integer
        number.
      global_seed: an integer seed tensor for stateless random ops.
      max_warp_frames: Upper-bound on the warp distance. Integer or None.
      dtype: Data type.
      max_ratio: Maximum ratio between the shift distance and choose_range.
        Float number.

    Returns:
      warp_matrix: An array of fixed size warp matrices with shape
      (batch_size, matrix_size, matrix_size).
    """
    p = self.params
    # Non-empty random seed values are only used for testing or when using
    # stateless random ops. seed_3, seed_4, and seed_5 are set separately to
    # avoid correlation of warp magnitude and origin position.
    if p.use_input_dependent_random_seed:
      seed_3 = global_seed + 3
      seed_4 = global_seed + 4
      seed_5 = global_seed + 5
    elif p.random_seed:
      seed_3 = p.random_seed - 1
      seed_4 = p.random_seed - 1
      seed_5 = 2 * p.random_seed + 1
    else:
      seed_3 = p.random_seed
      seed_4 = p.random_seed
      seed_5 = p.random_seed

    choose_range_dtype = tf.cast(choose_range, dtype=dtype)
    length_upper_bound = tf.cast(max_ratio * choose_range_dtype, dtype=tf.int32)
    # Set shift length.

    random_uniform = _random_uniform_op(p.use_input_dependent_random_seed)

    if max_warp_frames and max_warp_frames > 0:
      shift = random_uniform(
          shape=(batch_size,),
          minval=-1 * max_warp_frames,
          maxval=max_warp_frames + 1,
          dtype=tf.int32,
          seed=seed_3)
    else:
      random_ratio = random_uniform(
          shape=(batch_size,),
          minval=-1.0,
          maxval=1.0,
          dtype=dtype,
          seed=seed_4)
      shift = tf.cast(random_ratio * tf.cast(length_upper_bound, dtype=dtype),
                      tf.int32)
    # Make sure the sampled length was smaller than max_ratio * length_bound.
    # Note that sampling in this way is biased.
    # (Shorter sequence may over-masked.)
    final_shift = tf.maximum(-length_upper_bound,
                             tf.minimum(shift, length_upper_bound))
    # Choose origin anchor point.
    mid_range = tf.cast(choose_range, dtype=tf.int32)
    mid_range = tf.maximum(choose_range - 2, 0)
    random_origin = random_uniform(shape=(batch_size,), maxval=1.0, seed=seed_5)
    origin_with_in_valid_range = random_origin * tf.cast(mid_range, dtype=dtype)
    origin = tf.cast(origin_with_in_valid_range, tf.int32) + 1
    # Set destination point of the origin anchor point under the warp map.
    destination = origin + final_shift
    # Cast origin and destination.
    origin = tf.cast(origin, dtype=dtype)
    destination = tf.cast(destination, dtype=dtype)

    return self._ConstructWarpMatrix(
        batch_size=batch_size,
        matrix_size=matrix_size,
        origin=origin,
        destination=destination,
        choose_range=choose_range_dtype,
        dtype=dtype)
Example #2
0
    def _XYZFromRangeImage(self,
                           lidar_image,
                           lidar_image_mask,
                           extrinsics,
                           inclinations,
                           pixel_pose=None,
                           frame_pose=None):
        """Extract the cartesian coordinates from the range image.

    Args:
       lidar_image: [H, W, C] range image Tensor.
       lidar_image_mask: [H, W] boolean indicating which 2d coordinates in the
         lidar image are present.
       extrinsics: [4, 4] float matrix representing transformation matrix to
         world coordinates.
       inclinations: [V] beam inclinations vector.
       pixel_pose: [64, 2650, 4, 4] tensor representing per pixel pose of GBR.
       frame_pose: [4, 4] matrix representing vehicle to world transformation.

    Returns:
      [H, W, 3] range image cartesian coordinates.
    """
        height, width, channels = py_utils.GetShape(lidar_image, 3)

        conversion_dtype = tf.float32
        lidar_image = tf.cast(lidar_image, conversion_dtype)
        extrinsics = tf.cast(extrinsics, conversion_dtype)
        inclinations = tf.cast(inclinations, conversion_dtype)
        inclinations = tf.reverse(inclinations, axis=[-1])

        az_correction = py_utils.HasShape(
            tf.atan2(extrinsics[1, 0], extrinsics[0, 0]), [])
        ratios = (tf.cast(tf.range(width, 0, -1), dtype=conversion_dtype) -
                  .5) / tf.cast(width, conversion_dtype)
        ratios = py_utils.HasShape(ratios, [width])

        azimuth = (ratios * 2. - 1.) * np.pi - az_correction[..., tf.newaxis]
        azimuth = py_utils.HasShape(azimuth, [width])

        lidar_image_mask = lidar_image_mask[..., tf.newaxis]
        lidar_image_mask = tf.tile(lidar_image_mask, [1, 1, channels])
        lidar_image = tf.where(lidar_image_mask, lidar_image,
                               tf.zeros_like(lidar_image))
        lidar_image_range = lidar_image[..., 0]

        azimuth = py_utils.HasShape(azimuth[tf.newaxis, ...], [1, width])
        inclinations = py_utils.HasShape(inclinations[..., tf.newaxis],
                                         [height, 1])

        cos_azimuth = tf.cos(azimuth)
        sin_azimuth = tf.sin(azimuth)
        cos_incl = tf.cos(inclinations)
        sin_incl = tf.sin(inclinations)

        x = cos_azimuth * cos_incl * lidar_image_range
        y = sin_azimuth * cos_incl * lidar_image_range
        z = sin_incl * lidar_image_range

        lidar_image_points = tf.stack([x, y, z], -1)
        lidar_image_points = py_utils.HasShape(lidar_image_points,
                                               [height, width, 3])
        rotation = extrinsics[0:3, 0:3]
        translation = extrinsics[0:3, 3][tf.newaxis, ...]

        # Transform the image points in cartesian coordinates to
        # the world coordinate system using the extrinsics matrix.
        #
        # We first flatten the points, apply rotation, then
        # reshape to restore the original input and then apply
        # translation.
        lidar_image_points = tf.matmul(tf.reshape(lidar_image_points, [-1, 3]),
                                       rotation,
                                       transpose_b=True)
        lidar_image_points = tf.reshape(lidar_image_points, [height, width, 3])
        lidar_image_points += translation

        lidar_image_points = py_utils.HasShape(lidar_image_points,
                                               [height, width, 3])
        # GBR uses per pixel pose.
        if pixel_pose is not None:
            pixel_pose_rotation = pixel_pose[..., 0:3, 0:3]
            pixel_pose_translation = pixel_pose[..., 0:3, 3]
            lidar_image_points = tf.einsum(
                'hwij,hwj->hwi', pixel_pose_rotation,
                lidar_image_points) + pixel_pose_translation
            if frame_pose is None:
                raise ValueError(
                    'frame_pose must be set when pixel_pose is set.')
            # To vehicle frame corresponding to the given frame_pose
            # [4, 4]
            world_to_vehicle = tf.linalg.inv(frame_pose)
            world_to_vehicle_rotation = world_to_vehicle[0:3, 0:3]
            world_to_vehicle_translation = world_to_vehicle[0:3, 3]
            # [H, W, 3]
            lidar_image_points = tf.einsum(
                'ij,hwj->hwi', world_to_vehicle_rotation, lidar_image_points
            ) + world_to_vehicle_translation[tf.newaxis, tf.newaxis, :]

        return lidar_image_points
Example #3
0
def _SingleClassDecodeWithNMS(predicted_bboxes,
                              classification_scores,
                              nms_iou_threshold,
                              score_threshold,
                              max_boxes_per_class=None):
    """Perform NMS on predicted bounding boxes / associated logits.

  Args:
    predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing
      predicted bounding box coordinates.
    classification_scores: [batch_size, num_boxes, num_classes] float Tensor
      containing predicted classification scores for each box.
    nms_iou_threshold: IoU threshold to use when determining whether two boxes
      overlap for purposes of suppression.
    score_threshold: The score threshold passed to NMS that allows NMS to
      quickly ignore irrelevant boxes.
    max_boxes_per_class: The maximum number of boxes per example to emit. If
      None, this value is set to num_boxes from the shape of predicted_bboxes.

  Returns:
    nms_indices: Indices of the boxes selected after NMS. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
    predicted_bboxes: Filtered bboxes after NMS of shape
      [batch_size, num_classes, max_boxes_per_class, 7].
    bbox_scores: A float32 Tensor with the score for each box of shape
      [batch_size, num_classes, max_boxes_per_class].
    valid_mask: A float32 Tensor with 1/0 values indicating the validity of
      each box. 1 indicates valid, and 0 invalid. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
  """
    utils_3d = detection_3d_lib.Utils3D()
    predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7])
    batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes)
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_predicted_boxes, -1])
    _, _, num_classes = py_utils.GetShape(classification_scores)

    if not isinstance(nms_iou_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`nms_iou_threshold`.')
    if not isinstance(score_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`score_threshold`.')

    if max_boxes_per_class is None:
        max_boxes_per_class = num_predicted_boxes

    # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and
    # per-class thresholding.
    # [batch, num_predicted_boxes]
    nms_scores = tf.reduce_max(classification_scores, axis=-1)

    # Compute the most likely label by computing the highest class score from
    # the output of the sigmoid.
    likely_labels = tf.argmax(classification_scores, axis=-1)

    # When background is the most likely class for the box, mask out the scores
    # of that box from NMS scoring so the background boxes don't dominate the
    # NMS.
    nms_scores *= tf.cast(likely_labels > 0, tf.float32)

    # Compute NMS for every sample in the batch.
    nms_indices, valid_mask = utils_3d.BatchedNMSIndices(
        predicted_bboxes,
        nms_scores,
        nms_iou_threshold=nms_iou_threshold,
        score_threshold=score_threshold,
        max_num_boxes=max_boxes_per_class)

    # Reorder the box data and logits according to NMS scoring.
    predicted_bboxes = tf.array_ops.batch_gather(predicted_bboxes, nms_indices)
    classification_scores = tf.array_ops.batch_gather(classification_scores,
                                                      nms_indices)

    # Now reformat the output of NMS to match the format of the
    # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result.
    # This takes the leading shape of
    # [batch_size, num_classes, max_boxes_per_class] for all outputs, which
    # means since this NMS is not class specific we need to tile the outputs
    # num_classes times or reorder the data such that its [batch, num_classes].
    predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :],
                               [1, num_classes, 1, 1])
    classification_scores = tf.transpose(classification_scores, (0, 2, 1))
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_classes, max_boxes_per_class])
    valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1])
    return nms_indices, predicted_bboxes, classification_scores, valid_mask
Example #4
0
    def FProp(self, theta, input_batch):
        """Embeds source ids and transforms with TransformerStack.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      input_batch: A `.NestedMap` object containing: ids - The inputs tensor of
        shape [batch, time]. paddings - The ids' paddings of shape [batch,
        time].

    Returns:
      A '.NestedMap' object containing:
        encoded - The encoded features of shape [time, batch, dim] or [batch,
          time, dim], depending p.output_data_format.
        padding - The encoded features' padding of shape [time, batch] or
          [batch, time].
        segment_id - The segmentation of packed inputs of shape [time, batch] or
          [batch, time] if it is supported by the model, or None otherwise.
        embedded_inputs - The embedded inputs tokens without positional
          encodings of shape [time, batch, dim] or [batch, time, dim].
    """

        p = self.params
        with tf.name_scope(p.name):
            # [batch, time]
            input_ids = input_batch.ids
            # [batch, time]
            paddings = input_batch.paddings

            # [batch, time]
            segment_ids = input_batch.segment_ids if p.packed_input else None

            batch = py_utils.GetShape(input_ids)[0]
            time = py_utils.GetShape(input_ids)[1]

            # Embedding layer.
            # [batch, time, dim]
            if not p.shared_emb:
                input_embs = self.token_emb.EmbLookup(theta.token_emb,
                                                      input_ids)
            else:
                input_embs = self.softmax.EmbLookup(theta.softmax, input_ids)
            orig_input_embs = input_embs

            # [1, time, dim]
            if p.packed_input:
                positions = input_batch.segment_pos
                position_embs = tf.expand_dims(
                    self.position_emb.FPropWithPosition(
                        theta.position_emb, positions), 0)
            else:
                position_embs = tf.expand_dims(
                    self.position_emb.FProp(theta.position_emb, time), 0)

            # [batch, time, dim]
            input_embs += position_embs

            if p.input_dropout_tpl.fprop_dtype:
                input_embs = tf.cast(input_embs,
                                     p.input_dropout_tpl.fprop_dtype)
                paddings = tf.cast(paddings, p.input_dropout_tpl.fprop_dtype)

            input_embs = self.input_dropout.FProp(theta.input_dropout,
                                                  input_embs)
            # [batch, time, dim]
            transformer_input = input_embs
            # Explicitly set the input shape of Transformer layers, to avoid
            # unknown shape error occurred to tf.einsum on nonTPU devices.
            transformer_input = tf.reshape(transformer_input,
                                           [batch, time, p.model_dim])

            # Compute self-attention segment mask once.
            if p.packed_input:
                segment_mask = batch_major_attention.SegmentMask(
                    segment_ids, segment_ids, dtype=transformer_input.dtype)
            else:
                segment_mask = tf.zeros([batch, 1, time, time])

            encoded, padding = self.transformer_stack.FProp(
                theta.transformer_stack, transformer_input, paddings,
                segment_mask)

            if p.final_layer_norm:
                encoded = self.final_ln.FProp(theta.final_ln, encoded)

            seq_lengths = tf.cast(tf.reduce_sum(1. - padding, axis=1),
                                  tf.int32)

            if p.output_data_format == 'TBC':
                encoded = tf.transpose(encoded,
                                       [1, 0, 2])  # [time, batch, dim]
                padding = tf.transpose(padding)  # [time, batch]
                segment_ids = tf.transpose(
                    segment_ids) if p.packed_input else None
                orig_input_embs = tf.transpose(orig_input_embs, [1, 0, 2])

            return py_utils.NestedMap(
                encoded=encoded,
                padding=padding,
                seq_lengths=seq_lengths,  # used by beam_search_helper.
                segment_id=segment_ids,
                embedded_inputs=orig_input_embs)
Example #5
0
    def BeamSearchDecode(self,
                         theta,
                         encoder_outputs,
                         num_hyps_per_beam_override=0,
                         init_beam_search_state=None,
                         pre_beam_search_step_callback=None,
                         post_beam_search_step_callback=None,
                         max_steps=None):
        """Performs beam-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder layer
        and its children layers.
      encoder_outputs: A NestedMap containing encoder outputs to be passed to
        the callbacks. Mostly opaque to BeamSearchHelper, except that it should
        contain either a 'seq_lengths' field of shape [source_batch_size] or
        a 'paddings' field of shape [source_max_lengths, source_batch_size].
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        `p.num_hyps_per_beam`.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
        to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      max_steps: maximum beam search steps. If None, use
        self.params.target_seq_len.

    Returns:
      A `BeamSearchDecodeOutput`.
    """
        p = self.params
        num_hyps_per_beam = p.num_hyps_per_beam
        if num_hyps_per_beam_override > 0:
            num_hyps_per_beam = num_hyps_per_beam_override
        if max_steps is None:
            max_steps = p.target_seq_len

        initial_results, other_states = init_beam_search_state(
            theta, encoder_outputs, num_hyps_per_beam)

        num_hyps = tf.shape(initial_results.log_probs)[0]
        num_beams = num_hyps // num_hyps_per_beam

        if 'step_ids' in initial_results:
            # [num_hyps, 1]
            step_ids = tf.ensure_shape(initial_results.step_ids, [None, 1])
        else:
            step_ids = tf.fill([num_hyps, 1],
                               tf.constant(p.target_sos_id, dtype=tf.int32))

        min_score = -1e36
        best_scores = (tf.zeros(shape=[num_beams], dtype=p.dtype) + min_score)
        cumulative_scores = tf.zeros(shape=[num_hyps], dtype=p.dtype)
        in_scores = tf.zeros([max_steps, num_hyps], dtype=p.dtype)
        in_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_prev_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_done_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.string)
        bs_atten_probs = tf.zeros(
            [max_steps, num_hyps,
             tf.shape(initial_results.atten_probs)[1]],
            dtype=p.dtype)
        cur_step = tf.constant(0, dtype=tf.int32)
        all_done = tf.constant(False, dtype=tf.bool)
        core_bs_states = (best_scores, cumulative_scores, in_scores, in_hyps,
                          in_prev_hyps, in_done_hyps, bs_atten_probs)

        def LoopContinue(cur_step, all_done, unused_step_ids,
                         unused_core_bs_states, unused_other_states_list):
            return tf.math.logical_and(cur_step < max_steps,
                                       tf.math.logical_not(all_done))

        def LoopBody(cur_step, unused_all_done, step_ids, core_bs_states,
                     other_states_list):
            (cur_step, all_done, new_step_ids, new_bs_states,
             new_other_states) = self._BeamSearchStep(
                 theta, encoder_outputs, cur_step, step_ids, core_bs_states,
                 other_states.Pack(other_states_list), num_hyps_per_beam,
                 pre_beam_search_step_callback, post_beam_search_step_callback)
            return (cur_step, all_done, new_step_ids, new_bs_states,
                    new_other_states.Flatten())

        flat_other_states = other_states.Flatten()
        _, _, _, final_bs_states, flat_final_other_states = tf.while_loop(
            LoopContinue,
            LoopBody,
            loop_vars=(cur_step, all_done, step_ids, core_bs_states,
                       flat_other_states),
            parallel_iterations=10,
            back_prop=False,
            swap_memory=False,
            shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                              tf.TensorShape(all_done.get_shape()),
                              tf.TensorShape(step_ids.get_shape()),
                              _GetShapes(core_bs_states),
                              _GetShapes(flat_other_states, none_shapes=True)))
        # [target_seq_len, num_beams * num_hyps_per_beam].
        final_done_hyps = final_bs_states[5]
        final_other_states = other_states.Pack(flat_final_other_states)

        # Assume that `paddings` has shape [source_max_lengths, source_batch_size]
        # by default, and compute `encoded_seq_lengths` accordingly. This can be
        # overridden by directly passing `seq_lengths` in the `encoder_outputs`
        # NestedMap.
        encoded_seq_lengths = getattr(encoder_outputs, 'seq_lengths', None)
        if encoded_seq_lengths is None:
            source_paddings = encoder_outputs.padding
            if isinstance(source_paddings, py_utils.NestedMap):
                encoded_seq_lengths = tf.cast(
                    tf.round(
                        tf.reduce_sum(
                            1.0 - tf.transpose(source_paddings.Flatten()[0]),
                            1)), tf.int32)
            else:
                encoded_seq_lengths = tf.cast(
                    tf.round(
                        tf.reduce_sum(
                            1.0 -
                            tf.cast(tf.transpose(source_paddings), tf.float32),
                            1)), tf.int32)

        # [num_beams, num_hyps_per_beam].
        topk_hyps = ops.top_k_terminated_hyps(
            final_done_hyps,
            encoded_seq_lengths,
            k=num_hyps_per_beam,
            num_hyps_per_beam=num_hyps_per_beam,
            length_normalization=p.length_normalization,
            coverage_penalty=p.coverage_penalty,
            target_seq_length_ratio=p.target_seq_length_ratio)
        # [num_beams * num_hyps_per_beam, ...].
        max_seq_length = 0 if isinstance(max_steps, tf.Tensor) else max_steps
        topk_ids, topk_lens, topk_scores = ops.unpack_hyp(
            tf.reshape(topk_hyps, [-1]), max_seq_length=max_seq_length)
        # [num_beams, num_hyps_per_beam].
        topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

        return BeamSearchDecodeOutput(final_done_hyps, topk_hyps, topk_ids,
                                      topk_lens, topk_scores, None,
                                      final_other_states)
Example #6
0
  def _resource_apply_dense(self, grad, var):
    if grad is None:
      tf.logging.warning('Gradient is None for variable %s' % var.name)
      return []

    grad_dtype = var.dtype  # TODO(lepikhin): add to params
    grad = tf.cast(grad, grad_dtype)
    factored_dims = self._factored_dims(var.shape.as_list())
    if factored_dims:
      vr = self.get_slot(var, 'vr')
      vc = self.get_slot(var, 'vc')
    else:
      v = self.get_slot(var, 'v')
    if self._beta1:
      m = self.get_slot(var, 'm')

    cond = tf.constant(True)

    def _Upd(c, x):
      if not self._cond_is_finite:
        return c
      c = tf.math.logical_and(c, tf.reduce_all(tf.math.is_finite(x)))
      c = tf.math.logical_and(
          c, tf.reduce_all(tf.math.logical_not(tf.math.is_inf(x))))
      return c

    def _Wrap(fn, x, y):
      if not self._cond_is_finite:
        return fn(x, y)
      return tf.cond(cond, lambda: fn(x, y), lambda: x)

    with tf.variable_scope(var.name[:-2] + '/Adafactor'):
      grad_squared = tf.math.square(grad) + tf.cast(self._epsilon1, grad_dtype)
      cond = _Upd(cond, grad_squared)
      decay_rate = tf.cast(self._decay_rate, var.dtype)
      old_val = tf.identity(var)  # TODO(lepikhin): introduce gradient dtype
      if self._multiply_by_parameter_scale:
        update_scale = self._parameter_scale(old_val) * tf.cast(
            self._learning_rate, grad_dtype)
      else:
        update_scale = self._learning_rate
      mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
      update_scale = tf.cast(update_scale, grad_dtype)
      updates = []
      if factored_dims:
        d0, d1 = factored_dims
        vr_axis, vc_axis = d0, d1
        grad_squared_row_mean = tf.reduce_mean(grad_squared, axis=vr_axis)
        grad_squared_col_mean = tf.reduce_mean(grad_squared, axis=vc_axis)
        # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
        new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
        # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
        new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
        cond = _Upd(cond, new_vr)
        cond = _Upd(cond, new_vc)
        vr_update = _Wrap(tf.assign, vr, new_vr)
        vc_update = _Wrap(tf.assign, vc, new_vc)
        updates.extend([vr_update, vc_update])
        long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
        r_factor = tf.math.rsqrt(new_vr / long_term_mean)
        c_factor = tf.math.rsqrt(new_vc)
        x = grad * tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
            c_factor, vc_axis)
      else:
        new_v = v * decay_rate + grad_squared * mixing_rate
        cond = _Upd(cond, new_v)
        v_update = _Wrap(tf.assign, v, new_v)
        updates.append(v_update)
        x = grad * tf.math.rsqrt(new_v)
      if self._clipping_threshold is not None:
        clipping_denom = tf.maximum(
            tf.constant(1.0, grad_dtype),
            py_utils.ReduceRms(x) /
            tf.constant(self._clipping_threshold, grad_dtype))
        x /= clipping_denom
      subtrahend = x * update_scale
      if self._beta1:
        new_m = (
            m * tf.constant(self._beta1, dtype=grad_dtype) +
            subtrahend * tf.constant(1.0 - self._beta1, dtype=grad_dtype))
        subtrahend = new_m
        cond = _Upd(cond, new_m)
        updates.append(_Wrap(tf.assign, m, new_m))
      # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
      #  for the case of bfloat16 activations, so as to avoid repeatedly
      #  rounding the slice value, which results in poor quality.
      cond = _Upd(cond, subtrahend)
      var_update = _Wrap(tf.assign_sub, var, subtrahend)
      updates.append(var_update)
      return tf.group(*updates)
Example #7
0
 def ApplyClipping(self, theta, x):
     p = self.params
     if not p.cc_schedule:
         return x
     cap = tf.cast(self.cc_schedule.GetState(theta.cc_schedule), x.dtype)
     return tf.clip_by_value(x, -cap, cap)
Example #8
0
 def FProp(self, theta, current_step):
     p = self.params
     num_decays = tf.floor(
         tf.div(tf.cast(current_step, tf.float32),
                float(p.num_steps_per_decay)))
     return tf.pow(p.decay, num_decays)
Example #9
0
 def FProp(self, theta, current_step):
     p = self.params
     step_num = tf.cast(current_step, tf.float32)
     learning_rate = tf.math.rsqrt(tf.maximum(step_num, p.warmup_steps))
     learning_rate *= p.multiplier
     return learning_rate
 def _CastFloats(v):
     if v is None:
         return None
     return tf.cast(
         v, py_utils.FPropDtype(p)) if v.dtype.is_floating else v
Example #11
0
 def FProp(self, theta, current_step):
     return self._exp(tf.cast(current_step, dtype=self.params.dtype))
Example #12
0
    def _Gradient(inputs, _, original_grad):

      # Compute the gradients for each loss w.r.t. the inputs.
      # TODO(jngiam): Look into whether TF dedups this computation.
      per_loss_grads = []
      for loss, _ in self._losses:
        per_loss_grad = tf.gradients(loss, self._output_tensor)[0]
        if per_loss_grad is None:
          tf.logging.warn(
              'Loss %s did not result in a gradient during '
              'GradDrop computation.', loss)
        else:
          per_loss_grads.append(per_loss_grad)

      if not per_loss_grads:
        raise ValueError('No valid gradients for GradDrop.')

      # Multiply the gradients with the inputs.
      grads = per_loss_grads
      if p.use_input_sign_only:
        input_abs = tf.abs(
            tf.cast(tf.abs(inputs) <= p.epsilon, tf.float32) + inputs)
        grads = [grad * ((inputs) / (input_abs)) for grad in grads]
      else:
        grads = [grad * inputs for grad in grads]

      # Sum gradient over batch, assuming that batch is always on dim 0.
      if p.marginalize_batch_dim:
        grads = [tf.reduce_sum(grad, axis=0, keepdims=True) for grad in grads]

      # First discretize all gradients into their sign values.
      grad_sign_positive = [tf.cast(grad > 0.0, tf.float32) for grad in grads]
      grad_sign_negative = [tf.cast(grad < 0.0, tf.float32) for grad in grads]

      # Calculate the probability of positive gradients based on equation (1)
      # in the GradDrop paper.
      grad_abs_sum = tf.add_n([tf.abs(grad) for grad in grads])
      prob_pos = (tf.add_n(grads) / (2. * grad_abs_sum + p.epsilon))
      # Implementation of different scales for the keep function. Larger
      # scales result in steeper keep functions.
      prob_pos *= p.keep_prob_function_scale

      if p.keep_prob_function == 'sigmoid':
        # Standard sigmoid has derivative of 0.25 at 0 so the factor of 4.0
        # allows the function scale in sigmoid to be compatible with the
        # function scale in the linear case.
        prob_pos = tf.sigmoid(4.0 * prob_pos)
      elif p.keep_prob_function == 'linear':
        prob_pos += 0.5

      # The main, default mode of GradDrop. Only gradients of one sign are kept,
      # and which sign is calculated via equation (1) of the main paper.
      prob_pos = tf.cast(prob_pos >= tf.random.uniform(prob_pos.shape),
                         tf.float32) - 0.5
      grad_masks = [(gsp - gsn) * prob_pos >= 0
                    for (gsn,
                         gsp) in zip(grad_sign_negative, grad_sign_positive)]

      # This diag value gives us the percentage of grads which are kept.
      gradmask_diag = [tf.cast(gm, tf.float32) for gm in grad_masks]
      diag = tf.reduce_mean(tf.add_n(gradmask_diag) / len(grad_masks))
      summary_utils.scalar('average_grad_mask', diag)
      leak_ratios = [leak_ratio for _, leak_ratio in self._losses]
      transformed_per_loss_grads = [
          grad * (leak + (1.0 - leak) * tf.cast(grad_mask, tf.float32))
          for (leak, grad,
               grad_mask) in zip(leak_ratios, per_loss_grads, grad_masks)
      ]

      transformed_grad = tf.cast(
          tf.add_n(transformed_per_loss_grads), original_grad.dtype)

      if not p.keep_gradnorm_constant:
        return transformed_grad

      transformed_grad_norm = tf.sqrt(tf.reduce_sum(transformed_grad**2))
      original_grad_norm = tf.sqrt(tf.reduce_sum(original_grad**2))
      return transformed_grad * transformed_grad_norm / (
          original_grad_norm + p.epsilon)
Example #13
0
  def _TimeMask(self,
                inputs,
                seq_lengths,
                global_seed,
                noisify=False,
                gaussian_noise=False,
                dtype=tf.float32,
                domain_id_index=0):
    """Applies time masking with given degree to inputs.

    Args:
      inputs: Batch of input features of shape (batch_size, time_length,
        num_freq, channels).
      seq_lengths: The actual sequence lengths which mask been sampled of shape
        (batch_size,).
      global_seed: an integer seed tensor for stateless random ops.
      noisify: Whether to noisify the masked out regions.
      gaussian_noise: Whether to use gaussian noise when noisifying.
      dtype: Data type.
      domain_id_index: domain id index.

    Returns:
      Inputs with random time masking applied.
    """
    p = self.params

    # Get time masking parameters.
    time_mask_max_frames = p.time_mask_max_frames[domain_id_index]
    time_masks_per_frame = p.time_masks_per_frame[domain_id_index]
    use_dynamic_time_mask_max_frames = \
        p.use_dynamic_time_mask_max_frames[domain_id_index]
    multiplicity = p.time_mask_count[domain_id_index]
    max_ratio = p.time_mask_max_ratio[domain_id_index]

    # If maximum mask length is zero, do nothing.
    if ((time_mask_max_frames == 0 and not use_dynamic_time_mask_max_frames) or
        max_ratio <= 0.0):
      return inputs
    if multiplicity == 0:
      return inputs
    seq_lengths = tf.cast(seq_lengths, tf.int32)
    batch_size, time_length, _, _ = py_utils.GetShape(inputs)

    # When using dynamic time mask size, discard upper-bound on
    # maximum allowed frames for time mask.
    if use_dynamic_time_mask_max_frames:
      time_mask_max_frames = None
    # Create masks in time direction and apply.
    block_arrays = self._GetMask(
        batch_size,
        choose_range=seq_lengths,
        mask_size=time_length,
        global_seed=global_seed,
        max_length=time_mask_max_frames,
        masks_per_frame=time_masks_per_frame,
        multiplicity=multiplicity,
        dtype=dtype,
        max_ratio=max_ratio)

    # Non-empty random seed values are only used for testing or when using
    # stateless random ops. seed_6 and seed_7 are set separately to avoid
    # correlation of warp magnitude and origin position.
    if p.use_input_dependent_random_seed:
      seed_6 = global_seed + 6
      seed_7 = global_seed + 7
    else:
      seed_6 = p.random_seed
      seed_7 = p.random_seed

    outputs = self.EinsumBxycBxBxyc(
        inputs, block_arrays, name='einsum_formasking')
    if noisify:
      # Sample noise with standard deviation with factor * 0.1 + 0.0001
      # TODO(ngyuzh): Make sure this won't affect EOS.
      if gaussian_noise:
        stddev = 1.0
      else:
        random_uniform = _random_uniform_op(p.use_input_dependent_random_seed)
        factor = random_uniform(
            shape=(), minval=1.0, maxval=2.0, dtype=dtype, seed=seed_6)
        stddev = factor * 0.1 + 0.0001
      random_normal = _random_normal_op(p.use_input_dependent_random_seed)
      noise = random_normal(
          shape=[tf.shape(inputs)[0],
                 tf.shape(inputs)[1],
                 tf.shape(inputs)[2]],
          stddev=stddev,
          seed=seed_7)
      if p.fprop_dtype is not None and p.fprop_dtype != p.dtype:
        noise = tf.cast(noise, p.fprop_dtype)
      outputs_mask = self.EinsumBxyBxBxy(
          noise, 1.0 - block_arrays, name='einsum_fornoisymasking')
      outputs = outputs + tf.expand_dims(outputs_mask, -1)

    return outputs
Example #14
0
  def _ConstructWarpMatrix(self, batch_size, matrix_size, origin, destination,
                           choose_range, dtype):
    """Returns warp matrices according to origin, destination and choose_range.

    This function constructs a batch of warp matrices which maps the batch
    of origin points to the batch of destination points with fixed boundary
    coordinates at 0 and choose_range.

    The warping function, defined by the origin anchor point `origin`,
    the destination of the origin anchor point `destination` and the
    length of the domain in the warping axis `choose_range` is a piecewise
    linear map that fixes the points 0 and `choose_range` and maps
    `origin` to `destination`.

    For the warping matrix to be non-singular, destination must lie in the
    range 1<= destination <= choose_range - 1, so a destination
    out of this range is adjusted to be in this range before the warping
    matrix is constructed.

    The warping map can be explicitly written by first defining the slopes:
      1) slope_0 = origin / destination.
      2) slope_1 = (choose_range - origin) / (choose_range - destination).
      3) slope_2 = 1.0.

    Then the origin point orig_i of the mapped coordinate i is given by:
      1) i < destination: orig_i = slope_0 * i.
      2) destination <= i < choose_range:
         orig_i = slope_1 * i - (slope_1 - slope_0) * destination.
      3) i >= choose_range: orig_i = i.

    Denoting n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by:
      1) j = n_i: 1 - n_i + orig_i.
      2) j = n_i - 1: n_i - orig_i.
      3) Otherwise: 0.

    Applying the warp matrix to an array of pixels, i.e.,
    warped_pixel[i] = sum_j warp[i][j] * pixel[j], one would get
    warped_pixel[i] = (n_i-orig_i) pixel[n_i-1] + (1-n_i+orig_i) pixel[n_i].

    Args:
      batch_size: Batch size. Integer number.
      matrix_size: Dimension of the vector space the warp matrix is applied to.
        Integer number.
      origin: Origin anchor point for warping. Tensor of shape (batch_size,) and
        data type dtype.
      destination: Destination of the origin anchor point upon warping. Tensor
        of shape (batch_size,) and data type dtype.
      choose_range: Range within which the warp reference points must lie.
        Tensor of shape (batch_size,) data type dtype.
      dtype: Data type of origin, destination, choose_range and the output warp
        matrix.

    Returns:
      warp_matrix: An array of fixed size warp matrices with shape
      (batch_size, matrix_size, matrix_size).
    """
    p = self.params

    # Entries of destination must be in the range
    # 1 <= destination <= choose_range - 1
    # for warp matrix to have non-singular values.
    destination = tf.minimum(tf.maximum(destination, 1.0), choose_range - 1.0)

    # Construct piece-wise linear function fixing boundary points
    # specified by zero, choose_range and matrix size and maps
    # the origin anchor point to the destination.
    destination_bc = tf.broadcast_to(destination, (matrix_size, batch_size))
    destination_bc = tf.transpose(destination_bc)
    choose_range_bc = tf.broadcast_to(choose_range, (matrix_size, batch_size))
    choose_range_bc = tf.transpose(choose_range_bc)

    # Slopes of piece-wise linear function.
    slope_0 = origin / destination
    slope_1 = (choose_range - origin) / (choose_range - destination)
    slope_2 = 1.0

    # x is a batch of origin matrices.
    # The origin matrix is the matrix such that
    # origin[i][j] = Origin coordinate of coordinate i for the warp map.
    # Denoting the destination of the origin anchor point in the
    # warp map as "dest," the origin coordinate of point i is given by:
    # 1) i < dest: slope_0 * i.
    # 2) dest <= i < choose_range: slope_1 * i - (slope_1 - slope_0) * dest.
    # 3) i >= choose_range: i.
    x = tf.broadcast_to(
        tf.cast(tf.range(matrix_size), dtype=dtype), (batch_size, matrix_size))
    x = (
        self.EinsumBBmBm(slope_0, x) +
        self.EinsumBBmBm(slope_1 - slope_0, tf.nn.relu(x - destination_bc)) +
        self.EinsumBBmBm(slope_2 - slope_1, tf.nn.relu(x - choose_range_bc)))
    x = tf.broadcast_to(x, (matrix_size, batch_size, matrix_size))
    x = tf.transpose(x, perm=[1, 2, 0])

    # y is a batch of coordinate matrices.
    # A coordinate matrix is a matrix such that
    # coordinate[i][j] = j.
    y = tf.broadcast_to(
        tf.cast(tf.range(matrix_size), dtype=dtype),
        (batch_size, matrix_size, matrix_size))
    # Warp matrix is obtained by applying hat function element-wise to (x-y).
    # Denoting the origin point of i under the warp map as orig_i,
    # and n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by:
    # 1) j = n_i: 1 - n_i + orig_i.
    # 2) j = n_i - 1: n_i - orig_i.
    # 3) Otherwise: 0.
    # Applying the warp matrix to pixels, i.e.,
    # warped_pixel[i] = sum_j warp[i][j] * original_pixel[j], one would get
    # warped_pixel[i] = (n_i - orig_i) * original_pixel[n_i-1]
    #                   + (1 - n_i + orig_i) * original_pixel[n_i].
    warp_matrix = x - y
    warp_matrix = _hat(warp_matrix)
    if p.fprop_dtype is not None and p.fprop_dtype != dtype:
      warp_matrix = tf.cast(warp_matrix, p.fprop_dtype)

    return warp_matrix
Example #15
0
def _StepNum():
  return tf.cast(tf.train.get_or_create_global_step(), tf.float32)
Example #16
0
def AddAttentionSummaryBatchMajor(name,
                                  attention_tensors,
                                  src_paddings,
                                  tgt_paddings,
                                  transcripts=None,
                                  max_outputs=3):
    """Adds an image summary showing the attention probability matrix and state.

  As opposed to AddAttentionSummary() takes all tensors with batch dimension in
  axis 0.

  Args:
    name: Summary name.
    attention_tensors: A list of 3D tensors shaped [batch_size, target_len,
      source_len] where attention[b, i, j] is the probability for the i-th
      output attending to the j-th input for element b in the batch.
    src_paddings: A tensor of binary paddings shaped [batch, source_len] for the
      source sequence. Or a list of tensors of the same length as
      attention_tensors with a separate paddings for each entry in
      attention_tensors.
    tgt_paddings: A tensor of binary paddings shaped [batch, target_len] for the
      target sequence. Or a list of tensors of the same length as
      attention_tensors with a separate paddings for each entry in
      attention_tensors.
    transcripts: Optional, transcripts shaped [batch, source_len] for the source
      sequence.
    max_outputs: Integer maximum number of elements of the batch to plot.
  """
    def VerifyLen(paddings):
        length = len(paddings) if isinstance(paddings, list) else 1
        if length != 1 and length != len(attention_tensors):
            raise ValueError('Bad length of paddings list {}'.format(length))

    VerifyLen(src_paddings)
    VerifyLen(tgt_paddings)

    # Verify shapes.
    for i, attention_tensor in enumerate(attention_tensors):
        src, tgt = src_paddings, tgt_paddings
        src = src[0 if len(src) == 1 else i] if isinstance(src, list) else src
        tgt = tgt[0 if len(tgt) == 1 else i] if isinstance(tgt, list) else tgt
        tgt_shape = py_utils.GetShape(tgt)
        attention_tensors[i] = tf.identity(
            py_utils.with_dependencies([
                py_utils.assert_equal(
                    py_utils.GetShape(attention_tensor), tgt_shape[:2] +
                    [py_utils.GetShape(src)[1]] + tgt_shape[2:])
            ], attention_tensor), re.sub(':.*$', '', attention_tensor.name))

    if not _ShouldAddSummary():
        return

    def ToLengths(paddings):
        paddings = paddings if isinstance(paddings, list) else [paddings]
        return [SequenceLength(p) for p in paddings]

    def Get(lengths, i):
        return lengths[0 if len(lengths) == 1 else i]

    src_lens = ToLengths(src_paddings)
    tgt_lens = ToLengths(tgt_paddings)

    with plot.MatplotlibFigureSummary(name + '/Attention',
                                      max_outputs=max_outputs,
                                      gridspec_kwargs={'hspace': 0.3}) as fig:
        for n, atten in enumerate(attention_tensors):
            # Diagnostic metric that decreases as attention picks up.
            max_entropy = tf.math.log(tf.cast(Get(src_lens, n), tf.float32))
            max_entropy = tf.expand_dims(tf.expand_dims(max_entropy, -1), -1)
            atten_normalized_entropy = -atten * tf.math.log(
                atten + 1e-10) / max_entropy
            scalar(name + '/Attention/average_normalized_entropy/%d' % n,
                   tf.reduce_mean(atten_normalized_entropy))
            args = [atten, Get(src_lens, n), Get(tgt_lens, n)]
            if transcripts is not None and n == 0:
                args.append(transcripts)
            fig.AddSubplot(args,
                           TrimPaddingAndPlotAttention,
                           title=atten.name,
                           xlabel='Input',
                           ylabel='Output')
Example #17
0
  def try_apply_dense(self, grad, var):
    assert grad is not None

    cond = tf.constant(True)
    is_finite_checks = []
    stats = {}

    grad_dtype = var.dtype  # TODO(lepikhin): add to params
    grad = tf.cast(grad, grad_dtype)
    factored_dims = self._factored_dims(var.shape.as_list())
    if factored_dims:
      vr = self.get_slot(var, 'vr')
      vc = self.get_slot(var, 'vc')
    else:
      v = self.get_slot(var, 'v')
    if self._beta1:
      m = self.get_slot(var, 'm')

    def _Upd(c, k, x):
      stats[k] = x
      is_finite_checks.append(tf.reduce_all(tf.math.is_finite(x)))
      return c

    with tf.variable_scope(var.name[:-2] + '/Adafactor'):
      grad_squared = tf.math.square(grad) + tf.cast(self._epsilon1, grad_dtype)
      cond = _Upd(cond, 'grad_squared', grad_squared)  # 0 (factored)
      decay_rate = tf.cast(self._decay_rate, var.dtype)
      old_val = tf.identity(var)  # TODO(lepikhin): introduce gradient dtype
      assert self._multiply_by_parameter_scale
      if self._multiply_by_parameter_scale:
        parameter_scale = self._parameter_scale(old_val)
        cond = _Upd(cond, 'parameter_scale', parameter_scale)  # 1 (factored)
        update_scale = self._parameter_scale(old_val) * tf.cast(
            self._learning_rate, grad_dtype)

      else:
        update_scale = self._learning_rate
      mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
      update_scale = tf.cast(update_scale, grad_dtype)
      if factored_dims:
        d0, d1 = factored_dims
        vr_axis, vc_axis = d0, d1
        grad_squared_row_mean = tf.reduce_mean(grad_squared, axis=vr_axis)
        grad_squared_col_mean = tf.reduce_mean(grad_squared, axis=vc_axis)
        # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
        new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
        # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
        new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
        cond = _Upd(cond, 'new_vr', new_vr)  # 2 (factored)
        cond = _Upd(cond, 'new_vc', new_vc)  # 3 (factored)
        # vr_update = _Wrap(tf.assign, vr, new_vr)
        # vc_update = _Wrap(tf.assign, vc, new_vc)
        # updates.extend([vr_update, vc_update])
        long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
        r_factor = tf.math.rsqrt(new_vr / long_term_mean)
        c_factor = tf.math.rsqrt(new_vc)
        mult = tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
            c_factor, vc_axis)
        cond = _Upd(cond, 'mult', mult)  # 4 (factored)
        x = grad * mult
      else:
        new_v = v * decay_rate + grad_squared * mixing_rate
        cond = _Upd(cond, 'new_v', new_v)
        # v_update = _Wrap(tf.assign, v, new_v)
        # updates.append(v_update)
        x = grad * tf.math.rsqrt(new_v)

      assert self._clipping_threshold is not None

      if self._clipping_threshold is not None:
        clipping_denom = tf.maximum(
            tf.constant(1.0, grad_dtype),
            py_utils.ReduceRms(x) /
            tf.constant(self._clipping_threshold, grad_dtype))
        x /= clipping_denom
      cond = _Upd(cond, 'x', x)
      subtrahend = x * update_scale
      if self._beta1:
        new_m = (
            m * tf.constant(self._beta1, dtype=grad_dtype) +
            subtrahend * tf.constant(1.0 - self._beta1, dtype=grad_dtype))
        subtrahend = new_m
        cond = _Upd(cond, 'new_m', new_m)
        # updates.append(_Wrap(tf.assign, m, new_m))

      # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
      #  for the case of bfloat16 activations, so as to avoid repeatedly
      #  rounding the slice value, which results in poor quality.
      cond = _Upd(cond, 'subtrahend', subtrahend)  # 5 (factored)

      # var_update = _Wrap(tf.assign_sub, var, subtrahend)
      # updates.append(var_update)

      return is_finite_checks, stats
Example #18
0
    def _BuildMetric(self, feed_data, classid):
        """Construct tensors and the feed_dict for Waymo metric op.

    Args:
      feed_data: a NestedMap returned by _GetData().
      classid: integer.

    Returns:
      A tuple of 3 dicts:

      - scalar_metrics: a dict mapping all the metric names to fetch tensors.
      - curves: a dict mapping all the curve names to fetch tensors.
      - feed_dict: a dict mapping the tensors in feed_tensors to feed values.
    """
        breakdown_names = config_util.get_breakdown_names_from_config(
            self._waymo_metric_config)
        if feed_data is None:
            dummy_scalar = tf.constant(np.nan)
            dummy_curve = tf.zeros(
                [self.metadata.NumberOfPrecisionRecallPoints(), 2], tf.float32)
            scalar_metrics = {
                'ap': dummy_scalar,
                'ap_ha_weighted': dummy_scalar
            }
            curve_metrics = {'pr': dummy_curve, 'pr_ha_weighted': dummy_curve}

            for i, metric in enumerate(breakdown_names):
                scalar_metrics['ap_%s' % metric] = dummy_scalar
                scalar_metrics['ap_ha_weighted_%s' % metric] = dummy_scalar
                curve_metrics['pr_%s' % metric] = dummy_curve
                curve_metrics['pr_ha_weighted_%s' % metric] = dummy_curve
            return scalar_metrics, curve_metrics, {}

        feed_dict = {}

        f_gt_bbox = tf.placeholder(tf.float32)
        feed_dict[f_gt_bbox] = feed_data.gt.bbox

        f_gt_imgid = tf.placeholder(tf.int32)
        feed_dict[f_gt_imgid] = feed_data.gt.imgid

        f_pd_bbox = tf.placeholder(tf.float32)
        feed_dict[f_pd_bbox] = feed_data.pd.bbox

        f_pd_imgid = tf.placeholder(tf.int32)
        feed_dict[f_pd_imgid] = feed_data.pd.imgid

        f_pd_score = tf.placeholder(tf.float32)
        feed_dict[f_pd_score] = feed_data.pd.score

        num_gt_bboxes = feed_data.gt.imgid.shape[0]
        num_pd_bboxes = feed_data.pd.imgid.shape[0]
        gt_class_ids = tf.constant(classid,
                                   dtype=tf.uint8,
                                   shape=[num_gt_bboxes])
        pd_class_ids = tf.constant(classid,
                                   dtype=tf.uint8,
                                   shape=[num_pd_bboxes])
        ap, ap_ha, pr, pr_ha, _ = py_metrics_ops.detection_metrics(
            prediction_bbox=f_pd_bbox,
            prediction_type=pd_class_ids,
            prediction_score=f_pd_score,
            prediction_frame_id=tf.cast(f_pd_imgid, tf.int64),
            prediction_overlap_nlz=tf.zeros_like(f_pd_imgid, dtype=tf.bool),
            ground_truth_bbox=f_gt_bbox,
            ground_truth_type=gt_class_ids,
            ground_truth_frame_id=tf.cast(f_gt_imgid, tf.int64),
            ground_truth_difficulty=tf.zeros_like(f_gt_imgid, dtype=tf.uint8),
            config=self._waymo_metric_config.SerializeToString())

        # All tensors returned by Waymo's metric op have a leading dimension
        # B=number of breakdowns. At this moment we always use B=1 to make
        # it compatible to the python code.
        scalar_metrics = {'ap': ap[0], 'ap_ha_weighted': ap_ha[0]}
        curve_metrics = {'pr': pr[0], 'pr_ha_weighted': pr_ha[0]}

        for i, metric in enumerate(breakdown_names):
            # There is a scalar / curve for every breakdown.
            scalar_metrics['ap_%s' % metric] = ap[i]
            scalar_metrics['ap_ha_weighted_%s' % metric] = ap_ha[i]
            curve_metrics['pr_%s' % metric] = pr[i]
            curve_metrics['pr_ha_weighted_%s' % metric] = pr_ha[i]
        return scalar_metrics, curve_metrics, feed_dict
Example #19
0
    def AssignAnchors(self,
                      anchor_bboxes,
                      gt_bboxes,
                      gt_bboxes_labels,
                      gt_bboxes_mask,
                      foreground_assignment_threshold=0.5,
                      background_assignment_threshold=0.35,
                      background_class_id=0,
                      force_match=True,
                      similarity_fn=None):
        """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:

      - Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      - Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      - Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      - Use assigned_cls_mask for weighting the classification loss. The mask
        is set such that the loss applies to foreground and background
        assignments only - ignored anchors will be set to 0.
      - Use assigned_reg_mask for weighting the regression loss. The mask is set
        such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys

      - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.
      - assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.
      - assigned_gt_labels: shape [A] label assigned to bbox.
      - assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.
      - assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.
    """
        if similarity_fn is None:
            similarity_fn = self.IOU2DRotatedBoxes

        # Shape validation.
        anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
        num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
        gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
        num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

        # Compute similarity score and reduce max by anchors and by ground-truth.
        similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
        similarity_score = py_utils.HasShape(
            similarity_score, [num_anchor_bboxes, num_gt_bboxes])

        # Reduce over ground-truth boxes, so we have the max score per anchor.
        anchor_max_score = tf.reduce_max(similarity_score, axis=1)
        anchor_max_idx = tf.argmax(similarity_score, axis=1)

        if force_match:
            # Reduce over anchors, so we have the max score per ground truth box.
            gt_max_score = tf.reduce_max(similarity_score,
                                         axis=0,
                                         keep_dims=True)

            # Force matches occur when the top matching gt bbox for an anchor is the
            # top matching anchor for the gt bbox. When force matching, we match
            # these boxes as long as their similarity score exceeds 0.
            force_matches = (
                tf.equal(similarity_score, gt_max_score)
                & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
                & tf.greater(similarity_score, 0.)
                & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
            force_match_indicator = tf.reduce_any(force_matches, axis=1)
            force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32),
                                        axis=1)

            # In assigning foreground/background anchors later, force_match_indicator
            # is used to determine which anchors are force foreground, and the index
            # assigned will be taken from anchor_max_idx.

            # Force matchers must also be the max scoring gt bbox per anchor.
            # We overwrite anchor_max_idx to ensure that the right match is done.
            anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                      anchor_max_idx)

        # Ensure that max score boxes are not padded boxes by setting score to 0
        # for boxes that are padded.
        gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
        anchor_max_score = tf.where(tf.equal(gathered_mask, 1),
                                    anchor_max_score,
                                    tf.zeros_like(anchor_max_score))

        # Boolean tensors corresponding to whether an anchor is background or
        # foreground based on thresholding.
        background_anchors = tf.less_equal(anchor_max_score,
                                           background_assignment_threshold)
        foreground_anchors = tf.greater_equal(anchor_max_score,
                                              foreground_assignment_threshold)
        if force_match:
            # Background anchors are below threshold and not force matches.
            background_anchors &= ~force_match_indicator
            # Foreground anchors are above thresholds or force matches.
            foreground_anchors |= force_match_indicator

        # Add dummy background bbox to gt_boxes to facilitate batch gather.
        dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

        # Since we are concatenating the dummy bbox, the index corresponds to the
        # number of boxes.
        dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

        gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
        gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                     axis=0)

        # Gather indices so that all foreground boxes are gathered from gt_bboxes,
        # while all background and ignore boxes gather the dummy_bbox.
        anchor_gather_idx = tf.where(
            foreground_anchors, anchor_max_idx,
            tf.constant(dummy_bbox_idx,
                        shape=py_utils.GetShape(anchor_max_idx),
                        dtype=anchor_max_idx.dtype))

        # Gather the bboxes and weights.
        assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
        assigned_gt_labels = tf.batch_gather(gt_bboxes_labels,
                                             anchor_gather_idx)

        # Set masks for classification and regression losses.
        assigned_cls_mask = tf.cast(background_anchors | foreground_anchors,
                                    tf.float32)
        assigned_reg_mask = tf.cast(foreground_anchors, tf.float32)

        return py_utils.NestedMap(
            assigned_gt_bbox=assigned_gt_bbox,
            assigned_gt_similarity_score=anchor_max_score,
            assigned_gt_labels=assigned_gt_labels,
            assigned_cls_mask=assigned_cls_mask,
            assigned_reg_mask=assigned_reg_mask)
Example #20
0
    def ComputeAndUpdateMoments(self, theta, inputs, paddings=None, **kwargs):
        """Computes moments and updates state.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      inputs: The inputs tensor.  Shaped [..., dim].
      paddings: The paddings tensor.  Shaped [..., 1], with the same rank as the
        input tensor.
      **kwargs: Additional inputs.

    Returns:
      Tuple of (mean, variance, beta, gamma).
    """
        p = self.params
        if paddings is None:
            paddings = self._GetDefaultPaddings(inputs)
        inputs = py_utils.with_dependencies([
            py_utils.assert_shape_match([tf.shape(paddings)[-1]], [1]),
        ], inputs)
        with tf.name_scope(p.name):
            if self.do_eval or p.freeze_bn_stats:
                # The mean and variance used for normalization.
                norm_mean, norm_variance = (self.vars.moving_mean,
                                            self.vars.moving_variance)
            else:
                rank = tf.rank(paddings)
                reduce_over_dims = tf.range(0, rank - 1)
                mean, variance = ComputeMoments(
                    inputs, paddings, reduce_over_dims, None,
                    p.enable_cross_replica_sum_on_tpu)

                py_utils.UpdateBatchNormVars(self.vars.moving_mean, mean,
                                             self._decay)
                py_utils.UpdateBatchNormVars(self.vars.moving_variance,
                                             variance, self._decay)
                # Add some summaries for visualization.
                summary_utils.histogram('%s_mean' % p.name,
                                        tf.cast(mean, tf.float32))
                summary_utils.histogram('%s_variance' % p.name,
                                        tf.cast(variance, tf.float32))
                summary_utils.histogram(
                    '%s_moving_mean' % p.name,
                    tf.cast(self.vars.moving_mean, tf.float32))
                summary_utils.histogram(
                    '%s_moving_variance' % p.name,
                    tf.cast(self.vars.moving_variance, tf.float32))
                summary_utils.histogram(
                    '%s_mean_diff' % p.name,
                    tf.cast(
                        tf.cast(mean, self.vars.moving_mean.dtype.base_dtype) -
                        self.vars.moving_mean, tf.float32))
                summary_utils.histogram(
                    '%s_variance_diff' % p.name,
                    tf.cast(
                        tf.cast(variance,
                                self.vars.moving_variance.dtype.base_dtype) -
                        self.vars.moving_variance, tf.float32))
                if p.use_moving_avg_in_training:
                    # Use the global statistics for normalization.
                    # Control dependencies on mean and variance make sure
                    # moving_mean and variance will be updated for every training step.
                    norm_mean = py_utils.with_dependencies(
                        [mean], self.vars.moving_mean)
                    norm_variance = py_utils.with_dependencies(
                        [variance], self.vars.moving_variance)
                else:
                    # Use the batch statistics for normalization.
                    norm_mean = mean
                    norm_variance = variance

            norm_mean = py_utils.CheckNumerics(
                norm_mean, 'mean of %s failed numeric check' % p.name)
            norm_variance = py_utils.CheckNumerics(
                norm_variance, 'variance of %s failed numeric check' % p.name)

            beta, gamma = self._GetBetaGamma(theta, inputs, **kwargs)
            return norm_mean, norm_variance, beta, gamma
Example #21
0
    def FProp(self, theta, input_batch):
        """Embeds source ids and transforms with TransformerStack.

    Args:
      theta: A `.NestedMap` object containing weights' values of this
        layer and its children layers.
      input_batch: A `.NestedMap` with fields:

        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].
        - task_ids: If p.task_emb is provided, must contain per-token task
            ids of shape [batch, time].

    Returns:
      A NestedMap containing

      - encoded: The encoded features, either a tensor of shape
        [time, batch, depth], or a list of tensors if is_transparent is set in
        transformer_stack.
      - padding: of shape [time, batch]
      - segment_id: [time, batch] if packed inputs are supported by the model
        (and all layers), or None otherwise.
      - embedded_inputs: [time, batch, depth] embedded inputs tokens without
        positional encodings.
    """

        p = self.params
        with tf.name_scope(p.name):
            src_segment_id = None
            src_segment_pos = None
            input_ids = py_utils.with_dependencies([
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            tf.shape(input_batch.paddings)),
                py_utils.assert_equal(tf.rank(input_batch.ids), 2)
            ], input_batch.ids)

            if (not py_utils.use_tpu()
                    and tf.flags.FLAGS.transformer_encoder_truncates_inputs):
                max_seq_length = tf.cast(
                    tf.reduce_max(tf.reduce_sum(1.0 - input_batch.paddings,
                                                1)), tf.int32)
                paddings = py_utils.with_dependencies([
                    py_utils.assert_equal(
                        tf.constant(True, tf.bool),
                        tf.reduce_all(
                            input_batch.paddings[:, max_seq_length:] > 0.5))
                ], input_batch.paddings)
                input_ids = input_ids[:, :max_seq_length]
                paddings = paddings[:, :max_seq_length]
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids[:, :
                                                             max_seq_length]
                    src_segment_pos = input_batch.segment_pos[:, :
                                                              max_seq_length]
            else:
                paddings = input_batch.paddings
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids
                    src_segment_pos = input_batch.segment_pos

            max_time = tf.shape(input_ids)[1]

            # Input token embeddings + positional embeddings
            if not p.shared_emb:
                input_embs = self.token_emb.EmbLookup(
                    theta.token_emb, tf.reshape(input_ids, [-1]))
            else:
                input_embs = self.softmax.EmbLookup(
                    theta.softmax, tf.reshape(input_ids, [-1]))

            input_embs = tf.reshape(input_embs,
                                    [-1, max_time, p.token_emb.embedding_dim])
            # [time, batch, dim]
            orig_input_embs = tf.transpose(input_embs, [1, 0, 2])

            if p.packed_input:
                position_embs = self.position_emb.FPropWithPosition(
                    theta.position_emb, src_segment_pos)
            else:
                position_embs = self.position_emb.FProp(
                    theta.position_emb, max_time)
                position_embs = tf.reshape(
                    position_embs, [1, max_time, p.token_emb.embedding_dim])
            input_embs += position_embs
            if p.task_emb:
                input_embs += self.task_emb.EmbLookup(theta.task_emb,
                                                      input_batch.task_ids)

            if p.model_dim != p.token_emb.embedding_dim:
                input_embs = self.emb_proj.FProp(theta.emb_proj, input_embs)

            paddings = tf.cast(tf.transpose(paddings), py_utils.FPropDtype(p))
            if p.packed_input:
                src_segment_id = tf.transpose(src_segment_id)
            input_embs = self.input_dropout.FProp(theta.input_dropout,
                                                  input_embs)

            # [time, batch, dim]
            transformer_input = tf.transpose(input_embs, [1, 0, 2])

        if not self.do_eval and p.apply_source_mask:
            # Augment padding for masked source word positions.
            dtype = paddings.dtype
            source_mask = tf.where(tf.equal(input_ids, p.source_mask_id),
                                   tf.ones_like(input_ids, dtype=dtype),
                                   tf.zeros_like(input_ids, dtype=dtype))
            # Make sure padding is between 0 and 1.
            paddings = tf.clip_by_value(paddings + tf.transpose(source_mask),
                                        0.0, 1.0)

        encoded, padding, segment_id = self.transformer_stack.FProp(
            theta.transformer_stack, transformer_input, paddings,
            src_segment_id)
        return py_utils.NestedMap(encoded=encoded,
                                  padding=padding,
                                  segment_id=segment_id,
                                  embedded_inputs=orig_input_embs)
Example #22
0
 def Update(self, value):
     """Adds value to the accumulator."""
     self.SetValue(self.GetValue() + tf.cast(value, self.dtype))
Example #23
0
    def _BeamSearchStep(self, theta, encoder_outputs, cur_step, step_ids,
                        core_bs_states, other_states, num_hyps_per_beam,
                        pre_beam_search_step_callback,
                        post_beam_search_step_callback):
        """Extend beam search hyps for one step.

      | num_beams = Number of source sequences to be decoded.
      | num_hyps_per_beam = Number of hyps to keep per source sequence.
      | num_hyps = num_beams * num_hyps_per_beam
      | src_seq_len = Number of time steps in the source sequence.
      | src_batch = Number of examples in the source sequence.
      | tgt_seq_len = Maximum allowed time steps in the target sequence.
      | tgt_batch = num_hyps_per_beam * src_batch

    Args:
      theta: A `.NestedMap` object containing weights' values of the decoder
        layer and its children layers.
      encoder_outputs: A `.NestedMap` containing encoder outputs to be passed to
        the callbacks.
      cur_step: A scalar int tensor, the current time step, 0-based.
      step_ids: An int tensor of shape [num_hyps, 1]. The input ids to the
        current search step.
      core_bs_states: A tuple of core beam search states. This list is
        maintained by this helper class.
      other_states: A `.NestedMap` of other beam search states. This
        `.NestedMap` is managed and updated by the client. It is expected that
        each of its member tensors are of rank >= 1. t[i, ...] is the state of
        the i-th hyp at the beginning of this search step.
      num_hyps_per_beam: Num of hyps to keep per beam.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        See class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        See class header comments for more details.

    Returns:
      A tuple of following elements for the next beam search step,
      (next step, all_done, step_ids, core_bs_states, other_states)
    """
        p = self.params

        bs_results, other_states = pre_beam_search_step_callback(
            theta, encoder_outputs, step_ids, other_states, num_hyps_per_beam)

        (best_scores, cumulative_scores, in_scores, in_hyps, in_prev_hyps,
         in_done_hyps, in_atten_probs) = core_bs_states

        (out_best_scores, out_cumulative_scores, out_scores, out_hyps,
         out_prev_hyps, out_done_hyps, out_atten_probs,
         all_done) = ops.beam_search_step(
             tf.cast(bs_results.log_probs, dtype=p.dtype),
             tf.cast(bs_results.atten_probs, dtype=p.dtype),
             best_scores,
             cumulative_scores,
             in_scores,
             in_hyps,
             in_prev_hyps,
             in_done_hyps,
             in_atten_probs,
             bs_results.is_last_chunk if self._model_uses_eoc_id else [],
             cur_step,
             eoc_id=p.target_eoc_id,
             eos_id=p.target_eos_id,
             beam_size=p.beam_size,
             num_hyps_per_beam=num_hyps_per_beam,
             valid_eos_max_logit_delta=p.valid_eos_max_logit_delta,
             merge_paths=p.merge_paths,
             allow_empty_terminated_hyp=p.allow_empty_terminated_hyp,
             ensure_full_beam=p.ensure_full_beam,
             force_eos_in_last_step=p.force_eos_in_last_step,
             local_eos_threshold=p.local_eos_threshold)

        new_step_ids = tf.reshape(out_hyps[cur_step, :], tf.shape(step_ids))
        new_step_ids.set_shape(step_ids.get_shape())

        # [num_hyps_per_beam * num_beams].
        old_hyp_ids = tf.reshape(
            tf.slice(out_prev_hyps, begin=[cur_step, 0], size=[1, -1]), [-1])

        if p.batch_major_compute:
            # Transformed the indices into the key/value cache for fast decoding
            # (prefix_states in other_states) due to the num_hyps dimension of
            # cache is computed as num_beams by num_hyps_per_beam, which is different
            # from the old_hyp_ids assumption (num_hyps_per_beam by num_beams).
            # Both transpose and recomputation are required to correct the indices.
            num_beams = tf.shape(best_scores)[0]
            # [num_beams * num_hyps_per_beam].
            old_hyp_ids_in_cache_order = tf.reshape(
                tf.transpose(tf.reshape(old_hyp_ids, [num_hyps_per_beam, -1])),
                [-1])
            old_hyp_ids_in_cache_order = (
                (old_hyp_ids_in_cache_order % num_beams) * num_hyps_per_beam +
                old_hyp_ids_in_cache_order // num_beams)

        new_bs_states = (out_best_scores, out_cumulative_scores, out_scores,
                         out_hyps, out_prev_hyps, out_done_hyps,
                         out_atten_probs)

        def ReOrderHyps(key, x_in):
            """Reorders x_in based on prev hyp ids."""
            correct_old_hyp_ids = (old_hyp_ids_in_cache_order
                                   if p.batch_major_compute else old_hyp_ids)
            if (isinstance(x_in, tf.Tensor) and x_in.shape.ndims):
                if x_in.shape.ndims > 2 and not p.batch_major_state:
                    # Use corrected indices only here for batch major compute as key/value
                    # caches are the states being affected.
                    x_out = tf.gather(x_in, correct_old_hyp_ids, axis=1)
                elif key in POSSIBLY_TIME_MAJOR_STATE_KEYS:
                    x_out = tf.gather(x_in, old_hyp_ids, axis=-1)
                else:
                    x_out = tf.gather(x_in, correct_old_hyp_ids)
                x_out.set_shape(x_in.get_shape())
                return x_out
            else:
                return x_in

        new_other_states = other_states.TransformWithKey(ReOrderHyps)

        final_other_states = post_beam_search_step_callback(
            theta, encoder_outputs, new_step_ids, new_other_states)

        return (cur_step + 1, all_done, new_step_ids, new_bs_states,
                final_other_states)
Example #24
0
    def _StreamMoments(self, inputs, paddings, cached_sum, cached_count,
                       cached_var):
        """Computes mean and variance over the valid data points in inputs.

    Args:
      inputs: [B, T, F, N, G] or [B, T, N, G]
      paddings: [B, T, 1, 1, 1] or [B, T, 1, 1]
      cached_sum: [B, 1, 1, N, 1] or [B, 1, N, 1]
      cached_count: same shape as cached_sum.
      cached_var: same shape as cached_sum.

    Returns:
      mean: [B, T, 1, N, 1] or [B, T, N, 1]
      variance: same shape as mean.
      new_cached_sum: same shape as cached_sum.
      new_cached_count: same shape as cached_count.
    """
        tf.logging.vlog(1, 'inputs: %r', inputs)
        tf.logging.vlog(1, 'paddings: %r', paddings)
        tf.logging.vlog(1, 'cached_sum: %r', cached_sum)
        tf.logging.vlog(1, 'cached_count: %r', cached_count)

        mask = 1.0 - paddings
        inputs *= tf.cast(mask, inputs.dtype)

        input_rank = py_utils.GetRank(inputs)
        assert input_rank is not None, (f'inputs rank must be staic for '
                                        f'{repr(inputs)}')
        reduce_over_dims = list(range(input_rank))
        # Skip B, T, and N. Reduce {F,G} or just G.
        reduce_over_dims = reduce_over_dims[2:-2] + reduce_over_dims[-1:]
        tf.logging.vlog(1, 'reduce_over_dims: %s', reduce_over_dims)

        # [B, T, 1, N, 1] or [B, T, N, 1]
        sum_v = tf.reduce_sum(inputs, reduce_over_dims, keepdims=True)
        sum_v = tf.math.cumsum(sum_v, axis=1)
        sum_v += cached_sum

        # [B, T, 1, 1, 1] or [B, T, 1, 1]
        count_v = tf.reduce_sum(mask, reduce_over_dims, keepdims=True)
        count_v = tf.math.cumsum(count_v, axis=1)
        input_shape = py_utils.GetShape(inputs)
        if input_rank == 4:
            # F * G
            multiplier = input_shape[-1] * input_shape[-3]
        else:
            # G
            multiplier = input_shape[-1]
        count_v *= multiplier
        count_v += cached_count
        count_v = tf.maximum(count_v, 1.0)

        tf.logging.vlog(1, 'sum_v: %r', sum_v)
        tf.logging.vlog(1, 'count_v: %r', count_v)

        mean = sum_v / count_v
        sum_vv = tf.reduce_sum((inputs - mean)**2 * mask,
                               reduce_over_dims,
                               keepdims=True)
        sum_vv = tf.math.cumsum(sum_vv, axis=1)
        sum_vv += cached_var

        cached_sum = sum_v[:, -1:]
        cached_count = count_v[:, -1:]
        cached_var = sum_vv[:, -1:]

        variance = py_utils.with_dependencies([
            py_utils.assert_greater_equal(sum_vv, tf.cast(0, sum_vv.dtype)),
        ], sum_vv / count_v)
        return mean, variance, cached_sum, cached_count, cached_var
Example #25
0
    def _Extract(self, features):
        p = self.params
        ri_outputs = {}
        outputs = {}
        frame_pose = tf.reshape(_Dense(features['pose']), [4, 4])
        for laser in p.cbr_laser_names + p.gbr_laser_names:
            # Extract range images.
            for returns in p.returns:
                ri_shape = tf.reshape(
                    _Dense(features['%s_%s_shape' % (laser, returns)]), [-1])
                range_image = tf.reshape(
                    _Dense(features['%s_%s' % (laser, returns)]), ri_shape)

                shape_to_check = (p.cbr_ri_shape if laser in p.cbr_laser_names
                                  else p.gbr_ri_shape)
                range_image = py_utils.HasShape(range_image, shape_to_check)

                ri_outputs['%s_%s' % (laser, returns)] = range_image

            # Extract beam inclinations and extrinsics
            outputs['%s_extrinsics' % laser] = tf.reshape(
                _Dense(features['%s_extrinsics' % laser]), [4, 4])

        # CBRs have uniform inclination
        for laser in p.cbr_laser_names:
            beam_inclination_min = tf.reshape(
                _Dense(features['%s_beam_inclination_min' % laser]), [])
            beam_inclination_max = tf.reshape(
                _Dense(features['%s_beam_inclination_max' % laser]), [])
            outputs['%s_beam_inclinations' % laser] = tf.stack(
                [beam_inclination_min, beam_inclination_max], axis=0)

        # GBRs have non-uniform inclinations defined by 64 floats.
        for laser in p.gbr_laser_names:
            outputs['%s_beam_inclinations' % laser] = tf.reshape(
                _Dense(features['%s_beam_inclinations' % laser]), [64])

        # Embed xyz onto each range image pixel.
        for laser in p.cbr_laser_names + p.gbr_laser_names:
            extrinsics = outputs['%s_extrinsics' % laser]
            inclinations = outputs['%s_beam_inclinations' % laser]
            if laser in p.cbr_laser_names:
                ri_shape = p.cbr_ri_shape

                # Convert from 2-tuple range inclination to the full range
                # via linear interpolation.
                #
                # CBR lasers currently are always uniform inclinations specified by a
                # length 2 vector.
                height = ri_shape[0]
                min_inclination = inclinations[0]
                max_inclination = inclinations[1]
                diff = max_inclination - min_inclination
                ratio = (.5 + tf.cast(tf.range(
                    0, height), tf.float32)) / tf.cast(height, tf.float32)
                # interpolate from min to max inclination.
                inclinations = (ratio * diff) + min_inclination
            else:
                ri_shape = p.gbr_ri_shape

            pixel_pose = None
            if laser in p.gbr_laser_names:
                pixel_pose = tf.reshape(_Dense(features['%s_pose' % laser]),
                                        shape=p.gbr_ri_shape[0:2] + [4, 4])
                outputs['%s_pose' % laser] = pixel_pose

            for returns in p.returns:
                range_image = ri_outputs['%s_%s' % (laser, returns)]
                range_image = tf.reshape(range_image, ri_shape)
                range_image_mask = range_image[..., 0] >= 0
                ri_xyz = tf.cast(
                    self._XYZFromRangeImage(range_image, range_image_mask,
                                            extrinsics, inclinations,
                                            pixel_pose, frame_pose),
                    tf.float32)

                # Produce the NestedMap of xyz, features, mask.
                ri_result = py_utils.NestedMap({
                    'xyz':
                    ri_xyz,
                    'features':
                    range_image,
                    'mask':
                    tf.cast(range_image_mask, tf.float32),
                })

                outputs['%s_%s' % (laser, returns)] = ri_result

        return py_utils.NestedMap(outputs)
Example #26
0
    def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
        """Takes a tensor of strings and returns id/padding tensors.

    This generates `token_ids`, `target_ids`, and `paddings` in the format that
    is expected for tokenizers. This performs padding to a fixed length and
    appends the end-of-sentence token as appropriate.

    Args:
      strs: a string Tensor.
      max_length: a python integer. The second dimension of the returned arrays.
        All sequences are padded or truncated to that length.
      append_eos: a python bool. See `BaseTokenizer` for explanation.
      languages: A vector of strings with the same length as `strs`.

    Returns:
      A tuple of 3 tensors:

      - token_ids: a tensor of sequences of WPM ids starting with SOS. Sequences
        always end with EOS unless the sequence exceeds the maximum length.
        Always padded with EOS.
      - target_ids: a tensor of sequences of WPM ids not starting with SOS
        but ending with EOS. Always padded with EOS.
      - paddings: a tensor of floats indicating, at each position, whether
        the corresponding position is padded.
    """
        p = self.params
        if append_eos is None:
            append_eos = p.append_eos

        batch_size = py_utils.GetShape(strs)[0]
        token_ids_ta = tf.TensorArray(tf.int32, batch_size)
        target_ids_ta = tf.TensorArray(tf.int32, batch_size)
        paddings_ta = tf.TensorArray(tf.float32, batch_size)

        def _TokenizeOneSentence(i, strs, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            ids, _ = self._wpm_encoder.Encode(strs[i])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)

            # This truncates after the eos is added, so some sentences might
            # not have </s> at the end.
            token_ids_ta = token_ids_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.concat([[self.sos_id], ids], axis=0),
                                     [max_length], self.eos_id))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], self.eos_id))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta

        _, _, token_ids_ta, target_ids_ta, paddings_ta = tf.while_loop(
            lambda i, *_: i < batch_size,
            _TokenizeOneSentence,
            loop_vars=(tf.constant(0, tf.int32), strs, token_ids_ta,
                       target_ids_ta, paddings_ta),
            parallel_iterations=30,
            back_prop=False)

        token_ids = token_ids_ta.stack()
        target_ids = target_ids_ta.stack()
        paddings = paddings_ta.stack()

        if not p.pad_to_max_length:
            maxlen = tf.cast(
                tf.round(tf.reduce_max(tf.reduce_sum(1.0 - paddings, axis=1))),
                tf.int32)
            token_ids = token_ids[:, :maxlen]
            target_ids = target_ids[:, :maxlen]
            paddings = paddings[:, :maxlen]

        return token_ids, target_ids, paddings
Example #27
0
    def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
        del languages
        p = self.params
        if append_eos is None:
            append_eos = p.append_eos

        batch_size = py_utils.GetShape(strs)[0]
        token_ids_ta = tf.TensorArray(tf.int32, batch_size)
        target_ids_ta = tf.TensorArray(tf.int32, batch_size)
        paddings_ta = tf.TensorArray(tf.float32, batch_size)

        def _TokenizeOneSentence(i, text, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            if tf.is_tensor(i):
                text_i = tf.gather(text, i)
            else:
                text_i = text[i]
            ids = self._tokenizer.tokenize(text_i).merge_dims(0, -1)
            ids.set_shape([None])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)
            sos_ids = tf.concat([[self.sos_id], ids], axis=0)
            if p.prepend_sos:
                ids = sos_ids

            # This truncates after the EOS is added, so some sentences might
            # not have EOS at the end.
            token_ids_ta = token_ids_ta.write(
                i, py_utils.PadOrTrimTo(sos_ids, [max_length], 0))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], 0))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta

        _, _, token_ids_ta, target_ids_ta, paddings_ta = tf.while_loop(
            lambda i, *_: i < batch_size,
            _TokenizeOneSentence,
            loop_vars=(tf.constant(0, tf.int32), strs, token_ids_ta,
                       target_ids_ta, paddings_ta),
            parallel_iterations=30,
            back_prop=False)

        token_ids = token_ids_ta.stack()
        target_ids = target_ids_ta.stack()
        paddings = paddings_ta.stack()

        if not p.pad_to_max_length:
            maxlen = tf.cast(
                tf.round(tf.reduce_max(tf.reduce_sum(1.0 - paddings, axis=1))),
                tf.int32)
            token_ids = token_ids[:, :maxlen]
            target_ids = target_ids[:, :maxlen]
            paddings = paddings[:, :maxlen]

        return token_ids, target_ids, paddings
Example #28
0
 def MaybeCastToFPropDtype(x):
     if x is not None and x.dtype == self._params.dtype:
         return tf.cast(x, self._params.fprop_dtype)
     else:
         return x
Example #29
0
def _ComputePaddings(ids, eos_id):
    is_eos = tf.cast(tf.equal(ids, eos_id), tf.int32)
    # eos_in_prefix[i, j] = any(ids[i, k] == eos_id for k in range(j))
    eos_in_prefix = tf.cumsum(is_eos, axis=-1, exclusive=True)
    return tf.where(tf.equal(eos_in_prefix, 0), tf.zeros_like(ids),
                    tf.ones_like(ids))
Example #30
0
  def _GetMask(self,
               batch_size,
               choose_range,
               mask_size,
               global_seed,
               max_length=None,
               masks_per_frame=0.0,
               multiplicity=1,
               dtype=tf.float32,
               max_ratio=1.0):
    """Returns fixed size multi-masks starting from random positions.

    A multi-mask is a mask obtained by applying multiple masks.

    This function when max_length is given:
      1) Sample random mask lengths less than max_length with shape
         (batch_size, multiplicity).
      2) Truncate lengths to a max of (choose_range * max_ratio),
         so that each mask is fully contained within the corresponding sequence.
      3) Random sample start points of shape (batch_size, multiplicity)
         with in (choose_range - lengths).
      4) For each batch, multiple masks (whose number is given by the
         multiplicity) are constructed.
      5) Return a mask of shape (batch_size, mask_size) where masks are
         obtained by composing the masks constructed in step 4).
         If masks_per_frame > 0, the number is given by
         min(masks_per_frame * choose_range, multiplicity).
         If not, all the masks are composed. The masked regions are set to zero.

    This function when max_length is not given:
      1) Sample random mask lengths less than (choose_range * max_ratio)
         with shape (batch_size, multiplicity).
      2) Proceed to steps 3), 4) and 5) of the above.

    Args:
      batch_size: Batch size. Integer number.
      choose_range: Range within which the masked entries must lie. Tensor of
        shape (batch_size,).
      mask_size: Size of the mask. Integer number.
      global_seed: an integer seed tensor for stateless random ops.
      max_length: Maximum number of allowed consecutive masked entries. Integer
        number or None.
      masks_per_frame: Number of masks per frame. Float number. If > 0, the
        multiplicity of the mask is set to be masks_per_frame * choose_range.
      multiplicity: Maximum number of total masks. Integer number.
      dtype: Data type.
      max_ratio: Maximum portion of the entire range allowed to be masked. Float
        number.

    Returns:
      mask: a fixed size multi-mask starting from a random position with shape
      (batch_size, mask_size).
    """
    p = self.params
    # Non-empty random seed values are only used for testing or when using
    # stateless random ops. seed_1 and seed_2 are set separately to avoid
    # correlation of mask size and mask position.
    if p.use_input_dependent_random_seed:
      seed_1 = global_seed + 1
      seed_2 = global_seed + 2
    elif p.random_seed:
      seed_1 = p.random_seed + 1
      seed_2 = 2 * p.random_seed
    else:
      seed_1 = p.random_seed
      seed_2 = p.random_seed
    # Sample lengths for multiple masks.
    if max_length and max_length > 0:
      max_length = tf.broadcast_to(tf.cast(max_length, dtype), (batch_size,))
    else:
      max_length = tf.cast(choose_range, dtype=dtype) * max_ratio
    random_uniform = _random_uniform_op(p.use_input_dependent_random_seed)
    masked_portion = random_uniform(
        shape=(batch_size, multiplicity),
        minval=0.0,
        maxval=1.0,
        dtype=dtype,
        seed=seed_1)
    masked_frame_size = self.EinsumBBmBm(max_length, masked_portion)
    masked_frame_size = tf.cast(masked_frame_size, dtype=tf.int32)
    # Make sure the sampled length was smaller than max_ratio * length_bound.
    # Note that sampling in this way was biased
    # (shorter sequence may over-masked.)
    choose_range = tf.expand_dims(choose_range, -1)
    choose_range = tf.tile(choose_range, [1, multiplicity])
    length_bound = tf.cast(choose_range, dtype=dtype)
    length_bound = tf.cast(max_ratio * length_bound, dtype=tf.int32)
    length = tf.minimum(masked_frame_size, tf.maximum(length_bound, 1))

    # Choose starting point.
    random_start = random_uniform(
        shape=(batch_size, multiplicity), maxval=1.0, seed=seed_2)
    start_with_in_valid_range = random_start * tf.cast(
        (choose_range - length + 1), dtype=dtype)
    start = tf.cast(start_with_in_valid_range, tf.int32)
    end = start + length - 1

    # Shift starting and end point by small value.
    delta = tf.constant(0.1)
    start = tf.expand_dims(tf.cast(start, dtype) - delta, -1)
    start = tf.tile(start, [1, 1, mask_size])
    end = tf.expand_dims(tf.cast(end, dtype) + delta, -1)
    end = tf.tile(end, [1, 1, mask_size])

    # Construct pre-mask of shape (batch_size, multiplicity, mask_size).
    diagonal = tf.expand_dims(
        tf.expand_dims(tf.cast(tf.range(mask_size), dtype=dtype), 0), 0)
    diagonal = tf.tile(diagonal, [batch_size, multiplicity, 1])
    pre_mask = tf.cast(
        tf.math.logical_and(diagonal < end, diagonal > start), dtype=dtype)

    # Sum masks with appropriate multiplicity.
    if masks_per_frame > 0:
      multiplicity_weights = tf.tile(
          tf.expand_dims(tf.range(multiplicity, dtype=dtype), 0),
          [batch_size, 1])
      multiplicity_tensor = masks_per_frame * tf.cast(choose_range, dtype=dtype)
      multiplicity_weights = tf.cast(
          multiplicity_weights < multiplicity_tensor, dtype=dtype)
      pre_mask = self.EinsumBmtBmBt(pre_mask, multiplicity_weights)
    else:
      pre_mask = tf.reduce_sum(pre_mask, 1)
    mask = tf.cast(1.0 - tf.cast(pre_mask > 0, dtype=dtype), dtype=dtype)

    if p.fprop_dtype is not None and p.fprop_dtype != p.dtype:
      mask = tf.cast(mask, p.fprop_dtype)

    return mask