Esempio n. 1
0
def _RelPositionBiasCausal(query, abs_pos_emb):
    """Computes relative position bias for causal self attention."""
    _, t, n, h = py_utils.GetShape(query)

    abs_pos_emb = py_utils.HasShape(abs_pos_emb, [2 * t - 1, n, h])

    # abs_pos_emb is [-(T-1), -(T-2), ... 0, 1, 2, ... T-1]
    # Retain only half and change order to [T-1, T-2, ... 0]
    # [T, N, H]
    abs_pos_emb = tf.reverse(abs_pos_emb, [0])[:t]

    # [B, N, T, L=T]
    term_bd = tf.einsum('BTNH,LNH->BNTL', query, abs_pos_emb)

    # Perform shifting.
    term_bd = tf.reverse(term_bd, [2, 3])
    term_bd = RelShift(term_bd)
    return tf.reverse(term_bd, [2, 3])
Esempio n. 2
0
    def ComputeLoss(self, theta, input_batch, predicted):
        diff = predicted - input_batch.tgt_ids
        per_example_loss = diff * diff
        batch_dim = py_utils.GetShape(per_example_loss)[0]

        def replicate_var(name):
            return tf.convert_to_tensor([self._private_vars[name]] * batch_dim,
                                        dtype=tf.float32)

        metrics = {'loss': (tf.reduce_sum(per_example_loss), batch_dim)}
        per_example_tensors = {
            'input': input_batch.src_ids,
            'loss': per_example_loss,
            'diff': diff,
            'm': replicate_var('m'),
            'b': replicate_var('b'),
        }
        return metrics, per_example_tensors
Esempio n. 3
0
    def _BuildCrossBatchMixingDataSource(self):
        """Read and return input batch from a p.file_pattern list.

    `p.file_pattern` should be a list of (file_pattern, weight,
    optional_bprop_filter) tuples. Every batch returned will be filled from one
    source only and batches will be mixed proportionally to the weights.
    Additionally some backprop filters may be applied for different input
    sources.

    Returns:
      A tuple which contains the output of `self._DataSourceFromFilePattern()`
      and a tensor of size [batch_size, number of data sources] which contains
      the source selected for each element in the input batch. With cross batch
      mixing the complete input batch comes from the same source.

    Raises:
      ValueError: If unknown token type.
    """
        p = self.params
        input_file_pattern = p.file_pattern

        def _MakeDataSourceFromFilePatternFunc(file_pattern):
            # It's important to invoke self._DataSourceFromFilePattern() inside the
            # lambda to make sure that the record is drawn from data source
            # only if it will be used.
            return lambda: self._DataSourceFromFilePattern(file_pattern)

        inputs = []
        weights = []
        self._bprop_variable_filters = []
        for input_entry in input_file_pattern:
            file_pattern, weight = input_entry[:2]
            inputs.append(_MakeDataSourceFromFilePatternFunc(file_pattern))
            weights.append(weight)
            bprop_variable_filter = input_entry[2] if len(
                input_entry) > 2 else ''
            self._bprop_variable_filters.append(bprop_variable_filter)
        data_source, selected_bprop = py_utils.MixByWeight(inputs, weights)
        # TODO(neerajgaur): Remove _bprop_onehot and change code that uses it to
        # use source_selected from input_batch.
        self._bprop_onehot = selected_bprop
        batch_size = py_utils.GetShape(tf.nest.flatten(data_source)[0])[0]
        return data_source, tf.tile(tf.expand_dims(selected_bprop, 0),
                                    [batch_size, 1])
Esempio n. 4
0
    def FProp(self, theta, input_batch):
        """Perform signal processing on a sequence of PCM data.

    NOTE: This implementation does not currently support paddings, and they
    are accepted for compatibility with the super-class.

    TODO(laurenzo): Rework this to support paddings.

    Args:
      theta: Layer theta.
      input_batch: PCM input map:

        - 'src_inputs': int16 or float32 tensor of PCM audio data, scaled to
          +/-32768 (versus [-1..1)!). Shaped: [batch, frame_count].
        - 'paddings': per frame 0/1 paddings. Shaped: [batch, frame].
    Returns:
      NestedMap of encoder inputs which can be passed directly to a
      compatible encoder and contains:

        - 'src_inputs': inputs to the encoder, minimally of shape
          [batch, time, ...].
        - 'paddings': a 0/1 tensor of shape [batch, time].
    """
        p = self.params
        pcm_audio_data = input_batch.src_inputs
        batch_size, frame_count = py_utils.GetShape(pcm_audio_data, 2)
        mel_spectrogram_norm = self._FPropChunk(theta, pcm_audio_data)

        # Stacking across the whole sequence.
        assert p.left_context == 2, 'Only p.left context 2 is implemented.'
        first_frame = mel_spectrogram_norm[:, 0:1, :]
        padded_mel_spectrogram = tf.concat(
            (first_frame, first_frame, mel_spectrogram_norm), axis=1)
        frame_count = tf.shape(padded_mel_spectrogram)[1] // 3
        triple_mel = tf.reshape(
            padded_mel_spectrogram[:, 0:3 * frame_count, :],
            [batch_size, frame_count, 3 * p.num_bins])
        output_padding = 0 * tf.reduce_sum(triple_mel, axis=2)

        # Add feature dim. Shape = [batch, time, features, 1]
        outputs = triple_mel
        outputs = tf.expand_dims(triple_mel, -1)

        return py_utils.NestedMap(src_inputs=outputs, paddings=output_padding)
Esempio n. 5
0
    def _FrequencyMask(self,
                       inputs,
                       global_seed,
                       dtype=tf.float32,
                       domain_id_index=0):
        """Applies frequency masking with given degree to inputs.

    Args:
      inputs: Batch of input features of shape (batch_size, time_length,
        num_freq, channels).
      global_seed: an integer seed tensor for stateless random ops.
      dtype: Data type.
      domain_id_index: domain id index.

    Returns:
      Inputs with random frequency masking applied.
    """
        p = self.params

        # Mask parameters.
        freq_mask_max_bins = p.freq_mask_max_bins[domain_id_index]
        multiplicity = p.freq_mask_count[domain_id_index]

        # If masking length or count is zero, do nothing.
        if freq_mask_max_bins == 0 or multiplicity == 0:
            return inputs

        # Arguments to pass to mask generator.
        batch_size, _, num_freq, _ = py_utils.GetShape(inputs)
        choose_range = tf.cast(tf.broadcast_to(num_freq, (batch_size, )),
                               dtype=tf.int32)
        # Create masks in frequency direction and apply.
        block_arrays = self._GetMask(tf.shape(inputs)[0],
                                     choose_range=choose_range,
                                     mask_size=num_freq,
                                     global_seed=global_seed,
                                     max_length=freq_mask_max_bins,
                                     masks_per_frame=0.0,
                                     multiplicity=multiplicity,
                                     dtype=dtype,
                                     max_ratio=1.0)
        outputs = tf.einsum('bxyc,by->bxyc', inputs, block_arrays)

        return outputs
Esempio n. 6
0
def SequenceTrimLastToken(x, x_paddings):
    """Trims the last token off of sequence `x`, and set trimmed elements to 0.

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.

  Returns:
    A tuple.
      - The new sequence, Tensor of shape [batch_size, x_len_max].
      - The new paddings, Tensor of shape [batch_size, x_len_max].
  """
    x_len = tf.reduce_sum(1 - x_paddings, 1)
    x_len_max = py_utils.GetShape(x)[1]
    x_trimmed_len = tf.maximum(x_len - 1, 0)
    x_trimmed_paddings = tf.sequence_mask(x_trimmed_len, x_len_max,
                                          x_paddings.dtype)
    x_trimmed = x * tf.cast(x_trimmed_paddings, x.dtype)
    return x_trimmed, 1 - x_trimmed_paddings
Esempio n. 7
0
 def _Slice(tensor):
     """Return a slice of this tensor at time=state0.t."""
     shape = py_utils.GetShape(tensor)
     # All zeros except for t in the time dimension.
     # e.g. if params.axis=1, begin is [0, t, 0, 0, 0, ...]
     begin = tf.one_hot(self.params.axis,
                        tf.rank(tensor),
                        on_value=state0.t)
     # Same as shape, but with a 1 in the time dimension.
     # e.g. if params.axis=1, shape is [shape[0], 1, shape[2], shape[3], ...]
     size = tf.concat([
         shape[0:self.params.axis],
         tf.constant([1], dtype=tf.int32), shape[self.params.axis + 1:]
     ],
                      axis=0)
     # Make a slice where the time dimension is fixed at state0.t.
     time_slice = tf.slice(tensor, begin, size)
     # Remove the time dimension.
     return tf.squeeze(time_slice, axis=self.params.axis)
Esempio n. 8
0
def RelShift(x):
  """Performs relative shift on 4D tensor (first 2 axis are batching dims).

  Given input of shape [?, ?, W, W], this does "relative shifting" for the
  last two dims, s.t. output[b, n, i, j] = 0 if i > j else input[b, n, i, j-i]

  Args:
    x: A Tensor of shape [?, ?, W, W]

  Returns:
    A Tensor of the same shape as input with its content shifted (as described
    above).
  """
  b, n, w, _ = py_utils.GetShape(x)
  x = py_utils.HasShape(x, [-1, -1, w, w])
  x = tf.pad(x, ((0, 0), (0, 0), (0, 0), (0, 1)))
  x = tf.reshape(x, [b, n, w + 1, w])
  x = x[:, :, :w, :]
  return x
Esempio n. 9
0
    def IsSpecialExample(task_ids, special_task_ids):
      """A utility function indicates whether inputs belong to specific tasks.

      Args:
        task_ids: Task ids for the input batch. Tensor of shape [batch].
        special_task_ids: A list of specified task ids.

      Returns:
        A tensor indicating whether each sample in the batch belong to the
        specified task. Return a tensor of size [batch].
      """
      batch_size = py_utils.GetShape(task_ids)[0]
      return tf.reduce_any(
          tf.equal(
              tf.expand_dims(task_ids, -1),
              tf.cast(
                  tf.broadcast_to(
                      special_task_ids,
                      [batch_size, len(special_task_ids)]), tf.int32)), -1)
Esempio n. 10
0
    def StreamStep(self, theta, inputs, paddings, state0):
        """Apply a singele step of convolution to input_tensor.

    Only supports 1d causal convolution. Doesn't support dilation.

    Args:
      theta: A NestedMap of layer params.
      inputs: A Tensor of shape [b, t, 1, c]
      paddings: A 0/1 valued tensor of shape [b, t].
      state0: A NestedMap of tensors of the same struct as returned by
        zero_state().

    Returns:
      outputs: A Tensor of shape [b, t, 1, c * channel_multiplier]
      padding: the same as input paddings.
      state1: A NestedMap of the same struct as input state
    """
        p = self.params
        assert p.filter_shape[1] == 1, (
            'StreamStep only supports 1d causal convolution.')
        assert p.filter_stride[0] == 1, (
            'StreamStep doesn\'t support striding')
        assert p.dilation_rate == (1,
                                   1), ('StreamStep doesn\'t support dilation')

        with tf.name_scope(p.name):
            inputs = py_utils.HasShape(inputs, [-1, -1, 1, p.filter_shape[2]])
            paddings = py_utils.HasShape(paddings,
                                         py_utils.GetShape(inputs)[:2])

            concat_inputs = tf.concat([
                state0.context, inputs * (1 - py_utils.AppendDims(paddings, 2))
            ],
                                      axis=1)
            outputs = tf.nn.depthwise_conv2d(concat_inputs,
                                             self._GetWeight(theta),
                                             strides=(1, 1, 1, 1),
                                             dilations=(1, 1),
                                             data_format='NHWC',
                                             padding='VALID')
            new_context = concat_inputs[:, -(p.filter_shape[0] - 1):]
            return outputs, paddings, py_utils.NestedMap(context=new_context)
Esempio n. 11
0
  def _InitBeamSearchStateCallback(self,
                                   theta,
                                   source_encs,
                                   source_paddings,
                                   num_hyps_per_beam,
                                   additional_source_info=None):
    """Returns initial beams search states.

    Args:
      source_encs: A tensor of shape [src_len, src_batch, source_dim].
      source_paddings: A tensor of shape [src_len, src_batch].
      num_hyps_per_beam: An int, number hyps to keep for source sentence.
      additional_source_info: a `.NestedMap` of tensors containing extra context
          information about the source that may be useful for decoding.
    Returns:
      A tuple (initial_results, states).
        initial_results: a `.NestedMap` of initial results.
          atten_probs:
            The initial attention probs, of shape [tgt_batch, src_len].
        states: a `.NestedMap` of initial model states.
          rnn_states:
            Initial state of the RNN.
          atten_context:
            Initial attention context vector.
          atten_states:
            Initial attention state.
    """
    # additional_source_info is currently not used.
    del additional_source_info
    num_beams = py_utils.GetShape(source_encs)[1]
    num_hyps = num_beams * num_hyps_per_beam
    rnn_states, init_atten_context, atten_probs, atten_states = (
        self._InitDecoder(theta, source_encs, source_paddings, num_hyps))

    initial_results = py_utils.NestedMap({'atten_probs': atten_probs})

    return initial_results, py_utils.NestedMap({
        'rnn_states': rnn_states,
        'atten_context': init_atten_context,
        'atten_probs': atten_probs,
        'atten_states': atten_states,
    })
Esempio n. 12
0
    def _InitBeamSearchStateCallback(self, theta, encoder_outputs,
                                     num_hyps_per_beam):
        """Returns initial beams search states.

    Args:
      theta: a NestedMap of parameters.
      encoder_outputs: a NestedMap computed by encoder.
      num_hyps_per_beam: An int, number hyps to keep for source sentence.

    Returns:
      A tuple (initial_results, states).
        initial_results: a `.NestedMap` of initial results.
          atten_probs:
            The initial attention probs, of shape [tgt_batch, src_len].
        states: a `.NestedMap` of initial model states.
          rnn_states:
            Initial state of the RNN.
          atten_context:
            Initial attention context vector.
          atten_states:
            Initial attention state.
    """
        p = self.params
        num_beams = py_utils.GetShape(encoder_outputs.padding)[1]
        num_hyps = num_beams * num_hyps_per_beam
        rnn_states, init_atten_context, atten_probs, atten_states = (
            self._InitDecoder(theta, encoder_outputs, num_hyps))

        initial_results = py_utils.NestedMap(log_probs=tf.zeros(
            [num_hyps, p.softmax.num_classes], dtype=py_utils.FPropDtype(p)),
                                             atten_probs=atten_probs)

        return initial_results, py_utils.NestedMap({
            'rnn_states':
            rnn_states,
            'atten_context':
            init_atten_context,
            'atten_probs':
            atten_probs,
            'atten_states':
            atten_states,
        })
Esempio n. 13
0
    def _Normalize(self, theta, grouped_inputs, group_mean, group_variance):
        p = self.params
        group_mean = py_utils.CheckNumerics(
            group_mean, f'mean of {p.name} failed numeric check.')
        group_variance = py_utils.CheckNumerics(
            group_variance, f'variance of {p.name} failed numeric check.')

        input_shape = py_utils.GetShape(grouped_inputs)
        moment_shape = list(input_shape)
        if p.input_rank == 4:
            moment_shape[2] = 1
            moment_shape[-1] = 1
        else:
            moment_shape[-1] = 1
        if not p.cumulative:
            # If not cumulative, the seqlen dimension is also reduced.
            moment_shape[1] = 1

        group_mean = py_utils.HasShape(group_mean, moment_shape)
        group_variance = py_utils.HasShape(group_variance, moment_shape)
        group_variance = py_utils.with_dependencies([
            py_utils.assert_greater_equal(group_variance,
                                          tf.cast(0, group_variance.dtype))
        ], group_variance)

        if group_variance.dtype == tf.bfloat16:
            # tf.rsqrt is not implemented for bfloat16, hence we always cast into
            # tf.float32.
            group_stddev_inv = tf.cast(
                tf.math.rsqrt(
                    tf.cast(group_variance + self._epsilon, tf.float32)),
                group_mean.dtype)
        else:
            group_stddev_inv = tf.math.rsqrt(group_variance + self._epsilon)

        grouped_inputs = (grouped_inputs - group_mean) * group_stddev_inv
        # Merges the last two dims.
        grouped_inputs = tf.reshape(grouped_inputs, input_shape[:-2] + [-1])

        # Note, The real gamma to use is 1 + gamma.
        outputs = grouped_inputs * (theta.gamma + 1) + theta.beta
        return outputs
Esempio n. 14
0
    def _PaddedMeanFn(inp):
      """Apply padded mean using reduce_sum and dividing by # real points."""
      # Replace all padded features with 0 by masking the padded features out.
      mask = 1 - inp.padding
      features = inp.features * mask[..., tf.newaxis]
      features = tf.reduce_sum(features, axis=-2)
      num_real_points = tf.reduce_sum(mask, axis=-1, keep_dims=True)
      # Prevent the divisor of our padded mean from ever being 0, so that
      # the gradient flowing back through this op doesn't give us NaNs.
      num_real_points = tf.maximum(num_real_points, 1)
      features = features / num_real_points

      # Replace features of all padded points by zeros. If a batch of points are
      # all padded, then num_real_points will be zero. We set the features to be
      # zero, so that we don't get any downstream issue with NaNs.
      # Note that inf * 0 = NaN.
      all_padded = tf.equal(num_real_points, 0.)
      all_padded = tf.broadcast_to(all_padded, py_utils.GetShape(features))
      features = tf.where(all_padded, tf.zeros_like(features), features)
      return py_utils.CheckNumerics(features)
Esempio n. 15
0
    def FProp(self, theta, inputs):
        """Apply dropout to inputs.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      inputs: The inputs tensor.

    Returns:
      inputs with dropout applied at training time.
    """
        p = self.params
        if p.keep_prob >= 1.0 or p.is_eval:
            return inputs

        with tf.name_scope(p.name):
            mb_tensor = gpipe.GetOverWriteGlobalStep()
            if p.burn_in_steps > 0:
                current_step = tf.cast(mb_tensor // p.num_micro_batches,
                                       inputs.dtype)
                current_ratio = current_step / tf.cast(p.burn_in_steps,
                                                       inputs.dtype)
                current_ratio = tf.minimum(tf.cast(1.0, inputs.dtype),
                                           current_ratio)
                keep_prob = (1 - current_ratio * (1 - p.keep_prob))
            else:
                keep_prob = tf.cast(p.keep_prob, inputs.dtype)

            seeds = gpipe.GenerateStepSeedPair(p)
            noise_shape = py_utils.GetShape(inputs)
            if p.noise_shape_dim and p.noise_shape_dim < inputs.shape.ndims:
                for d in range(inputs.shape.ndims):
                    if d != p.noise_shape_dim:
                        noise_shape[d] = 1
            random_tensor = (tf.cast(keep_prob, tf.float32) +
                             tf.contrib.stateless.stateless_random_uniform(
                                 noise_shape, seed=seeds, dtype=tf.float32))
            binary_tensor = tf.cast(tf.floor(random_tensor), inputs.dtype)
            ret = tf.div(inputs, keep_prob) * binary_tensor
            ret.set_shape(inputs.get_shape())
            return ret
Esempio n. 16
0
def ComputeConvOutputPadding(paddings,
                             window,
                             stride,
                             padding_algorithm='SAME',
                             v2_padding=False):
    """Computes paddings for convolution and pooling output.

  WARNING: This implementation is buggy prefer using ComputeConvOutputPaddingV2.

  out_padding[i] == 1 iff any in_padding corresponding to that output is 1.

  Args:
    paddings: The paddings tensor. It is expected to be of shape [batch, time].
    window: The size of the windows.
    stride: The time-stride between adjacent windows.
    padding_algorithm: 'SAME' or 'VALID'.
    v2_padding: Prefer setting to True. The default implementation is buggy for
    strided convolutions.

  Returns:
    out_padding, The new padding tensor of size [batch, ceil(time / stride)].
  """
    if v2_padding:
        return _ComputeConvOutputPaddingV2(paddings, window, stride,
                                           padding_algorithm)

    if stride == 1:
        return paddings

    # Pad so input_length divides stride.
    input_length = py_utils.GetShape(paddings)[1]
    pad_len = (input_length + stride - 1) // stride * stride - input_length
    paddings = tf.pad(paddings, [[0, 0], [0, pad_len]], constant_values=1.0)
    out_padding = tf.nn.pool(
        tf.expand_dims(paddings, -1),
        [window],
        'MAX',
        padding=padding_algorithm,
        strides=[stride],
    )
    return tf.squeeze(out_padding, -1)
Esempio n. 17
0
    def _GetBetaGamma(self, theta, inputs, **kwargs):
        assert 'class_emb' in kwargs
        class_emb = kwargs['class_emb']

        # class_emb is a one-hot vector of shape [batch, class_emb_dim=num_classes].
        class_ids = tf.math.argmax(class_emb, axis=-1, output_type=tf.int32)
        # [batch, dim]
        # Not using matmul/einsum to avoid potential precision problem on TPU with
        # sparse inputs.
        beta = tf.gather(theta.beta, class_ids)
        gamma = tf.gather(theta.gamma, class_ids)

        # Extend to [batch, 1, ... 1, dim]
        batch = py_utils.GetShape(inputs)[0]
        to_shape = tf.concat([[batch],
                              tf.ones([py_utils.GetRank(inputs) - 2],
                                      tf.int32), [self.params.dim]],
                             axis=0)
        beta = tf.reshape(beta, to_shape)
        gamma = tf.reshape(gamma, to_shape)
        return beta, gamma
Esempio n. 18
0
def PrepareSequenceForPlot(tensor, padding, name):
    """Prepares a sequence feature for plotting.

  The sequence feature is transposed and channels are flattened.

  Args:
    tensor: A n-D Tensor of shape [batch, time, ...].
    padding: A Tensor of shape [batch, time].
    name: A string as the name of the reshaped Tensor, which will be used as the
      subcaption for plotting.

  Returns:
    A tuple of:
      reshaped_tensor: A 3-D Tensor of shape [batch, dim, time].
      sequence_length: A 1-D Tensor of shape [batch].
  """
    # Flatten any dimensions beyond the third into the third.
    batch_size, max_len = py_utils.GetShape(tensor, 2)
    plot_tensor = tf.reshape(tensor, [batch_size, max_len, -1])
    plot_tensor = tf.transpose(plot_tensor, [0, 2, 1], name=name)
    return (plot_tensor, SequenceLength(padding))
  def _Extract(self, features):
    """Returns the laser Tensor."""
    p = self.params
    ret = super()._Extract(features)

    all_vxyz = []
    all_classes = []
    for lidar in p.lidar_names:
      for ri in p.lidar_returns:
        feature_name = 'laser_%s_%s' % (lidar, ri)
        laser_data = tf.reshape(
            _Dense(features[feature_name]), [-1, 3 + p.num_features])
        num = py_utils.GetShape(laser_data)[0]
        # We expect lidar_$lidar_$ri and lidar_$lidar_$ri_flow has
        # same number of points.
        feature_name += '_flow'
        laser_data = tf.reshape(_Dense(features[feature_name]), [num, 3 + 1])
        points_vxyz = laser_data[..., 0:3]
        points_classes = laser_data[..., 3]

        all_vxyz += [points_vxyz]
        all_classes += [points_classes]

    # Stack all of the points along the major dimension
    points_vxyz = tf.concat(all_vxyz, axis=0)
    points_class = tf.concat(all_classes, axis=0)

    # The precomputed class uses -1 to mean 5 in our current code.
    points_class = tf.where(
        tf.less(points_class, 0), 5. * tf.ones_like(points_class), points_class)

    if p.max_num_points is not None:
      assert 'points_padding' in ret
      points_vxyz = py_utils.PadOrTrimTo(points_vxyz, [p.max_num_points, 3])
      points_class = py_utils.PadOrTrimTo(points_class, [p.max_num_points])

    assert 'points_xyz' in ret
    ret.world_flow = points_vxyz
    ret.pointwise_class = tf.cast(points_class, tf.int32)
    return ret
Esempio n. 20
0
def BBoxCorners2D(bboxes):
    """Extract the corner points from a 5-DOF bbox representation.

  Args:
    bboxes: A [..., 5] floating point bounding box representation ([x, y, dx,
      dy, phi]).

  Returns:
    A [..., 4, 2] floating point Tensor containing
      the corner (x, y) points for every bounding box.
  """
    corners = tf.constant([
        [0.5, 0.5],
        [-0.5, 0.5],
        [-0.5, -0.5],
        [0.5, -0.5],
    ])

    leading_shape = py_utils.GetShape(bboxes)[:-1]

    # Extract location, dimension, and rotation.
    location = bboxes[..., :2]
    dimensions = bboxes[..., 2:4]
    phi_world = bboxes[..., 4]

    # Convert rotation_phis into rotation matrices along unit z.
    cos = tf.cos(phi_world)
    sin = tf.sin(phi_world)
    rotations_world = tf.reshape(tf.stack([cos, -sin, sin, cos], axis=-1),
                                 leading_shape + [2, 2])

    # Create axis-aligned corners from length/width/height.
    corners = tf.einsum('...i,ji->...ji', dimensions, corners)

    # Rotate the corners coordinates to the rotated world frame.
    corners = tf.einsum('...ij,...kj->...ki', rotations_world, corners)

    # Translate corners to the world location.
    corners = corners + tf.reshape(location, leading_shape + [1, 2])
    return corners
Esempio n. 21
0
  def _BBoxesAndLogits(self, input_batch):
    """Decode an input batch, computing predicted bboxes from residuals."""
    _, per_example_dict = self.FPropTower(self.theta, input_batch)

    # Decode residuals.
    predicted_bboxes = self._utils.ResidualsToBBoxes(
        input_batch.anchor_bboxes, per_example_dict['residuals'])

    # predicted_bboxes is a [batch, nx, ny, nz, na, 7] Tensor.
    batch_size, nx, ny, nz, na, _ = py_utils.GetShape(predicted_bboxes, 6)
    num_boxes = nx * ny * nz * na

    # Reshape to [batch_size, num_boxes, 7]
    predicted_bboxes = tf.reshape(predicted_bboxes, [batch_size, num_boxes, 7])

    classification_logits = tf.reshape(
        per_example_dict['classification_logits'], [batch_size, num_boxes, -1])

    return py_utils.NestedMap({
        'predicted_bboxes': predicted_bboxes,
        'classification_logits': classification_logits
    })
Esempio n. 22
0
    def _FrequencyWarp(self,
                       inputs,
                       global_seed,
                       dtype=tf.float32,
                       domain_id_index=0):
        """Applies frequency warping with given degree to inputs.

    Args:
      inputs: Batch of input features of shape (batch_size, time_length,
        num_freq, channels).
      global_seed: an integer seed tensor for stateless random ops.
      dtype: Data type.
      domain_id_index: Domain ID index.

    Returns:
      Inputs with random frequency warping applied.
    """
        p = self.params
        batch_size, _, num_freq, _ = py_utils.GetShape(inputs)

        # Get parameters for warping.
        freq_warp_max_bins = p.freq_warp_max_bins[domain_id_index]

        # If maximum warp length is zero, do nothing.
        if freq_warp_max_bins == 0:
            return inputs
        choose_range = tf.ones((batch_size, ), dtype=tf.int32) * num_freq

        # Create warping matrix in time direction and apply
        warp_matrix = self._GetWarpMatrix(batch_size,
                                          choose_range=choose_range,
                                          matrix_size=num_freq,
                                          global_seed=global_seed,
                                          max_warp_frames=freq_warp_max_bins,
                                          dtype=dtype)

        return self.EinsumBxycBzyBxzc(inputs,
                                      warp_matrix,
                                      name='einsum_forfreqwarping')
Esempio n. 23
0
  def testMoEFPropDynamicShapes(self):
    """Test to verify MoEBuilder.MoE() supports dynamic shapes.

    Test without this change fails.
    """
    batch_dim = 2
    length_dim = 4
    input_dim = 4
    builder = gshard_builder.MoEBuilder.Params().Set(
        model_dim=input_dim, num_devices=2, moe_hidden_dim=16, e_dim=2, c_dim=2)
    p = builder.Instantiate().MoE('moe')
    with self.session(graph=tf.Graph()) as sess:
      tf.random.set_seed(2019)
      # we will reduce the length_dim by 2 dynamically.
      layer = p.Instantiate()
      inputs, segment_ids, segment_pos = self._CreateDynamicShapeInputs(
          batch_dim, length_dim, input_dim)
      # Verify length dimension shape is dynamic(a Tensor).
      self.assertIsInstance(py_utils.GetShape(inputs)[1], tf.Tensor)
      out, aux_loss = layer.FPropDefaultTheta(inputs, segment_ids, segment_pos)
      sess.run(tf.global_variables_initializer())
      _ = sess.run([out, aux_loss])
Esempio n. 24
0
    def _PadAndReshapeSpec(self, mel_spectrogram, mel_spectrogram_paddings):
        p = self.params
        batch_size = py_utils.GetShape(mel_spectrogram)[0]
        # Stack and sub-sample. Only subsampling with a stride of the stack size
        # is supported.
        if p.stack_left_context > 0:
            # Since left context is leading, pad the left by duplicating the first
            # frame.
            stack_size = 1 + p.stack_left_context
            mel_spectrogram = tf.concat(
                [mel_spectrogram[:, 0:1, :]] * p.stack_left_context +
                [mel_spectrogram],
                axis=1)
            mel_spectrogram_paddings = tf.concat(
                [mel_spectrogram_paddings[:, 0:1]] * p.stack_left_context +
                [mel_spectrogram_paddings],
                axis=1)

            # Note that this is the maximum number of frames. Actual frame count
            # depends on padding.
            stacked_frame_dim = tf.shape(mel_spectrogram)[1] // stack_size
            mel_spectrogram = tf.reshape(
                mel_spectrogram[:, 0:(stack_size) * stacked_frame_dim, :],
                [batch_size, stacked_frame_dim, stack_size * p.num_bins])
            # After stacking paddings, pad if any source frame was padded.
            # Stacks into [batch_size, stacked_frame_dim, stack_size] like the
            # spectrogram stacking above, and then reduces the stack_size dim
            # to the max (effectively, making padding = 1.0 if any of the pre-stacked
            # frames were 1.0). Final shape is [batch_size, stacked_frame_dim].
            mel_spectrogram_paddings = tf.reshape(
                mel_spectrogram_paddings[:,
                                         0:(stack_size) * stacked_frame_dim],
                [batch_size, stacked_frame_dim, stack_size])
            mel_spectrogram_paddings = tf.reduce_max(mel_spectrogram_paddings,
                                                     axis=2)

        # Add feature dim. Shape = [batch, time, features, 1]
        mel_spectrogram = tf.expand_dims(mel_spectrogram, -1)
        return mel_spectrogram, mel_spectrogram_paddings
Esempio n. 25
0
    def FProp(self, theta, inputs):
        """Apply projection to inputs.

    Args:
      theta: A NestedMap object containing weights' values of this layer and its
        children layers.
      inputs: The inputs tensor.  Shaped [..., input_dims].

    Returns:
      Projected inputs.
    """
        p = self.params
        with tf.name_scope(p.name):
            computation_cost.Add(
                self, 'flops',
                tf.reduce_prod(tf.cast(tf.shape(inputs)[:-1], tf.int64)) *
                tf.cast(
                    symbolic.EvalExpr(symbolic.TENSOR_VALUES, p.input_dims *
                                      p.output_dims), tf.int64) * 2)
            use_tpu = py_utils.use_tpu()
            shape = inputs.shape
            if use_tpu and (shape is not None and shape.rank is not None
                            and shape.rank < 26):
                # Avoids reshape if feasible and uses Einsum.
                if shape.rank == 2:
                    return tf.matmul(inputs, theta.w)
                else:
                    s = ''.join([chr(x) for x in range(97, 123)])  # abc...xyz
                    r = shape.rank
                    return tf.einsum('{0}y,yz->{0}z'.format(s[:r - 1]), inputs,
                                     theta.w)

            input_dim = py_utils.GetShape(inputs)[-1]
            act = tf.matmul(tf.reshape(inputs, [-1, input_dim]), theta.w)
            output_dim = tf.shape(theta.w)[-1]
            act = tf.reshape(
                act, tf.concat([tf.shape(inputs)[:-1], [output_dim]], axis=0))
            return act
Esempio n. 26
0
    def FProp(self, theta, inputs, paddings, class_emb):
        """Apply batch normalization.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      inputs: The inputs tensor.  Shaped [batch, ..., dim].
      paddings: The paddings tensor.  Shaped [batch, ..., 1], with the same rank
        as the input tensor.
      class_emb: The conditioning inputs, Shaped [batch, emb_dim].

    Returns:
      Output after applying batch normalization, with the same shape as
      'inputs'.
    """
        if py_utils.testonly_skip_norm_layers():
            return inputs

        p = self.params
        batch = py_utils.GetShape(inputs)[0]
        class_emb = py_utils.HasShape(class_emb, [batch, p.class_emb_dim])
        if not py_utils.use_tpu():
            class_emb = py_utils.with_dependencies([
                py_utils.assert_less_equal(
                    tf.cast(class_emb, tf.int32), 1, name='one_hot_assert1'),
                py_utils.assert_greater_equal(
                    tf.cast(class_emb, tf.int32), 0, name='one_hot_assert2'),
                py_utils.assert_equal(tf.ones([batch], tf.int32),
                                      tf.cast(tf.reduce_sum(class_emb, -1),
                                              tf.int32),
                                      name='one_hot_assert3'),
            ], class_emb)

        with tf.name_scope(p.name):
            norm_mean, norm_variance, beta, gamma = self.ComputeAndUpdateMoments(
                theta, inputs, paddings=paddings, class_emb=class_emb)
            return self._ComputeBN(inputs, paddings, gamma, beta, norm_mean,
                                   norm_variance)
Esempio n. 27
0
def ConvertToBlocks(x, block_size, padding_val=0.0):
  """Turns a sequence to non overlapping blocks.

  Args:
    x: a tensor of [batch, time, ...].
    block_size: int. Number of time frames in a block.
    padding_val: float. value on the padded frames.

  Returns:
    A tensor of [batch, num_blocks, block_size, ...], with necessary paddings,
    where output[:, i, ...] are x[:, i*block_size:(i+1)*block_size, ...].
  """
  shape = py_utils.GetShape(x)
  b, t = shape[:2]
  if block_size < 1:
    raise ValueError('block_size must be at least 1, got {}'.format(block_size))
  w = block_size
  # Pad t to be a multiply of w.
  num_blocks = (t + w - 1) // w
  pad_to_length = num_blocks * w
  padded = py_utils.PadSequenceDimension(x, pad_to_length, padding_val)
  reshaped = tf.reshape(padded, [b, num_blocks, w] + shape[2:])
  return reshaped
Esempio n. 28
0
    def _CreateFrustumMask(self, bbox_corners_image,
                           bbox2d_corners_image_clipped, image_height,
                           image_width):
        """Creates a box mask for boxes whose projections fall outside of image."""
        p = self.params
        batch_size, num_boxes = py_utils.GetShape(bbox_corners_image, 2)
        if not p.filter_predictions_outside_frustum:
            return tf.ones(shape=(batch_size, num_boxes), dtype=tf.float32)

        def _MinMax(bbox_corners):
            """Computes the min and max over corners."""
            bbox_min = tf.reduce_min(bbox_corners, axis=-1)
            bbox_max = tf.reduce_max(bbox_corners, axis=-1)
            bbox_min = py_utils.HasShape(bbox_min, [batch_size, num_boxes])
            bbox_max = py_utils.HasShape(bbox_max, [batch_size, num_boxes])
            return bbox_min, bbox_max

        bbox_min_x, bbox_max_x = _MinMax(bbox_corners_image[:, :, :, 0])
        bbox_min_y, bbox_max_y = _MinMax(bbox_corners_image[:, :, :, 1])

        # Compute the fraction of the clipped 2d image projection and the
        # full 2d image projection.  We simply need to divide the area
        # of each cropped box by the area of the full box to get the
        # overlap fraction.
        original_area = (bbox_max_x - bbox_min_x) * (bbox_max_y - bbox_min_y)
        bbox_clipped_x_min = bbox2d_corners_image_clipped[..., 0]
        bbox_clipped_y_min = bbox2d_corners_image_clipped[..., 1]
        bbox_clipped_x_max = bbox2d_corners_image_clipped[..., 2]
        bbox_clipped_y_max = bbox2d_corners_image_clipped[..., 3]
        clipped_area = (bbox_clipped_x_max - bbox_clipped_x_min) * (
            bbox_clipped_y_max - bbox_clipped_y_min)
        fraction = clipped_area / original_area

        frustum_mask = (fraction > p.truncation_threshold)
        frustum_mask = py_utils.HasShape(frustum_mask, [batch_size, num_boxes])
        frustum_mask = tf.cast(frustum_mask, tf.float32)
        return frustum_mask
Esempio n. 29
0
    def _ReshapeToMono2D(self, pcm_audio_data, paddings):
        """Reshapes a 3D or 4D input to 2D.

    Since the input to FProp can be 3D or 4D (see class comments), this will
    collapse it back to a 2D, mono shape for internal processing.

    Args:
      pcm_audio_data: 2D, 3D or 4D audio input. See class comments. Must have a
        rank.
      paddings: Original paddings shaped to the first two dims of
        pcm_audio_data.

    Returns:
      Tuple of 2D [batch_size, timestep] mono audio data, new paddings.
    """
        shape = py_utils.GetShape(pcm_audio_data)
        rank = len(shape)
        if rank == 2:
            return pcm_audio_data, paddings
        elif rank == 3:
            # [batch, time, channel]
            with tf.control_dependencies([tf.assert_equal(shape[2], 1)]):
                return tf.squeeze(pcm_audio_data, axis=2), paddings
        elif rank == 4:
            # [batch, time, packet, channel]
            batch_size, orig_time, orig_packet_size, channel = shape
            time = orig_time * orig_packet_size
            with tf.control_dependencies([tf.assert_equal(channel, 1)]):
                pcm_audio_data = tf.reshape(pcm_audio_data, (batch_size, time))
                # Transform paddings into the new time base with a padding per time
                # step vs per packet by duplicating each packet.
                paddings = tf.reshape(
                    tf.tile(tf.expand_dims(paddings, axis=2),
                            [1, 1, orig_packet_size]), (batch_size, time))
                return pcm_audio_data, paddings
        else:
            raise ValueError('Illegal pcm_audio_data shape')
Esempio n. 30
0
    def CornersToImagePlane(self, corners, velo_to_image_plane):
        """Project 3d box corners to the image plane.

    Args:
      corners: A [batch, num_boxes, 8, 3] floating point tensor containing the 8
        corners points for each 3d bounding box.
      velo_to_image_plane: A [batch, 3, 4] batch set of projection matrices from
        velo xyz to image plane xy. After multiplication, you need to divide by
        last coordinate to recover 2D pixel locations.

    Returns:
      A [batch, num_boxes, 8, 2] floating point Tensor containing the 3D
      bounding box corners projected to the image plane.
    """
        batch_size, num_boxes, _, _ = py_utils.GetShape(corners, 4)

        def CornersToPlaneBody(args):
            """Body of function to convert each bounding box to the image plane."""
            (corners, velo_to_image_plane) = args
            # corners[i] is [num_boxes, 8, 3]: flatten the points in this batch and do
            # the conversion in one call.
            bbox_corners = tf.reshape(corners, [-1, 3])
            image_plane_corners = geometry.PointsToImagePlane(
                bbox_corners, velo_to_image_plane)
            image_plane_corners = tf.reshape(image_plane_corners, [-1, 8, 2])
            return image_plane_corners

        corners_in_image_plane = tf.map_fn(fn=CornersToPlaneBody,
                                           elems=(corners,
                                                  velo_to_image_plane),
                                           dtype=tf.float32,
                                           back_prop=False)

        corners_in_image_plane = py_utils.HasShape(
            corners_in_image_plane, [batch_size, num_boxes, 8, 2])
        return corners_in_image_plane