コード例 #1
0
    def NMSIndices(self,
                   bboxes,
                   scores,
                   max_output_size,
                   nms_iou_threshold=0.3,
                   score_threshold=0.01):
        """Apply NMS to a series of 3d bounding boxes in 7-DOF format.

    Args:
      bboxes: A [num_boxes, 7] floating point Tensor of bounding boxes in [x, y,
        z, dx, dy, dz, phi] format.
      scores: A [num_boxes] floating point Tensor containing box
        scores.
      max_output_size: Maximum number of boxes to predict per input.
      nms_iou_threshold: IoU threshold to use when determining whether two boxes
        overlap for purposes of suppression.
      score_threshold: The score threshold passed to NMS that allows NMS to
        quickly ignore irrelevant boxes.

    Returns:
      The NMS indices and the mask of the padded indices.
    """
        bboxes = py_utils.HasShape(bboxes, [-1, 7])

        # Extract x, y, w, h, then convert to extrema.
        #
        # Note that we drop the rotation angle because we don't have an NMS
        # operation that takes rotation into account.
        bboxes_2d = tf.stack(
            [bboxes[:, 0], bboxes[:, 1], bboxes[:, 3], bboxes[:, 4]], axis=-1)
        bboxes_extrema = geometry.XYWHToBBoxes(bboxes_2d)

        # Compute NMS with padding; we use the padded version so this function can
        # be used in a map_fn.  This function returns the scalar number of boxes
        # for each example.
        #
        # We use an IoU threshold of 0.3 since our anchor boxes have rotations
        # that make the default IoU threshold of 0.5 possibly too high.
        nms_index_padded, num_valid = tf.image.non_max_suppression_padded(
            bboxes_extrema,
            scores,
            iou_threshold=nms_iou_threshold,
            max_output_size=max_output_size,
            score_threshold=score_threshold,
            pad_to_max_output_size=True)

        # Return the mask of valid indices instead of just a scalar number.
        mask = tf.concat(
            [tf.ones([num_valid]),
             tf.zeros([max_output_size - num_valid])],
            axis=0)

        nms_index_padded = tf.where(mask > 0, nms_index_padded,
                                    tf.zeros_like(nms_index_padded))
        return nms_index_padded, mask
コード例 #2
0
    def _Extract(self, features):
        p = self.params
        # Label values match the proto enum car.open_dataset.Label.Type. The value
        # range is [1..4] for non-background labels.
        labels = tf.cast(_Dense(features['labels']), tf.int32)
        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1])
        label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '')
        bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7])
        bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]])
        bboxes_3d_num_points = tf.cast(
            _Dense(features['bboxes_3d_num_points']), tf.int32)
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])
        bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask,
                                              [p.max_num_objects])
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])
        label_metadata = tf.reshape(_Dense(features['label_metadata']),
                                    [-1, 4])
        label_metadata = py_utils.PadOrTrimTo(label_metadata,
                                              [p.max_num_objects, 4])

        detection_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['detection_difficulties']), tf.int32),
            [p.max_num_objects])
        combined_detection_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['combined_detection_difficulties']),
                    tf.int32), [p.max_num_objects])
        tracking_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['tracking_difficulties']), tf.int32),
            [p.max_num_objects])
        unfiltered_bboxes_3d_mask = bboxes_3d_mask

        if p.filter_labels:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bboxes_3d_mask *= tf.cast(bbox_mask, tf.float32)

        outputs = {
            'labels': labels,
            'label_ids': label_ids,
            'detection_difficulties': detection_difficulties,
            'combined_detection_difficulties': combined_detection_difficulties,
            'tracking_difficulties': tracking_difficulties,
            'bboxes_3d': bboxes_3d,
            'bboxes_3d_mask': bboxes_3d_mask,
            'bboxes_3d_num_points': bboxes_3d_num_points,
            'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask,
            'speed': label_metadata[:, :2],
            'acceleration': label_metadata[:, 2:],
        }

        return py_utils.NestedMap(outputs)
コード例 #3
0
 def _InputBatch(self):
     targets = tf.ones([self.params.batch_size, 1024], dtype=tf.int32)
     input_batch = py_utils.NestedMap()
     input_batch.tgt = py_utils.NestedMap()
     input_batch.tgt.ids = tf.roll(targets, 1, axis=1)
     input_batch.tgt.labels = targets
     input_batch.tgt.segment_ids = tf.minimum(targets, 1)
     input_batch.tgt.segment_pos = targets
     input_batch = input_batch.Transform(
         lambda t: tf.ensure_shape(t, (self.params.batch_size, 1024)))
     return input_batch
コード例 #4
0
ファイル: plot_test.py プロジェクト: galv/lingvo-copy
 def testLimitsOutputImagesIfBatchIsSmall(self):
   batch_size = 1
   tensors = [tf.zeros((batch_size, 3, 5)), tf.ones((batch_size, 2, 2))]
   with self.session() as s:
     fig = plot.MatplotlibFigureSummary('summary', self.FIGSIZE, max_outputs=3)
     for t in tensors:
       fig.AddSubplot([t])
     im = fig.Finalize()
     summary_str = s.run(im)
   summary = tf.summary.Summary.FromString(summary_str)
   self.assertEqual(len(summary.value), 1)
コード例 #5
0
    def testRepeatMoEFProp(self):
        """Test to verify RecurrentDenseBuilder.DecoderLayerStack().

    Test without this change fails.
    """
        batch_dim = 2
        length_dim = 4
        input_dim = 4
        builder = gshard_builder.RecurrentDenseBuilder.Params().Set(
            model_dim=input_dim,
            num_devices=2,
            moe_hidden_dim=16,
            e_dim=2,
            attention_key_value_dim=input_dim,
            attention_num_heads=1,
            c_dim=2,
            emh_split=[-1, 0, -1, -1],
            ehm_split=[-1, 0, -1, -1])
        b = builder.Instantiate()
        layers = [
            b.DecSelfAttention('dec_self_attention'),
            b.MoE('moe', decoder=True)
        ]
        p = b.DecoderLayerStack('rep', layers, 2)
        with self.session(graph=tf.Graph()) as sess:
            tf.random.set_seed(2019)
            # we will reduce the length_dim by 2 dynamically.
            layer = p.Instantiate()
            inputs = tf.ones([batch_dim, length_dim, input_dim])
            segment_ids = tf.ones([batch_dim, length_dim])
            segment_pos = tf.ones([batch_dim, length_dim])
            layer_inputs = py_utils.NestedMap(vec=inputs,
                                              segment_id=segment_ids,
                                              segment_pos=segment_pos,
                                              encoder_output=inputs,
                                              encoder_segment_id=segment_ids,
                                              encoder_segment_pos=segment_pos,
                                              aux_loss=tf.zeros([]))
            outputs = layer.FPropDefaultTheta(layer_inputs)
            sess.run(tf.global_variables_initializer())
            sess.run(outputs)
コード例 #6
0
        def _DerivePaddingsAndIds(src_ids, tgt_labels):
            """tgt_ids is tgt_labels shifted right by one, with a SOS ID prepended."""
            tgt_ids = tf.concat([[p.sos_id], tgt_labels[:-1]], axis=0)
            src_paddings = tf.zeros(tf.shape(src_ids), dtype=tf.float32)
            tgt_paddings = tf.zeros(tf.shape(tgt_ids), dtype=tf.float32)
            tgt_weights = tf.ones(tf.shape(tgt_ids), dtype=tf.float32)

            bucket_key = tf.cast(
                tf.maximum(tf.reduce_sum(1.0 - src_paddings),
                           tf.reduce_sum(1.0 - tgt_paddings)), tf.int32)

            return src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key
コード例 #7
0
ファイル: model.py プロジェクト: tensorflow/lingvo
  def FPropTower(self, theta, input_batch):
    p = self.params
    tf.logging.info('input_batch=%r', input_batch)
    ids, paddings, labels_ids, weights = self._TrimIfPossible(
        input_batch.ids, input_batch.paddings, input_batch.labels,
        input_batch.weights)
    fprop_dtype = py_utils.FPropDtype(p)
    paddings = tf.cast(paddings, fprop_dtype)
    weights = tf.cast(weights, fprop_dtype)
    tf.logging.info('inputs={}'.format((ids, paddings, labels_ids, weights)))

    batch_size = tf.shape(ids)[0]
    state0 = None
    labels = py_utils.NestedMap(class_ids=labels_ids, class_weights=weights)
    fprop_kwargs = dict()
    if 'segment_ids' in input_batch:
      fprop_kwargs.update(
          segment_ids=input_batch.segment_ids,
          segment_pos=input_batch.segment_pos)
    xent_output, _ = self.lm.FProp(theta.lm, ids, paddings, state0, labels,
                                   **fprop_kwargs)

    if 'segment_ids' in input_batch:
      num_sentences = input_batch.num_sentences
    else:
      num_sentences = tf.ones(shape=[batch_size], dtype=tf.int32)
    # +num_sentences to account for the end of sequence symbol.
    num_words = tf.cast(
        tf.reduce_sum(input_batch.word_count + num_sentences), fprop_dtype)
    predicted_labels = tf.cast(xent_output.per_example_argmax, labels_ids.dtype)

    num_preds = xent_output.total_weight
    mean_acc = tf.reduce_sum(
        tf.cast(tf.equal(labels_ids, predicted_labels), fprop_dtype) *
        weights) / tf.math.maximum(num_preds, 1)
    loss = xent_output.avg_xent
    per_sequence_loss = tf.reduce_sum(
        xent_output.per_example_xent * weights, axis=1)
    if p.train.sum_loss_across_tokens_in_batch:
      loss = xent_output.total_xent
    else:
      per_sequence_loss /= tf.reduce_sum(weights, axis=1)
    return {
        'loss': (loss, num_preds),
        'fraction_of_correct_next_step_preds': (mean_acc, num_preds),
        'log_pplx': (xent_output.avg_xent, num_preds),
        'log_pplx_per_word': (xent_output.total_xent / num_words, num_words),
        'num_predictions': (num_preds, 1),
        'num_words': (num_words, 1),
        'num_sentences': (tf.reduce_sum(num_sentences), 1),
    }, {
        'loss': per_sequence_loss,
    }
コード例 #8
0
ファイル: favor_attention.py プロジェクト: sailfish009/lingvo
def create_projection_matrix(nb_random_projections, dim, seed=0, scaling=0):
    r"""Constructs the matrix of random projections.

  Constructs a matrix of random orthogonal projections. Each projection vector
  has direction chosen uniformly at random and either deterministic length
  \sqrt{dim} or length taken from the \chi(dim) distribution (in the latter
  case marginal distributions of the projections are dim-dimensional Gaussian
  vectors with associated identity covariance matrix).

  Args:
    nb_random_projections: number of random projections.
    dim: dimensionality of each random projection.
    seed: random seed used to construct projections.
    scaling: 1 if all the random projections need to be renormalized to have
      length \sqrt{dim}, 0 if the lengths of random projections should follow
      \chi(dim) distribution.

  Returns:
    The matrix of random projections of the shape [nb_random_projections, dim].
  """
    if nb_random_projections == 0:
        return None
    nb_full_blocks = nb_random_projections // dim
    block_list = []
    current_seed = seed
    for _ in range(nb_full_blocks):
        unstructured_block = tf.random.normal((dim, dim), seed=current_seed)
        q, _ = tf.linalg.qr(unstructured_block)
        q = tf.transpose(q)
        block_list.append(q)
        current_seed = next_seed(current_seed)
    remaining_rows = nb_random_projections - nb_full_blocks * dim
    if remaining_rows > 0:
        unstructured_block = tf.random.normal((dim, dim), seed=current_seed)
        q, _ = tf.linalg.qr(unstructured_block)
        q = tf.transpose(q)
        block_list.append(q[0:remaining_rows])
    final_matrix = tf.concat(block_list, 0)
    current_seed = next_seed(current_seed)

    if scaling == 0:
        squares = tf.math.square(
            tf.random.normal((nb_random_projections, dim), seed=current_seed))
        squared_lengths = tf.math.reduce_sum(squares, axis=1)
        multiplier = tf.math.sqrt(squared_lengths)
    elif scaling == 1:
        multiplier = tf.math.sqrt(float(dim)) * tf.ones(
            (nb_random_projections))
    else:
        raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling)

    return tf.linalg.matmul(tf.linalg.diag(multiplier), final_matrix)
コード例 #9
0
 def testBasic(self):
     with self.session():
         t = 3
         # [BTNH].
         content = tf.linalg.diag(tf.ones([t]))[None, :, None, :]
         # [LNH].
         abs_pos_emb = tf.reshape(
             tf.range(t * (2 * t - 1), dtype=tf.float32), [2 * t - 1, 1, t])
         tf.logging.info('content=%s abs_pos_emb=%s', content.eval(),
                         abs_pos_emb.eval())
         self.assertAllClose(
             [[[[6., 3., 0.], [10., 7., 4.], [14., 11., 8.]]]],
             attention_util.RelPositionBias(content, abs_pos_emb).eval())
コード例 #10
0
ファイル: favor_attention.py プロジェクト: rxhmdia/lingvo
def noncausal_denominator(qs, ks):
    """Computes FAVOR normalizer in noncausal attention.

  Args:
    qs: query_prime tensor of the shape [L,B,H,M].
    ks: key_prime tensor of the shape [L,B,H,M].

  Returns:
    FAVOR normalizer in noncausal attention.
  """
    all_ones = tf.ones([ks.shape[0]])
    ks_sum = tf.einsum("lbhm,l->bhm", ks, all_ones)
    return tf.einsum("lbhm,bhm->lbh", qs, ks_sum)
コード例 #11
0
ファイル: plot_test.py プロジェクト: galv/lingvo-copy
 def testDoesNotDieOnMatplotlibError(self):
   invalid_dim_data = tf.ones((5,))
   with self.session() as s:
     fig = plot.MatplotlibFigureSummary('summary', self.FIGSIZE, max_outputs=1)
     fig.AddSubplot([invalid_dim_data])
     im = fig.Finalize()
     summary_str = s.run(im)
   summary = tf.summary.Summary.FromString(summary_str)
   self.assertEqual(len(summary.value), 1)
   value = summary.value[0]
   # Generates dummy 1-pixel image.
   self.assertEqual(value.image.width, 1)
   self.assertEqual(value.image.height, 1)
コード例 #12
0
ファイル: plot_test.py プロジェクト: zhangdong1992/lingvo
 def testLargerBatch(self):
     batch_size = 4
     tensors = [tf.ones((batch_size, 3, 5)), tf.ones((batch_size, 2, 2))]
     with self.session() as s:
         fig = plot.MatplotlibFigureSummary('larger_batch',
                                            self.FIGSIZE,
                                            max_outputs=batch_size)
         for t in tensors:
             fig.AddSubplot([t])
         im = fig.Finalize()
         summary_str = s.run(im)
     summary = tf.summary.Summary.FromString(summary_str)
     self.assertEqual(len(summary.value), batch_size)
     for n, value in enumerate(summary.value):
         self.assertEqual(value.tag, u'larger_batch/image/%d' % n)
         self.assertEqual(value.image.width,
                          self.EXPECTED_DPI * self.FIGSIZE[0])
         self.assertEqual(value.image.height,
                          self.EXPECTED_DPI * self.FIGSIZE[1])
         self.assertEqual(value.image.colorspace, 3)
         self.assertNotEqual(value.image.encoded_image_string,
                             self.default_encoded_image)
コード例 #13
0
 def testSpectrumAugmenterWithPerDomainPolicyFreqMask(self):
   with self.session(use_gpu=False, graph=tf.Graph()):
     tf.random.set_seed(1234)
     inputs = tf.ones([6, 5, 4, 2], dtype=tf.float32)
     input_domain_ids = tf.constant(
         [[1] * 5, [2] * 5, [0] * 5, [2] * 5, [0] * 5, [1] * 5],
         dtype=tf.float32)
     paddings = tf.zeros([3, 5])
     p = spectrum_augmenter.SpectrumAugmenter.Params()
     p.name = 'specAug_layers'
     p.domain_ids = [0, 1, 2]
     p.freq_mask_max_bins = [0, 3, 8]
     p.time_mask_max_frames = 0
     p.random_seed = 1234
     specaug_layer = p.Instantiate()
     expected_output = np.array([[[[0., 0.], [0., 0.], [1., 1.], [1., 1.]],
                                  [[0., 0.], [0., 0.], [1., 1.], [1., 1.]],
                                  [[0., 0.], [0., 0.], [1., 1.], [1., 1.]],
                                  [[0., 0.], [0., 0.], [1., 1.], [1., 1.]],
                                  [[0., 0.], [0., 0.], [1., 1.], [1., 1.]]],
                                 [[[1., 1.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [0., 0.]]],
                                 [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]],
                                 [[[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
                                  [[0., 0.], [0., 0.], [0., 0.], [0., 0.]]],
                                 [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]],
                                  [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]],
                                 [[[1., 1.], [0., 0.], [0., 0.], [1., 1.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [1., 1.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [1., 1.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [1., 1.]],
                                  [[1., 1.], [0., 0.], [0., 0.], [1., 1.]]]])
     h, _ = specaug_layer.FPropDefaultTheta(
         inputs, paddings, domain_ids=input_domain_ids)
     actual_layer_output = self.evaluate(h)
     print(np.array_repr(actual_layer_output))
     self.assertAllClose(actual_layer_output, expected_output)
コード例 #14
0
ファイル: input_generator.py プロジェクト: tensorflow/lingvo
    def _InputPaddingValue(self, key, tensorspec):
        """Returns a scalar value to pad the tensor corresponding to key with.

    This function is used by the TFDatasetBatchBySequenceLength DataSource to
    specify the value used for padding.

    Args:
      key: The NestedMap key to return padding value for.
      tensorspec: a tf.TensorSpec describing the tensor to be padded.
    """
        if key.endswith('_paddings'):
            return tf.ones([], dtype=tensorspec.dtype)
        else:
            return tf.zeros([], dtype=tensorspec.dtype)
コード例 #15
0
    def FProp(self, theta, inputs, paddings):
        """Apply global spatial pooling to inputs.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      inputs: The inputs tensor. It is expected to be of shape [batch, time,
        frequency, channel]. The time dimension corresponds to the height
        dimension as in images and the frequency dimension corresponds to the
        width dimension as in images.
      paddings: The paddings tensor. It is expected to be of shape [batch,
        time]. Defaults to None, which means there no paddings.

    Returns:
      outputs, out_paddings pair.
       - outputs: has shape [batch, 1, 1, channel].
       - out_paddings: None or has shape [batch, 1].
    """
        p = self.params
        assert p.pooling_type in ['MAX', 'AVG'], p.pooling_type
        b, t, f = py_utils.GetShape(inputs, ndims=3)

        if paddings is not None:
            paddings = py_utils.HasShape(paddings, [b, t])

        if paddings is not None:
            mask = 1.0 - paddings[..., tf.newaxis, tf.newaxis]
        else:
            mask = tf.ones([b, t, 1, 1], p.dtype)
        if p.pooling_type == 'AVG':
            global_sum = tf.reduce_sum(inputs * mask,
                                       axis=[1, 2],
                                       keepdims=True)
            f = tf.cast(tf.convert_to_tensor(f), p.dtype)
            count = f * tf.reduce_sum(mask, axis=[1, 2], keepdims=True)
            out_feature = global_sum / tf.maximum(1.0, count)
        elif p.pooling_type == 'MAX':
            large_negative = (tf.ones_like(inputs) * p.dtype.max *
                              tf.constant(-0.7, dtype=p.dtype))
            padded_inputs = tf.where_v2(mask > 0.0, inputs, large_negative)
            out_feature = tf.reduce_max(padded_inputs,
                                        axis=[1, 2],
                                        keepdims=True)
        if paddings is None:
            out_paddings = None
        else:
            out_paddings = tf.reduce_min(paddings, axis=1, keepdims=True)
            out_feature *= 1.0 - out_paddings[..., tf.newaxis, tf.newaxis]
        return out_feature, out_paddings
コード例 #16
0
    def forward(inputs, alpha):
        with tf.name_scope("entmax_loss"):
            alpha_shape = inputs.get_shape().as_list()

            alpha_shape[axis] = 1
            alpha = tf.fill(alpha_shape, alpha)
            alpha = tf.cast(alpha, dtype=inputs.dtype)

            d = inputs.get_shape().as_list()[axis]
            alpha_m1 = alpha - 1.0

            inputs = inputs * alpha_m1

            max_val = tf.math.reduce_max(inputs, axis=axis, keepdims=True)
            tau_lo = max_val - tf.ones(alpha.get_shape().as_list(),
                                       dtype=inputs.dtype)
            tau_hi = max_val - tf.math.pow(
                tf.cast((1.0 / d), dtype=inputs.dtype), alpha_m1)

            f_lo = tf.math.reduce_sum(
                _calculate_probability(tf.math.subtract(inputs, tau_lo),
                                       alpha), axis) - 1.0

            dm = tau_hi - tau_lo

            for _ in range(n_iter):
                dm /= 2
                tau_m = tau_lo + dm
                p_m = _calculate_probability(inputs - tau_m, alpha)
                f_m = tf.math.reduce_sum(p_m, axis) - 1.0

                mask = tf.expand_dims(tf.math.greater(f_m * f_lo, 0), axis)
                tau_lo = tf.where(mask, tau_m, tau_lo)

            if ensure_sum_one:
                p_m /= tf.expand_dims(tf.math.reduce_sum(p_m, axis), axis)

        def grad_fn(d_outputs):
            with tf.name_scope("entmax_grad"):
                gppr = tf.where(p_m > 0, tf.math.pow(p_m, 2.0 - alpha),
                                tf.zeros_like(p_m))
                d_inputs = d_outputs * gppr
                q = tf.math.reduce_sum(d_inputs, axis) / tf.math.reduce_sum(
                    gppr, axis)
                q = tf.expand_dims(q, axis)
                d_inputs -= q * gppr
                return d_inputs, d_inputs

        return p_m, grad_fn
コード例 #17
0
def _ComputeConvOutputPaddingV2(paddings,
                                window,
                                stride,
                                padding_algorithm='SAME'):
  """Computes paddings for convolution and pooling output.

  - If padding_algorithm='SAME': out_padding[i] == 0 if the in_padding
    corresponding to that output is 0. This prevents the output from shrinking
    unnecessarily when striding.
  - If padding algorithm='VALID': out_padding[i] == 1 iff any in_padding
    corresponding to that output is 1.

  Args:
    paddings: The paddings tensor. It is expected to be of shape [batch, time].
    window: The size of the windows.
    stride: The time-stride between adjacent windows.
    padding_algorithm: 'SAME' or 'VALID'.

  Returns:
    out_padding, The new padding tensor of size [batch, ceil(time / stride)].
  """
  if stride == 1 and padding_algorithm == 'SAME':
    return paddings

  paddings, slice_len = _PadForLengthCompatibleStridesV2(
      paddings, stride, padding_algorithm, 1.0)

  expanded_paddings = tf.expand_dims(paddings, -1)

  if padding_algorithm == 'SAME':
    # Using a strided conv1d of size 1x1 we find all non-padded positions for
    # the specified stride.
    out_paddings = tf.nn.conv1d(
        expanded_paddings,
        filters=tf.ones([1, 1, 1], paddings.dtype),
        stride=stride,
        padding='SAME',
        name='padding_conv')
  elif padding_algorithm == 'VALID':
    out_paddings = tf.nn.pool(
        expanded_paddings, [window],
        'MAX',
        padding=padding_algorithm,
        strides=[stride])
  out_paddings = tf.squeeze(out_paddings, -1)
  if stride > 1:
    slice_end = py_utils.GetShape(out_paddings)[1] - slice_len
    out_paddings = out_paddings[:, :slice_end]
  return out_paddings
コード例 #18
0
 def testBasic(self):
   with self.session():
     t = 3
     # [BTNH].
     content = tf.linalg.diag(tf.ones([t]))[None, :, None, :]
     # [LNH].
     abs_pos_emb = tf.reshape(
         tf.range(t * (2 * t - 1), dtype=tf.float32), [2 * t - 1, 1, t])
     tf.logging.info('content=%s abs_pos_emb=%s', content.eval(),
                     abs_pos_emb.eval())
     p = attention_util.PositionalAttenLogits.Params().Set(name='rel_pos_bias')
     pos_atten_logits = p.Instantiate()
     self.assertAllClose(
         [[[[6., 3., 0.], [10., 7., 4.], [14., 11., 8.]]]],
         pos_atten_logits.RelPositionBias(content, abs_pos_emb).eval(),
     )
コード例 #19
0
        def _GetFurthestPoint():
            """Get point that is furthest from those already selected.

      We also bias the sampling towards real points by setting the distance
      to padded points negative until we are out of real points.
      """
            # Set padded points distance to negative so they aren't selected.
            padding_masked_distance_to_selected = tf.where(
                tf.equal(padding, 0.0), distance_to_selected, -1.0 * tf.ones(
                    (batch_size, num_points), dtype=tf.float32))
            # But only do this when we still have valid points left.
            padding_masked_distance_to_selected = tf.where(
                tf.less(curr_idx, num_valid_points),
                padding_masked_distance_to_selected, distance_to_selected)
            return tf.argmax(padding_masked_distance_to_selected,
                             axis=-1,
                             output_type=tf.int32)
コード例 #20
0
 def _CreateDynamicShapeInputs(self, batch_dim, length_dim, input_dim):
   inputs = tf.random.normal([batch_dim, length_dim, input_dim], seed=92837472)
   # Create segment_ids with random number of 1s and stack 0s at end.
   num_ones = tf.random.uniform(
       shape=(), minval=1, maxval=length_dim, dtype=tf.int32)
   segment_ids = tf.concat([
       tf.ones([batch_dim, num_ones]),
       tf.zeros([batch_dim, length_dim - num_ones])
   ],
                           axis=1)
   # Remove unpadded positions from the end.
   max_seq_len = tf.cast(
       tf.reduce_max(tf.reduce_sum(segment_ids, -1)), tf.int32)
   inputs = inputs[:, :max_seq_len, :]
   segment_ids = segment_ids[:, :max_seq_len]
   unused_segment_pos = tf.zeros_like(segment_ids)
   return inputs, segment_ids, unused_segment_pos
コード例 #21
0
ファイル: frontend_test.py プロジェクト: galv/lingvo-copy
    def testMelFeaturesPaddedRightStacked(self):
        self._CreateFrontendParams()
        p = self.params
        p.stack_right_context = 2
        p.frame_stride = p.stack_right_context + 1
        mel_frontend = p.Instantiate()
        sample_rate, pcm = self._GetPcm()
        pcm *= 32768

        # Convert to 4D [batch, time, packet, channels].
        sample_count = tf.shape(pcm)[1]
        packet_size = 11  # A non-round number.
        trimmed_pcm = pcm[:, 0:(sample_count // packet_size) * packet_size]
        src_inputs = tf.reshape(trimmed_pcm, (1, -1, packet_size, 1))

        # Create paddings such that the first 455 packets are unpadded.
        paddings = tf.concat([
            tf.zeros([1, 455], dtype=tf.float32),
            tf.ones([1, tf.shape(src_inputs)[1] - 455], dtype=tf.float32)
        ],
                             axis=1)
        # frame_step=240, frame_size=600, +1200 right padded frames
        # 455 packets * 11 frames rounds = 5005 frames, rounds down to 21 mel
        # frames. Divide by 3 for stacking = 7.
        # TODO(talremez): Make sure with this makes sense.
        expected_unpadded = 6

        outputs = mel_frontend.FPropDefaultTheta(
            py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings))
        log_mel = outputs.src_inputs
        paddings = outputs.paddings

        with self.session():
            pcm = self.evaluate(pcm)
            tf.logging.info('pcm: ~ %s = %s', pcm.shape, pcm)
            self.assertGreater(33000, np.amax(pcm))
            self.assertGreater(np.amax(pcm), 2.)
            log_mel, paddings, sample_rate = self.evaluate(
                [log_mel, paddings, sample_rate])
            self.assertEqual(sample_rate, p.sample_rate)
            self.assertEqual(paddings.shape, log_mel.shape[0:2])
            self.assertAllEqual(paddings[:, 0:expected_unpadded],
                                np.zeros([1, expected_unpadded]))
            self.assertAllEqual(
                paddings[:, expected_unpadded:],
                np.ones([1, paddings.shape[1] - expected_unpadded]))
コード例 #22
0
 def testMassLayer(self):
     with self.session(use_gpu=False) as sess:
         batch_size = 3
         seq_len = 10
         p = self._MassParams()
         mass_layer = data_augmenter.MASS(p)
         seq_ids = tf.fill([batch_size, seq_len], 4)
         weights = tf.ones([batch_size, seq_len])
         actual_seq_len = tf.fill([batch_size], 10)
         mass_out = mass_layer.Mask(seq_ids, weights, actual_seq_len)
         (src_ids, tgt_ids, tgt_labels, tgt_weights) = sess.run([
             mass_out.src.ids, mass_out.tgt.ids, mass_out.tgt.labels,
             mass_out.tgt.weights
         ])
         self.assertAllEqual(np.sum(src_ids == 3, axis=1), [5, 5, 5])
         self.assertAllEqual(np.sum(tgt_ids == 3, axis=1), [5, 5, 5])
         self.assertAllEqual(
             tgt_labels, 4 * np.ones([batch_size, seq_len], dtype=np.int32))
         self.assertAllEqual(np.sum(tgt_weights, axis=1), [5., 5., 5.])
コード例 #23
0
  def _Extract(self, features):
    p = self.params
    points_xyz = tf.reshape(_Dense(features['pointcloud/xyz']), [-1, 3])
    points_feature = tf.reshape(
        _Dense(features['pointcloud/reflectance']), [-1, p.num_features])

    if p.max_num_points is not None:
      npoints = tf.shape(points_xyz)[0]
      points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3])
      points_feature = py_utils.PadOrTrimTo(points_feature,
                                            [p.max_num_points, p.num_features])
      points_padding = 1.0 - py_utils.PadOrTrimTo(
          tf.ones([npoints]), [p.max_num_points])

    ret = py_utils.NestedMap(
        points_xyz=points_xyz, points_feature=points_feature)
    if p.max_num_points is not None:
      ret.points_padding = points_padding
    return ret
コード例 #24
0
 def testRematerialize(self):
     # Test the dropout consistency between fprop and bprop.
     b = builder.Base.Params()
     b = b.Instantiate()
     start_block = layers.DeterministicDropoutLayer.Params().Set(
         name='start_dropout', keep_prob=0.7)
     # Build 4 dropout layers, each wrapped by RematerializeFn.
     num_blocks = 4
     blocks = []
     blocks_per_cell = 2
     for i in range(num_blocks):
         blocks.append(layers.DeterministicDropoutLayer.Params().Set(
             name='dropout_{}'.format(i), keep_prob=0.7))
     cells = []
     while blocks:
         heads, blocks = blocks[:blocks_per_cell], blocks[blocks_per_cell:]
         cell_name = 'cell_{}'.format(len(cells))
         cells.append(
             b._Rematerialize(name=cell_name,
                              body=b._Seq(cell_name, *heads)))
     with self.session(use_gpu=False, graph=tf.Graph()) as sess:
         tf.random.set_seed(12345)
         p = b._Seq('test', start_block, *cells)
         mdl = p.Instantiate()
         # y = mdl.Frop(x * w)
         # Fake input
         x = tf.ones([4, 5])
         # Construct weights.
         w = tf.get_variable('w',
                             shape=[4, 5],
                             initializer=tf.constant_initializer([[1] * 5] *
                                                                 4))
         y = mdl.FPropDefaultTheta(x * w)
         # Construct loss function such that gradients = final activation.
         # dy/dw = y = mdl.Frop(x * w) when w is 1.
         loss = tf.reduce_sum(y)
         grads = py_utils.ComputeGradients(loss, py_utils.NestedMap(w=w))
         tf.global_variables_initializer().run()
         y_val, grads_val = sess.run([y, grads.Transform(tuple)])
         grads_val = grads_val['w'][1]
         self.assertAllClose(y_val, grads_val)
         self.assertEqual(py_utils.GetStepSeed().eval(), 1553244033)
コード例 #25
0
        def Callback(theta, encoder_outputs, num_hyps_per_beam):
            initial_results, states = self._InitBeamSearchStateCallback(
                theta, encoder_outputs, num_hyps_per_beam)
            assert hasattr(states, 'time_step')
            if tf.is_tensor(encoder_outputs.padding):
                batch_size = tf.shape(encoder_outputs.padding)[1]
            else:  # Required for multisource models.
                batch_size = tf.shape(
                    list(encoder_outputs.padding.values())[0])[1]
            num_hyps = batch_size * num_hyps_per_beam

            if biased:
                # states.consistent is initially all True
                states.consistent = tf.ones([
                    num_hyps,
                ], dtype=tf.bool)

            if stochastic:
                dtype = py_utils.FPropDtype(self.params)
                states.cumulative_log_probs = tf.zeros([num_hyps, 1],
                                                       dtype=dtype)
                states.perturbed_cumulative_log_probs = tf.zeros([num_hyps, 1],
                                                                 dtype=dtype)
                # Temporary tensors that store information passed from
                # PreBeamSearchStepCallback to PostBeamSearchStepCallback. These are
                # used for updating states.cumulative_log_probs and
                # states.perturbed_cumulative_log_probs for the next step, which
                # requires the knowledge of the chosen IDs, which only becomes available
                # after PreBeamSearchStepCallback.
                states.tmp_states = py_utils.NestedMap(
                    # Top-k (non-perturbed) log-probs. Used for updating
                    # `cumulative_log_probs` in PostBeamSearchStepCallback.
                    top_k_log_probs=tf.zeros([num_hyps, k], dtype=dtype),
                    # Vocab ID of each item of `top_k_log_probs`.
                    top_k_ids=tf.zeros([num_hyps, k], dtype=tf.int32),
                    # Perturbed cumulative log-probs of the top-k IDs. Used for updating
                    # `perturbed_cumulative_log_probs` in PostBeamSearchStepCallback.
                    new_perturbed_cumulative_log_probs=tf.zeros([num_hyps, k],
                                                                dtype=dtype),
                )

            return initial_results, states
コード例 #26
0
    def _Extract(self, features):
        """Returns the laser Tensor."""
        p = self.params
        all_xyzs = []
        all_laser_features = []

        for lidar in p.lidar_names:
            for ri in p.lidar_returns:
                feature_name = 'laser_%s_%s' % (lidar, ri)
                laser_data = tf.reshape(_Dense(features[feature_name]),
                                        [-1, 3 + p.num_features])
                # We expect lidar_$lidar_$ri and lidar_$lidar_$ri_flow has
                # same number of points.
                feature_name += '_flow'
                flow_data = tf.reshape(_Dense(features[feature_name]),
                                       [-1, 3 + 1])

                points_xyz = laser_data[..., 0:3]
                points_feature = tf.concat([laser_data[..., 3:], flow_data],
                                           axis=1)

                all_xyzs += [points_xyz]
                all_laser_features += [points_feature]

        # Stack all of the points along the major dimension
        points_xyz = tf.concat(all_xyzs, axis=0)
        points_feature = tf.concat(all_laser_features, axis=0)

        if p.max_num_points is not None:
            npoints = tf.shape(points_xyz)[0]
            points_xyz = py_utils.PadOrTrimTo(points_xyz,
                                              [p.max_num_points, 3])
            points_feature = py_utils.PadOrTrimTo(
                points_feature, [p.max_num_points, p.num_features + 4])
            points_padding = 1.0 - py_utils.PadOrTrimTo(
                tf.ones([npoints]), [p.max_num_points])

        ret = py_utils.NestedMap(points_xyz=points_xyz,
                                 points_feature=points_feature)
        if p.max_num_points is not None:
            ret.points_padding = points_padding
        return ret
コード例 #27
0
 def testSpectrumAugmenterWithFrequencyMask(self):
   with self.session(use_gpu=False, graph=tf.Graph()):
     tf.random.set_seed(1234)
     inputs = tf.ones([3, 5, 10, 1], dtype=tf.float32)
     paddings = tf.zeros([3, 5])
     hs = []
     for p in [
         spectrum_augmenter.SpectrumAugmenter.Params(),
         spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params()
     ]:
       p.name = 'specAug_layers'
       p.freq_mask_max_bins = 6
       p.freq_mask_count = 2
       p.time_mask_max_frames = 0
       p.random_seed = 34567
       specaug_layer = p.Instantiate()
       h, _ = specaug_layer.FPropDefaultTheta(inputs, paddings)
       hs.append(h)
     layer_output, layer_output_on_device = self.evaluate(hs)
     self.assertAllClose(layer_output, layer_output_on_device)
コード例 #28
0
    def _GetBetaGamma(self, theta, inputs, **kwargs):
        assert 'class_emb' in kwargs
        class_emb = kwargs['class_emb']

        # class_emb is a one-hot vector of shape [batch, class_emb_dim=num_classes].
        class_ids = tf.math.argmax(class_emb, axis=-1, output_type=tf.int32)
        # [batch, dim]
        # Not using matmul/einsum to avoid potential precision problem on TPU with
        # sparse inputs.
        beta = tf.gather(theta.beta, class_ids)
        gamma = tf.gather(theta.gamma, class_ids)

        # Extend to [batch, 1, ... 1, dim]
        batch = py_utils.GetShape(inputs)[0]
        to_shape = tf.concat([[batch],
                              tf.ones([py_utils.GetRank(inputs) - 2],
                                      tf.int32), [self.params.dim]],
                             axis=0)
        beta = tf.reshape(beta, to_shape)
        gamma = tf.reshape(gamma, to_shape)
        return beta, gamma
コード例 #29
0
ファイル: plot_test.py プロジェクト: zhangdong1992/lingvo
    def testAddMultiCurveSubplot(self):
        with self.session(graph=tf.Graph(), use_gpu=False) as sess:
            fig = plot.MatplotlibFigureSummary('XXX')
            batch_size = 2
            tensor = tf.ones([batch_size, 3])
            paddings = tf.constant([[0., 0., 0.], [0., 1., 1.]])
            plot.AddMultiCurveSubplot(fig, [tensor, tensor],
                                      paddings,
                                      labels=['label1', 'label2'],
                                      xlabels=tf.constant(['a', 'b']),
                                      title='Title',
                                      ylabel='Ylabel')
            summary_str = sess.run(fig.Finalize())

        summary = tf.Summary.FromString(summary_str)
        self.assertEqual(len(summary.value), batch_size)
        for n, value in enumerate(summary.value):
            self.assertEqual(value.tag, 'XXX/image/%d' % n)
            self.assertGreater(value.image.width, 0)
            self.assertGreater(value.image.height, 0)
            self.assertNotEqual(value.image.encoded_image_string,
                                self.default_encoded_image)
コード例 #30
0
 def testSpectrumAugmenterUnstacking(self):
     with self.session(use_gpu=False, graph=tf.Graph()) as sess:
         tf.random.set_seed(1234)
         inputs = tf.ones([3, 5, 10, 1], dtype=tf.float32)
         paddings = tf.zeros([3, 5])
         hs = []
         for p in [
                 spectrum_augmenter.SpectrumAugmenter.Params(),
                 spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.
                 Params()
         ]:
             p.name = 'specAug_layers'
             p.unstack = True
             p.stack_height = 2
             p.freq_mask_max_bins = 5
             p.time_mask_max_frames = 8
             p.random_seed = 12345
             specaug_layer = p.Instantiate()
             h, _ = specaug_layer.FPropDefaultTheta(inputs, paddings)
             hs.append(h)
         layer_output, layer_output_on_device = sess.run(hs)
         self.assertAllClose(layer_output, layer_output_on_device)