Exemple #1
0
  def _GetBetaGamma(self, theta, inputs, **kwargs):
    p = self.params

    assert 'class_emb' in kwargs
    class_emb = kwargs['class_emb']

    # class_emb is a one-hot vector of shape [batch, class_emb_dim=num_classes].
    class_ids = tf.math.argmax(class_emb, axis=-1, output_type=tf.int32)
    # [batch, dim]
    # Not using matmul/einsum to avoid potential precision problem on TPU with
    # sparse inputs.
    beta = tf.gather(theta.beta, class_ids)
    gamma = tf.gather(theta.gamma, class_ids)
    if not p.gamma_zero_init and not p.gamma_one_init:
      # Note, The real gamma to use is 1 + gamma.
      gamma = 1.0 + gamma

    # Extend to [batch, 1, ... 1, dim]
    batch = py_utils.GetShape(inputs)[0]
    to_shape = tf.concat(
        [[batch],
         tf.ones([py_utils.GetRank(inputs) - 2], tf.int32), [self.params.dim]],
        axis=0)
    beta = tf.reshape(beta, to_shape)
    gamma = tf.reshape(gamma, to_shape)
    return beta, gamma
Exemple #2
0
 def ComputePredictions(self, theta, input_batch):
   # Forward through layers.
   act = self.extract.FProp(theta.extract, input_batch.data)
   # Avg pool
   if py_utils.GetRank(act) == 4:
     act = tf.reduce_mean(act, axis=[1, 2])
   act = py_utils.HasRank(act, 2)
   logits = self.softmax.Logits(theta.softmax, act)
   return py_utils.NestedMap(act=act, logits=logits)
  def _TestStreamStepHelper(self, **kwargs):
    """Main helper method."""
    batch_size, max_seqlen, input_dim = 2, 32, kwargs['input_dim']

    stride = kwargs.get('stride', 1)
    # max_seqlen is divisible by stride.
    assert max_seqlen % stride == 0

    right_context = kwargs.get('right_context', 0)

    # Prepares inputs.
    inputs, paddings = self._GetInputs(batch_size, max_seqlen, input_dim)

    # Gets params
    p = self._GetParams(**kwargs)

    # Builds graph.
    with self.session(use_gpu=False) as sess:
      l = p.Instantiate()
      init_op = tf.global_variables_initializer()

      fprop_out = self._FProp(l, inputs, paddings)
      base_outputs = self._GetFPropOutput(fprop_out)
      out_rank = py_utils.GetRank(base_outputs)
      base_outputs *= py_utils.AppendDims(1. - paddings, out_rank - 2)

      try:
        state = l.zero_state(batch_size)
      except TypeError:
        state = l.zero_state(l.theta, batch_size)
      outputs = []
      for i in range(max_seqlen // stride +
                     int(math.ceil(right_context / stride))):
        if i < max_seqlen // stride:
          step_inputs = inputs[:, stride * i:stride * (i + 1)]
          step_paddings = paddings[:, stride * i:stride * (i + 1)]
        else:
          step_inputs = tf.zeros_like(inputs[:, 0:stride])
          step_paddings = tf.ones_like(paddings[:, 0:stride])
        output, _, state = l.StreamStep(l.theta, step_inputs, step_paddings,
                                        state)
        outputs.append(output)

      outputs = tf.concat(outputs, axis=1)
      outputs = self._NormalizeStreamStepOutput(outputs, paddings,
                                                right_context, max_seqlen)

      sess.run(init_op)

      expected, actual = sess.run([base_outputs, outputs])
      print(f'expected: {repr(expected)}, {expected.shape}')
      print(f'actual: {repr(actual)}, {actual.shape}')
      print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
      print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
      tol = kwargs.get('tol', 1e-6)
      self.assertAllClose(expected, actual, atol=tol, rtol=tol)
 def _NormalizeStreamStepOutput(self,
                                outputs,
                                paddings,
                                right_context,
                                max_seqlen,
                                num_layers=1):
   # outputs has right_context * num_layers-frames delay from inputs.
   outputs = outputs[:, right_context * num_layers:]
   # later outputs corresponds to padded inputs to complete the last frame's
   # right context.
   outputs = outputs[:, :max_seqlen]
   out_rank = py_utils.GetRank(outputs)
   paddings = paddings[:, :max_seqlen]
   return outputs * py_utils.AppendDims(1. - paddings, out_rank - 2)
Exemple #5
0
  def ComputeLoss(self, theta, predictions, input_batch):
    p = self.params
    batch = tf.shape(input_batch.data)[0]
    act = predictions.act
    with tf.ops.colocate_with(act):
      tf.logging.info("{}'s device: {}".format(act, act.device))
      # Softmax
      if py_utils.GetRank(input_batch.label) == 1:
        # Create one_hot labels if rank is 1.
        labels = tf.cast(input_batch.label, tf.int64)
        onehot_labels = tf.one_hot(labels, p.softmax.num_classes)
      else:
        onehot_labels = input_batch.label
        labels = tf.math.argmax(onehot_labels, axis=-1)
      if p.label_smoothing > 0:
        smooth_positives = 1.0 - p.label_smoothing
        smooth_negatives = p.label_smoothing / p.softmax.num_classes
        onehot_labels = onehot_labels * smooth_positives + smooth_negatives

      xent = self.softmax.FProp(
          theta=theta.softmax,
          inputs=act,
          class_weights=input_batch.weight,
          class_probabilities=onehot_labels)

    self._AddSummary(input_batch, xent.per_example_argmax)

    rets = {
        'loss': (xent.avg_xent, batch),
        'log_pplx': (xent.avg_xent, batch),
        'num_preds': (batch, 1),
    }
    if self.do_eval or p.compute_accuracy_for_training:
      acc1 = self._Accuracy(1, xent.logits, labels, input_batch.weight)
      acc5 = self._Accuracy(5, xent.logits, labels, input_batch.weight)
      rets.update(
          accuracy=(acc1, batch),
          acc5=(acc5, batch),
          error=(1. - acc1, batch),
          error5=(1. - acc5, batch))
    return rets, {'loss': xent.per_example_xent}
Exemple #6
0
    def _StreamMoments(self, inputs, paddings, cached_sum, cached_count,
                       cached_var):
        """Computes mean and variance over the valid data points in inputs.

    Args:
      inputs: [B, T, F, N, G] or [B, T, N, G]
      paddings: [B, T, 1, 1, 1] or [B, T, 1, 1]
      cached_sum: [B, 1, 1, N, 1] or [B, 1, N, 1]
      cached_count: same shape as cached_sum.
      cached_var: same shape as cached_sum.

    Returns:
      mean: [B, T, 1, N, 1] or [B, T, N, 1]
      variance: same shape as mean.
      new_cached_sum: same shape as cached_sum.
      new_cached_count: same shape as cached_count.
    """
        tf.logging.vlog(1, 'inputs: %r', inputs)
        tf.logging.vlog(1, 'paddings: %r', paddings)
        tf.logging.vlog(1, 'cached_sum: %r', cached_sum)
        tf.logging.vlog(1, 'cached_count: %r', cached_count)

        inputs = py_utils.ApplyPadding(paddings, inputs, use_select=False)

        input_rank = py_utils.GetRank(inputs)
        assert input_rank is not None, (f'inputs rank must be staic for '
                                        f'{repr(inputs)}')
        reduce_over_dims = list(range(input_rank))
        # Skip B, T, and N. Reduce {F,G} or just G.
        reduce_over_dims = reduce_over_dims[2:-2] + reduce_over_dims[-1:]
        tf.logging.vlog(1, 'reduce_over_dims: %s', reduce_over_dims)

        # [B, T, 1, N, 1] or [B, T, N, 1]
        sum_v = tf.reduce_sum(inputs, reduce_over_dims, keepdims=True)
        sum_v = tf.math.cumsum(sum_v, axis=1)
        sum_v += cached_sum

        # [B, T, 1, 1, 1] or [B, T, 1, 1]
        mask = tf.cast(1.0 - paddings, inputs.dtype)
        count_v = tf.reduce_sum(mask, reduce_over_dims, keepdims=True)
        count_v = tf.math.cumsum(count_v, axis=1)
        input_shape = py_utils.GetShape(inputs)
        if input_rank == 4:
            # F * G
            multiplier = input_shape[-1] * input_shape[-3]
        else:
            # G
            multiplier = input_shape[-1]
        count_v *= multiplier
        count_v += cached_count

        tf.logging.vlog(1, 'sum_v: %r', sum_v)
        tf.logging.vlog(1, 'count_v: %r', count_v)

        mean = sum_v / tf.maximum(count_v, 1.0)

        sum_vv = tf.reduce_sum(py_utils.ApplyPadding(
            paddings,
            tf.math.squared_difference(inputs, mean),
            use_select=False),
                               reduce_over_dims,
                               keepdims=True)
        sum_vv = tf.math.cumsum(sum_vv, axis=1)
        sum_vv += cached_var

        cached_sum = sum_v[:, -1:]
        cached_count = count_v[:, -1:]
        cached_var = sum_vv[:, -1:]

        variance = py_utils.with_dependencies([
            py_utils.assert_greater_equal(sum_vv, tf.cast(0, sum_vv.dtype)),
        ], sum_vv / tf.maximum(count_v, 1.0))
        return mean, variance, cached_sum, cached_count, cached_var
Exemple #7
0
    def _StreamMoments(self, inputs, paddings, cached_sum, cached_count,
                       cached_var):
        """Computes mean and variance over the valid data points in inputs.

    Args:
      inputs: [B, T, F, N, G] or [B, T, N, G]
      paddings: [B, T, 1, 1, 1] or [B, T, 1, 1]
      cached_sum: [B, 1, 1, N, 1] or [B, 1, N, 1]
      cached_count: same shape as cached_sum.
      cached_var: same shape as cached_sum.

    Returns:
      mean: [B, T, 1, N, 1] or [B, T, N, 1]
      variance: same shape as mean.
      new_cached_sum: same shape as cached_sum.
      new_cached_count: same shape as cached_count.
    """
        tf.logging.vlog(1, 'inputs: %r', inputs)
        tf.logging.vlog(1, 'paddings: %r', paddings)
        tf.logging.vlog(1, 'cached_sum: %r', cached_sum)
        tf.logging.vlog(1, 'cached_count: %r', cached_count)

        mask = tf.cast(1.0 - paddings, inputs.dtype)
        inputs *= tf.cast(mask, inputs.dtype)

        input_rank = py_utils.GetRank(inputs)
        assert input_rank is not None, (f'inputs rank must be staic for '
                                        f'{repr(inputs)}')
        reduce_over_dims = list(range(input_rank))
        # Skip B, T, and N. Reduce {F,G} or just G.
        reduce_over_dims = reduce_over_dims[2:-2] + reduce_over_dims[-1:]
        tf.logging.vlog(1, 'reduce_over_dims: %s', reduce_over_dims)

        # [B, T, 1, N, 1] or [B, T, N, 1]
        sum_v = tf.reduce_sum(inputs, reduce_over_dims, keepdims=True)
        sum_v = tf.math.cumsum(sum_v, axis=1)
        sum_v += cached_sum

        # [B, T, 1, 1, 1] or [B, T, 1, 1]
        count_v = tf.reduce_sum(mask, reduce_over_dims, keepdims=True)
        count_v = tf.math.cumsum(count_v, axis=1)
        input_shape = py_utils.GetShape(inputs)
        if input_rank == 4:
            # F * G
            multiplier = input_shape[-1] * input_shape[-3]
        else:
            # G
            multiplier = input_shape[-1]
        count_v *= multiplier
        count_v += cached_count
        count_v = tf.maximum(count_v, 1.0)

        tf.logging.vlog(1, 'sum_v: %r', sum_v)
        tf.logging.vlog(1, 'count_v: %r', count_v)

        mean = sum_v / count_v
        if py_utils.FLAGS.tflite_compatible:
            # TfLite doesn't support broadcasting with 5D tensors.
            inputs_shape = py_utils.GetShape(inputs)
            if len(inputs_shape) == 4:
                tiled_mean = tf.tile(mean, [1, 1, 1, inputs_shape[3]])
            else:
                tiled_mean = tf.tile(
                    mean, [1, 1, inputs_shape[2], 1, inputs_shape[4]])
            sum_vv = tf.reduce_sum(tf.math.square(inputs - tiled_mean) * mask,
                                   reduce_over_dims,
                                   keepdims=True)
        else:
            sum_vv = tf.reduce_sum((inputs - mean)**2 * mask,
                                   reduce_over_dims,
                                   keepdims=True)
        sum_vv = tf.math.cumsum(sum_vv, axis=1)
        sum_vv += cached_var

        cached_sum = sum_v[:, -1:]
        cached_count = count_v[:, -1:]
        cached_var = sum_vv[:, -1:]

        variance = py_utils.with_dependencies([
            py_utils.assert_greater_equal(sum_vv, tf.cast(0, sum_vv.dtype)),
        ], sum_vv / count_v)
        return mean, variance, cached_sum, cached_count, cached_var
Exemple #8
0
    def FProp(self, theta, inputs, paddings=None):
        """Apply group normalization.

    Args:
      theta: A NestedMap object containing weights' values of this layer and its
        children layers.
      inputs: The inputs tensor with shape [batch_size, height, width, channel].
      paddings: The paddings tensor with shape [batch_size, height]. Intended to
        be used for sequence processing where `height` is `time`.

    Returns:
      A single tensor as the output after applying group normalization, with
      the same shape as 'inputs'. Or a output, output_paddings pair if input
      paddings is not None.
    """
        p = self.params
        inputs = py_utils.with_dependencies([
            py_utils.assert_greater_equal(py_utils.GetRank(inputs),
                                          p.input_rank)
        ], inputs)

        min_group_size = min(p.min_group_size, p.dim)
        group_size = max(p.dim // p.num_groups, min_group_size)
        num_groups = p.dim // group_size

        input_shape = py_utils.GetShape(inputs)
        with tf.name_scope(p.name):
            x = tf.reshape(inputs, input_shape[:-1] + [num_groups, group_size])
            expanded_rank = p.input_rank + 1
            all_dims = list(range(expanded_rank))
            if paddings is None:
                # Skip d0, d[-2]
                axes = all_dims[1:-2] + all_dims[-1:]
                counts, means_ss, variance_ss, _, = tf.nn.sufficient_statistics(
                    x, axes=axes, keepdims=True)
                norm_mean, norm_variance = tf.nn.normalize_moments(
                    counts, means_ss, variance_ss, None)
            else:
                expanded_paddings = tf.reshape(
                    paddings, input_shape[:2] + [1] * (expanded_rank - 2))
                # skip the batching and group dim
                if p.cumulative:
                    # Skip d0, d1 and d[-2]
                    reduce_over_dims = all_dims[2:-2] + all_dims[-1:]
                    norm_mean, norm_variance = ComputeMomentsWithPadding(
                        x,
                        expanded_paddings,
                        reduce_over_dims=reduce_over_dims,
                        cumulative_axis=1,
                        keepdims=True)
                else:
                    # Skip d0, d[-2]
                    reduce_over_dims = all_dims[1:-2] + all_dims[-1:]
                    norm_mean, norm_variance = ComputeMomentsWithPadding(
                        x, expanded_paddings, reduce_over_dims, keepdims=True)

            norm_mean = py_utils.CheckNumerics(
                norm_mean, 'mean of %s failed numeric check' % p.name)
            norm_variance = py_utils.CheckNumerics(
                norm_variance, 'variance of %s failed numeric check' % p.name)

            beta = theta.beta
            gamma = theta.gamma
            n = input_shape[0]
            t = input_shape[1] if p.cumulative else 1
            norm_shape = [n, t, 1, num_groups, 1
                          ] if p.input_rank == 4 else [n, t, num_groups, 1]
            with tf.control_dependencies([
                    py_utils.assert_greater_equal(
                        norm_variance, tf.cast(0., norm_variance.dtype)),
                    py_utils.assert_shape_match(norm_shape,
                                                tf.shape(norm_mean)),
                    py_utils.assert_shape_match(norm_shape,
                                                tf.shape(norm_variance)),
            ]):
                x = (x - norm_mean) / tf.sqrt(norm_variance + self._epsilon)
                x = tf.reshape(x, input_shape)
                gn_output = x * gamma + beta
                gn_output = tf.reshape(gn_output, input_shape)
                if paddings is None:
                    return gn_output
                else:
                    return gn_output, paddings
Exemple #9
0
def IsWithinBBox3D(points_3d, bboxes_3d):
    """Checks if points are within a 3-d bbox.

  Args:
    points_3d: [..., num_points, 3] float32 Tensor specifying points in 3-d
      space as [x, y, z] coordinates.
    bboxes_3d: [..., num_bboxes, 7] float32 Tensor specifying a 3-d bboxes
      specified as [x, y, z, dx, dy, dz, phi] where x, y and z is the center of
      the box.

  Returns:
    boolean Tensor of shape [..., num_points, num_bboxes] indicating whether the
    points belong within each box.
  """
    # Check that points_3d and bboxes_3d have the same rank.
    bboxes_rank = py_utils.GetRank(bboxes_3d)
    points_3d = py_utils.HasRank(points_3d, bboxes_rank)
    leading_shape = py_utils.GetShape(bboxes_3d)[:-2]

    # Check that both points_3d and bboxes_3d have the same leading shape.
    points_3d = py_utils.HasShape(points_3d, leading_shape + [-1, 3])
    bboxes_3d = py_utils.HasShape(bboxes_3d, leading_shape + [-1, 7])

    num_points = py_utils.GetShape(points_3d)[-2]
    num_bboxes = py_utils.GetShape(bboxes_3d)[-2]

    bbox_corners = BBoxCorners(bboxes_3d)
    bbox_corners = py_utils.HasShape(bbox_corners,
                                     leading_shape + [num_bboxes, 8, 3])
    # First four points are the top of the bounding box.
    # Counter-clockwise arrangement of points specifying 2-d Euclidean box.
    #   (x0, y1) <--- (x1, y1)
    #                    ^
    #                    |
    #                    |
    #   (x0, y0) ---> (x1, y0)
    bboxes_2d_corners = bbox_corners[..., 0:4, 0:2]
    # Determine if points lie within 2-D (x, y) plane for all bounding boxes.
    points_2d = points_3d[..., :2]
    is_inside_2d = IsWithinBBox(points_2d, bboxes_2d_corners)

    is_inside_2d = py_utils.HasShape(is_inside_2d,
                                     leading_shape + [num_points, num_bboxes])

    # Determine if points lie with the z-dimension for all bounding boxes.
    [_, _, z, _, _, dz, _] = tf.split(bboxes_3d, 7, axis=-1)

    def _ComputeLimits(center, width):
        left = center - width / 2.0
        right = center + width / 2.0
        return left, right

    z0, z1 = _ComputeLimits(z[..., 0], dz[..., 0])
    z_points = points_3d[..., 2:]

    is_inside_z = tf.math.logical_and(
        tf.less_equal(z_points, z1[..., tf.newaxis, :]),
        tf.greater_equal(z_points, z0[..., tf.newaxis, :]))
    is_inside_z = py_utils.HasShape(is_inside_z,
                                    leading_shape + [num_points, num_bboxes])

    return tf.math.logical_and(is_inside_z, is_inside_2d)
Exemple #10
0
  def _StreamMoments(self, inputs, paddings, cached_sum, cached_count,
                     cached_var):
    """Computes mean and variance over the valid data points in inputs.

    Args:
      inputs: [B, T, F, N, G] or [B, T, N, G]
      paddings: [B, T, 1, 1, 1] or [B, T, 1, 1] (same rank as inputs)
      cached_sum: [B, N]
      cached_count: [B, 1]
      cached_var: [B, N]

    Returns:
      mean: [B, T, 1, N, 1] or [B, T, N, 1] (same rank as inputs)
      variance: same shape as mean.
      new_cached_sum: same shape as cached_sum.
      new_cached_count: same shape as cached_count.
      new_cached_var: same shape as cached_var.
    """
    tf.logging.vlog(1, 'inputs: %r', inputs)
    tf.logging.vlog(1, 'paddings: %r', paddings)
    tf.logging.vlog(1, 'cached_sum: %r', cached_sum)
    tf.logging.vlog(1, 'cached_count: %r', cached_count)
    tf.logging.vlog(1, 'cached_var: %r', cached_var)

    input_rank = py_utils.GetRank(inputs)
    paddings = py_utils.HasRank(paddings, input_rank)
    cached_sum = py_utils.HasRank(cached_sum, 2)
    cached_count = py_utils.HasRank(cached_count, 2)
    cached_var = py_utils.HasRank(cached_var, 2)

    input_shape = py_utils.GetShape(inputs)
    output_shape = input_shape[:]
    if input_rank == 4:
      # Skip {B,T,N}. Reduce just G.
      reduce_over_dims = [3]
      multiplier = input_shape[3]
      output_shape[3] = 1
    else:
      assert input_rank == 5
      # Skip {B,T,N}. Reduce {F,G}.
      reduce_over_dims = [2, 4]
      multiplier = input_shape[2] * input_shape[4]
      output_shape[2] = 1
      output_shape[4] = 1

    # [B, T, N]
    sum_v = tf.reduce_sum(
        py_utils.ApplyPadding(paddings, inputs),
        reduce_over_dims,
        keepdims=False)
    sum_v = tf.math.cumsum(sum_v, axis=1)
    sum_v += cached_sum[:, tf.newaxis, :]

    # [B, T, 1]
    count_v = tf.reduce_sum(
        py_utils.ApplyPadding(
            paddings, tf.cast(multiplier, inputs.dtype), ensure_shape=False),
        reduce_over_dims,
        keepdims=False)
    count_v = tf.math.cumsum(count_v, axis=1)
    count_v += cached_count[:, tf.newaxis, :]

    # [B, T, 1, N, 1] or [B, T, N, 1]
    mean = tf.reshape(sum_v / tf.maximum(count_v, 1.0), output_shape)

    # [B, T, N]
    sum_vv = tf.reduce_sum(
        py_utils.ApplyPadding(paddings,
                              tf.math.squared_difference(inputs, mean)),
        reduce_over_dims,
        keepdims=False)
    sum_vv = tf.math.cumsum(sum_vv, axis=1)
    sum_vv += cached_var[:, tf.newaxis, :]

    # [B, N]
    cached_sum = sum_v[:, -1]
    # [B, 1]
    cached_count = count_v[:, -1]
    # [B, N]
    cached_var = sum_vv[:, -1]

    # [B, T, 1, N, 1] or [B, T, N, 1]
    variance = tf.reshape(sum_vv / tf.maximum(count_v, 1.0), output_shape)

    tf.logging.vlog(1, 'sum_v: %r', sum_v)
    tf.logging.vlog(1, 'count_v: %r', count_v)
    tf.logging.vlog(1, 'sum_vv: %r', sum_vv)

    return mean, variance, cached_sum, cached_count, cached_var