Esempio n. 1
0
 def _ExpandedSquaredDistanceMatrix(pa, pb):
   squared_pa = tf.reduce_sum(tf.square(pa), axis=2, keepdims=True)
   squared_pb = tf.transpose(
       tf.reduce_sum(tf.square(pb), axis=2, keepdims=True), perm=[0, 2, 1])
   # We have observed that entries may < 0. when using the expanded version.
   # The max operation guards that from being possible.
   return tf.maximum(
       squared_pa - 2 * tf.matmul(pa, pb, transpose_b=True) + squared_pb, 0.0)
Esempio n. 2
0
  def _MelSpectrogram(self, signal):
    """Computes the mel spectrogram from a waveform signal.

    Args:
      signal: f32 Tensor, shaped [batch_size, num_samples]

    Returns:
      f32 features Tensor, shaped [batch_size, num_frames, mel_channels]
    """
    p = self.params
    # FFT.
    real_frequency_spectrogram = tf.signal.rfft(signal, [self._fft_size])
    magnitude_spectrogram = tf.abs(real_frequency_spectrogram)
    if p.compute_energy:
      magnitude_spectrogram = tf.square(magnitude_spectrogram)

    # Shape of magnitude_spectrogram is num_frames x (fft_size/2+1)
    # Mel_weight is [num_spectrogram_bins, num_mel_bins]
    mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=p.num_bins,
        num_spectrogram_bins=self._fft_size // 2 + 1,
        sample_rate=p.sample_rate,
        lower_edge_hertz=p.lower_edge_hertz,
        upper_edge_hertz=p.upper_edge_hertz,
        dtype=tf.float32)
    # Weight matrix implemented in the magnitude domain.
    batch_size, num_frames, fft_channels = py_utils.GetShape(
        magnitude_spectrogram, 3)
    mel_spectrogram = tf.matmul(
        tf.reshape(magnitude_spectrogram,
                   [batch_size * num_frames, fft_channels]), mel_weight_matrix)
    mel_spectrogram = tf.reshape(mel_spectrogram,
                                 [batch_size, num_frames, p.num_bins])

    return mel_spectrogram
Esempio n. 3
0
    def testFetchGrad(self):
        # Tests we can fetch backprop gradients.
        # pyformat: disable
        b = builder.Base.Params()
        b = b.Instantiate()
        p = b._Seq('seq', b._Linear('l', 16, 32), b._Bias('b', 32),
                   b._Save('fetch'), b._Activation('a'))
        # pyformat: enable

        g = tf.Graph()
        with g.as_default():
            l = p.Instantiate()
            x = tf.random.normal(shape=[4, 16])
            y = l.FPropDefaultTheta(x)
            loss = tf.reduce_sum(tf.square(y))
            _ = tf.gradients(ys=loss, xs=x)

            act, dact = l.fetch.activation, l.fetch.gradient

        with self.session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            act_v, dact_v = sess.run([act, dact])

        # The last layer two layers is sum(square(relu(act))).
        # So the dact is simply 2*relu(act).
        self.assertAllClose(2 * np.maximum(0, act_v), dact_v)
Esempio n. 4
0
 def _process(record):
     num, = tf.py_func(str_to_num, [record], [tf.float32])
     num = tf.stack([num, tf.square(num)])
     if use_nested_map:
         return py_utils.NestedMap(record=record, num=num), 1
     else:
         return [record, num], 1
Esempio n. 5
0
 def _process(source_id, record):
   num, = tf.py_func(str_to_num, [record], [tf.float32])
   num = tf.stack([num, tf.square(num)])
   if use_nested_map:
     return py_utils.NestedMap(
         source_id=source_id, record=record, num=num), bucket_fn(num)
   else:
     return [source_id, record, num], bucket_fn(num)
Esempio n. 6
0
def NeighborSquaredDistanceMatrix(points, neighbor_points):
    """Compute the squared distance matrix between points and their neighbors.

  Args:
    points: A float tf.Tensor of shape [N, P1, 3] with point positions.
    neighbor_points: A float tf.Tensor fo shape [N, P1, K, 3] with neighbor
      positions.

  Returns:
    Squared distance matrix between points and their K nearest neighbors
    as a float tf.Tensor of shape [N, P1, K].
  """
    points = py_utils.HasShape(points, [-1, -1, 3])
    n, p1 = py_utils.GetShape(points, 2)
    neighbor_points = py_utils.HasShape(neighbor_points, [n, p1, -1, 3])
    _, _, k = py_utils.GetShape(neighbor_points, 3)

    sq_diff = tf.square(neighbor_points - tf.reshape(points, [n, p1, 1, 3]))
    sq_dist = tf.reduce_sum(sq_diff, axis=3)
    return py_utils.HasShape(sq_dist, [n, p1, k])
Esempio n. 7
0
def SphericalCoordinatesTransform(points_xyz):
    """Converts points from xyz coordinates to spherical coordinates.

  https://en.wikipedia.org/wiki/Spherical_coordinate_system#Coordinate_system_conversions
  for definitions of the transformations.

  Args:
    points_xyz: A floating point tensor with shape [..., 3], where the inner 3
      dimensions correspond to xyz coordinates.

  Returns:
    A floating point tensor with the same shape [..., 3], where the inner
    dimensions correspond to (dist, theta, phi), where phi corresponds to
    azimuth/yaw (rotation around z), and theta corresponds to pitch/inclination
    (rotation around y).
  """
    dist = tf.sqrt(tf.reduce_sum(tf.square(points_xyz), axis=-1))
    theta = tf.acos(points_xyz[..., 2] / tf.maximum(dist, 1e-7))
    # Note: tf.atan2 takes in (y, x).
    phi = tf.atan2(points_xyz[..., 1], points_xyz[..., 0])
    return tf.stack([dist, theta, phi], axis=-1)
Esempio n. 8
0
 def _NonExpandedSquaredDistanceMatrix(pa, pb):
     diff = tf.expand_dims(pa, axis=2) - tf.expand_dims(pb, axis=1)
     squared_diff = tf.square(diff)
     squared_dis = tf.reduce_sum(squared_diff, axis=3)
     return squared_dis
Esempio n. 9
0
def NeighborhoodIndices(points,
                        query_points,
                        k,
                        points_padding=None,
                        max_distance=None,
                        sample_neighbors_uniformly=False):
    """Get indices to k-neighbors of query_points in points.

  Padding is returned along-side indices. Non-padded points are guaranteed to
  be unique (non-repeated) points from original non-padded points.

  Padded points arise due to either a lack of points (k exceeds the number
  of original non-padded points) or points are too far away (exceeds max
  distance).

  Note: Padded point indices may refer to padded points from the original, or
  may be duplicates of the closest point.

  TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded
  points are repeated points from the original points. This behavior is
  maintained here, but we should update PointCNN to respect indices paddings.

  Args:
    points: tensor of shape [N, P1, dims].
    query_points: tensor of shape [N, P2, dims]
    k: Integer.
    points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the
      point is a padded point. if None, then all points are considered real
      points.
    max_distance: float representing the maximum distance that each neighbor can
      be. If there are no points within the distance, then the closest point is
      returned (regardless of distance). If this is set to None, then no
      filtering by distance is performed.
    sample_neighbors_uniformly: boolean specifying whether to sample neighbors
      uniformly if they are within max distance.

  Returns:
    A pair of tensors:

    - indices: tensor of shape [N, P2, k].
    - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and
      0 represents an unpadded (real) point.

  """
    n, p1 = py_utils.GetShape(points, 2)
    query_points = py_utils.HasShape(query_points, [n, -1, -1])
    _, p2 = py_utils.GetShape(query_points, 2)

    # Compute pair-wise squared distances.
    # Note that dist_mat contains the squared distance (without sqrt). Thus, when
    # using max_distance, we will need to square max_distance to make sure it's
    # in the same units.
    dist_mat = SquaredDistanceMatrix(query_points, points)
    dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1])

    # Add a large scalar to the distances for padded points.
    # dist_mat[i, j, k] will be:
    #   if k < valid_num[i]: distance between points[i, k] and query_points[i, j]
    #   otherwise:           a large scalar added to dist_mat[i, j, k]
    if points_padding is not None:
        points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32)
        points_padding = py_utils.HasShape(points_padding, [n, 1, p1])
        large_scalar = tf.reduce_max(dist_mat) + 1
        dist_mat += points_padding * large_scalar

    # To perform sampling neighbors uniformly efficiently, we set all neighbors
    # that are within the distance threshold to have distances be drawn uniformly
    # at random. Using top_k with this enables selecting a random set quickly
    # without replacement.
    if sample_neighbors_uniformly:
        if max_distance is not None:
            mask_by_distance = tf.less_equal(dist_mat, max_distance**2)
            dist_mat = tf.where(
                mask_by_distance,
                tf.square(max_distance) *
                tf.random_uniform(tf.shape(dist_mat)), dist_mat)
        else:
            raise ValueError(
                'Uniform sampling requires specifying max_distance.')

    top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k,
                                      sorted=True)  # N x P2 x K

    # Set padding using top_k_dist; padded points will have distance exceeding
    # the large_scalar.
    if points_padding is not None:
        paddings = tf.greater_equal(-top_k_dist, large_scalar)
    else:
        paddings = tf.zeros_like(top_k_dist, dtype=tf.bool)

    # Filter by max_distances by setting all indices that exceed the max_distance
    # to the closest point.
    if max_distance is not None:
        # Mask is true for points that are further than max_distance.
        mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance))
        closest_idx = tf.tile(indices[:, :, :1], [1, 1, k])
        indices = tf.where(mask_by_distance, closest_idx, indices)
        paddings |= mask_by_distance

    indices = tf.reshape(indices, [n, p2, k])
    paddings = tf.cast(paddings, tf.float32)

    return indices, paddings
Esempio n. 10
0
def _SmoothL1Norm(a):
    """Smoothed L1 norm."""
    # F&F paper formula (3).
    # http://openaccess.thecvf.com/content_cvpr_2018/papers/Luo_Fast_and_Furious_CVPR_2018_paper.pdf
    return tf.where(tf.abs(a) < 1, 0.5 * tf.square(a), tf.abs(a) - 0.5)
Esempio n. 11
0
    def ResidualsToBBoxes(self,
                          anchor_bboxes,
                          residuals,
                          min_angle_rad=-np.pi,
                          max_angle_rad=np.pi):
        r"""Converts anchor_boxes and residuals to predicted bboxes.

    This converts predicted residuals into bboxes using the following formulae::

      x_predicted = x_a + x_residual * diagonal_xy
      y_predicted = y_a + y_residual * diagonal_xy
      z_predicted = z_a + z_residual * dz_a

      dx_predicted = dx_a * exp(dx_residual)
      dy_predicted = dy_a * exp(dy_residual)
      dz_predicted = dz_a * exp(dz_residual)

      # Adding the residual, and bounding it between
      # [min_angle_rad, max_angle_rad]
      phi_predicted = NormalizeAngleRad(phi_a + phi_residual,
                                        min_angle_rad, max_angle_rad)

    These equations follow from those in LocalizationResiduals, where we solve
    for the \*_gt variables.

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      residuals: tf.float32 of the same shape as anchor_bboxes containing
        predicted residuals at each anchor location.
      min_angle_rad: Scalar with the minimum angle allowed (before wrapping)
        in radians.
      max_angle_rad: Scalar with the maximum angle allowed (before wrapping)
        in radians. This value usually should be pi.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with predicted
      bboxes.
    """
        anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
        anchor_bboxes = py_utils.with_dependencies(
            [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
        residuals = py_utils.HasShape(residuals, anchor_bboxes_shape)

        x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes,
                                                            num=7,
                                                            axis=-1)
        (x_residual, y_residual, z_residual, dx_residual, dy_residual,
         dz_residual, phi_residual) = tf.unstack(residuals, num=7, axis=-1)

        diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

        x_predicted = x_a + x_residual * diagonal_xy
        y_predicted = y_a + y_residual * diagonal_xy
        z_predicted = z_a + z_residual * dz_a

        dx_predicted = dx_a * tf.exp(dx_residual)
        dy_predicted = dy_a * tf.exp(dy_residual)
        dz_predicted = dz_a * tf.exp(dz_residual)

        # We bound the angle between [min_angle_rad, max_angle_rad], which should
        # be passed in depending on the heading handling in the calling model.
        # If the model uses a sine(delta_phi) transformation in the loss, then it
        # cannot distinguish direction and a [0, np.pi]
        # [min_angle_rad, max_angle_rad] should be used.
        # If there is a heading encoding that is directional, most likely you
        # should use a [-np.pi, np.pi] [min_angle_rad, max_angle_rad].
        phi_predicted = phi_a + phi_residual
        phi_predicted = geometry.WrapAngleRad(phi_predicted, min_angle_rad,
                                              max_angle_rad)

        return tf.stack([
            x_predicted,
            y_predicted,
            z_predicted,
            dx_predicted,
            dy_predicted,
            dz_predicted,
            phi_predicted,
        ],
                        axis=-1)  # pyformat: disable
Esempio n. 12
0
    def LocalizationResiduals(self, anchor_bboxes, assigned_gt_bboxes):
        """Computes the anchor residuals for every bbox.

    For a given bbox, compute residuals in the following way:

      Let ``anchor_bbox = (x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a)``
      and ``assigned_gt_bbox = (x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt)``

      Define ``diagonal_xy = sqrt(dx_a^2 + dy_a^2)``

      Then the corresponding residuals are given by::

        x_residual = (x_gt - x_a) / (diagonal_xy)
        y_residual = (y_gt - y_a) / (diagonal_xy)
        z_residual = (z_gt - z_a) / (dz_a)

        dx_residual = log(dx_gt / dx_a)
        dy_residual = log(dy_gt / dy_a)
        dz_residual = log(dz_gt / dz_a)

        phi_residual = phi_gt - phi_a

      The normalization for x and y residuals by the diagonal was first
      proposed by [1]. Intuitively, this reflects that objects can usually
      move freely in the x-y plane, including diagonally. On the other hand,
      moving in the z-axis (up and down) can be considered orthogonal to x-y.

      For phi_residual, one way to frame the loss is with
      SmoothL1(sine(phi_residual - phi_predicted)).
      The use of sine to wrap the phi residual was proposed by [2]. This
      stems from the observation that bboxes at phi and phi + pi are the same
      bbox, fully overlapping in 3D space, except that the direction is
      different. Note that the use of sine makes this residual invariant to
      direction when a symmetric loss like SmoothL1 is used. In
      ResidualsToBBoxes, we ensure that the phi predicted is between [0, pi).

    The Huber (SmoothL1) loss can then be applied to the delta between these
    target residuals and the model predicted residuals.

    [1] VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection
        https://arxiv.org/abs/1711.06396

    [2] SECOND: Sparsely Embedded Convolutional Detection
        https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      assigned_gt_bboxes: tf.float32 of the same shape as anchor_bboxes
        containing the corresponding assigned ground-truth bboxes.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with target
      residuals for every corresponding bbox.
    """
        anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
        anchor_bboxes = py_utils.with_dependencies(
            [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
        assigned_gt_bboxes = py_utils.HasShape(assigned_gt_bboxes,
                                               anchor_bboxes_shape)

        x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes,
                                                            num=7,
                                                            axis=-1)
        x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt = tf.unstack(
            assigned_gt_bboxes, num=7, axis=-1)

        diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

        # The anchor dimensions is usually a hard-coded param given to the input
        # generator and should not be 0. We use CheckNumerics to ensure that is the
        # case.
        x_residual = py_utils.CheckNumerics((x_gt - x_a) / diagonal_xy)
        y_residual = py_utils.CheckNumerics((y_gt - y_a) / diagonal_xy)
        z_residual = py_utils.CheckNumerics((z_gt - z_a) / dz_a)

        dx_residual = py_utils.CheckNumerics(tf.log(dx_gt / dx_a))
        dy_residual = py_utils.CheckNumerics(tf.log(dy_gt / dy_a))
        dz_residual = py_utils.CheckNumerics(tf.log(dz_gt / dz_a))

        phi_residual = phi_gt - phi_a

        return tf.stack([
            x_residual,
            y_residual,
            z_residual,
            dx_residual,
            dy_residual,
            dz_residual,
            phi_residual,
        ],
                        axis=-1)  # pyformat: disable
Esempio n. 13
0
    def _BuildStackedRecurrentElman(self, seqlen, trailing_pad_len, batch,
                                    dims, layers):
        tf.set_random_seed(342462)
        np.random.seed(32540)

        seqlen += trailing_pad_len
        dtype = tf.float64

        def CreateTheta():
            return py_utils.NestedMap(
                w=tf.constant(np.random.uniform(0, 0.2, (2 * dims, dims)),
                              dtype=dtype),
                b=tf.constant(np.random.uniform(0, 0.2, (dims, )),
                              dtype=dtype))

        def CreateState0():
            return py_utils.NestedMap(h=tf.constant(np.random.uniform(
                0, 0.2, (batch, dims)),
                                                    dtype=dtype),
                                      padding=tf.constant([[0]] * batch,
                                                          dtype=dtype))

        devices = ['/cpu:0'] * layers
        cell_fns = [self.Elman] * layers
        cell_grads = [self.ElmanGrad] * layers
        cell_outs = [self.ElmanOut] * layers
        cell_out_grads = [self.ElmanOutGrad] * layers
        thetas = [CreateTheta() for _ in range(layers)]
        init_states = [CreateState0() for _ in range(layers)]
        padding = np.zeros((seqlen, batch, 1))
        padding[-trailing_pad_len:, :, :] = 1.
        padding[-trailing_pad_len - 3:-trailing_pad_len - 1, :, :] = 1.
        inputs = py_utils.NestedMap(x=tf.constant(np.random.uniform(
            0, 0.2, (seqlen, batch, dims)),
                                                  dtype=dtype),
                                    padding=tf.constant(padding, dtype=dtype))
        output, _ = recurrent.StackedRecurrent(devices=devices,
                                               cell_fns=cell_fns,
                                               cell_grads=cell_grads,
                                               cell_outs=cell_outs,
                                               cell_out_grads=cell_out_grads,
                                               thetas=thetas,
                                               init_states=init_states,
                                               inputs=inputs)
        o = output.x
        if 'padding' in inputs:
            o *= (1 - inputs.padding)
        loss = tf.reduce_sum(tf.square(o))

        xs = recurrent.Flatten(thetas + [py_utils.NestedMap(x=inputs.x)])
        dxs = tf.gradients(ys=loss, xs=xs)

        # Reference implementation using Recurrent().
        ref = inputs
        for i in range(layers):
            ref = self.ElmanOut(
                recurrent.Recurrent(cell_fn=cell_fns[i],
                                    cell_grad=cell_grads[i],
                                    theta=thetas[i],
                                    state0=init_states[i],
                                    inputs=ref)[0])
        return ref.x, output.x, loss, xs, dxs
Esempio n. 14
0
    def FProp(self, theta, x, paddings=None, update=False):
        """Computes distances of the given input 'x' to all centroids.

    This implementation applies layer normalization on 'x' internally first,
    and the returned 'dists' is computed using the normalized 'x'.

    Args:
      theta: A `.NestedMap` of weights' values of this layer.
      x: A tensor of shape [B, L, N, H].
      paddings: If not None, a tensor of shape [B, L].
      update: bool, whether to update centroids using x.

    Returns:
      dists: "distances" of the given input 'x' to all centroids.
             Shape [B, L, N, K].
      k_means_loss: the average squared Euclidean distances to the closest
                    centroid, a scalar.
    """
        p = self.params
        x = tf.cast(x, theta.means.dtype)
        if paddings is None:
            paddings = tf.zeros_like(x[:, :, 0, 0])
        # Shape [B, L, 1, 1]
        paddings_4d = paddings[:, :, None, None]

        if p.apply_layer_norm:
            x = KMeansClusteringForAtten.LayerNorm(x, p.epsilon)

        # 'x' is normalized (but theta.means is not), we use negative dot product to
        # approximate the Euclidean distance here.
        dists = -2 * tf.einsum('BLNH, NKH -> BLNK', x, theta.means)
        if not p.apply_layer_norm:
            # If entries are not normalized, compute norms here.
            x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
            means_norm_sq = tf.reduce_sum(tf.square(theta.means),
                                          axis=-1,
                                          keepdims=False)
            means_norm_sq = tf.expand_dims(means_norm_sq, axis=0)
            means_norm_sq = tf.expand_dims(means_norm_sq, axis=0)
            dists += x_norm_sq + means_norm_sq

        # For padded positions we update the distances to very large numbers.
        very_large_dists = tf.ones_like(dists) * tf.constant(
            0.1, dtype=dists.dtype) * dists.dtype.max
        paddings_tiled = tf.tile(paddings_4d,
                                 [1, 1, p.num_heads, p.num_clusters])
        dists = tf.where(paddings_tiled > 0.0, very_large_dists, dists)

        # Shape [B, L, N, K], the same as 'dists' above.
        nearest_one_hot = tf.one_hot(tf.math.argmin(dists, axis=-1),
                                     p.num_clusters,
                                     dtype=theta.means.dtype)
        # Same shape as the input 'x'.
        nearest_centroid = tf.einsum('BLNK, NKH -> BLNH', nearest_one_hot,
                                     theta.means)
        diff = tf.math.squared_difference(x,
                                          tf.stop_gradient(nearest_centroid))
        diff = py_utils.ApplyPadding(paddings_4d, diff)
        diff = tf.math.reduce_mean(diff, axis=2)

        # The commitment loss which when back proped against encourages the 'x'
        # values to commit to their chosen centroids.
        diff = tf.cast(diff, tf.float32)
        paddings = tf.cast(paddings, tf.float32)
        k_means_loss = tf.math.reduce_sum(diff) / tf.math.reduce_sum(1.0 -
                                                                     paddings)
        summary_utils.scalar('k_means/squared_distance_loss', k_means_loss)

        # TODO(zhouwk): investigate normalizing theta.means after each update.
        means_norm = tf.norm(theta.means)
        summary_utils.scalar('k_means/centroid_l2_norm/min',
                             tf.math.reduce_min(means_norm))
        summary_utils.scalar('k_means/centroid_l2_norm/mean',
                             tf.math.reduce_mean(means_norm))

        if not update:
            return dists, k_means_loss

        # To update the centroids (self.vars.means), we apply gradient descent on
        # the mini-batch of input 'x', which yields the following:
        #   new_centroid = centroid + (1 - decay) * (x_mean - centroid)
        # where x_mean is the average over all the input vectors closest to this
        # centroid.
        #
        # Note that this approach is equivalent with backprop via
        #    loss = tf.math.reduce_mean(
        #        tf.math.squared_difference(tf.stop_gradient(x), nearest_centroid)))
        # , except that here the learning rate is independently set via 'decay'.

        # Ensure that the padded positions are not used to update the centroids.
        nearest_one_hot = py_utils.ApplyPadding(paddings_4d, nearest_one_hot)

        # Sum away batch and sequence length dimensions to get per cluster count.
        # Shape: [N, K]
        per_cluster_count = tf.reduce_sum(nearest_one_hot, axis=[0, 1])
        summary_utils.histogram('k_means/per_cluster_vec_count',
                                per_cluster_count)

        # Sum of the input 'x' per each closest centroid.
        sum_x = tf.einsum('BLNK, BLNH -> NKH', nearest_one_hot, x)

        if py_utils.use_tpu():
            per_cluster_count = tf.tpu.cross_replica_sum(per_cluster_count)
            sum_x = tf.tpu.cross_replica_sum(sum_x)

        if p.use_ema:
            updated_ema_count = moving_averages.assign_moving_average(
                self.vars.ema_count,
                tf.cast(per_cluster_count, self.vars.ema_count.dtype),
                p.decay,
                zero_debias=False)
            updated_ema_means = moving_averages.assign_moving_average(
                self.vars.ema_means,
                tf.cast(sum_x, self.vars.ema_means.dtype),
                p.decay,
                zero_debias=False)
            n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True)
            updated_ema_count = ((updated_ema_count + p.epsilon) /
                                 (n + p.num_clusters * p.epsilon) * n)
            updated_ema_means = updated_ema_means / tf.expand_dims(
                updated_ema_count, axis=-1)
            updated_ema_means = tf.cast(updated_ema_means,
                                        self.vars.means.dtype)
            means = tf.cast(theta.means, updated_ema_means.dtype)
            update_means_diff = updated_ema_means - means
        else:
            # If per_cluster_count for a cluster is 0, then 'nearest_one_hot' in that
            # cluster's position will always be 0, hence 'sum_x' in that dimension
            # will be 0.
            new_means = sum_x / tf.maximum(
                tf.constant(1.0, dtype=per_cluster_count.dtype),
                tf.expand_dims(per_cluster_count, axis=-1))
            # Note that we intentionally do not normalize the means after this update
            # as empirically this works better.
            update_means_diff = tf.cast(
                (1.0 - p.decay) * (new_means - theta.means),
                self.vars.means.dtype)
        return py_utils.with_dependencies(
            [tf.assign_add(self.vars.means, update_means_diff)],
            dists), k_means_loss
Esempio n. 15
0
 def _l2_norm(v):
     return tf.sqrt(tf.reduce_sum(tf.square(v)))
Esempio n. 16
0
  def ResidualsToBBoxes(self, anchor_bboxes, residuals):
    r"""Converts anchor_boxes and residuals to predicted bboxes.

    This converts predicted residuals into bboxes using the following formulae:

      x_predicted = x_a + x_residual \* diagonal_xy
      y_predicted = y_a + y_residual \* diagonal_xy
      z_predicted = z_a + z_residual \* dz_a

      dx_predicted = dx_a \* exp(dx_residual)
      dy_predicted = dy_a \* exp(dy_residual)
      dz_predicted = dz_a \* exp(dz_residual)

      phi_predicted = phi_a + phi_residual

    These equations follow from those in LocalizationResiduals, where we solve
    for the \*_gt variables.

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      residuals: tf.float32 of the same shape as anchor_bboxes containing
        predicted residuals at each anchor location.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with predicted
      bboxes.
    """
    anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
    anchor_bboxes = py_utils.with_dependencies(
        [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
    residuals = py_utils.HasShape(residuals, anchor_bboxes_shape)

    x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(
        anchor_bboxes, num=7, axis=-1)
    (x_residual, y_residual, z_residual, dx_residual, dy_residual, dz_residual,
     phi_residual) = tf.unstack(
         residuals, num=7, axis=-1)

    diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

    x_predicted = x_a + x_residual * diagonal_xy
    y_predicted = y_a + y_residual * diagonal_xy
    z_predicted = z_a + z_residual * dz_a

    dx_predicted = dx_a * tf.exp(dx_residual)
    dy_predicted = dy_a * tf.exp(dy_residual)
    dz_predicted = dz_a * tf.exp(dz_residual)

    # Assuming a sine(delta_phi) transformation is used in the loss, then, it
    # is not possible to distinguish direction, hence, we use floormod here to
    # ensure that the predicted_phi is always in [0, np.pi) for consistency.
    # A separate direction classifier should be added the model if needed.
    phi_predicted = phi_a + phi_residual
    phi_predicted = tf.floormod(phi_predicted, np.pi)

    return tf.stack([
        x_predicted, y_predicted, z_predicted,
        dx_predicted, dy_predicted, dz_predicted,
        phi_predicted,
    ], axis=-1)  # pyformat: disable