def _ExpandedSquaredDistanceMatrix(pa, pb): squared_pa = tf.reduce_sum(tf.square(pa), axis=2, keepdims=True) squared_pb = tf.transpose( tf.reduce_sum(tf.square(pb), axis=2, keepdims=True), perm=[0, 2, 1]) # We have observed that entries may < 0. when using the expanded version. # The max operation guards that from being possible. return tf.maximum( squared_pa - 2 * tf.matmul(pa, pb, transpose_b=True) + squared_pb, 0.0)
def _MelSpectrogram(self, signal): """Computes the mel spectrogram from a waveform signal. Args: signal: f32 Tensor, shaped [batch_size, num_samples] Returns: f32 features Tensor, shaped [batch_size, num_frames, mel_channels] """ p = self.params # FFT. real_frequency_spectrogram = tf.signal.rfft(signal, [self._fft_size]) magnitude_spectrogram = tf.abs(real_frequency_spectrogram) if p.compute_energy: magnitude_spectrogram = tf.square(magnitude_spectrogram) # Shape of magnitude_spectrogram is num_frames x (fft_size/2+1) # Mel_weight is [num_spectrogram_bins, num_mel_bins] mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=p.num_bins, num_spectrogram_bins=self._fft_size // 2 + 1, sample_rate=p.sample_rate, lower_edge_hertz=p.lower_edge_hertz, upper_edge_hertz=p.upper_edge_hertz, dtype=tf.float32) # Weight matrix implemented in the magnitude domain. batch_size, num_frames, fft_channels = py_utils.GetShape( magnitude_spectrogram, 3) mel_spectrogram = tf.matmul( tf.reshape(magnitude_spectrogram, [batch_size * num_frames, fft_channels]), mel_weight_matrix) mel_spectrogram = tf.reshape(mel_spectrogram, [batch_size, num_frames, p.num_bins]) return mel_spectrogram
def testFetchGrad(self): # Tests we can fetch backprop gradients. # pyformat: disable b = builder.Base.Params() b = b.Instantiate() p = b._Seq('seq', b._Linear('l', 16, 32), b._Bias('b', 32), b._Save('fetch'), b._Activation('a')) # pyformat: enable g = tf.Graph() with g.as_default(): l = p.Instantiate() x = tf.random.normal(shape=[4, 16]) y = l.FPropDefaultTheta(x) loss = tf.reduce_sum(tf.square(y)) _ = tf.gradients(ys=loss, xs=x) act, dact = l.fetch.activation, l.fetch.gradient with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) act_v, dact_v = sess.run([act, dact]) # The last layer two layers is sum(square(relu(act))). # So the dact is simply 2*relu(act). self.assertAllClose(2 * np.maximum(0, act_v), dact_v)
def _process(record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap(record=record, num=num), 1 else: return [record, num], 1
def _process(source_id, record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap( source_id=source_id, record=record, num=num), bucket_fn(num) else: return [source_id, record, num], bucket_fn(num)
def NeighborSquaredDistanceMatrix(points, neighbor_points): """Compute the squared distance matrix between points and their neighbors. Args: points: A float tf.Tensor of shape [N, P1, 3] with point positions. neighbor_points: A float tf.Tensor fo shape [N, P1, K, 3] with neighbor positions. Returns: Squared distance matrix between points and their K nearest neighbors as a float tf.Tensor of shape [N, P1, K]. """ points = py_utils.HasShape(points, [-1, -1, 3]) n, p1 = py_utils.GetShape(points, 2) neighbor_points = py_utils.HasShape(neighbor_points, [n, p1, -1, 3]) _, _, k = py_utils.GetShape(neighbor_points, 3) sq_diff = tf.square(neighbor_points - tf.reshape(points, [n, p1, 1, 3])) sq_dist = tf.reduce_sum(sq_diff, axis=3) return py_utils.HasShape(sq_dist, [n, p1, k])
def SphericalCoordinatesTransform(points_xyz): """Converts points from xyz coordinates to spherical coordinates. https://en.wikipedia.org/wiki/Spherical_coordinate_system#Coordinate_system_conversions for definitions of the transformations. Args: points_xyz: A floating point tensor with shape [..., 3], where the inner 3 dimensions correspond to xyz coordinates. Returns: A floating point tensor with the same shape [..., 3], where the inner dimensions correspond to (dist, theta, phi), where phi corresponds to azimuth/yaw (rotation around z), and theta corresponds to pitch/inclination (rotation around y). """ dist = tf.sqrt(tf.reduce_sum(tf.square(points_xyz), axis=-1)) theta = tf.acos(points_xyz[..., 2] / tf.maximum(dist, 1e-7)) # Note: tf.atan2 takes in (y, x). phi = tf.atan2(points_xyz[..., 1], points_xyz[..., 0]) return tf.stack([dist, theta, phi], axis=-1)
def _NonExpandedSquaredDistanceMatrix(pa, pb): diff = tf.expand_dims(pa, axis=2) - tf.expand_dims(pb, axis=1) squared_diff = tf.square(diff) squared_dis = tf.reduce_sum(squared_diff, axis=3) return squared_dis
def NeighborhoodIndices(points, query_points, k, points_padding=None, max_distance=None, sample_neighbors_uniformly=False): """Get indices to k-neighbors of query_points in points. Padding is returned along-side indices. Non-padded points are guaranteed to be unique (non-repeated) points from original non-padded points. Padded points arise due to either a lack of points (k exceeds the number of original non-padded points) or points are too far away (exceeds max distance). Note: Padded point indices may refer to padded points from the original, or may be duplicates of the closest point. TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded points are repeated points from the original points. This behavior is maintained here, but we should update PointCNN to respect indices paddings. Args: points: tensor of shape [N, P1, dims]. query_points: tensor of shape [N, P2, dims] k: Integer. points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the point is a padded point. if None, then all points are considered real points. max_distance: float representing the maximum distance that each neighbor can be. If there are no points within the distance, then the closest point is returned (regardless of distance). If this is set to None, then no filtering by distance is performed. sample_neighbors_uniformly: boolean specifying whether to sample neighbors uniformly if they are within max distance. Returns: A pair of tensors: - indices: tensor of shape [N, P2, k]. - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and 0 represents an unpadded (real) point. """ n, p1 = py_utils.GetShape(points, 2) query_points = py_utils.HasShape(query_points, [n, -1, -1]) _, p2 = py_utils.GetShape(query_points, 2) # Compute pair-wise squared distances. # Note that dist_mat contains the squared distance (without sqrt). Thus, when # using max_distance, we will need to square max_distance to make sure it's # in the same units. dist_mat = SquaredDistanceMatrix(query_points, points) dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1]) # Add a large scalar to the distances for padded points. # dist_mat[i, j, k] will be: # if k < valid_num[i]: distance between points[i, k] and query_points[i, j] # otherwise: a large scalar added to dist_mat[i, j, k] if points_padding is not None: points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32) points_padding = py_utils.HasShape(points_padding, [n, 1, p1]) large_scalar = tf.reduce_max(dist_mat) + 1 dist_mat += points_padding * large_scalar # To perform sampling neighbors uniformly efficiently, we set all neighbors # that are within the distance threshold to have distances be drawn uniformly # at random. Using top_k with this enables selecting a random set quickly # without replacement. if sample_neighbors_uniformly: if max_distance is not None: mask_by_distance = tf.less_equal(dist_mat, max_distance**2) dist_mat = tf.where( mask_by_distance, tf.square(max_distance) * tf.random_uniform(tf.shape(dist_mat)), dist_mat) else: raise ValueError( 'Uniform sampling requires specifying max_distance.') top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k, sorted=True) # N x P2 x K # Set padding using top_k_dist; padded points will have distance exceeding # the large_scalar. if points_padding is not None: paddings = tf.greater_equal(-top_k_dist, large_scalar) else: paddings = tf.zeros_like(top_k_dist, dtype=tf.bool) # Filter by max_distances by setting all indices that exceed the max_distance # to the closest point. if max_distance is not None: # Mask is true for points that are further than max_distance. mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance)) closest_idx = tf.tile(indices[:, :, :1], [1, 1, k]) indices = tf.where(mask_by_distance, closest_idx, indices) paddings |= mask_by_distance indices = tf.reshape(indices, [n, p2, k]) paddings = tf.cast(paddings, tf.float32) return indices, paddings
def _SmoothL1Norm(a): """Smoothed L1 norm.""" # F&F paper formula (3). # http://openaccess.thecvf.com/content_cvpr_2018/papers/Luo_Fast_and_Furious_CVPR_2018_paper.pdf return tf.where(tf.abs(a) < 1, 0.5 * tf.square(a), tf.abs(a) - 0.5)
def ResidualsToBBoxes(self, anchor_bboxes, residuals, min_angle_rad=-np.pi, max_angle_rad=np.pi): r"""Converts anchor_boxes and residuals to predicted bboxes. This converts predicted residuals into bboxes using the following formulae:: x_predicted = x_a + x_residual * diagonal_xy y_predicted = y_a + y_residual * diagonal_xy z_predicted = z_a + z_residual * dz_a dx_predicted = dx_a * exp(dx_residual) dy_predicted = dy_a * exp(dy_residual) dz_predicted = dz_a * exp(dz_residual) # Adding the residual, and bounding it between # [min_angle_rad, max_angle_rad] phi_predicted = NormalizeAngleRad(phi_a + phi_residual, min_angle_rad, max_angle_rad) These equations follow from those in LocalizationResiduals, where we solve for the \*_gt variables. Args: anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz, phi), corresponding to each anchor bbox parameters. residuals: tf.float32 of the same shape as anchor_bboxes containing predicted residuals at each anchor location. min_angle_rad: Scalar with the minimum angle allowed (before wrapping) in radians. max_angle_rad: Scalar with the maximum angle allowed (before wrapping) in radians. This value usually should be pi. Returns: A tf.float32 tensor of the same shape as anchor_bboxes with predicted bboxes. """ anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes) anchor_bboxes = py_utils.with_dependencies( [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes) residuals = py_utils.HasShape(residuals, anchor_bboxes_shape) x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes, num=7, axis=-1) (x_residual, y_residual, z_residual, dx_residual, dy_residual, dz_residual, phi_residual) = tf.unstack(residuals, num=7, axis=-1) diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a)) x_predicted = x_a + x_residual * diagonal_xy y_predicted = y_a + y_residual * diagonal_xy z_predicted = z_a + z_residual * dz_a dx_predicted = dx_a * tf.exp(dx_residual) dy_predicted = dy_a * tf.exp(dy_residual) dz_predicted = dz_a * tf.exp(dz_residual) # We bound the angle between [min_angle_rad, max_angle_rad], which should # be passed in depending on the heading handling in the calling model. # If the model uses a sine(delta_phi) transformation in the loss, then it # cannot distinguish direction and a [0, np.pi] # [min_angle_rad, max_angle_rad] should be used. # If there is a heading encoding that is directional, most likely you # should use a [-np.pi, np.pi] [min_angle_rad, max_angle_rad]. phi_predicted = phi_a + phi_residual phi_predicted = geometry.WrapAngleRad(phi_predicted, min_angle_rad, max_angle_rad) return tf.stack([ x_predicted, y_predicted, z_predicted, dx_predicted, dy_predicted, dz_predicted, phi_predicted, ], axis=-1) # pyformat: disable
def LocalizationResiduals(self, anchor_bboxes, assigned_gt_bboxes): """Computes the anchor residuals for every bbox. For a given bbox, compute residuals in the following way: Let ``anchor_bbox = (x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a)`` and ``assigned_gt_bbox = (x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt)`` Define ``diagonal_xy = sqrt(dx_a^2 + dy_a^2)`` Then the corresponding residuals are given by:: x_residual = (x_gt - x_a) / (diagonal_xy) y_residual = (y_gt - y_a) / (diagonal_xy) z_residual = (z_gt - z_a) / (dz_a) dx_residual = log(dx_gt / dx_a) dy_residual = log(dy_gt / dy_a) dz_residual = log(dz_gt / dz_a) phi_residual = phi_gt - phi_a The normalization for x and y residuals by the diagonal was first proposed by [1]. Intuitively, this reflects that objects can usually move freely in the x-y plane, including diagonally. On the other hand, moving in the z-axis (up and down) can be considered orthogonal to x-y. For phi_residual, one way to frame the loss is with SmoothL1(sine(phi_residual - phi_predicted)). The use of sine to wrap the phi residual was proposed by [2]. This stems from the observation that bboxes at phi and phi + pi are the same bbox, fully overlapping in 3D space, except that the direction is different. Note that the use of sine makes this residual invariant to direction when a symmetric loss like SmoothL1 is used. In ResidualsToBBoxes, we ensure that the phi predicted is between [0, pi). The Huber (SmoothL1) loss can then be applied to the delta between these target residuals and the model predicted residuals. [1] VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection https://arxiv.org/abs/1711.06396 [2] SECOND: Sparsely Embedded Convolutional Detection https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf Args: anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz, phi), corresponding to each anchor bbox parameters. assigned_gt_bboxes: tf.float32 of the same shape as anchor_bboxes containing the corresponding assigned ground-truth bboxes. Returns: A tf.float32 tensor of the same shape as anchor_bboxes with target residuals for every corresponding bbox. """ anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes) anchor_bboxes = py_utils.with_dependencies( [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes) assigned_gt_bboxes = py_utils.HasShape(assigned_gt_bboxes, anchor_bboxes_shape) x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes, num=7, axis=-1) x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt = tf.unstack( assigned_gt_bboxes, num=7, axis=-1) diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a)) # The anchor dimensions is usually a hard-coded param given to the input # generator and should not be 0. We use CheckNumerics to ensure that is the # case. x_residual = py_utils.CheckNumerics((x_gt - x_a) / diagonal_xy) y_residual = py_utils.CheckNumerics((y_gt - y_a) / diagonal_xy) z_residual = py_utils.CheckNumerics((z_gt - z_a) / dz_a) dx_residual = py_utils.CheckNumerics(tf.log(dx_gt / dx_a)) dy_residual = py_utils.CheckNumerics(tf.log(dy_gt / dy_a)) dz_residual = py_utils.CheckNumerics(tf.log(dz_gt / dz_a)) phi_residual = phi_gt - phi_a return tf.stack([ x_residual, y_residual, z_residual, dx_residual, dy_residual, dz_residual, phi_residual, ], axis=-1) # pyformat: disable
def _BuildStackedRecurrentElman(self, seqlen, trailing_pad_len, batch, dims, layers): tf.set_random_seed(342462) np.random.seed(32540) seqlen += trailing_pad_len dtype = tf.float64 def CreateTheta(): return py_utils.NestedMap( w=tf.constant(np.random.uniform(0, 0.2, (2 * dims, dims)), dtype=dtype), b=tf.constant(np.random.uniform(0, 0.2, (dims, )), dtype=dtype)) def CreateState0(): return py_utils.NestedMap(h=tf.constant(np.random.uniform( 0, 0.2, (batch, dims)), dtype=dtype), padding=tf.constant([[0]] * batch, dtype=dtype)) devices = ['/cpu:0'] * layers cell_fns = [self.Elman] * layers cell_grads = [self.ElmanGrad] * layers cell_outs = [self.ElmanOut] * layers cell_out_grads = [self.ElmanOutGrad] * layers thetas = [CreateTheta() for _ in range(layers)] init_states = [CreateState0() for _ in range(layers)] padding = np.zeros((seqlen, batch, 1)) padding[-trailing_pad_len:, :, :] = 1. padding[-trailing_pad_len - 3:-trailing_pad_len - 1, :, :] = 1. inputs = py_utils.NestedMap(x=tf.constant(np.random.uniform( 0, 0.2, (seqlen, batch, dims)), dtype=dtype), padding=tf.constant(padding, dtype=dtype)) output, _ = recurrent.StackedRecurrent(devices=devices, cell_fns=cell_fns, cell_grads=cell_grads, cell_outs=cell_outs, cell_out_grads=cell_out_grads, thetas=thetas, init_states=init_states, inputs=inputs) o = output.x if 'padding' in inputs: o *= (1 - inputs.padding) loss = tf.reduce_sum(tf.square(o)) xs = recurrent.Flatten(thetas + [py_utils.NestedMap(x=inputs.x)]) dxs = tf.gradients(ys=loss, xs=xs) # Reference implementation using Recurrent(). ref = inputs for i in range(layers): ref = self.ElmanOut( recurrent.Recurrent(cell_fn=cell_fns[i], cell_grad=cell_grads[i], theta=thetas[i], state0=init_states[i], inputs=ref)[0]) return ref.x, output.x, loss, xs, dxs
def FProp(self, theta, x, paddings=None, update=False): """Computes distances of the given input 'x' to all centroids. This implementation applies layer normalization on 'x' internally first, and the returned 'dists' is computed using the normalized 'x'. Args: theta: A `.NestedMap` of weights' values of this layer. x: A tensor of shape [B, L, N, H]. paddings: If not None, a tensor of shape [B, L]. update: bool, whether to update centroids using x. Returns: dists: "distances" of the given input 'x' to all centroids. Shape [B, L, N, K]. k_means_loss: the average squared Euclidean distances to the closest centroid, a scalar. """ p = self.params x = tf.cast(x, theta.means.dtype) if paddings is None: paddings = tf.zeros_like(x[:, :, 0, 0]) # Shape [B, L, 1, 1] paddings_4d = paddings[:, :, None, None] if p.apply_layer_norm: x = KMeansClusteringForAtten.LayerNorm(x, p.epsilon) # 'x' is normalized (but theta.means is not), we use negative dot product to # approximate the Euclidean distance here. dists = -2 * tf.einsum('BLNH, NKH -> BLNK', x, theta.means) if not p.apply_layer_norm: # If entries are not normalized, compute norms here. x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(theta.means), axis=-1, keepdims=False) means_norm_sq = tf.expand_dims(means_norm_sq, axis=0) means_norm_sq = tf.expand_dims(means_norm_sq, axis=0) dists += x_norm_sq + means_norm_sq # For padded positions we update the distances to very large numbers. very_large_dists = tf.ones_like(dists) * tf.constant( 0.1, dtype=dists.dtype) * dists.dtype.max paddings_tiled = tf.tile(paddings_4d, [1, 1, p.num_heads, p.num_clusters]) dists = tf.where(paddings_tiled > 0.0, very_large_dists, dists) # Shape [B, L, N, K], the same as 'dists' above. nearest_one_hot = tf.one_hot(tf.math.argmin(dists, axis=-1), p.num_clusters, dtype=theta.means.dtype) # Same shape as the input 'x'. nearest_centroid = tf.einsum('BLNK, NKH -> BLNH', nearest_one_hot, theta.means) diff = tf.math.squared_difference(x, tf.stop_gradient(nearest_centroid)) diff = py_utils.ApplyPadding(paddings_4d, diff) diff = tf.math.reduce_mean(diff, axis=2) # The commitment loss which when back proped against encourages the 'x' # values to commit to their chosen centroids. diff = tf.cast(diff, tf.float32) paddings = tf.cast(paddings, tf.float32) k_means_loss = tf.math.reduce_sum(diff) / tf.math.reduce_sum(1.0 - paddings) summary_utils.scalar('k_means/squared_distance_loss', k_means_loss) # TODO(zhouwk): investigate normalizing theta.means after each update. means_norm = tf.norm(theta.means) summary_utils.scalar('k_means/centroid_l2_norm/min', tf.math.reduce_min(means_norm)) summary_utils.scalar('k_means/centroid_l2_norm/mean', tf.math.reduce_mean(means_norm)) if not update: return dists, k_means_loss # To update the centroids (self.vars.means), we apply gradient descent on # the mini-batch of input 'x', which yields the following: # new_centroid = centroid + (1 - decay) * (x_mean - centroid) # where x_mean is the average over all the input vectors closest to this # centroid. # # Note that this approach is equivalent with backprop via # loss = tf.math.reduce_mean( # tf.math.squared_difference(tf.stop_gradient(x), nearest_centroid))) # , except that here the learning rate is independently set via 'decay'. # Ensure that the padded positions are not used to update the centroids. nearest_one_hot = py_utils.ApplyPadding(paddings_4d, nearest_one_hot) # Sum away batch and sequence length dimensions to get per cluster count. # Shape: [N, K] per_cluster_count = tf.reduce_sum(nearest_one_hot, axis=[0, 1]) summary_utils.histogram('k_means/per_cluster_vec_count', per_cluster_count) # Sum of the input 'x' per each closest centroid. sum_x = tf.einsum('BLNK, BLNH -> NKH', nearest_one_hot, x) if py_utils.use_tpu(): per_cluster_count = tf.tpu.cross_replica_sum(per_cluster_count) sum_x = tf.tpu.cross_replica_sum(sum_x) if p.use_ema: updated_ema_count = moving_averages.assign_moving_average( self.vars.ema_count, tf.cast(per_cluster_count, self.vars.ema_count.dtype), p.decay, zero_debias=False) updated_ema_means = moving_averages.assign_moving_average( self.vars.ema_means, tf.cast(sum_x, self.vars.ema_means.dtype), p.decay, zero_debias=False) n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True) updated_ema_count = ((updated_ema_count + p.epsilon) / (n + p.num_clusters * p.epsilon) * n) updated_ema_means = updated_ema_means / tf.expand_dims( updated_ema_count, axis=-1) updated_ema_means = tf.cast(updated_ema_means, self.vars.means.dtype) means = tf.cast(theta.means, updated_ema_means.dtype) update_means_diff = updated_ema_means - means else: # If per_cluster_count for a cluster is 0, then 'nearest_one_hot' in that # cluster's position will always be 0, hence 'sum_x' in that dimension # will be 0. new_means = sum_x / tf.maximum( tf.constant(1.0, dtype=per_cluster_count.dtype), tf.expand_dims(per_cluster_count, axis=-1)) # Note that we intentionally do not normalize the means after this update # as empirically this works better. update_means_diff = tf.cast( (1.0 - p.decay) * (new_means - theta.means), self.vars.means.dtype) return py_utils.with_dependencies( [tf.assign_add(self.vars.means, update_means_diff)], dists), k_means_loss
def _l2_norm(v): return tf.sqrt(tf.reduce_sum(tf.square(v)))
def ResidualsToBBoxes(self, anchor_bboxes, residuals): r"""Converts anchor_boxes and residuals to predicted bboxes. This converts predicted residuals into bboxes using the following formulae: x_predicted = x_a + x_residual \* diagonal_xy y_predicted = y_a + y_residual \* diagonal_xy z_predicted = z_a + z_residual \* dz_a dx_predicted = dx_a \* exp(dx_residual) dy_predicted = dy_a \* exp(dy_residual) dz_predicted = dz_a \* exp(dz_residual) phi_predicted = phi_a + phi_residual These equations follow from those in LocalizationResiduals, where we solve for the \*_gt variables. Args: anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz, phi), corresponding to each anchor bbox parameters. residuals: tf.float32 of the same shape as anchor_bboxes containing predicted residuals at each anchor location. Returns: A tf.float32 tensor of the same shape as anchor_bboxes with predicted bboxes. """ anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes) anchor_bboxes = py_utils.with_dependencies( [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes) residuals = py_utils.HasShape(residuals, anchor_bboxes_shape) x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack( anchor_bboxes, num=7, axis=-1) (x_residual, y_residual, z_residual, dx_residual, dy_residual, dz_residual, phi_residual) = tf.unstack( residuals, num=7, axis=-1) diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a)) x_predicted = x_a + x_residual * diagonal_xy y_predicted = y_a + y_residual * diagonal_xy z_predicted = z_a + z_residual * dz_a dx_predicted = dx_a * tf.exp(dx_residual) dy_predicted = dy_a * tf.exp(dy_residual) dz_predicted = dz_a * tf.exp(dz_residual) # Assuming a sine(delta_phi) transformation is used in the loss, then, it # is not possible to distinguish direction, hence, we use floormod here to # ensure that the predicted_phi is always in [0, np.pi) for consistency. # A separate direction classifier should be added the model if needed. phi_predicted = phi_a + phi_residual phi_predicted = tf.floormod(phi_predicted, np.pi) return tf.stack([ x_predicted, y_predicted, z_predicted, dx_predicted, dy_predicted, dz_predicted, phi_predicted, ], axis=-1) # pyformat: disable