def _True(anchor, bboxes): """True branch when num of bboxes is non-zero.""" n = tf.shape(bboxes)[0] centroid = BBoxesCentroid(bboxes) # Computed dot products between centroid and the anchor point. dot = tf.squeeze(tf.matmul(centroid, tf.expand_dims(anchor, 1)), axis=1) # Normalize dot to get the cosine of the angles. norm = tf.norm(anchor) * tf.norm(centroid, axis=1) cosine = tf.where(tf.greater(norm, 0), dot / norm, tf.zeros([n], norm.dtype)) # Disambiguates the angle anchor--O--point is positive or negative by the # sign of cross products between angle and points. tf.linalg.cross takes # 3-vector (x, y, z), so we set z to 0. tf.linalg.cross does not support # broadcasting, so we tile anchor to shape [n, 3]. cross = tf.linalg.cross( tf.tile(tf.pad(tf.expand_dims(anchor, 0), [[0, 0], [0, 1]]), [n, 1]), tf.pad(centroid, [[0, 0], [0, 1]])) # If the sign is positive, the points lie on the clockwise side of # O-->anchor. Hence, -1 - cosine moves the cosine values to [-2, 0]. If the # sign is negative, the points lie on the counter-clockwise side of # O-->anchor. 1 + cosine moves the cosine values to [0, 2]. # # The car dataset shows that the points are scanned in the counter-clockwise # fashion. Therefore, top-k orders the points in the same order in which # bboxes appears in the spin. score = tf.where(tf.greater(cross, 0)[:, 2], -1 - cosine, 1 + cosine) _, indices = tf.nn.top_k(score, n, sorted=True) return indices
def _finish(self, update_ops, name_scope): with tf.control_dependencies(update_ops): ops1 = self.magnitude_optimizer._finish([], name_scope + "_m") # pylint: disable=protected-access ops2 = self.direction_optimizer._finish([], name_scope + "_d") # pylint: disable=protected-access if self.use_global_norm: # apply global grafting with tf.control_dependencies([ops1, ops2]): m_global_norm = tf.Variable(0.) d_global_norm = tf.Variable(0.) for var in self._variables: m_step_norm = self.get_slot(var, "m_step_norm") d_step_norm = self.get_slot(var, "d_step_norm") tf.assign_add(m_global_norm, m_step_norm**2) tf.assign_add(d_global_norm, d_step_norm**2) multiplier = tf.sqrt(m_global_norm / tf.maximum(d_global_norm, 1e-30)) step_ops = [] for var in self._variables: d_step = self.get_slot(var, "scratch_copy") step = tf.where(tf.greater(d_step_norm, 0), multiplier * d_step, tf.zeros_like(d_step)) step_op = tf.assign_add( var, self._learning_rate_tensor * step) step_ops.append(step_op) return tf.group(*step_ops, name=name_scope) return tf.group(*([ops1, ops2] + update_ops), name=name_scope)
def _internal_apply_dense(self, grad, var, magnitude_optimizer_apply_fn, direction_optimizer_apply_fn): # pylint: disable=g-doc-args """Main optimization logic of AdaGraft, which calls the child optimizers. Args: grad: Tensor containing gradients. var: Tensor containing parameter values. magnitude_optimizer_apply_fn: Apply magnitude optimizer. direction_optimizer_apply_fn: Apply direction optimizer. Returns: The final update op, which increments var by the grafted step. Pseudocode: - Copy weights into scratch space 'scratch_copy'. - Run magnitude_optimizer in-place. - Use scratch copy to figure out how far we moved ('magnitude_step'). - Copy weights back. - Run direction_optimizer in-place. - Move weights along the line segment with scratch_copy. """ if self.use_global_norm: self._variables.append(var) # Slot with current parameter values scratch_slot = self.get_slot(var, "scratch_copy") old_var = tf.assign(scratch_slot, var) with tf.control_dependencies([old_var]): m_updated_var = magnitude_optimizer_apply_fn(grad, var) # pylint: disable=protected-access # Run magnitude optimizer and compute the norm of the update. with tf.control_dependencies([m_updated_var]): m_step = var - old_var m_step_norm = tf.norm(m_step) if self.diagnostic or self.use_global_norm: m_step_norm = tf.assign(self.get_slot(var, "m_step_norm"), m_step_norm) # Run direction optimizer and compute its norm, and the direction. with tf.control_dependencies([m_step_norm]): flushed_var = tf.assign(var, old_var) with tf.control_dependencies([flushed_var]): d_updated_var = direction_optimizer_apply_fn(grad, var) # pylint: disable=protected-access # Run an update of the direction optimizer with magnitude optimizer norm. with tf.control_dependencies([d_updated_var]): d_step = var - old_var d_step_norm = tf.norm(d_step) if self.diagnostic or self.use_global_norm: d_step_norm = tf.assign(self.get_slot(var, "d_step_norm"), d_step_norm) if self.use_global_norm: flushed_var = tf.assign(var, old_var) with tf.control_dependencies([d_step_norm, flushed_var]): return tf.assign(scratch_slot, d_step) step = tf.where( tf.greater(d_step_norm, 0), (m_step_norm / tf.maximum(d_step_norm, 1e-30)) * d_step, tf.zeros_like(d_step)) return tf.assign(var, old_var + self._learning_rate_tensor * step)
def KeyFunc(batch): key = tf.reduce_min(batch.bucket_keys) idx = tf.reduce_sum( tf.cast(tf.greater(key, p.bucket_upper_bound), tf.int32)) return tf.constant(p.bucket_upper_bound, dtype=tf.int64)[idx]
def _preconditioned_update(self, var, partitioned_grads, diagonal_grad_update): """Computes the matrix preconditioned update. Args: var: Variable for which we are computing the preconditioned gradient. partitioned_grads: Partitioned gradients. diagonal_grad_update: Update as given by diagonal adagrad. Returns: scaled preconditioned gradient. """ def _l2_norm(v): return tf.sqrt(tf.reduce_sum(tf.square(v))) precond_grad = self._compute_preconditioned_raw_grad( var, partitioned_grads) if self._momentum > 0.0: gbar = self.get_slot(var, "precond_grad_momentum") matrix_preconditioned_grad = state_ops.assign( gbar, gbar * self._momentum_tensor + precond_grad * (1.0 - self._momentum_tensor)) else: matrix_preconditioned_grad = precond_grad # We use the direction from Shampoo while using the step size scale from # diagonal AdaGrad. precond_l2_norm = _l2_norm(matrix_preconditioned_grad) diagonal_l2_norm = _l2_norm(diagonal_grad_update) multiplier = tf.where( tf.greater(precond_l2_norm, 0.0), tf.maximum(diagonal_l2_norm, 1e-30) / (tf.maximum(precond_l2_norm, 1e-30)), 1.0) return matrix_preconditioned_grad * multiplier
def _inverse_pth_root_graph(self, epsilon): graph = tf.Graph() with graph.as_default(): exponent_t = tf.reshape( tf.placeholder(dtype=tf.float32, name="exponent", shape=None), []) # Apply exponent multiplier. exponent_t = exponent_t * self._exponent_multiplier input_t = tf.placeholder(dtype=tf.float32, name="input", shape=None) # For p = 2, 4 or 8, we use the iterative Newton-Schur method for # computing the inverse-pth root. either_p_2_4_8 = tf.math.logical_or( tf.math.logical_or(tf.equal(-1.0 / exponent_t, 2), tf.equal(-1.0 / exponent_t, 4)), tf.equal(-1.0 / exponent_t, 8)) # 4096 is the larger dimension SVD is tractable for. greater_than_4096 = tf.greater(tf.shape(input_t)[0], 4096) run_specialized_iterative_method = tf.math.logical_and( greater_than_4096, either_p_2_4_8) specialized_fn = functools.partial( self._specialized_inverse_pth_root, input_t, exponent_t, epsilon) generalized_fn = functools.partial( self._generalized_inverse_pth_root, input_t, exponent_t, epsilon) output, diff = tf.cond(run_specialized_iterative_method, specialized_fn, generalized_fn) tf.identity(output, "output") tf.identity(tf.cast(diff, tf.float32), "diff") return graph.as_graph_def().SerializeToString()
def ReorderIndicesByPhi(anchor, bboxes): """Sort bboxes based their angles relative to the anchor point. Args: anchor: A vector of (x0, y0). bboxes: A matrix of shape [N, 4]. Returns: A permutation of tf.range(n) which can be used to reshuffle bboxes to the sorted order. (e.g., tf.gather(bboxes, indices)). """ @tf.Defun(anchor.dtype, bboxes.dtype) def _True(anchor, bboxes): """True branch when num of bboxes is non-zero.""" n = tf.shape(bboxes)[0] centroid = BBoxesCentroid(bboxes) # Computed dot products between centroid and the anchor point. dot = tf.squeeze(tf.matmul(centroid, tf.expand_dims(anchor, 1)), axis=1) # Normalize dot to get the cosine of the angles. norm = tf.norm(anchor) * tf.norm(centroid, axis=1) cosine = tf.where(tf.greater(norm, 0), dot / norm, tf.zeros([n], norm.dtype)) # Disambiguates the angle anchor--O--point is positive or negative by the # sign of cross products between angle and points. tf.linalg.cross takes # 3-vector (x, y, z), so we set z to 0. tf.linalg.cross does not support # broadcasting, so we tile anchor to shape [n, 3]. cross = tf.linalg.cross( tf.tile(tf.pad(tf.expand_dims(anchor, 0), [[0, 0], [0, 1]]), [n, 1]), tf.pad(centroid, [[0, 0], [0, 1]])) # If the sign is positive, the points lie on the clockwise side of # O-->anchor. Hence, -1 - cosine moves the cosine values to [-2, 0]. If the # sign is negative, the points lie on the counter-clockwise side of # O-->anchor. 1 + cosine moves the cosine values to [0, 2]. # # The car dataset shows that the points are scanned in the counter-clockwise # fashion. Therefore, top-k orders the points in the same order in which # bboxes appears in the spin. score = tf.where(tf.greater(cross, 0)[:, 2], -1 - cosine, 1 + cosine) _, indices = tf.nn.top_k(score, n, sorted=True) return indices @tf.Defun(anchor.dtype, bboxes.dtype) def _False(anchor, bboxes): del anchor, bboxes return tf.zeros([0], dtype=tf.int32) n = tf.shape(bboxes)[0] return functional_ops.If(tf.greater(n, 0), [anchor, bboxes], _True, _False)[0]
def PostTrainingStepUpdate(self, global_step): """Updates moving_mean, moving_variance after each training step.""" p = self.params # Get sufficient stats that accumulates over microbatches. counts = self.accumulators.counts.GetValue() mean_ss = self.accumulators.mean_ss.GetValue() variance_ss = self.accumulators.variance_ss.GetValue() # Compute batch mean and batch variance from sufficient stats mean, variance = tf.nn.normalize_moments(counts, mean_ss, variance_ss, None) decay = tf.convert_to_tensor(1.0 - p.decay, p.dtype) # Update moving_mean, moving_variance from batch mean and batch variance. with tf.name_scope(p.name) as scope: with tf.colocate_with(self.vars.moving_mean): mean_update = tf.assign_sub( self.vars.moving_mean, tf.where(tf.greater(counts, 0.5), (self.vars.moving_mean - tf.cast(mean, p.dtype)) * decay, tf.zeros_like(self.vars.moving_mean)), name='moving_mean_update') with tf.colocate_with(self.vars.moving_variance): var_update = tf.assign_sub( self.vars.moving_variance, tf.where(tf.greater(counts, 0.5), (self.vars.moving_variance - tf.cast(variance, p.dtype)) * decay, tf.zeros_like(self.vars.moving_variance)), name='moving_variance_update') py_utils.CheckNumerics( self.vars.moving_mean, 'moving mean of {} failed numeric check'.format(scope)) py_utils.CheckNumerics( self.vars.moving_variance, 'moving variance of {} failed numeric check'.format(scope)) self.accumulators.counts.Reset() self.accumulators.mean_ss.Reset() self.accumulators.variance_ss.Reset() return tf.group(mean_update, var_update)
def IsWithinBBox(points, bbox): """Checks if points are within a 2-d bbox. The function returns true if points are strictly inside the box. It also returns true when the points are exactly on the box edges. Args: points: a float Tensor of shape [..., 2] of points to be tested. The last coordinates are (x, y). bbox: a float Tensor of shape [..., 4, 2] of bboxes. The last coordinates are the four corners of the bbox and (x, y). The corners are assumed to be given in counter-clockwise order. Returns: Tensor: If ``pshape = tf.shape(points)[:-1]`` and ``bshape = tf.shape(bbox)[:-2]``, returns a boolean tensor of shape ``tf.concat(pshape, bshape)``, where each element is true if the point is inside to the corresponding box. If a point falls exactly on an edge of the bbox, it is also true. """ bshape = py_utils.GetShape(bbox)[:-2] pshape = py_utils.GetShape(points)[:-1] bbox = py_utils.HasShape(bbox, tf.concat([bshape, [4, 2]], axis=0)) points = py_utils.HasShape(points, tf.concat([pshape, [2]], axis=0)) # Enumerate all 4 edges: v1, v2, v3, v4 = (bbox[..., 0, :], bbox[..., 1, :], bbox[..., 2, :], bbox[..., 3, :]) v1v2v3_check = tf.reduce_all(_IsCounterClockwiseDirection(v1, v2, v3)) v2v3v4_check = tf.reduce_all(_IsCounterClockwiseDirection(v2, v3, v4)) v4v1v2_check = tf.reduce_all(_IsCounterClockwiseDirection(v4, v1, v2)) v3v4v1_check = tf.reduce_all(_IsCounterClockwiseDirection(v3, v4, v1)) with tf.control_dependencies([ py_utils.Assert(v1v2v3_check, [v1, v2, v3]), py_utils.Assert(v2v3v4_check, [v3, v3, v4]), py_utils.Assert(v4v1v2_check, [v4, v1, v2]), py_utils.Assert(v3v4v1_check, [v3, v4, v1]) ]): is_inside = tf.math.logical_and( tf.math.logical_and(_IsOnLeftHandSideOrOn(points, v1, v2), _IsOnLeftHandSideOrOn(points, v2, v3)), tf.math.logical_and(_IsOnLeftHandSideOrOn(points, v3, v4), _IsOnLeftHandSideOrOn(points, v4, v1))) has_non_zero_area = tf.greater(_BBoxArea(bbox), 0) is_inside = tf.logical_and(tf.cast(is_inside, tf.bool), has_non_zero_area) # Swap the last two dimensions. is_inside = tf.einsum('...ij->...ji', tf.cast(is_inside, tf.int32)) return tf.cast(is_inside, tf.bool)
def _BodyFn(curr_idx, distance_to_selected, sampled_idx, closest_idx): """Loop body for farthest point sampler.""" def _GetRandomRealPoint(): """Select the first point. For the first point, we want any random real (non padded) point, so we create a random values per point, and then set all padded ones to some large value (more than the maxval). We then take the min per batch element to get the first points. Returns: Tensor containing the index of a random point selected for each example in the batch. """ random_values = tf.random.uniform((batch_size, num_points), minval=0, maxval=1, dtype=tf.float32, seed=random_seed) random_values = tf.where(tf.equal(padding, 0.0), random_values, padding * 10) return tf.argmin(random_values, axis=1, output_type=tf.int32) def _GetFurthestPoint(): """Get point that is furthest from those already selected. We also bias the sampling towards real points by setting the distance to padded points negative until we are out of real points. Returns: Tensor containing the index of the next farthest point selected for each example in the batch. """ # Set padded points distance to negative so they aren't selected. padding_masked_distance_to_selected = tf.where( tf.equal(padding, 0.0), distance_to_selected, -1.0 * tf.ones( (batch_size, num_points), dtype=tf.float32)) # But only do this when we still have valid points left. padding_masked_distance_to_selected = tf.where( tf.less(curr_idx, num_valid_points), padding_masked_distance_to_selected, distance_to_selected) return tf.argmax(padding_masked_distance_to_selected, axis=-1, output_type=tf.int32) def _GetSeededPoint(): """Select a seeded point. Seeded points are assumed to be at the beginning of the original points. Returns: Tensor containing the index of the next seeded point to select for each example in the batch. """ return tf.ones((batch_size, ), dtype=tf.int32) * curr_idx # Select indices for this loop iteration. def _Seeded(): return tf.cond(tf.less(curr_idx, num_seeded_points), _GetSeededPoint, _GetFurthestPoint) def _Real(): return tf.cond(tf.equal(curr_idx, 0), _GetRandomRealPoint, _GetFurthestPoint) new_selected = tf.cond(tf.greater(num_seeded_points, 0), _Seeded, _Real) sampled_idx = sampled_idx.write(curr_idx, new_selected) # Extract the distance to the latest point selected to update # distance_to_selected. new_selected_gather_idx = tf.stack( [tf.range(batch_size), new_selected], axis=1) if precomputed_squared_distance is not None: new_distance = tf.gather_nd(precomputed_squared_distance, new_selected_gather_idx) else: new_points = tf.reshape( tf.gather_nd(points, new_selected_gather_idx), [batch_size, 1, dims]) new_distance = tf.reshape( SquaredDistanceMatrix(points, new_points), [batch_size, num_points]) is_newly_closest = tf.less(new_distance, distance_to_selected) distance_to_selected = tf.minimum(distance_to_selected, new_distance) # Track the index to the closest selected point. new_selected_tiled = tf.tile([[curr_idx]], [batch_size, num_points]) closest_idx = tf.cond( tf.equal(curr_idx, 0), # At the first loop iteration, the init points are the closest. lambda: new_selected_tiled, # Otherwise, update with the new points based on the distances. lambda: tf.where(is_newly_closest, new_selected_tiled, closest_idx) ) return curr_idx + 1, distance_to_selected, sampled_idx, closest_idx
def NeighborhoodIndices(points, query_points, k, points_padding=None, max_distance=None, sample_neighbors_uniformly=False): """Get indices to k-neighbors of query_points in points. Padding is returned along-side indices. Non-padded points are guaranteed to be unique (non-repeated) points from original non-padded points. Padded points arise due to either a lack of points (k exceeds the number of original non-padded points) or points are too far away (exceeds max distance). Note: Padded point indices may refer to padded points from the original, or may be duplicates of the closest point. TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded points are repeated points from the original points. This behavior is maintained here, but we should update PointCNN to respect indices paddings. Args: points: tensor of shape [N, P1, dims]. query_points: tensor of shape [N, P2, dims] k: Integer. points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the point is a padded point. if None, then all points are considered real points. max_distance: float representing the maximum distance that each neighbor can be. If there are no points within the distance, then the closest point is returned (regardless of distance). If this is set to None, then no filtering by distance is performed. sample_neighbors_uniformly: boolean specifying whether to sample neighbors uniformly if they are within max distance. Returns: A pair of tensors: - indices: tensor of shape [N, P2, k]. - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and 0 represents an unpadded (real) point. """ n, p1 = py_utils.GetShape(points, 2) query_points = py_utils.HasShape(query_points, [n, -1, -1]) _, p2 = py_utils.GetShape(query_points, 2) # Compute pair-wise squared distances. # Note that dist_mat contains the squared distance (without sqrt). Thus, when # using max_distance, we will need to square max_distance to make sure it's # in the same units. dist_mat = SquaredDistanceMatrix(query_points, points) dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1]) # Add a large scalar to the distances for padded points. # dist_mat[i, j, k] will be: # if k < valid_num[i]: distance between points[i, k] and query_points[i, j] # otherwise: a large scalar added to dist_mat[i, j, k] if points_padding is not None: points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32) points_padding = py_utils.HasShape(points_padding, [n, 1, p1]) large_scalar = tf.reduce_max(dist_mat) + 1 dist_mat += points_padding * large_scalar # To perform sampling neighbors uniformly efficiently, we set all neighbors # that are within the distance threshold to have distances be drawn uniformly # at random. Using top_k with this enables selecting a random set quickly # without replacement. if sample_neighbors_uniformly: if max_distance is not None: mask_by_distance = tf.less_equal(dist_mat, max_distance**2) dist_mat = tf.where( mask_by_distance, tf.square(max_distance) * tf.random_uniform(tf.shape(dist_mat)), dist_mat) else: raise ValueError( 'Uniform sampling requires specifying max_distance.') top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k, sorted=True) # N x P2 x K # Set padding using top_k_dist; padded points will have distance exceeding # the large_scalar. if points_padding is not None: paddings = tf.greater_equal(-top_k_dist, large_scalar) else: paddings = tf.zeros_like(top_k_dist, dtype=tf.bool) # Filter by max_distances by setting all indices that exceed the max_distance # to the closest point. if max_distance is not None: # Mask is true for points that are further than max_distance. mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance)) closest_idx = tf.tile(indices[:, :, :1], [1, 1, k]) indices = tf.where(mask_by_distance, closest_idx, indices) paddings |= mask_by_distance indices = tf.reshape(indices, [n, p2, k]) paddings = tf.cast(paddings, tf.float32) return indices, paddings
def AssignAnchors(self, anchor_bboxes, gt_bboxes, gt_bboxes_labels, gt_bboxes_mask, foreground_assignment_threshold=0.5, background_assignment_threshold=0.35, background_class_id=0, force_match=True, similarity_fn=None): """Assigns anchors to bboxes using a similarity function (SSD-based). Each anchor box is assigned to the top matching ground truth box. Ground truth boxes can be assigned to multiple anchor boxes. Assignments can result in 3 outcomes: - Positive assignment (if score >= foreground_assignment_threshold): assigned_gt_labels will reflect the assigned box label and assigned_cls_mask will be set to 1.0 - Background assignment (if score <= background_assignment_threshold): assigned_gt_labels will be background_class_id and assigned_cls_mask will be set to 1.0 - Ignore assignment (otherwise): assigned_gt_labels will be background_class_id and assigned_cls_mask will be set to 0.0 The detection loss function would usually: - Use assigned_cls_mask for weighting the classification loss. The mask is set such that the loss applies to foreground and background assignments only - ignored anchors will be set to 0. - Use assigned_reg_mask for weighting the regression loss. The mask is set such that the loss applies to foreground assignments only. The thresholds (foreground_assignment_threshold and background_assignment_threshold) should be tuned per dataset. TODO(jngiam): Consider having a separate threshold for regression boxes; a separate threshold is used in PointRCNN. Args: anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box parameters (x, y, z, dx, dy, dz, r). gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth box parameters (x, y, z, dx, dy, dz, r). gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each bounding box. gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff the gt_bbox is a real bbox. foreground_assignment_threshold: Similarity score threshold for assigning foreground bounding boxes; scores need to be >= foreground_assignment_threshold to be assigned to foreground. background_assignment_threshold: Similarity score threshold for assigning background bounding boxes; scores need to be <= background_assignment_threshold to be assigned to background. background_class_id: class id to be assigned to anchors_gt_class if no anchor boxes match. force_match: Boolean specifying if force matching is enabled. If force matching is enabled, then matched anchors which are also the highest scoring with a ground-truth box are considered foreground matches as long as their similarity score > 0. similarity_fn: Function that computes the a similarity score (e.g., IOU) between pairs of bounding boxes. This function should take in two tensors corresponding to anchor and ground-truth bboxes, and return a matrix [A, G] with the similarity score between each pair of bboxes. The score must be non-negative, with greater scores representing more similar. The fore/background_assignment_thresholds will be applied to this score to determine if the an anchor is foreground, background or ignored. If set to None, the function will default to IOU2DRotatedBoxes. Returns: NestedMap with the following keys - assigned_gt_idx: shape [A] index corresponding to the index of the assigned ground truth box. Anchors not assigned to a ground truth box will have the index set to -1. - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor. - assigned_gt_similarity_score: shape [A] (iou) score between the anchor and the gt bbox. - assigned_gt_labels: shape [A] label assigned to bbox. - assigned_cls_mask: shape [A] mask for classification loss per anchor. This should be 1.0 if the anchor has a foreground or background assignment; otherwise, it will be assigned to 0.0. - assigned_reg_mask: shape [A] mask for regression loss per anchor. This should be 1.0 if the anchor has a foreground assignment; otherwise, it will be assigned to 0.0. Note: background anchors do not have regression targets. """ if similarity_fn is None: similarity_fn = self.IOU2DRotatedBoxes # Shape validation. anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7]) num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2) gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7]) num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2) # Compute similarity score and reduce max by anchors and by ground-truth. similarity_score = similarity_fn(anchor_bboxes, gt_bboxes) similarity_score = py_utils.HasShape( similarity_score, [num_anchor_bboxes, num_gt_bboxes]) # Reduce over ground-truth boxes, so we have the max score per anchor. anchor_max_score = tf.reduce_max(similarity_score, axis=1) anchor_max_idx = tf.argmax(similarity_score, axis=1) if force_match: # Reduce over anchors, so we have the max score per ground truth box. gt_max_score = tf.reduce_max(similarity_score, axis=0, keepdims=True) # Force matches occur when the top matching gt bbox for an anchor is the # top matching anchor for the gt bbox. When force matching, we match # these boxes as long as their similarity score exceeds 0. force_matches = ( tf.equal(similarity_score, gt_max_score) & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis]) & tf.greater(similarity_score, 0.) & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool)) force_match_indicator = tf.reduce_any(force_matches, axis=1) force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32), axis=1) # In assigning foreground/background anchors later, force_match_indicator # is used to determine which anchors are force foreground, and the index # assigned will be taken from anchor_max_idx. # Force matchers must also be the max scoring gt bbox per anchor. # We overwrite anchor_max_idx to ensure that the right match is done. anchor_max_idx = tf.where(force_match_indicator, force_match_idx, anchor_max_idx) # Ensure that max score boxes are not padded boxes by setting score to 0 # for boxes that are padded. gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx) anchor_max_score = tf.where(tf.equal(gathered_mask, 1), anchor_max_score, tf.zeros_like(anchor_max_score)) # Boolean tensors corresponding to whether an anchor is background or # foreground based on thresholding. background_anchors = tf.less_equal(anchor_max_score, background_assignment_threshold) foreground_anchors = tf.greater_equal(anchor_max_score, foreground_assignment_threshold) if force_match: # Background anchors are below threshold and not force matches. background_anchors &= ~force_match_indicator # Foreground anchors are above thresholds or force matches. foreground_anchors |= force_match_indicator # Add dummy background bbox to gt_boxes to facilitate batch gather. dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32) # Since we are concatenating the dummy bbox, the index corresponds to the # number of boxes. dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0] gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0) gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]], axis=0) # Gather indices so that all foreground boxes are gathered from gt_bboxes, # while all background and ignore boxes gather the dummy_bbox. anchor_gather_idx = tf.where( foreground_anchors, anchor_max_idx, tf.constant(dummy_bbox_idx, shape=py_utils.GetShape(anchor_max_idx), dtype=anchor_max_idx.dtype)) # Gather the bboxes and weights. assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx) assigned_gt_labels = tf.batch_gather(gt_bboxes_labels, anchor_gather_idx) # Set masks for classification and regression losses. assigned_cls_mask = tf.cast(background_anchors | foreground_anchors, tf.float32) assigned_reg_mask = tf.cast(foreground_anchors, tf.float32) # Set assigned_gt_idx such that dummy boxes have idx = -1. assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx), tf.ones_like(anchor_gather_idx) * -1, anchor_gather_idx) assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32) return py_utils.NestedMap( assigned_gt_idx=assigned_gt_idx, assigned_gt_bbox=assigned_gt_bbox, assigned_gt_similarity_score=anchor_max_score, assigned_gt_labels=assigned_gt_labels, assigned_cls_mask=assigned_cls_mask, assigned_reg_mask=assigned_reg_mask)
def _AddNoise(self, batch): """Adding noise the src (see https://arxiv.org/pdf/1711.00043). This function implement 3 types of noise (hyparams defined in self.params.denoise): 1) slightly shuffle the sentence following p.shuffle_tok_range 2) randomly drop tokens with probability p.drop_tok_prob 3) randomly mask tokens with probability p.blank_tok_prob The noises are added to the input with probability p.noise_sent_prob. Args: batch: a `.NestedMap` of the input batch. """ def IsSpecialExample(task_ids, special_task_ids): """A utility function indicates whether inputs belong to specific tasks. Args: task_ids: Task ids for the input batch. Tensor of shape [batch]. special_task_ids: A list of specified task ids. Returns: A tensor indicating whether each sample in the batch belong to the specified task. Return a tensor of size [batch]. """ batch_size = py_utils.GetShape(task_ids)[0] return tf.reduce_any( tf.equal( tf.expand_dims(task_ids, -1), tf.cast( tf.broadcast_to( special_task_ids, [batch_size, len(special_task_ids)]), tf.int32)), -1) p = self.params.denoise batch_size = tf.shape(batch.src.ids)[0] source_max_len = tf.shape(batch.src.ids)[1] # Shuffle tokens according to p.shuffle_tok_range noise = tf.random.uniform([batch_size, source_max_len], 0, p.shuffle_tok_range + 1) # Don't shuffle eos or padding shuffle_tok_range = tf.fill([batch_size, source_max_len], float(p.shuffle_tok_range)) shifted_paddings = tf.pad(batch.src.paddings[:, 1:], [[0, 0], [0, 1]], constant_values=1) noise = tf.where(tf.equal(shifted_paddings, 0), noise, shuffle_tok_range) indices = tf.broadcast_to(tf.range(source_max_len, dtype=tf.int32), [batch_size, source_max_len]) noisy_indices = tf.cast(indices, dtype=tf.float32) + noise permutations = tf.argsort(noisy_indices) stacked = tf.stack([batch.src.ids, permutations], axis=1) denoise_src_ids = tf.stack(tf.map_fn(lambda x: tf.gather(x[0], x[1]), stacked), axis=0) # Select tokens to drop with probability=p.drop_tok_prob random_drop_tok = tf.random.uniform([batch_size, source_max_len]) # Don't drop eos token is_keep_tok = tf.math.logical_or( tf.greater(random_drop_tok, p.drop_tok_prob), tf.equal(denoise_src_ids, self._src_tokenizer.eos_id)) denoise_src_ids = tf.ragged.boolean_mask( denoise_src_ids, is_keep_tok).to_tensor(default_value=0, shape=tf.shape(batch.src.ids)) denoise_src_paddings = tf.ragged.boolean_mask( batch.src.paddings, is_keep_tok).to_tensor(default_value=1, shape=tf.shape(batch.src.ids)) # Select tokens to blank with probability=p.blank_tok_prob # Don't blank eos token random_blank_tok = tf.random.uniform([batch_size, source_max_len]) shifted_paddings = tf.pad(denoise_src_paddings[:, 1:], [[0, 0], [0, 1]], constant_values=1) is_blank_tok = tf.math.logical_and( tf.less(random_blank_tok, p.blank_tok_prob), tf.equal(shifted_paddings, 0)) blank_id = tf.fill([batch_size, source_max_len], p.blank_id) denoise_src_ids = tf.where(is_blank_tok, blank_id, denoise_src_ids) # Select denoising task examples with probability=p.denoise_sent_prob random_uniform_sent = tf.random.uniform([batch_size]) is_denoise_sent = tf.math.logical_and( tf.less(random_uniform_sent, p.noise_sent_prob), IsSpecialExample(self._GetTaskIds(batch.src.source_ids[:, 0]), p.task_ids)) batch.src.ids = tf.where(is_denoise_sent, denoise_src_ids, batch.src.ids) batch.src.paddings = tf.where(is_denoise_sent, denoise_src_paddings, batch.src.paddings) batch.src.ids_indicator = 1 - batch.src.paddings batch.src.weights = batch.src.ids_indicator
def StochasticBeamSearchDecodeBiased(self, encoder_outputs, biased, stochastic, num_hyps_per_beam_override=0): """Performs beam search based decoding with optional advanced features. If `biased` is true, the target biasing feature is added. `encoder_outputs` must include the following auxiliary inputs: - targets.labels: An int tensor of shape [batch, seq] that represents target labels to bias beam search towards. - targets.paddings: A 0/1 float tensor of shape [batch, seq] where 1 means that the corresponding element of targets.labels is a padding. - targets.weights: A float tensor of shape [batch, seq] that represents biasing weights. 1.0 means forced-decoding. If `stochastic` is true, the stochastic beam search feature (https://arxiv.org/pdf/1903.06059.pdf) is added. Also, top-p filtering (i.e. sampling only from the top-p probability mass of the token distribution) is performed to ensure the quality of samples. Note that there are slight differences from the implementation in the original paper, e.g., length normalization and coverage penalty are applied to the perturbed probabilities. `encoder_outputs` must include the following auxiliary inputs: - stochastic_beam_search.top_p_threshold: A float tensor of shape [batch] that represents the thresholds of top-p filtering. Must satisfy 0 < top_p_threshold <= 1. If the value is low, the quality of samples will be high but the diversity will be low. If the value is high, the quality of samples will be low but the diversity will be high. Stochastic beam search is performed only if top_p_threshold > 0 for some batch items. - stochastic_beam_search.seed: An int tensor of shape [batch] the represents the random seeds. If the seeds are the same, the same samples are drawn. - stochastic_beam_search.src_ids: An int tensor of shape [batch, src_seq] that represents source IDs. Used for turning the random seed into a function of source IDs. - stochastic_beam_search.src_paddings: A 0/1 float tensor of shape [batch, src_seq] where 1 means that the corresponding element of stochastic_beam_search.src_ids is a padding. Args: encoder_outputs: a NestedMap computed by encoder. biased: If true, add the target decoding feature. stochastic: If true, add the stochastic beam search feature. num_hyps_per_beam_override: If set to a value <= 0, this parameter is ignored. If set to a value > 0, then this value will be used to override `p.num_hyps_per_beam`. Returns: BeamSearchDecodeOutput, a namedtuple containing the decode results. """ p = self.params if biased: targets = encoder_outputs.targets targets.weights *= (1.0 - targets.paddings) def PadToTargetSeqLen(tensor, constant): length = tf.shape(tensor)[1] pad = tf.maximum(0, p.beam_search.target_seq_len - length) return tf.pad(tensor, [[0, 0], [0, pad]], constant_values=constant) targets.labels = PadToTargetSeqLen(targets.labels, 0) targets.weights = PadToTargetSeqLen(targets.weights, 0) if stochastic: # Determine whether to perform stochastic beam search. stochastic_beam_search = encoder_outputs.stochastic_beam_search stochastic_beam_search.enable = tf.reduce_any( tf.greater(stochastic_beam_search.top_p_threshold, 0.0)) return self.beam_search.BeamSearchDecode( self.theta, encoder_outputs, num_hyps_per_beam_override, self._WrapInitBeamSearchStateCallback(biased, stochastic), self._WrapPreBeamSearchStepCallback(biased, stochastic), self._WrapPostBeamSearchStepCallback(stochastic))