def BatchedOrientedNMSIndices(self, bboxes, scores, nms_iou_threshold, score_threshold, max_boxes_per_class): """Runs batched version of a Per-Class 3D (7-DOF) Non Max Suppression. All outputs have shape [batch_size, num_classes, max_boxes_per_class]. Args: bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [batch_size, num_boxes, num_classes] floating point Tensor containing box scores. nms_iou_threshold: Either a float or a list of floats of len num_classes with the IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: Either a float or a list of floats of len num_classes with the score threshold that allows NMS to quickly ignore boxes. max_boxes_per_class: An integer scalar with the maximum number of boxes per example to emit per class. Returns: A tuple of 3 tensors: - bbox_indices: An int32 Tensor with the indices of the chosen boxes. Values are in sort order until the class_idx switches. - bbox_scores: A float32 Tensor with the score for each box. - valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. """ bboxes = py_utils.HasShape(bboxes, [-1, -1, 7]) batch_size, num_boxes = py_utils.GetShape(bboxes, 2) scores = py_utils.HasShape(scores, [batch_size, num_boxes, -1]) _, _, num_classes = py_utils.GetShape(scores) # Force the thresholds to be tensors of len num_classes nms_iou_threshold = tf.broadcast_to( tf.convert_to_tensor(nms_iou_threshold), [num_classes]) score_threshold = tf.broadcast_to( tf.convert_to_tensor(score_threshold), [num_classes]) def NMSBody(args): per_sample_bboxes, per_sample_scores = args indices, scores, mask = ops.non_max_suppression_3d( per_sample_bboxes, per_sample_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_boxes_per_class=max_boxes_per_class) return indices, scores, mask bbox_indices, bbox_scores, valid_mask = tf.map_fn( fn=NMSBody, elems=(bboxes, scores), dtype=(tf.int32, tf.float32, tf.float32), back_prop=False) output_shape = [batch_size, num_classes, max_boxes_per_class] bbox_indices = py_utils.PadOrTrimTo(bbox_indices, output_shape) bbox_scores = py_utils.PadOrTrimTo(bbox_scores, output_shape) valid_mask = py_utils.PadOrTrimTo(valid_mask, output_shape) return bbox_indices, bbox_scores, valid_mask
def FProp(self, _, encoded_images): """Decodes and preprocesses the given images. Args: encoded_images: Encoded jpeg images as a [batch_size] string Tensor. Returns: The decoded images as a float32 Tensor with shape [batch_size, height, width, num_channels=3]. """ p = self.params def _DecodeAndPreprocessOne(encoded_image): image = tf.image.decode_jpeg(encoded_image, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) if self.do_eval: return self._PreprocessForEval(image) else: return self._PreprocessForTraining(image) images = tf.map_fn( _DecodeAndPreprocessOne, encoded_images, back_prop=False, dtype=tf.float32, parallel_iterations=p.parallelism) return images
def IdsToStrings(self, ids, lens): """Takes integer matrices and returns vectors of strings.""" ids = py_utils.with_dependencies( [py_utils.assert_same_dim0([ids, lens])], ids) return tf.map_fn( lambda inputs: self._wpm_encoder.Decode(inputs[0][:inputs[1]]), (ids, lens), dtype=tf.string, parallel_iterations=30, back_prop=False)
def IdsToStrings(self, ids, lens): """Takes int32 token ids and returns approximate detokenized strings.""" ids = py_utils.with_dependencies( [py_utils.assert_same_dim0([ids, lens])], ids) def _ProcessRow(inputs): length = inputs[1] ids = tf.reshape(inputs[0][:length], [1, -1]) tokens = self._tokenizer.detokenize(ids) return tf.strings.reduce_join(tokens.flat_values, separator=' ') return tf.map_fn(_ProcessRow, (ids, lens), dtype=tf.string, parallel_iterations=30, back_prop=False)
def _ExtractBatch(self, features): """The subclass-defined implementation of ExtractBatch(). Args: features: A dictionary of batched Tensors including tensors from this extractor. Returns: A NestedMap of output Tensors whose key names match self.Shape()'s keys. """ # Default implementation uses map_fn. result = tf.map_fn(self._Extract, elems=features, dtype=self.DType(), back_prop=False) return py_utils.NestedMap(result)
def BatchedNMSIndices(self, bboxes, scores, nms_iou_threshold=0.3, score_threshold=0.01, max_num_boxes=None): """Batched version of NMSIndices. Args: bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [batch_size, num_boxes, num_classes] floating point Tensor containing box scores. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. max_num_boxes: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of bboxes. Returns: The NMS indices and the mask of the padded indices for each example in the batch. """ batch_size, num_boxes = py_utils.GetShape(bboxes, 2) if max_num_boxes is not None: max_output_size = max_num_boxes else: max_output_size = num_boxes output_shape = [batch_size, max_output_size] def NMSBody(args): bbox, score = args return self.NMSIndices(bbox, score, max_output_size, nms_iou_threshold, score_threshold) nms_indices, valid_mask = tf.map_fn( fn=NMSBody, elems=(bboxes, scores), dtype=(tf.int32, tf.float32), back_prop=False) nms_indices = py_utils.PadOrTrimTo(nms_indices, output_shape) return nms_indices, valid_mask
def _BatchSampleGumbel(batch_seed, time_step, src_ids, src_paddings, shape, dtype): """Samples (standard) Gumbel noises of a given shape for each batch item. The random seed for the i-th batch item is determined by batch_seed[i], time_step, and the sum of non-padding elements of src_ids[i]. Args: batch_seed: An int tensor of shape [batch] that holds a seed for each batch item. time_step: An int tensor used as a secondary seed. src_ids: An int tensor of shape [batch, src_seq] that represents source IDs. Used for turning the random seed into a function of source IDs. src_paddings: A 0/1 float tensor of shape [batch, src_seq] where 1 means that the corresponding element of src_ids is a padding. shape: A shape of the Gumbel noises to sample. dtype: A type of the Gumbel noises. Returns: A `dtype` tensor of shape [batch, ...] that holds Gumbel noises. """ # Turn batch_seed into a function of the source IDs by adding the sum of the # source IDs. Without doing this, the same pattern of random noises would be # used no matter what the source sequence is, resulting in a systematic bias # among the output for a given seed value. # Mask padding IDs by 0. src_ids = src_ids * tf.cast(1.0 - src_paddings, dtype=src_ids.dtype) # Compute the sum of source IDs. src_ids_sum = tf.math.reduce_sum(src_ids, axis=1) # shape: [src_batch] batch_seed_plus_src_ids_sum = batch_seed + src_ids_sum def SampleForBeam(seed): return -tf.math.log(-tf.math.log( tf.random.stateless_uniform( shape=shape, dtype=dtype, seed=tf.stack([seed, time_step])))) return tf.map_fn(SampleForBeam, batch_seed_plus_src_ids_sum, dtype=dtype)
def CornersToImagePlane(self, corners, velo_to_image_plane): """Project 3d box corners to the image plane. Args: corners: A [batch, num_boxes, 8, 3] floating point tensor containing the 8 corners points for each 3d bounding box. velo_to_image_plane: A [batch, 3, 4] batch set of projection matrices from velo xyz to image plane xy. After multiplication, you need to divide by last coordinate to recover 2D pixel locations. Returns: A [batch, num_boxes, 8, 2] floating point Tensor containing the 3D bounding box corners projected to the image plane. """ batch_size, num_boxes, _, _ = py_utils.GetShape(corners, 4) def CornersToPlaneBody(args): """Body of function to convert each bounding box to the image plane.""" (corners, velo_to_image_plane) = args # corners[i] is [num_boxes, 8, 3]: flatten the points in this batch and do # the conversion in one call. bbox_corners = tf.reshape(corners, [-1, 3]) image_plane_corners = geometry.PointsToImagePlane( bbox_corners, velo_to_image_plane) image_plane_corners = tf.reshape(image_plane_corners, [-1, 8, 2]) return image_plane_corners corners_in_image_plane = tf.map_fn(fn=CornersToPlaneBody, elems=(corners, velo_to_image_plane), dtype=tf.float32, back_prop=False) corners_in_image_plane = py_utils.HasShape( corners_in_image_plane, [batch_size, num_boxes, 8, 2]) return corners_in_image_plane
def _EncodeToIds(self, word): # Below: # * a token is a wordpiece ID. # * the tokens array will be merged in-place. # * the candidates array is an array of size len(tokens) - 1. # It contains the token for the merged wordpiece, if it exists, # -1 otherwise. For instance, candidate[3] = id(token[3] + token[4]). # First, split into basic UTF-8 characters (letters). chars = tf.strings.unicode_split(word, 'UTF-8') tokens = self._StringToToken(chars) tokens = tf.where( tf.equal(tokens, NO_TOKEN), # Unseen character. tf.broadcast_to(self.unk_id, tf.shape(tokens)), tokens) # Create initial candidate list. candidates = tf.map_fn(self._MergeTokens, (tokens[:-1], tokens[1:]), dtype=tokens.dtype) def _ShouldMerge(unused_tokens, candidates): """Merge until not possible, or we abort early according to merge_prob.""" return tf.logical_and( tf.reduce_any(tf.not_equal(candidates, NO_TOKEN)), tf.random.uniform([]) < self._merge_prob) def _MergeOneToken(tokens, i): return tf.expand_dims(self._MergeTokens( (tokens[i], tokens[i + 1])), axis=-1) def _MergeCandidates(tokens, candidates): """Merge in the reverse binary tree.""" best_id = tf.argmin(candidates, output_type=tf.int32) # Perform the merge at position best_id. tokens = tf.concat([ tokens[:best_id], [candidates[best_id]], tokens[best_id + 2:] ], axis=0) # Recompute the merge candidates. # Only the neighbors of best_id need to be recomputed. empty = tf.zeros([0], dtype=candidates.dtype) def _MergeLeft(): return tf.concat([ candidates[:best_id - 1], _MergeOneToken(tokens, best_id - 1) ], axis=0) left_candidates = tf.cond(tf.equal(best_id, 0), lambda: empty, _MergeLeft) def _MergeRight(): return tf.concat([ _MergeOneToken(tokens, best_id), candidates[best_id + 2:] ], axis=0) right_candidates = tf.cond( tf.greater_equal(best_id, tf.size(tokens) - 1), lambda: empty, _MergeRight) candidates = tf.concat([left_candidates, right_candidates], axis=0) return tokens, candidates return tf.while_loop(_ShouldMerge, _MergeCandidates, (tokens, candidates), parallel_iterations=1, back_prop=False)[0]
def SegmentPool3D(points, point_features, pooling_idx, closest_idx, pooling_method='max'): """Performs {min/max/average} pooling over a pointcloud given indices. This should be functionally identical when using max to the above MaxPool3D function, except it turns out to be much more memory efficient on a TPU, and supports min/max/mean. Args: points: A float tf.Tensor of shape [N, P1, 3] with point locations. point_features: A float tf.Tensor of shape [N, P1, C] with point features. pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which points we want to keep. Each value should be in the range [0, P1]. closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which sampled point is closest to each original point. Each value should be in the range of [0, P2]. pooling_method: A string for which pooling function to use. Should be one of {'min', 'max', 'mean'}. Returns: pooled_points: A float tf.Tensor of shape [N, P2, 3] with the pooled point locations. pooled_features: A float tf.Tensor of shape [N, P2, C] with the pooled features. Raises: ValueError: If pooling_method is not one of {min/max/mean}. """ segment_pooling_functions = { 'min': tf.unsorted_segment_min, 'max': tf.unsorted_segment_max, 'mean': tf.unsorted_segment_mean } if pooling_method not in segment_pooling_functions: raise ValueError('`pooling_method` must be one of {}.'.format( segment_pooling_functions.keys())) segment_fn = segment_pooling_functions[pooling_method] points = py_utils.HasShape(points, [-1, -1, 3]) n, p1 = py_utils.GetShape(points, 2) point_features = py_utils.HasShape(point_features, [n, p1, -1]) _, _, c = py_utils.GetShape(point_features) pooling_idx = py_utils.HasShape(pooling_idx, [n, -1]) _, p2 = py_utils.GetShape(pooling_idx) closest_idx = py_utils.HasShape(closest_idx, [n, p1]) # Subselect our output points pooled_points = tf.batch_gather(points, pooling_idx) # Loop over batch dimension of our features/indices, as unsorted_segment_X # does not currently support a batch dimension. def _LoopFn(args): example_features, example_closest_idx = args return segment_fn(example_features, example_closest_idx, num_segments=p2) pooled_features = tf.map_fn(fn=_LoopFn, elems=(point_features, closest_idx), dtype=tf.float32) return (py_utils.HasShape(pooled_points, [n, p2, 3]), py_utils.HasShape(pooled_features, [n, p2, c]))
def MaxPool3D(points, point_features, pooling_idx, closest_idx): """Apply max pooling to a point cloud with computed sampling indices. sampled_idx and closest_idx are the outputs of a sampler such as FurthestPointSampler. The pooling operation results in a point cloud with fewer points, where the pooled points are specified by pooling_idx. Each element of pooling_idx contains an integer in the range [0, P1) containing the index of the point in points/points_features. Max pooling is performed by assigning each point to its closest pooled point, and then taking a max over the features of points assigned. We assume that this mapping is provided by closest_idx, where each element should contain an integer in the range [0, P2) containing the index of the pooled point that each point is assigned to. Note: This logic for pooling assumes that there will be at least one value > 0 per sampled region for each feature, otherwise it will return 0. Additionally, it does a reduce over a masked version of the features, so mean and min would not work without a change in the logic. Args: points: a floating point tf.Tensor with shape [N, P1, 3] point_features: a floating point tf.Tensor with shape [N, P1, C] pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which points we want to keep. Each value should be in the range [0, P1]. closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which sampled point is closest to each original point. Each value should be in the range of [0, P2]. Returns: A tuple of tf.Tensors (pooled_points, pooled_features). pooled_points has shape [N, P2, 3] representing the locations of each selected point. P2 corresponds to num_pooled_points. pooled_features has shape [N, P2, C] representing the pooled features at each point. """ batch_size, num_points = py_utils.GetShape(points, 2) point_features = py_utils.HasShape(point_features, [batch_size, num_points, -1]) pooling_idx = py_utils.HasShape(pooling_idx, [batch_size, -1]) _, num_output_points = py_utils.GetShape(pooling_idx) _, _, feature_dims = py_utils.GetShape(point_features, 3) # Gather new point locations. pooled_points = tf.batch_gather(points, pooling_idx) mask = tf.one_hot(closest_idx, num_output_points) # [N, P1, P2] mask = tf.transpose(mask, [2, 0, 1]) # [P2, N, P1] def _PartialPoolFeaturesFn(partial_mask): partial_mask = tf.tile( tf.reshape(partial_mask, [batch_size, num_points, 1]), [1, 1, feature_dims]) # Note: This method of pooling assumes there will be a value > 0 # And will only work with max under this condition. return tf.reduce_max(partial_mask * point_features, axis=1) # Performing a map_fn over the pooled points is more memory efficient. pooled_point_features = tf.map_fn(_PartialPoolFeaturesFn, mask) # [P2, N, P1] pooled_point_features = tf.transpose(pooled_point_features, [1, 0, 2]) return pooled_points, pooled_point_features
def _AddNoise(self, batch): """Adding noise the src (see https://arxiv.org/pdf/1711.00043). This function implement 3 types of noise (hyparams defined in self.params.denoise): 1) slightly shuffle the sentence following p.shuffle_tok_range 2) randomly drop tokens with probability p.drop_tok_prob 3) randomly mask tokens with probability p.blank_tok_prob The noises are added to the input with probability p.noise_sent_prob. Args: batch: a `.NestedMap` of the input batch. """ def IsSpecialExample(task_ids, special_task_ids): """A utility function indicates whether inputs belong to specific tasks. Args: task_ids: Task ids for the input batch. Tensor of shape [batch]. special_task_ids: A list of specified task ids. Returns: A tensor indicating whether each sample in the batch belong to the specified task. Return a tensor of size [batch]. """ batch_size = py_utils.GetShape(task_ids)[0] return tf.reduce_any( tf.equal( tf.expand_dims(task_ids, -1), tf.cast( tf.broadcast_to( special_task_ids, [batch_size, len(special_task_ids)]), tf.int32)), -1) p = self.params.denoise batch_size = tf.shape(batch.src.ids)[0] source_max_len = tf.shape(batch.src.ids)[1] # Shuffle tokens according to p.shuffle_tok_range noise = tf.random.uniform([batch_size, source_max_len], 0, p.shuffle_tok_range + 1) # Don't shuffle eos or padding shuffle_tok_range = tf.fill([batch_size, source_max_len], float(p.shuffle_tok_range)) shifted_paddings = tf.pad(batch.src.paddings[:, 1:], [[0, 0], [0, 1]], constant_values=1) noise = tf.where(tf.equal(shifted_paddings, 0), noise, shuffle_tok_range) indices = tf.broadcast_to(tf.range(source_max_len, dtype=tf.int32), [batch_size, source_max_len]) noisy_indices = tf.cast(indices, dtype=tf.float32) + noise permutations = tf.argsort(noisy_indices) stacked = tf.stack([batch.src.ids, permutations], axis=1) denoise_src_ids = tf.stack(tf.map_fn(lambda x: tf.gather(x[0], x[1]), stacked), axis=0) # Select tokens to drop with probability=p.drop_tok_prob random_drop_tok = tf.random.uniform([batch_size, source_max_len]) # Don't drop eos token is_keep_tok = tf.math.logical_or( tf.greater(random_drop_tok, p.drop_tok_prob), tf.equal(denoise_src_ids, self._src_tokenizer.eos_id)) denoise_src_ids = tf.ragged.boolean_mask( denoise_src_ids, is_keep_tok).to_tensor(default_value=0, shape=tf.shape(batch.src.ids)) denoise_src_paddings = tf.ragged.boolean_mask( batch.src.paddings, is_keep_tok).to_tensor(default_value=1, shape=tf.shape(batch.src.ids)) # Select tokens to blank with probability=p.blank_tok_prob # Don't blank eos token random_blank_tok = tf.random.uniform([batch_size, source_max_len]) shifted_paddings = tf.pad(denoise_src_paddings[:, 1:], [[0, 0], [0, 1]], constant_values=1) is_blank_tok = tf.math.logical_and( tf.less(random_blank_tok, p.blank_tok_prob), tf.equal(shifted_paddings, 0)) blank_id = tf.fill([batch_size, source_max_len], p.blank_id) denoise_src_ids = tf.where(is_blank_tok, blank_id, denoise_src_ids) # Select denoising task examples with probability=p.denoise_sent_prob random_uniform_sent = tf.random.uniform([batch_size]) is_denoise_sent = tf.math.logical_and( tf.less(random_uniform_sent, p.noise_sent_prob), IsSpecialExample(self._GetTaskIds(batch.src.source_ids[:, 0]), p.task_ids)) batch.src.ids = tf.where(is_denoise_sent, denoise_src_ids, batch.src.ids) batch.src.paddings = tf.where(is_denoise_sent, denoise_src_paddings, batch.src.paddings) batch.src.ids_indicator = 1 - batch.src.paddings batch.src.weights = batch.src.ids_indicator