def unstack_batch(tensor_dict): """ Unstack input tensor along 0th dimension Args: tensor_dict: dict of tensor with shape (batch_size, num_boxes, d1, .., dn), including: gt_labels, gt_boxes, num_gt_boxes """ # # extract tensor from tuple. TODO: figure out where box tuple comes from? for key in tensor_dict.keys(): if key == "gt_boxes": tensor_dict["gt_boxes"] = tensor_dict["gt_boxes"][0] unbatched_tensor_dict = { key: tf.unstack(tensor) for key, tensor in tensor_dict.items() } # remove padding along 'num_boxes' dimension of the gt tensors num_gt_list = unbatched_tensor_dict["num_gt_boxes"] unbatched_unpadded_tensor_dict = {} for key in unbatched_tensor_dict: if key == "num_gt_boxes": continue unpadded_tensor_list = [] for num_gt, padded_tensor in zip(num_gt_list, unbatched_tensor_dict[key]): tensor_shape = shape_utils.combined_static_and_dynamic_shape( padded_tensor) slice_begin = tf.zeros(len(tensor_shape), dtype=tf.int32) slice_size = tf.stack( [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]]) unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size) unpadded_tensor_list.append(unpadded_tensor) unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list return unbatched_unpadded_tensor_dict
def _get_feature_map_shape(self, features): """Return list of spatial dimensions for each feature map""" feature_map_shapes = [ shape_utils.combined_static_and_dynamic_shape(feature) for feature in features ] return [(shape[1], shape[2]) for shape in feature_map_shapes]
def select_random_box(boxlist, default_box=None, seed=None, scope=None): """Selects a random bounding box from a `BoxList`. Args: boxlist: A BoxList. default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`, this default box will be returned. If None, will use a default box of [[-1., -1., -1., -1.]]. seed: Random seed. scope: Name scope. Returns: bbox: A [1, 4] tensor with a random bounding box. valid: A bool tensor indicating whether a valid bounding box is returned (True) or whether the default box is returned (False). """ with tf.name_scope(scope, 'SelectRandomBox'): bboxes = boxlist.get() combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes) number_of_boxes = combined_shape[0] default_box = default_box or tf.constant([[-1., -1., -1., -1.]]) def select_box(): random_index = tf.random_uniform([], maxval=number_of_boxes, dtype=tf.int32, seed=seed) return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True) return tf.cond(tf.greater_equal(number_of_boxes, 1), true_fn=select_box, false_fn=lambda: (default_box, tf.constant(False)))
def _match_when_rows_are_empty(): """Performs matching when the rows of similarity matrix are empty. When the rows are empty, all detections are false positives. So we return a tensor of -1's to indicate that the columns do not match to any rows. Returns: matches: int32 tensor indicating the row each column matches to. """ similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
def retinanet(images, num_classes, num_anchors_per_loc, resnet_arch='resnet50', is_training=True): """ Get box prediction features and class prediction features from given images Args: images: input batch of images with shape (batch_size, h, w, 3) num_classes: number of classes for prediction num_anchors_per_loc: number of anchors at each feature map spatial location resnet_arch: name of which resnet architecture used is_training: indicate training or not return: prediciton dict: holding following items: box_predictions tensor from each feature map with shape (batch_size, num_anchors, 4) class_predictions_with_bg tensor from each feature map with shape (batch_size, num_anchors, num_class+1) feature_maps: list of tensor of feature map """ assert resnet_arch in list( RESNET_ARCH_BLOCK.keys()), "resnet architecture not defined" with tf.variable_scope('retinanet'): batch_size = combined_static_and_dynamic_shape(images)[0] features = retinanet_fpn(images, block_layers=RESNET_ARCH_BLOCK[resnet_arch], is_training=is_training) class_pred = [] box_pred = [] feature_map_list = [] num_slots = num_classes + 1 with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE): for level in features.keys(): class_outputs = share_weight_class_net(features[level], level, num_slots, num_anchors_per_loc, is_training=is_training) class_outputs = tf.reshape(class_outputs, shape=[batch_size, -1, num_slots]) class_pred.append(class_outputs) feature_map_list.append(features[level]) with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE): for level in features.keys(): box_outputs = share_weight_box_net(features[level], level, num_anchors_per_loc, is_training=is_training) box_outputs = tf.reshape(box_outputs, shape=[batch_size, -1, 4]) box_pred.append(box_outputs) return dict(box_pred=tf.concat(box_pred, axis=1), cls_pred=tf.concat(class_pred, axis=1), feature_map_list=feature_map_list)
def _match_when_rows_are_non_empty(): """Performs matching when the rows of similarity matrix are non empty. Returns: matches: int32 tensor indicating the row each column matches to. """ # Matches for each column matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) # Deal with matched and unmatched threshold if self._matched_threshold is not None: # Get logical indices of ignored and unmatched columns as tf.int64 matched_vals = tf.reduce_max(similarity_matrix, 0) below_unmatched_threshold = tf.greater(self._unmatched_threshold, matched_vals) between_thresholds = tf.logical_and( tf.greater_equal(matched_vals, self._unmatched_threshold), tf.greater(self._matched_threshold, matched_vals)) if self._negatives_lower_than_unmatched: matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -1) matches = self._set_values_using_indicator(matches, between_thresholds, -2) else: matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -2) matches = self._set_values_using_indicator(matches, between_thresholds, -1) if self._force_match_for_each_row: similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) force_match_column_ids = tf.argmax(similarity_matrix, 1, output_type=tf.int32) force_match_column_indicators = tf.one_hot( force_match_column_ids, depth=similarity_matrix_shape[1]) force_match_row_ids = tf.argmax(force_match_column_indicators, 0, output_type=tf.int32) force_match_column_mask = tf.cast( tf.reduce_max(force_match_column_indicators, 0), tf.bool) final_matches = tf.where(force_match_column_mask, force_match_row_ids, matches) return final_matches else: return matches
def nearest_neighbor_upsampling(input_tensor, scale): """Nearest neighbor upsampling implementation. NOTE: See TensorFlow Object Detection API uitls.ops Args: input_tensor: A float32 tensor of size [batch, height_in, width_in, channels]. scale: An integer multiple to scale resolution of input data. Returns: upsample_input: A float32 tensor of size [batch, height_in*scale, width_in*scale, channels]. """ with tf.name_scope('nearest_neighbor_upsampling'): (batch_size, h, w, c) = combined_static_and_dynamic_shape(input_tensor) output_tensor = tf.reshape( input_tensor, [batch_size, h, 1, w, 1, c]) * tf.ones( [1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype) return tf.reshape(output_tensor, [batch_size, h * scale, w * scale, c])
def matmul_gather_on_zeroth_axis(params, indices, scope=None): """Matrix multiplication based implementation of tf.gather on zeroth axis. TODO(rathodv, jonathanhuang): enable sparse matmul option. Args: params: A float32 Tensor. The tensor from which to gather values. Must be at least rank 1. indices: A Tensor. Must be one of the following types: int32, int64. Must be in range [0, params.shape[0]) scope: A name for the operation (optional). Returns: A Tensor. Has the same type as params. Values from params gathered from indices given by indices, with shape indices.shape + params.shape[1:]. """ with tf.name_scope(scope, 'MatMulGather'): params_shape = shape_utils.combined_static_and_dynamic_shape(params) indices_shape = shape_utils.combined_static_and_dynamic_shape(indices) params2d = tf.reshape(params, [params_shape[0], -1]) indicator_matrix = tf.one_hot(indices, params_shape[0]) gathered_result_flattened = tf.matmul(indicator_matrix, params2d) return tf.reshape(gathered_result_flattened, tf.stack(indices_shape + params_shape[1:]))
def _batch_decode(self, box_encodings): """ Decode batch of box encodings with respect to anchors Args: box_encodings: box prediction tensor with shape [batch_size, num_anchors, 4] Returns: decoded_boxes: decoded box tensor with same shape as input tensor """ input_shape = shape_utils.combined_static_and_dynamic_shape( box_encodings) batch_size = input_shape[0] tiled_anchor_boxes = tf.tile(tf.expand_dims(self._anchors, 0), [batch_size, 1, 1]) tiled_anchor_boxlist = box_list.BoxList( tf.reshape(tiled_anchor_boxes, [-1, 4])) decoded_boxes = self._box_coder.decode( tf.reshape(box_encodings, [-1, self._box_coder.code_size]), tiled_anchor_boxlist) return tf.reshape(decoded_boxes.get(), [batch_size, -1, 4])
def _match_when_rows_are_non_empty(): matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) if self._matched_threshold is not None: matched_vals = tf.reduce_max(similarity_matrix, 0) below_unmatched_threshold = tf.greater( self._unmatched_threshold, matched_vals) between_thresholds = tf.logical_and( tf.greater_equal(matched_vals, self._unmatched_threshold), tf.greater(self._matched_threshold, matched_vals)) if self._negatives_lower_than_unmatched: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -1) matches = self._set_values_using_indicator( matches, between_thresholds, -2) else: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -2) matches = self._set_values_using_indicator( matches, between_thresholds, -1) if self._force_match_for_each_row: similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) force_match_column_ids = tf.argmax(similarity_matrix, 1, output_type=tf.int32) force_match_column_indicators = tf.one_hot( force_match_column_ids, depth=similarity_matrix_shape[1]) force_match_row_ids = tf.argmax(force_match_column_indicators, 0, output_type=tf.int32) force_match_column_mask = tf.cast( tf.reduce_max(force_match_column_indicators, 0), tf.bool) final_matches = tf.where(force_match_column_mask, force_match_row_ids, matches) return final_matches else: return matches
def _create_regression_targets(self, anchors, groundtruth_boxes, match): """Returns a regression target for each anchor. Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth_boxes match: a matcher.Match object Returns: reg_targets: a float32 tensor with shape [N, box_code_dimension] """ matched_gt_boxes = match.gather_based_on_match( groundtruth_boxes.get(), unmatched_value=tf.zeros(4), ignored_value=tf.zeros(4)) matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME): groundtruth_keypoints = groundtruth_boxes.get_field( KEYPOINTS_FIELD_NAME) matched_keypoints = match.gather_based_on_match( groundtruth_keypoints, unmatched_value=tf.zeros( groundtruth_keypoints.get_shape()[1:]), ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints) matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) match_results_shape = shape_utils.combined_static_and_dynamic_shape( match.match_results) # Zero out the unmatched and ignored regression targets. unmatched_ignored_reg_targets = tf.tile( self._default_regression_target(), [match_results_shape[0], 1]) matched_anchors_mask = match.matched_column_indicator() reg_targets = tf.where(matched_anchors_mask, matched_reg_targets, unmatched_ignored_reg_targets) return reg_targets
def predict(self, inputs): """ Perform predict from batched input tensor. During this time, anchors must be constructed before post-process or loss function called Args: inputs: a [batch_size, height, width, channels] image tensor Returns: prediction_dict: dict with items: inputs: [batch_size, height, width, channels] image tensor box_pred: [batch_size, num_anchors, 4] tensor containing predicted boxes cls_pred: [batch_size, num_anchors, num_classes+1] tensor containing class predictions feature_maps: a list of feature map tensor anchors: [num_anchors, 4] tensor containing anchors in normalized coordinates """ num_anchors_per_loc = self._params.get("num_scales") * len( self._params.get("aspect_ratios")) prediction_dict = retinanet(inputs, self._num_classes, num_anchors_per_loc, is_training=self._is_training) # generate anchors feature_map_shape_list = self._get_feature_map_shape( prediction_dict["feature_map_list"]) image_shape = shape_utils.combined_static_and_dynamic_shape(inputs) # initialize anchor generator if self._anchor_generator is None: self._anchor_generator = Anchor( feature_map_shape_list=feature_map_shape_list, img_size=(image_shape[1], image_shape[2]), anchor_scale=self._params.get("anchor_scale"), aspect_ratios=self._params.get("aspect_ratios"), scales_per_octave=self._params.get("num_scales")) self._anchors = self._anchor_generator.boxes prediction_dict["inputs"] = inputs prediction_dict["anchors"] = self._anchors return prqediction_dict
def _match_when_rows_are_empty(): similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, groundtruth_weights=None, **params): """Assign classification and regression targets to each anchor. For a given set of anchors and groundtruth detections, match anchors to groundtruth_boxes and assign classification and regression targets to each anchor as well as weights based on the resulting match (specifying, e.g., which anchors should not contribute to training loss). Anchors that are not matched to anything are given a classification target of self._unmatched_cls_target which can be specified via the constructor. Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth boxes groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set to None, groundtruth_labels assumes a binary problem where all ground_truth boxes get a positive label (of 1). groundtruth_weights: a float tensor of shape [M] indicating the weight to assign to all anchors match to a particular groundtruth box. The weights must be in [0., 1.]. If None, all weights are set to 1. **params: Additional keyword arguments for specific implementations of the Matcher. Returns: cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has shape [num_gt_boxes, d_1, d_2, ... d_k]. cls_weights: a float32 tensor with shape [num_anchors] reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] reg_weights: a float32 tensor with shape [num_anchors] match: a matcher.Match object encoding the match between anchors and groundtruth boxes, with rows corresponding to groundtruth boxes and columns corresponding to anchors. Raises: ValueError: if anchors or groundtruth_boxes are not of type box_list.BoxList """ if not isinstance(anchors, box_list.BoxList): raise ValueError('anchors must be an BoxList') if not isinstance(groundtruth_boxes, box_list.BoxList): raise ValueError('groundtruth_boxes must be an BoxList') if groundtruth_labels is None: groundtruth_labels = tf.ones( tf.expand_dims(groundtruth_boxes.num_boxes(), 0)) groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) unmatched_shape_assert = shape_utils.assert_shape_equal( shape_utils.combined_static_and_dynamic_shape(groundtruth_labels) [1:], shape_utils.combined_static_and_dynamic_shape( self._unmatched_cls_target)) labels_and_box_shapes_assert = shape_utils.assert_shape_equal( shape_utils.combined_static_and_dynamic_shape(groundtruth_labels) [:1], shape_utils.combined_static_and_dynamic_shape( groundtruth_boxes.get())[:1]) if groundtruth_weights is None: num_gt_boxes = groundtruth_boxes.num_boxes_static() if not num_gt_boxes: num_gt_boxes = groundtruth_boxes.num_boxes() groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32) with tf.control_dependencies( [unmatched_shape_assert, labels_and_box_shapes_assert]): match_quality_matrix = self._similarity_calc.compare( groundtruth_boxes, anchors) match = self._matcher.match(match_quality_matrix, **params) reg_targets = self._create_regression_targets( anchors, groundtruth_boxes, match) cls_targets = self._create_classification_targets( groundtruth_labels, match) reg_weights = self._create_regression_weights( match, groundtruth_weights) cls_weights = self._create_classification_weights( match, groundtruth_weights) num_anchors = anchors.num_boxes_static() if num_anchors is not None: reg_targets = self._reset_target_shape(reg_targets, num_anchors) cls_targets = self._reset_target_shape(cls_targets, num_anchors) reg_weights = self._reset_target_shape(reg_weights, num_anchors) cls_weights = self._reset_target_shape(cls_weights, num_anchors) return cls_targets, cls_weights, reg_targets, reg_weights, match