def scale(boxlist, y_scale, x_scale, scope=None): """scale box coordinates in x and y dimensions. Args: boxlist: BoxList holding N boxes y_scale: (float) scalar tensor x_scale: (float) scalar tensor scope: name scope. Returns: boxlist: BoxList holding N boxes """ with tf.name_scope(scope, 'Scale'): y_scale = tf.cast(y_scale, tf.float32) x_scale = tf.cast(x_scale, tf.float32) y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) y_min = y_scale * y_min y_max = y_scale * y_max x_min = x_scale * x_min x_max = x_scale * x_max scaled_boxlist = box_list.BoxList( tf.concat([y_min, x_min, y_max, x_max], 1)) return _copy_extra_fields(scaled_boxlist, boxlist)
def change_coordinate_frame(boxlist, window, scope=None): """Change coordinate frame of the boxlist to be relative to window's frame. Given a window of the form [ymin, xmin, ymax, xmax], changes bounding box coordinates from boxlist to be relative to this window (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). An example use case is data augmentation: where we are given groundtruth boxes (boxlist) and would like to randomly crop the image to some window (window). In this case we need to change the coordinate frame of each groundtruth box to be relative to this new window. Args: boxlist: A BoxList object holding N boxes. window: A rank 1 tensor [4]. scope: name scope. Returns: Returns a BoxList object with N boxes. """ with tf.name_scope(scope, 'ChangeCoordinateFrame'): win_height = window[2] - window[0] win_width = window[3] - window[1] boxlist_new = scale( box_list.BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width) boxlist_new = _copy_extra_fields(boxlist_new, boxlist) return boxlist_new
def boolean_mask(boxlist, indicator, fields=None, scope=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the indicator tensor. By default, `boolean_mask` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only draw from a subset of fields. Args: boxlist: BoxList holding N boxes indicator: a rank-1 boolean tensor fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indicator Raises: ValueError: if `indicator` is not a rank-1 boolean tensor. """ with tf.name_scope(scope, 'BooleanMask'): if indicator.shape.ndims != 1: raise ValueError('indicator should have rank 1') if indicator.dtype != tf.bool: raise ValueError('indicator should be a boolean tensor') subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) subboxlist.add_field(field, subfieldlist) return subboxlist
def gather(boxlist, indices, fields=None, scope=None): """Gather boxes from BoxList according to indices and return new BoxList. By default, `gather` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only gather from a subset of fields. Args: boxlist: BoxList holding N boxes indices: a rank-1 tensor of type int32 / int64 fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices Raises: ValueError: if specified field is not contained in boxlist or if the indices are not of type int32 """ with tf.name_scope(scope, 'Gather'): if len(indices.shape.as_list()) != 1: raise ValueError('indices should have rank 1') if indices.dtype != tf.int32 and indices.dtype != tf.int64: raise ValueError('indices should be an int32 / int64 tensor') subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.gather(boxlist.get_field(field), indices) subboxlist.add_field(field, subfieldlist) return subboxlist
def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] with tf.variable_scope('avod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) avod_projection_in = expanded_anchors else: avod_projection_in = top_anchors ## selected 3D bounding box with tf.variable_scope('img'): image_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes = top_anchors img_proposal_boxes_norm = anchor_encoder.tf_boxes_normalization( img_proposal_boxes, image_shape ) ## image_shape should be one before resizing operation img_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes(img_proposal_boxes) # Only reorder the normalized img img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) #bev_feature_maps = rpn_model.bev_feature_maps ## commented by benz, bev_feature_map is useless img_feature_maps = rpn_model.img_feature_maps img_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('avod_roi_pooling'): def get_box_indices( boxes): ## commented by benz, no bev_proposal_box proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) # Do ROI Pooling on image img_proposal_boxes_norm_batches = tf.expand_dims( img_proposal_boxes_norm, axis=0) tf_box_indices = get_box_indices(img_proposal_boxes_norm_batches) img_rois = tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='img_rois') # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, #input_rois=[bev_rois, img_rois], ## benz input_rois=[img_rois], input_weights=[img_mask], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training) all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None #all_angle_vectors = \ ## for 2d bounding box, there is no angle vector # fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors boxes_2d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_2D_GT] with tf.variable_scope('avod_box_list'): ## benz boxes_2d_gt_tf_order = \ anchor_projector.reorder_projected_boxes( boxes_2d_gt) boxes_2d_list_gt = box_list.BoxList(boxes_2d_gt_tf_order) boxes_2d_list = box_list.BoxList(img_proposal_boxes_tf_order) mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( ##TODO benz, figuring out how it works anchor_box_list_gt=boxes_2d_list_gt, anchor_box_list=boxes_2d_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # TODO: Don't create a mini batch in test mode # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): # mb_anchors = tf.boolean_mask(top_anchors, mb_mask) mb_boxes_2d = tf.boolean_mask(img_proposal_boxes, mb_mask) if self._box_rep == 'box_2d': # benz mb_boxes_2d_gt = tf.gather(boxes_2d_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_2d_box_to_offset( mb_boxes_2d, mb_boxes_2d_gt) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) ###################################################### # ROI summary images ###################################################### avod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size with tf.variable_scope('img_avod_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_avod_rois', img_input_rois, max_outputs=avod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_2d': prediction_boxes = \ anchor_encoder.tf_2d_offset_to_box(img_proposal_boxes, all_offsets ) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) prediction_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( prediction_boxes) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( prediction_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) if self._box_rep == 'box_2d': top_prediction_boxes = tf.gather(prediction_boxes, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_boxes # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) # prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_boxes if self._box_rep == 'box_2d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_boxes_2d_gt else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) # prediction_dict[self.PRED_MAX_IOUS] = max_ious # prediction_dict[self.PRED_ALL_IOUS] = all_ious return prediction_dict