def test_prune_non_overlapping_boxes(self): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) minoverlap = 0.5 exp_output_1 = boxes1 exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4])) output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes( boxes1, boxes2, min_overlap=minoverlap) output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes( boxes2, boxes1, min_overlap=minoverlap) with self.test_session() as sess: (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_, exp_output_2_) = sess.run([output_1.get(), keep_indices_1, output_2.get(), keep_indices_2, exp_output_1.get(), exp_output_2.get()]) self.assertAllClose(output_1_, exp_output_1_) self.assertAllClose(output_2_, exp_output_2_) self.assertAllEqual(keep_indices_1_, [0, 1]) self.assertAllEqual(keep_indices_2_, [])
def test_gather_with_invalid_inputs(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) indices_float32 = tf.constant([0, 2, 4], tf.float32) boxes = box_list.BoxList(corners) with self.assertRaises(ValueError): _ = box_list_ops.gather(boxes, indices_float32) indices_2d = tf.constant([[0, 2, 4]], tf.int32) boxes = box_list.BoxList(corners) with self.assertRaises(ValueError): _ = box_list_ops.gather(boxes, indices_2d)
def test_matched_intersection(self): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) corners2 = tf.constant( [[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) exp_output = [2.0, 0.0] boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) intersect = box_list_ops.matched_intersection(boxes1, boxes2) with self.test_session() as sess: intersect_output = sess.run(intersect) self.assertAllClose(intersect_output, exp_output)
def test_pairwise_distances(self): corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 2.0]]) corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], [-4.0, 0.0, 0.0, 3.0], [0.0, 0.0, 0.0, 0.0]]) exp_output = [[26, 25, 0], [18, 27, 6]] boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) dist_matrix = box_list_ops.sq_dist(boxes1, boxes2) with self.test_session() as sess: dist_output = sess.run(dist_matrix) self.assertAllClose(dist_output, exp_output)
def test_iou(self): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) exp_output = [ [2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) iou = box_list_ops.iou(boxes1, boxes2) with self.test_session() as sess: iou_output = sess.run(iou) self.assertAllClose(iou_output, exp_output)
def test_change_coordinate_frame(self): corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]]) window = tf.constant([0.25, 0.25, 0.75, 0.75]) boxes = box_list.BoxList(corners) expected_corners = tf.constant( [[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]]) expected_boxes = box_list.BoxList(expected_corners) output = box_list_ops.change_coordinate_frame(boxes, window) with self.test_session() as sess: output_, expected_boxes_ = sess.run( [output.get(), expected_boxes.get()]) self.assertAllClose(output_, expected_boxes_)
def scale(boxlist, y_scale, x_scale, scope=None): """scale box coordinates in x and y dimensions. Args: boxlist: BoxList holding N boxes y_scale: (float) scalar tensor x_scale: (float) scalar tensor scope: name scope. Returns: boxlist: BoxList holding N boxes """ with tf.name_scope(scope, 'Scale'): y_scale = tf.cast(y_scale, tf.float32) x_scale = tf.cast(x_scale, tf.float32) y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) y_min = y_scale * y_min y_max = y_scale * y_max x_min = x_scale * x_min x_max = x_scale * x_max scaled_boxlist = box_list.BoxList( tf.concat([y_min, x_min, y_max, x_max], 1)) return _copy_extra_fields(scaled_boxlist, boxlist)
def test_ioaworks_on_empty_inputs(self): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) boxes_empty = box_list.BoxList(tf.zeros((0, 4))) ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty) ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2) ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty) with self.test_session() as sess: ioa_output_1, ioa_output_2, ioa_output_3 = sess.run( [ioa_empty_1, ioa_empty_2, ioa_empty_3]) self.assertAllEqual(ioa_output_1.shape, (2, 0)) self.assertAllEqual(ioa_output_2.shape, (0, 3)) self.assertAllEqual(ioa_output_3.shape, (0, 0))
def change_coordinate_frame(boxlist, window, scope=None): """Change coordinate frame of the boxlist to be relative to window's frame. Given a window of the form [ymin, xmin, ymax, xmax], changes bounding box coordinates from boxlist to be relative to this window (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). An example use case is data augmentation: where we are given groundtruth boxes (boxlist) and would like to randomly crop the image to some window (window). In this case we need to change the coordinate frame of each groundtruth box to be relative to this new window. Args: boxlist: A BoxList object holding N boxes. window: A rank 1 tensor [4]. scope: name scope. Returns: Returns a BoxList object with N boxes. """ with tf.name_scope(scope, 'ChangeCoordinateFrame'): win_height = window[2] - window[0] win_width = window[3] - window[1] boxlist_new = scale( box_list.BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width) boxlist_new = _copy_extra_fields(boxlist_new, boxlist) return boxlist_new
def test_ioa(self): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], [1.0 / 12.0, 0.0, 5.0 / 400.0]] exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], [0, 0], [6.0 / 6.0, 5.0 / 5.0]] boxes1 = box_list.BoxList(corners1) boxes2 = box_list.BoxList(corners2) ioa_1 = box_list_ops.ioa(boxes1, boxes2) ioa_2 = box_list_ops.ioa(boxes2, boxes1) with self.test_session() as sess: ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2]) self.assertAllClose(ioa_output_1, exp_output_1) self.assertAllClose(ioa_output_2, exp_output_2)
def test_area(self): corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) exp_output = [200.0, 4.0] boxes = box_list.BoxList(corners) areas = box_list_ops.area(boxes) with self.test_session() as sess: areas_output = sess.run(areas) self.assertAllClose(areas_output, exp_output)
def test_gather_with_invalid_field(self): corners = tf.constant([4 * [0.0], 4 * [1.0]]) indices = tf.constant([0, 1], tf.int32) weights = tf.constant([[.1], [.3]], tf.float32) boxes = box_list.BoxList(corners) boxes.add_field('weights', weights) with self.assertRaises(ValueError): box_list_ops.gather(boxes, indices, ['foo', 'bar'])
def test_height_width(self): corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) exp_output_heights = [10., 2.] exp_output_widths = [20., 2.] boxes = box_list.BoxList(corners) heights, widths = box_list_ops.height_width(boxes) with self.test_session() as sess: output_heights, output_widths = sess.run([heights, widths]) self.assertAllClose(output_heights, exp_output_heights) self.assertAllClose(output_widths, exp_output_widths)
def test_gather(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) indices = tf.constant([0, 2, 4], tf.int32) expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] boxes = box_list.BoxList(corners) subset = box_list_ops.gather(boxes, indices) with self.test_session() as sess: subset_output = sess.run(subset.get()) self.assertAllClose(subset_output, expected_subset)
def test_boolean_mask(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) indicator = tf.constant([True, False, True, False, True], tf.bool) expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] boxes = box_list.BoxList(corners) subset = box_list_ops.boolean_mask(boxes, indicator) with self.test_session() as sess: subset_output = sess.run(subset.get()) self.assertAllClose(subset_output, expected_subset)
def test_prune_small_boxes(self): boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0], [3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) exp_boxes = [[3.0, 4.0, 6.0, 8.0], [0.0, 0.0, 20.0, 20.0]] boxes = box_list.BoxList(boxes) pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) with self.test_session() as sess: pruned_boxes = sess.run(pruned_boxes.get()) self.assertAllEqual(pruned_boxes, exp_boxes)
def test_scale(self): corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]], dtype=tf.float32) boxes = box_list.BoxList(corners) boxes.add_field('extra_data', tf.constant([[1], [2]])) y_scale = tf.constant(1.0 / 100) x_scale = tf.constant(1.0 / 200) scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale) exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]] with self.test_session() as sess: scaled_corners_out = sess.run(scaled_boxes.get()) self.assertAllClose(scaled_corners_out, exp_output) extra_data_out = sess.run(scaled_boxes.get_field('extra_data')) self.assertAllEqual(extra_data_out, [[1], [2]])
def test_prune_small_boxes_prunes_boxes_with_negative_side(self): boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0], [3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0], [2.0, 3.0, 1.5, 7.0], # negative height [2.0, 3.0, 5.0, 1.7]]) # negative width exp_boxes = [[3.0, 4.0, 6.0, 8.0], [0.0, 0.0, 20.0, 20.0]] boxes = box_list.BoxList(boxes) pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) with self.test_session() as sess: pruned_boxes = sess.run(pruned_boxes.get()) self.assertAllEqual(pruned_boxes, exp_boxes)
def test_gather_with_field(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) indices = tf.constant([0, 2, 4], tf.int32) weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] expected_weights = [[.1], [.5], [.9]] boxes = box_list.BoxList(corners) boxes.add_field('weights', weights) subset = box_list_ops.gather(boxes, indices, ['weights']) with self.test_session() as sess: subset_output, weights_output = sess.run( [subset.get(), subset.get_field('weights')]) self.assertAllClose(subset_output, expected_subset) self.assertAllClose(weights_output, expected_weights)
def test_boolean_mask_with_field(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) indicator = tf.constant([True, False, True, False, True], tf.bool) weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] expected_weights = [[.1], [.5], [.9]] boxes = box_list.BoxList(corners) boxes.add_field('weights', weights) subset = box_list_ops.boolean_mask(boxes, indicator, ['weights']) with self.test_session() as sess: subset_output, weights_output = sess.run( [subset.get(), subset.get_field('weights')]) self.assertAllClose(subset_output, expected_subset) self.assertAllClose(weights_output, expected_weights)
def test_gather_with_dynamic_indexing(self): corners = tf.constant( [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) weights = tf.constant([.5, .3, .7, .1, .9], tf.float32) indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1]) expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] expected_weights = [.5, .7, .9] boxes = box_list.BoxList(corners) boxes.add_field('weights', weights) subset = box_list_ops.gather(boxes, indices, ['weights']) with self.test_session() as sess: subset_output, weights_output = sess.run( [subset.get(), subset.get_field('weights')]) self.assertAllClose(subset_output, expected_subset) self.assertAllClose(weights_output, expected_weights)
def boolean_mask(boxlist, indicator, fields=None, scope=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the indicator tensor. By default, `boolean_mask` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only draw from a subset of fields. Args: boxlist: BoxList holding N boxes indicator: a rank-1 boolean tensor fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indicator Raises: ValueError: if `indicator` is not a rank-1 boolean tensor. """ with tf.name_scope(scope, 'BooleanMask'): if indicator.shape.ndims != 1: raise ValueError('indicator should have rank 1') if indicator.dtype != tf.bool: raise ValueError('indicator should be a boolean tensor') subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) subboxlist.add_field(field, subfieldlist) return subboxlist
def gather(boxlist, indices, fields=None, scope=None): """Gather boxes from BoxList according to indices and return new BoxList. By default, `gather` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only gather from a subset of fields. Args: boxlist: BoxList holding N boxes indices: a rank-1 tensor of type int32 / int64 fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices Raises: ValueError: if specified field is not contained in boxlist or if the indices are not of type int32 """ with tf.name_scope(scope, 'Gather'): if len(indices.shape.as_list()) != 1: raise ValueError('indices should have rank 1') if indices.dtype != tf.int32 and indices.dtype != tf.int64: raise ValueError('indices should be an int32 / int64 tensor') subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.gather(boxlist.get_field(field), indices) subboxlist.add_field(field, subfieldlist) return subboxlist
def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] depth_map = prediction_dict[RpnModel.PRED_DEPTH_MAP] with tf.variable_scope('mlod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) mlod_projection_in = expanded_anchors else: mlod_projection_in = top_anchors with tf.variable_scope('bev'): # Project top anchors into bev and image spaces bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( mlod_projection_in, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) bev_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes_norm) with tf.variable_scope('img'): if self.lidar_only: image_shape = tf.cast( tf.shape(rpn_model.placeholders[RpnModel.PL_IMG_INPUT]) [1:3], tf.float32) img_proposal_boxes_norm_tf_order = [] for i in range(self.num_views): img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( mlod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2][i], image_shape) # Only reorder the normalized img img_proposal_boxes_norm_tf_order.append( anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm)) else: image_shape = tf.cast( tf.shape(rpn_model.placeholders[RpnModel.PL_IMG_INPUT]) [0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( mlod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2], image_shape) img_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes) img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) bev_feature_maps = rpn_model.bev_feature_maps img_feature_maps = rpn_model.img_feature_maps if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0) \ and (self.lidar_only and self.num_views > 0): with tf.variable_scope('mlod_path_drop'): img_mask = rpn_model.img_path_drop_mask bev_mask = rpn_model.bev_path_drop_mask if self.lidar_only: #img_feature_maps_list = [] for i in range(self.num_views): img_feature_maps[i] = tf.multiply( img_feature_maps[i], img_mask) #img_feature_maps = img_feature_maps_list else: img_feature_maps = tf.multiply(img_feature_maps, img_mask) bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask) else: bev_mask = tf.constant(1.0) img_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('mlod_roi_pooling'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_rois = tf.image.crop_and_resize( bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='bev_rois') # Do ROI Pooling on image if self.lidar_only: img_rois = [] for i in range(self.num_views): img_rois.append( tf.image.crop_and_resize( img_feature_maps[i], img_proposal_boxes_norm_tf_order[i], tf_box_indices, self._proposal_roi_crop_size, name='img_rois')) else: img_rois_list = [] if self.multi_scale_image: img_end_points = rpn_model.img_end_points #print(img_end_points) img_feature_maps_list = [ img_end_points[tensor_name] for tensor_name in self.feature_names ] else: two_features = False if two_features: img_end_points = rpn_model.img_end_points img_feature_maps_list = [ img_end_points[tensor_name] for tensor_name in self.feature_names ] else: img_feature_maps_list = [img_feature_maps] for img_feature_maps in img_feature_maps_list: img_rois_list.append( tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='img_rois')) # Occlusion masking if self.apply_occ_mask: ref_depth_min = mlod_projection_in[:, 2] - mlod_projection_in[:, 5] / 2 - 0.5 ref_depth_max = mlod_projection_in[:, 2] + mlod_projection_in[:, 5] / 2 + 0.5 #no background masking #ref_depth_max = tf.ones_like(ref_depth_min)*100 occ_mask = self._occ_mask_layer.build( depth_map, img_proposal_boxes_norm_tf_order, tf_box_indices, ref_depth_min, ref_depth_max, self._n_split, [8, 8], self._proposal_roi_crop_size, self.occ_quantile_level) img_rois_masked_list = [ tf.multiply(img_rois, occ_mask, name='masked_img') for img_rois in img_rois_list ] else: img_rois_masked_list = img_rois_list # Get anchors dimension boxes_3d_x_dim = tf.abs(bev_proposal_boxes[:, 0] - bev_proposal_boxes[:, 2]) boxes_3d_z_dim = tf.abs(bev_proposal_boxes[:, 1] - bev_proposal_boxes[:, 3]) boxes_3d_dim = tf.stack([ boxes_3d_x_dim, boxes_3d_x_dim, boxes_3d_x_dim, boxes_3d_x_dim, boxes_3d_z_dim, boxes_3d_z_dim, boxes_3d_z_dim, boxes_3d_z_dim, tf.ones_like(boxes_3d_z_dim), tf.ones_like(boxes_3d_z_dim) ], axis=1) boxes_2d_x_dim = tf.abs(img_proposal_boxes[:, 0] - img_proposal_boxes[:, 2]) boxes_2d_y_dim = tf.abs(img_proposal_boxes[:, 1] - img_proposal_boxes[:, 3]) boxes_2d_dim = tf.stack([ boxes_2d_x_dim, boxes_2d_y_dim, tf.ones_like(boxes_2d_x_dim), tf.ones_like(boxes_2d_y_dim) ], axis=1) # Fully connected layers (Box Predictor) mlod_layers_config = self.model_config.layers_config.mlod_config cls_input_weights = [ mlod_layers_config.cls_input_weights[0] * bev_mask, mlod_layers_config.cls_input_weights[1] * img_mask ] reg_input_weights = [ mlod_layers_config.reg_input_weights[0] * bev_mask, mlod_layers_config.reg_input_weights[1] * img_mask ] multi_check = rpn_model.multi_check cls_reg_separated = rpn_model.cls_reg_separated reg_var = self._config.reg_var if cls_reg_separated: fusion_cls_out = ['cls'] fusion_reg_out = ['offset', 'ang'] else: fusion_net_out = ['cls', 'offset', 'ang'] if self.lidar_only: img_rois_masked.append(bev_rois) mlod_mask = [img_mask] * self.num_views + [bev_mask] fc_output_layers = \ mlod_fc_layers_builder.build( layers_config=mlod_layers_config, input_rois=img_rois_masked, input_weights=mlod_mask, num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training, cls_reg_separated=cls_reg_separated) else: if two_features: rois_masked_list_cls = [img_rois_masked_list[0]] rois_masked_list_reg = [img_rois_masked_list[1]] rois_masked_list_cls.insert(0, bev_rois) rois_masked_list_reg.insert(0, bev_rois) else: rois_masked_list = img_rois_masked_list rois_masked_list.insert(0, bev_rois) rois_masked_list_cls = rois_masked_list rois_masked_list_reg = rois_masked_list if not cls_reg_separated: fc_output_layers = \ mlod_fc_layers_builder.build( layers_config=mlod_layers_config, input_rois=rois_masked_list, cls_input_weights=cls_input_weights, reg_input_weights=reg_input_weights, num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training, variables_name='box_classifier_regressor', multi_check=multi_check, net_out = fusion_net_out, img_idx = self.img_idx) all_cls_logits = \ fc_output_layers[mlod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[ mlod_fc_layers_builder.KEY_OFFSETS] all_angle_vectors = \ fc_output_layers.get(mlod_fc_layers_builder.KEY_ANGLE_VECTORS) if multi_check: sub_cls_logits_list = \ fc_output_layers[mlod_fc_layers_builder.KEY_SUB_CLS_LOGITS_LIST] sub_reg_offset_list = fc_output_layers[ mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST] else: sub_cls_logits_list = [] sub_reg_offset_list = [] else: fc_output_layers1 = \ mlod_fc_layers_builder.build( layers_config=mlod_layers_config, input_rois=rois_masked_list_cls, cls_input_weights=cls_input_weights, reg_input_weights=reg_input_weights, num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training, variables_name='box_classifier', multi_check=multi_check, net_out = fusion_cls_out, img_idx = self.img_idx) all_cls_logits = \ fc_output_layers1[mlod_fc_layers_builder.KEY_CLS_LOGITS] sub_cls_logits_list = \ fc_output_layers1[mlod_fc_layers_builder.KEY_SUB_CLS_LOGITS_LIST] selected_img_idx = fc_output_layers1[ mlod_fc_layers_builder.KEY_SELECTED_IMG_IDX] fc_output_layers2 = \ mlod_fc_layers_builder.build( layers_config=mlod_layers_config, input_rois=rois_masked_list_reg, cls_input_weights=cls_input_weights, reg_input_weights=reg_input_weights, num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training, variables_name='box_regressor', multi_check=multi_check, net_out = fusion_reg_out, img_idx = self.img_idx, selected_img_idx_in = selected_img_idx) if self.offsets_ratio: all_offsets_ratio = fc_output_layers2[ mlod_fc_layers_builder.KEY_OFFSETS] sub_reg_offset_ratio_list = fc_output_layers2[ mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST] all_offsets = all_offsets_ratio * boxes_3d_dim * reg_var sub_reg_offset_list = [] for branch_idx, sub_reg_offset_ratio in enumerate( sub_reg_offset_ratio_list): if branch_idx in self.img_idx: sub_reg_offset = sub_reg_offset_ratio * boxes_2d_dim * reg_var else: sub_reg_offset = sub_reg_offset_ratio * boxes_3d_dim * reg_var sub_reg_offset_list.append(sub_reg_offset) else: all_offsets_multi_classes = fc_output_layers2[ mlod_fc_layers_builder.KEY_OFFSETS] sub_reg_offset_multi_classes_list = fc_output_layers2[ mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST] all_angle_vectors = \ fc_output_layers2.get(mlod_fc_layers_builder.KEY_ANGLE_VECTORS) #select class in offsets print(all_offsets_multi_classes, all_cls_logits) all_offsets = self.class_selection(all_offsets_multi_classes, all_cls_logits, self.off_out_size) sub_reg_offset_list = [] for branch_idx, sub_offsets_mc in enumerate( sub_reg_offset_multi_classes_list): sub_logits = sub_cls_logits_list[branch_idx] if branch_idx in self.img_idx: sub_offsets = self.class_selection(sub_offsets_mc, sub_logits, 4) else: sub_offsets = self.class_selection(sub_offsets_mc, sub_logits, self.off_out_size) sub_reg_offset_list.append(sub_offsets) sub_cls_softmax_list = [] with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) for sub_logits in sub_cls_logits_list: sub_cls_softmax = tf.nn.softmax(sub_logits) sub_cls_softmax_list.append(sub_cls_softmax) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') boxes_2d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_2D] # Project anchor_gts to 2D bev with tf.variable_scope('mlod_gt_projection'): # TODO: (#140) fix kitti_util bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt) img_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(boxes_2d_gt) with tf.variable_scope('mlod_box_list'): #bev # Convert to box_list format anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order) #img img_box_list_gt = box_list.BoxList(img_anchor_boxes_gt_tf_order) img_box_list = box_list.BoxList(img_proposal_boxes_tf_order) mb_mask, mb_bev_class_label_indices, mb_img_class_label_indices, \ mb_gt_indices, mb_img_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, img_box_list_gt=img_box_list_gt, img_box_list=img_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('mlod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_bev_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) with tf.variable_scope('mlod_img_one_hot_classes'): mb_img_classification_gt = tf.one_hot( mb_img_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # Mask predictions with tf.variable_scope('mlod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) sub_mb_classifications_logits_list = [] for br_idx, sub_cls_logits in enumerate(sub_cls_logits_list): sub_mb_classifications_logits = tf.boolean_mask( sub_cls_logits, mb_mask) sub_mb_classifications_logits_list.append( sub_mb_classifications_logits) mb_sub_classifications_softmax_list = [] for br_idx, sub_cls_softmax in enumerate(sub_cls_softmax_list): mb_sub_classifications_softmax = tf.boolean_mask( sub_cls_softmax, mb_mask) mb_sub_classifications_softmax_list.append( mb_sub_classifications_softmax) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) mb_sub_offsets_list = [] for sub_offset in sub_reg_offset_list: mb_sub_offsets = tf.boolean_mask(sub_offset, mb_mask) mb_sub_offsets_list.append(mb_sub_offsets) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('mlod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(top_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self._rpn_model.placeholders[ self._rpn_model.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) #2d bounding boxes offset # anchor projected 2d bounding box #mb_img_proposal_boxes_norm = tf.boolean_mask(img_proposal_boxes_norm, mb_mask) #mb_boxes_2d = mb_img_proposal_boxes_norm*[image_w,image_h,image_w,image_h] #mb_boxes_w = mb_boxes_2d[:,2] - mb_boxes_2d[:,0] #mb_boxes_h = mb_boxes_2d[:,3] - mb_boxes_2d[:,1] mb_boxes_2d = tf.boolean_mask(img_proposal_boxes, mb_mask) mb_boxes_2d_gt = tf.gather(boxes_2d_gt, mb_img_gt_indices) mb_offsets_2d_gt = anchor_encoder.tf_2d_box_to_offset( mb_boxes_2d, mb_boxes_2d_gt) ###################################################### # ROI summary images ###################################################### mlod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.mlod_mini_batch_size with tf.variable_scope('bev_mlod_rois'): mb_bev_anchors_norm = tf.boolean_mask( bev_proposal_boxes_norm_tf_order, mb_mask) mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._rpn_model._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_mlod_rois', bev_input_roi_summary_images[-1], max_outputs=mlod_mini_batch_size) with tf.variable_scope('img_mlod_rois'): if self.lidar_only: for i in range(self.num_views): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order[i], mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( tf.expand_dims(self._rpn_model._img_preprocessed[i], axis=0), mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_mlod_rois', img_input_rois, max_outputs=mlod_mini_batch_size) else: # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_mlod_rois', img_input_rois, max_outputs=mlod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('mlod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('mlod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(top_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('mlod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV mlod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) mlod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( mlod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( mlod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_sub_classification_softmax_list = [] for sub_cls_softmax in sub_cls_softmax_list: top_sub_classification_softmax_list.append( tf.gather(sub_cls_softmax, nms_indices)) top_2d_proposal = tf.gather(img_proposal_boxes_norm, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather(all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather(prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) top_orientations = tf.gather(all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict['cls_softmax'] = all_cls_softmax prediction_dict[self.PRED_SUB_MB_CLASSIFICATION_LOGITS_LIST] = \ sub_mb_classifications_logits_list prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_SUB_CLASSIFICATION_SOFTMAX_LIST] = \ mb_sub_classifications_softmax_list prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets prediction_dict[ self.PRED_MB_SUB_OFFSETS_LIST] = mb_sub_offsets_list prediction_dict['top_3d_proposal'] = top_2d_proposal prediction_dict['2d_gt_box_rescale'] = img_anchor_boxes_gt_tf_order prediction_dict['3d_gt_box_rescale'] = bev_anchor_boxes_gt_tf_order prediction_dict['2d_proposed_box'] = img_proposal_boxes_tf_order prediction_dict['3d_proposed_box'] = bev_proposal_boxes_tf_order # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_IMG_CLASSIFICATIONS_GT] = \ mb_img_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt prediction_dict[self.PRED_MB_OFFSETS_2D_GT] = mb_offsets_2d_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_SUB_CLASSIFICATION_SOFTMAX_LIST] = \ top_sub_classification_softmax_list prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_bev_class_label_indices prediction_dict['gt_indices'] = mb_gt_indices prediction_dict['img_gt_indices'] = mb_img_gt_indices prediction_dict[ 'img_gt_label_indices'] = mb_img_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask prediction_dict[ 'BEV_freeze_mask'] = self._rpn_model.bev_freeze_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_SUB_CLASSIFICATION_SOFTMAX_LIST] = \ top_sub_classification_softmax_list prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets if self._box_rep == 'box_3d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) return prediction_dict
def test_iou_mask_ops(self): # corners are in [y1, x1, y2, x2] format corners_pred = tf.constant( [[4.0, 3.0, 7.0, 5.0], [14.0, 14.0, 16.0, 16.0], [0.0, 0.0, 21.0, 19.0], [3.0, 4.0, 5.0, 7.0]]) corners_gt = tf.constant( [[4.0, 3.0, 7.0, 6.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]) # 3 classes class_indices = tf.constant([1., 2., 3.]) exp_ious = [[0.66666669, 0., 0.02255639, 0.15384616], [0., 0.25, 0.00250627, 0.], [0.015, 0.01, 0.90692127, 0.015]] exp_max_ious = np.array([0.66666669, 0.25, 0.90692127, 0.15384616]) exp_max_indices = np.array([0, 1, 2, 0]) exp_pos_mask = np.array([True, False, True, False]) exp_class_and_background_indices = np.array([1, 0, 3, 0]) # Convert to box_list format boxes_pred = box_list.BoxList(corners_pred) boxes_gt = box_list.BoxList(corners_gt) # Calculate IoU iou = box_list_ops.iou(boxes_gt, boxes_pred) # Get max IoU, the dimension should match the anchors we are # evaluating max_ious = tf.reduce_max(iou, axis=0) max_iou_indices = tf.argmax(iou, axis=0) # Sample a mini-batch from anchors with highest IoU match mini_batch_size = 4 # Custom positive/negative iou ranges neg_2d_iou_range = [0.0, 0.3] pos_2d_iou_range = [0.6, 0.7] mb_mask, mb_pos_mask = \ self.mb_utils.sample_mini_batch(max_ious, mini_batch_size, neg_2d_iou_range, pos_2d_iou_range) mb_class_indices = self.mb_utils.mask_class_label_indices( mb_pos_mask, mb_mask, max_iou_indices, class_indices) with self.test_session() as sess: iou_out = sess.run(iou) max_ious_out, max_iou_indices_out = sess.run([max_ious, max_iou_indices]) mb_mask_out, mb_pos_mask_out = sess.run([mb_mask, mb_pos_mask]) class_indices_out = sess.run(mb_class_indices) self.assertAllClose(iou_out, exp_ious) self.assertAllClose(max_ious_out, exp_max_ious) self.assertAllEqual(max_iou_indices_out, exp_max_indices) self.assertAllEqual(exp_pos_mask, mb_pos_mask_out) self.assertAllEqual(class_indices_out, exp_class_and_background_indices)
def main(): """ This demo shows example mini batch info for full MlodModel training. This includes ground truth, ortho rotated ground truth, negative proposal anchors, positive proposal anchors, and a sampled mini batch. The 2D iou can be modified to show the effect of changing the iou threshold for mini batch sampling. In order to let this demo run without training an RPN, the proposals shown are being read from a text file. Keys: F1: Toggle ground truth F2: Toggle ortho rotated ground truth F3: Toggle negative proposal anchors F4: Toggle positive proposal anchors F5: Toggle mini batch anchors """ ############################## # Options ############################## # Config file folder, default (<mlod_root>/data/outputs/<checkpoint_name>) config_dir = None # checkpoint_name = None checkpoint_name = 'mlod_exp_example' data_split = 'val_half' # global_step = None global_step = 100000 # # # Cars # # # # sample_name = "000050" sample_name = "000104" # sample_name = "000764" # # # People # # # # val_half # sample_name = '000001' # Hard, 1 far cyc # sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped # sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc mini_batch_size = 512 neg_proposal_2d_iou_hi = 0.6 pos_proposal_2d_iou_lo = 0.65 bkg_proposals_line_width = 0.5 neg_proposals_line_width = 0.5 mid_proposals_line_width = 0.5 pos_proposals_line_width = 1.0 ############################## # End of Options ############################## img_idx = int(sample_name) print("Showing mini batch for sample {}".format(sample_name)) # Read proposals from file if checkpoint_name is None: # Use VAL Dataset dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_VAL) # Load demo proposals proposals_and_scores_dir = mlod.top_dir() + \ '/demos/data/predictions/' + checkpoint_name + \ '/proposals_and_scores/' + dataset.data_split else: if config_dir is None: config_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name # Parse experiment config pipeline_config_file = \ config_dir + '/' + checkpoint_name + '.config' _, _, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( pipeline_config_file, is_training=False) dataset_config.data_split = data_split dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Overwrite mini_batch_utils = dataset.kitti_utils.mini_batch_utils mini_batch_utils.mlod_neg_iou_range[1] = neg_proposal_2d_iou_hi mini_batch_utils.mlod_pos_iou_range[0] = pos_proposal_2d_iou_lo # Load proposals from outputs folder proposals_and_scores_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + \ '/predictions/proposals_and_scores/' + dataset.data_split # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] proposals_and_scores = np.loadtxt( proposals_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) proposal_boxes_3d = proposals_and_scores[:, 0:7] proposal_anchors = box_3d_encoder.box_3d_to_anchor(proposal_boxes_3d) # Get filtered ground truth obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx) filtered_objs = dataset.kitti_utils.filter_labels(obj_labels) # Convert ground truth to anchors gt_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in filtered_objs ]) gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d, ortho_rotate=True) # Ortho rotate ground truth gt_ortho_boxes_3d = box_3d_encoder.anchors_to_box_3d(gt_anchors) gt_ortho_objs = [ box_3d_encoder.box_3d_to_object_label(box_3d, obj_type='OrthoGt') for box_3d in gt_ortho_boxes_3d ] # Project gt and anchors into BEV gt_bev_anchors, _ = \ anchor_projector.project_to_bev(gt_anchors, dataset.kitti_utils.bev_extents) bev_anchors, _ = \ anchor_projector.project_to_bev(proposal_anchors, dataset.kitti_utils.bev_extents) # Reorder boxes into (y1, x1, y2, x2) order gt_bev_anchors_tf_order = anchor_projector.reorder_projected_boxes( gt_bev_anchors) bev_anchors_tf_order = anchor_projector.reorder_projected_boxes( bev_anchors) # Convert to box_list format for iou calculation gt_anchor_box_list = box_list.BoxList( tf.cast(gt_bev_anchors_tf_order, tf.float32)) anchor_box_list = box_list.BoxList( tf.cast(bev_anchors_tf_order, tf.float32)) # Get IoU for every anchor tf_all_ious = box_list_ops.iou(gt_anchor_box_list, anchor_box_list) valid_ious = True # Make sure the calculated IoUs contain values. Since its a [N, M] # tensor, if there are no gt's for instance, that entry will be zero. if tf_all_ious.shape[0] == 0 or tf_all_ious.shape[1] == 0: print('#################################################') print('Warning: This sample does not contain valid IoUs') print('#################################################') valid_ious = False if valid_ious: tf_max_ious = tf.reduce_max(tf_all_ious, axis=0) tf_max_iou_indices = tf.argmax(tf_all_ious, axis=0) # Sample an RPN mini batch from the non empty anchors mini_batch_utils = dataset.kitti_utils.mini_batch_utils # Overwrite mini batch size and sample a mini batch mini_batch_utils.mlod_mini_batch_size = mini_batch_size mb_mask_tf, _ = mini_batch_utils.sample_mlod_mini_batch(tf_max_ious) # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Run the graph to calculate ious for every proposal and # to get the mini batch mask all_ious, max_ious, max_iou_indices = sess.run( [tf_all_ious, tf_max_ious, tf_max_iou_indices]) mb_mask = sess.run(mb_mask_tf) mb_anchors = proposal_anchors[mb_mask] mb_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(mb_anchors) mb_anchor_ious = max_ious[mb_mask] else: # We have no valid IoU's, so assume all IoUs are zeros # and the mini-batch contains all the anchors since we cannot # mask without IoUs. max_ious = np.zeros(proposal_boxes_3d.shape[0]) mb_anchor_ious = max_ious mb_anchors = proposal_anchors mb_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(mb_anchors) # Create list of positive/negative proposals based on iou pos_proposal_objs = [] mid_proposal_objs = [] neg_proposal_objs = [] bkg_proposal_objs = [] for i in range(len(proposal_boxes_3d)): box_3d = proposal_boxes_3d[i] if max_ious[i] == 0.0: # Background proposals bkg_proposal_objs.append( box_3d_encoder.box_3d_to_object_label( box_3d, obj_type='BackgroundProposal')) elif max_ious[i] < neg_proposal_2d_iou_hi: # Negative proposals neg_proposal_objs.append( box_3d_encoder.box_3d_to_object_label( box_3d, obj_type='NegativeProposal')) elif max_ious[i] < pos_proposal_2d_iou_lo: # Middle proposals (in between negative and positive) mid_proposal_objs.append( box_3d_encoder.box_3d_to_object_label( box_3d, obj_type='MiddleProposal')) elif max_ious[i] <= 1.0: # Positive proposals pos_proposal_objs.append( box_3d_encoder.box_3d_to_object_label( box_3d, obj_type='PositiveProposal')) else: raise ValueError('Invalid IoU > 1.0') print('{} bkg, {} neg, {} mid, {} pos proposals:'.format( len(bkg_proposal_objs), len(neg_proposal_objs), len(mid_proposal_objs), len(pos_proposal_objs))) # Convert the mini_batch anchors to object list mb_obj_list = [] for i in range(len(mb_anchor_ious)): if valid_ious and (mb_anchor_ious[i] > mini_batch_utils.mlod_pos_iou_range[0]): obj_type = "Positive" else: obj_type = "Negative" obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i], obj_type) mb_obj_list.append(obj) # Point cloud image = cv2.imread(dataset.get_rgb_image_path(sample_name)) points, point_colours = demo_utils.get_filtered_pc_and_colours( dataset, image, img_idx) # Visualize from here vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) # VtkPointCloud vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) # VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # VtkBoxes for ground truth vtk_gt_boxes = VtkBoxes() vtk_gt_boxes.set_objects(filtered_objs, COLOUR_SCHEME) # VtkBoxes for ortho ground truth vtk_gt_ortho_boxes = VtkBoxes() vtk_gt_ortho_boxes.set_objects(gt_ortho_objs, COLOUR_SCHEME) # VtkBoxes for background proposals vtk_bkg_proposal_boxes = VtkBoxes() vtk_bkg_proposal_boxes.set_objects(bkg_proposal_objs, COLOUR_SCHEME) vtk_bkg_proposal_boxes.set_line_width(bkg_proposals_line_width) # VtkBoxes for negative proposals vtk_neg_proposal_boxes = VtkBoxes() vtk_neg_proposal_boxes.set_objects(neg_proposal_objs, COLOUR_SCHEME) vtk_neg_proposal_boxes.set_line_width(neg_proposals_line_width) # VtkBoxes for middle proposals vtk_mid_proposal_boxes = VtkBoxes() vtk_mid_proposal_boxes.set_objects(mid_proposal_objs, COLOUR_SCHEME) vtk_mid_proposal_boxes.set_line_width(mid_proposals_line_width) # VtkBoxes for positive proposals vtk_pos_proposal_boxes = VtkBoxes() vtk_pos_proposal_boxes.set_objects(pos_proposal_objs, COLOUR_SCHEME) vtk_pos_proposal_boxes.set_line_width(pos_proposals_line_width) # Create VtkBoxes for mini batch anchors vtk_mb_boxes = VtkBoxes() vtk_mb_boxes.set_objects(mb_obj_list, COLOUR_SCHEME) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Add actors vtk_renderer.AddActor(axes) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_gt_ortho_boxes.vtk_actor) vtk_renderer.AddActor(vtk_bkg_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_neg_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_mid_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_pos_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_mb_boxes.vtk_actor) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("MLOD Mini Batch") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_gt_boxes.vtk_actor, vtk_gt_ortho_boxes.vtk_actor, vtk_bkg_proposal_boxes.vtk_actor, vtk_neg_proposal_boxes.vtk_actor, vtk_mid_proposal_boxes.vtk_actor, vtk_pos_proposal_boxes.vtk_actor, vtk_mb_boxes.vtk_actor, ])) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start()