def test_project_to_bev_tensors(self): anchors = np.asarray([[0, 0, 3, 2, 0, 6], [3, 0, 3, 2, 0, 2]], dtype=np.float64) tf_anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) bev_extents = [[-5, 5], [0, 10]] tf_bev_extents = tf.convert_to_tensor(bev_extents, dtype=tf.float32) bev_extents_range = np.diff(bev_extents, axis=1) bev_extents_range = np.stack([bev_extents_range, bev_extents_range]).flatten() expected_boxes = np.asarray([[0 - (-5) - 1, 4, 0 - (-5) + 1, 10], [3 - (-5) - 1, 6, 3 - (-5) + 1, 8]], dtype=np.float64) expected_boxes_norm = expected_boxes / bev_extents_range tf_boxes, tf_boxes_norm = \ anchor_projector.project_to_bev(tf_anchors, tf_bev_extents) np_boxes, np_boxes_norm = \ anchor_projector.project_to_bev(anchors, bev_extents) sess = tf.Session() with sess.as_default(): tf_boxes_out = tf_boxes.eval() tf_boxes_norm_out = tf_boxes_norm.eval() np.testing.assert_allclose(tf_boxes_out, expected_boxes) np.testing.assert_allclose(tf_boxes_norm_out, expected_boxes_norm) # Check that tensor calculations match numpy ones np.testing.assert_allclose(tf_boxes_out, np_boxes) np.testing.assert_allclose(tf_boxes_norm_out, np_boxes_norm)
def test_project_to_bev_outside_extents(self): anchors = np.asarray([[0, 0, 0, 10, 0, 2]], dtype=np.float64) bev_extents = [[-3, 3], [0, 10]] bev_extents_range = np.diff(bev_extents, axis=1) bev_extents_range = np.stack([bev_extents_range, bev_extents_range]).flatten() expected_boxes = np.asarray([[0 - (-3) - 5, 9, 0 - (-3) + 5, 11]], dtype=np.float64) expected_boxes_norm = expected_boxes / bev_extents_range boxes, boxes_norm = \ anchor_projector.project_to_bev(anchors, bev_extents) # Loop through cases to see errors separately for box, box_norm, \ exp_box, exp_box_norm in zip(boxes, boxes_norm, expected_boxes, expected_boxes_norm): np.testing.assert_allclose(box, exp_box, rtol=1E-5) np.testing.assert_allclose(box_norm, exp_box_norm, rtol=1E-5)
def _fill_anchor_pl_inputs(self, anchors_info, ground_plane, image_shape, stereo_calib_p2, sample_name, sample_augs): """ Fills anchor placeholder inputs with corresponding data Args: anchors_info: anchor info from mini_batch_utils ground_plane: ground plane coefficients image_shape: image shape (h, w), used for projecting anchors sample_name: name of the sample, e.g. "000001" sample_augs: list of sample augmentations """ # Lists for merging anchors info all_anchor_boxes_3d = [] anchors_ious = [] anchor_offsets = [] anchor_classes = [] # Create anchors for each class if len(self.dataset.classes) > 1: for class_idx in range(len(self.dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = self._anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=self._cluster_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.append(grid_anchor_boxes_3d) all_anchor_boxes_3d = np.concatenate(all_anchor_boxes_3d) else: # Don't loop for a single class class_idx = 0 grid_anchor_boxes_3d = self._anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=self._cluster_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d = grid_anchor_boxes_3d # Filter empty anchors # Skip if anchors_info is [] sample_has_labels = True if self._train_val_test in ['train', 'val']: # Read in anchor info during training / validation if anchors_info: anchor_indices, anchors_ious, anchor_offsets, \ anchor_classes = anchors_info anchor_boxes_3d_to_use = all_anchor_boxes_3d[anchor_indices] else: train_cond = (self._train_val_test == "train" and self._train_on_all_samples) eval_cond = (self._train_val_test == "val" and self._eval_all_samples) if train_cond or eval_cond: sample_has_labels = False else: sample_has_labels = False if not sample_has_labels: # During testing, or validation with no anchor info, manually # filter empty anchors # TODO: share voxel_grid_2d with BEV generation if possible voxel_grid_2d = \ self.dataset.kitti_utils.create_sliced_voxel_grid_2d( sample_name, self.dataset.bev_source, image_shape=image_shape) # Convert to anchors and filter anchors_to_use = box_3d_encoder.box_3d_to_anchor( all_anchor_boxes_3d) empty_filter = anchor_filter.get_empty_anchor_filter_2d( anchors_to_use, voxel_grid_2d, density_threshold=1) anchor_boxes_3d_to_use = all_anchor_boxes_3d[empty_filter] # Convert lists to ndarrays anchor_boxes_3d_to_use = np.asarray(anchor_boxes_3d_to_use) anchors_ious = np.asarray(anchors_ious) anchor_offsets = np.asarray(anchor_offsets) anchor_classes = np.asarray(anchor_classes) # Flip anchors and centroid x offsets for augmented samples if kitti_aug.AUG_FLIPPING in sample_augs: anchor_boxes_3d_to_use = kitti_aug.flip_boxes_3d( anchor_boxes_3d_to_use, flip_ry=False) if anchors_info: anchor_offsets[:, 0] = -anchor_offsets[:, 0] # Convert to anchors anchors_to_use = box_3d_encoder.box_3d_to_anchor( anchor_boxes_3d_to_use) num_anchors = len(anchors_to_use) # Project anchors into bev bev_anchors, bev_anchors_norm = anchor_projector.project_to_bev( anchors_to_use, self._bev_extents) # Project box_3d anchors into image space img_anchors, img_anchors_norm = \ anchor_projector.project_to_image_space( anchors_to_use, stereo_calib_p2, image_shape) # Reorder into [y1, x1, y2, x2] for tf.crop_and_resize op self._bev_anchors_norm = bev_anchors_norm[:, [1, 0, 3, 2]] self._img_anchors_norm = img_anchors_norm[:, [1, 0, 3, 2]] # Fill in placeholder inputs self._placeholder_inputs[self.PL_ANCHORS] = anchors_to_use # If we are in train/validation mode, and the anchor infos # are not empty, store them. Checking for just anchors_ious # to be non-empty should be enough. if self._train_val_test in ['train', 'val'] and \ len(anchors_ious) > 0: self._placeholder_inputs[self.PL_ANCHOR_IOUS] = anchors_ious self._placeholder_inputs[self.PL_ANCHOR_OFFSETS] = anchor_offsets self._placeholder_inputs[self.PL_ANCHOR_CLASSES] = anchor_classes # During test, or val when there is no anchor info elif self._train_val_test in ['test'] or \ len(anchors_ious) == 0: # During testing, or validation with no gt, fill these in with 0s self._placeholder_inputs[self.PL_ANCHOR_IOUS] = \ np.zeros(num_anchors) self._placeholder_inputs[self.PL_ANCHOR_OFFSETS] = \ np.zeros([num_anchors, 6]) self._placeholder_inputs[self.PL_ANCHOR_CLASSES] = \ np.zeros(num_anchors) else: raise ValueError( 'Got run mode {}, and non-empty anchor info'.format( self._train_val_test)) self._placeholder_inputs[self.PL_BEV_ANCHORS] = bev_anchors self._placeholder_inputs[self.PL_BEV_ANCHORS_NORM] = \ self._bev_anchors_norm self._placeholder_inputs[self.PL_IMG_ANCHORS] = img_anchors self._placeholder_inputs[self.PL_IMG_ANCHORS_NORM] = \ self._img_anchors_norm
def build(self): # Setup input placeholders self._set_up_input_pls() # Setup feature extractors self._set_up_feature_extractors() bev_proposal_input = self.bev_bottleneck img_proposal_input = self.img_bottleneck fusion_mean_div_factor = 2.0 # If both img and bev probabilites are set to 1.0, don't do # path drop. if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('rpn_path_drop'): random_values = tf.random_uniform(shape=[3], minval=0.0, maxval=1.0) img_mask, bev_mask = self.create_path_drop_masks( self._path_drop_probabilities[0], self._path_drop_probabilities[1], random_values) img_proposal_input = tf.multiply(img_proposal_input, img_mask) bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask) self.img_path_drop_mask = img_mask self.bev_path_drop_mask = bev_mask # Overwrite the division factor fusion_mean_div_factor = img_mask + bev_mask with tf.variable_scope('proposal_roi_pooling'): with tf.variable_scope('box_indices'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims( self._bev_anchors_norm_pl, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_proposal_rois = tf.image.crop_and_resize( bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Do ROI Pooling on image img_proposal_rois = tf.image.crop_and_resize( img_proposal_input, self._img_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) with tf.variable_scope('proposal_roi_fusion'): rpn_fusion_out = None if self._fusion_method == 'mean': tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois) rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) elif self._fusion_method == 'concat': rpn_fusion_out = tf.concat( [bev_proposal_rois, img_proposal_rois], axis=3) ############################### ############################### ## Chenye Netgate begin ## ############################### elif self._fusion_method == 'netgate': print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") print("bev_proposal_rois is:", bev_proposal_rois) rpn_fusion_out = chenye_netgate.netgate( bev_proposal_rois, img_proposal_rois, self._is_training) ################################ ## Chenye Netgate end ## ################################ else: raise ValueError('Invalid fusion method', self._fusion_method) # TODO: move this section into an separate AnchorPredictor class with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]): tensor_in = rpn_fusion_out # Parse rpn layers config layers_config = self._config.layers_config.rpn_config l2_weight_decay = layers_config.l2_weight_decay if l2_weight_decay > 0: weights_regularizer = slim.l2_regularizer(l2_weight_decay) else: weights_regularizer = None with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): # Use conv2d instead of fully_connected layers. cls_fc6 = slim.conv2d(tensor_in, layers_config.cls_fc6, self._proposal_roi_crop_size, padding='VALID', scope='cls_fc6') cls_fc6_drop = slim.dropout(cls_fc6, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc6_drop') cls_fc7 = slim.conv2d(cls_fc6_drop, layers_config.cls_fc7, [1, 1], scope='cls_fc7') cls_fc7_drop = slim.dropout(cls_fc7, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc7_drop') cls_fc8 = slim.conv2d(cls_fc7_drop, 2, [1, 1], activation_fn=None, scope='cls_fc8') objectness = tf.squeeze(cls_fc8, [1, 2], name='cls_fc8/squeezed') # Use conv2d instead of fully_connected layers. reg_fc6 = slim.conv2d(tensor_in, layers_config.reg_fc6, self._proposal_roi_crop_size, padding='VALID', scope='reg_fc6') reg_fc6_drop = slim.dropout(reg_fc6, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc6_drop') reg_fc7 = slim.conv2d(reg_fc6_drop, layers_config.reg_fc7, [1, 1], scope='reg_fc7') reg_fc7_drop = slim.dropout(reg_fc7, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc7_drop') reg_fc8 = slim.conv2d(reg_fc7_drop, 6, [1, 1], activation_fn=None, scope='reg_fc8') offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed') # Histogram summaries with tf.variable_scope('histograms_feature_extractor'): with tf.variable_scope('bev_vgg'): for end_point in self.bev_end_points: tf.summary.histogram(end_point, self.bev_end_points[end_point]) with tf.variable_scope('img_vgg'): for end_point in self.img_end_points: tf.summary.histogram(end_point, self.img_end_points[end_point]) with tf.variable_scope('histograms_rpn'): with tf.variable_scope('anchor_predictor'): fc_layers = [ cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7, reg_fc8, offsets ] for fc_layer in fc_layers: # fix the name to avoid tf warnings tf.summary.histogram(fc_layer.name.replace(':', '_'), fc_layer) # Return the proposals with tf.variable_scope('proposals'): anchors = self.placeholders[self.PL_ANCHORS] # Decode anchor regression offsets with tf.variable_scope('decoding'): regressed_anchors = anchor_encoder.offset_to_anchor( anchors, offsets) with tf.variable_scope('bev_projection'): _, bev_proposal_boxes_norm = anchor_projector.project_to_bev( regressed_anchors, self._bev_extents) with tf.variable_scope('softmax'): objectness_softmax = tf.nn.softmax(objectness) with tf.variable_scope('nms'): objectness_scores = objectness_softmax[:, 1] # Do NMS on regressed anchors top_indices = tf.image.non_max_suppression( bev_proposal_boxes_norm, objectness_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_thresh) top_anchors = tf.gather(regressed_anchors, top_indices) top_objectness_softmax = tf.gather(objectness_scores, top_indices) # top_offsets = tf.gather(offsets, top_indices) # top_objectness = tf.gather(objectness, top_indices) # Get mini batch all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS] all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS] all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES] with tf.variable_scope('mini_batch'): mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mini_batch_mask, _ = \ mini_batch_utils.sample_rpn_mini_batch(all_ious_gt) # ROI summary images rpn_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size with tf.variable_scope('bev_rpn_rois'): mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl, mini_batch_mask) mb_bev_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_rpn_rois', bev_input_roi_summary_images[-1], max_outputs=rpn_mini_batch_size) with tf.variable_scope('img_rpn_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl, mini_batch_mask) mb_img_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize(self._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_rpn_rois', img_input_rois, max_outputs=rpn_mini_batch_size) # Ground Truth Tensors with tf.variable_scope('one_hot_classes'): # Anchor classification ground truth # Object / Not Object min_pos_iou = \ self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0] objectness_classes_gt = tf.cast(tf.greater_equal( all_ious_gt, min_pos_iou), dtype=tf.int32) objectness_gt = tf.one_hot( objectness_classes_gt, depth=2, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=self._config.label_smoothing_epsilon) # Mask predictions for mini batch with tf.variable_scope('prediction_mini_batch'): objectness_masked = tf.boolean_mask(objectness, mini_batch_mask) offsets_masked = tf.boolean_mask(offsets, mini_batch_mask) with tf.variable_scope('ground_truth_mini_batch'): objectness_gt_masked = tf.boolean_mask(objectness_gt, mini_batch_mask) offsets_gt_masked = tf.boolean_mask(all_offsets_gt, mini_batch_mask) # Specify the tensors to evaluate predictions = dict() # Temporary predictions for debugging # predictions['anchor_ious'] = anchor_ious # predictions['anchor_offsets'] = all_offsets_gt if self._train_val_test in ['train', 'val']: # All anchors predictions[self.PRED_ANCHORS] = anchors # Mini-batch masks predictions[self.PRED_MB_MASK] = mini_batch_mask # Mini-batch predictions predictions[self.PRED_MB_OBJECTNESS] = objectness_masked predictions[self.PRED_MB_OFFSETS] = offsets_masked # Mini batch ground truth predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked # Proposals after nms predictions[self.PRED_TOP_INDICES] = top_indices predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax else: # self._train_val_test == 'test' predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax return predictions, bev_proposal_rois, bev_proposal_rois
def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] with tf.variable_scope('avod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) avod_projection_in = expanded_anchors else: avod_projection_in = top_anchors with tf.variable_scope('bev'): # Project top anchors into bev and image spaces bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( avod_projection_in, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) bev_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes_norm) with tf.variable_scope('img'): image_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2], image_shape) # Only reorder the normalized img img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) with tf.variable_scope('img_r'): image_r_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_R_INPUT])[0:2], tf.float32) img_r_proposal_boxes, img_r_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P3], image_r_shape) # Only reorder the normalized img img_r_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_r_proposal_boxes_norm) #bev_feature_maps = rpn_model.bev_feature_maps img_feature_maps = rpn_model.img_feature_maps img_r_feature_maps = rpn_model.img_r_feature_maps """ if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('avod_path_drop'): img_mask = rpn_model.img_path_drop_mask #bev_mask = rpn_model.bev_path_drop_mask img_r_mask = rpn_model.img_r_path_drop_mask img_feature_maps = tf.multiply(img_feature_maps, img_mask) #bev_feature_maps = tf.multiply(bev_feature_maps, # bev_mask) img_r_feature_maps = tf.multiply(img_r_feature_maps, img_r_mask) else: #bev_mask = tf.constant(1.0) img_mask = tf.constant(1.0) img_r_mask = tf.constant(1.0) """ img_mask = tf.constant(1.0) img_r_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('avod_roi_pooling'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) """ bev_boxes_norm_batches = tf.expand_dims( bev_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_rois = tf.image.crop_and_resize( bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='bev_rois') """ img_boxes_norm_batches = tf.expand_dims(img_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(img_boxes_norm_batches) # Do ROI Pooling on image img_rois = tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, (32, 32), name='img_rois') img_r_rois = tf.image.crop_and_resize( img_r_feature_maps, img_r_proposal_boxes_norm_tf_order, tf_box_indices, (32, 32), name='img_r_rois') img_rois = self._sub_mean(img_rois) img_r_rois = self._sub_mean(img_r_rois) cos_simi = tf.reduce_sum(img_rois * img_r_rois, \ axis=[1, 2], keep_dims=True) cos_simi = cos_simi / (tf.norm(img_rois + 1e-5, axis=[1, 2], keep_dims=True) * \ tf.norm(img_r_rois + 1e-5, axis=[1, 2], keep_dims=True)) cos_simi = tf.nn.relu(cos_simi) img_rois = tf.image.resize_bilinear( img_rois, self._proposal_roi_crop_size) * cos_simi img_r_rois = tf.image.resize_bilinear( img_r_rois, self._proposal_roi_crop_size) * cos_simi # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, input_rois=[img_rois, img_r_rois], input_weights=[img_mask, img_r_mask], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training) all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None all_angle_vectors = \ fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') # Project anchor_gts to 2D bev with tf.variable_scope('avod_gt_projection'): bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt) with tf.variable_scope('avod_box_list'): # Convert to box_list format anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order) mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # TODO: Don't create a mini batch in test mode # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(top_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self._rpn_model.placeholders[ self._rpn_model.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) ###################################################### # ROI summary images ###################################################### avod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size """ with tf.variable_scope('bev_avod_rois'): mb_bev_anchors_norm = tf.boolean_mask( bev_proposal_boxes_norm_tf_order, mb_mask) mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._rpn_model._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split( bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_avod_rois', bev_input_roi_summary_images[-1], max_outputs=avod_mini_batch_size) """ with tf.variable_scope('img_avod_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_avod_rois', img_input_rois, max_outputs=avod_mini_batch_size) with tf.variable_scope('img_r_avod_rois'): # ROIs on image input mb_img_r_anchors_norm = tf.boolean_mask( img_r_proposal_boxes_norm_tf_order, mb_mask) mb_img_r_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_r_input_rois = tf.image.crop_and_resize( self._rpn_model._img_r_preprocessed, mb_img_r_anchors_norm, mb_img_r_box_indices, (32, 32)) tf.summary.image('img_r_avod_rois', img_r_input_rois, max_outputs=avod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('avod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(top_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV avod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) avod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( avod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( avod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather(all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather(prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) top_orientations = tf.gather(all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) #prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask prediction_dict['img_r_mask'] = img_r_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors if self._box_rep == 'box_3d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) # prediction_dict[self.PRED_MAX_IOUS] = max_ious # prediction_dict[self.PRED_ALL_IOUS] = all_ious return prediction_dict
def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] with tf.variable_scope('avod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) avod_projection_in = expanded_anchors else: avod_projection_in = top_anchors with tf.variable_scope('bev'): # Project top anchors into bev and image spaces # bev_proposal_boxes are boxes' x and z coordinate relative to bev_extents # bev_proposal_boxes_norm are normalized boxes in bev_extents' range bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( avod_projection_in, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) bev_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes_norm) with tf.variable_scope('img'): image_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2], image_shape) # Only reorder the normalized img img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) bev_feature_maps = rpn_model.bev_feature_maps img_feature_maps = rpn_model.img_feature_maps if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('avod_path_drop'): img_mask = rpn_model.img_path_drop_mask bev_mask = rpn_model.bev_path_drop_mask img_feature_maps = tf.multiply(img_feature_maps, img_mask) bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask) else: bev_mask = tf.constant(1.0) img_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('avod_roi_pooling'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV # tf_box_indices contains 1D tensor with size [num_boxes], each element specifies # batch index to whom this box belongs. Because the batch size here is 1, so it # doesn't matter # bev_rois is a 4-D tensor of shape [num_boxes, crop_height, crop_width, depth] #################################################################################### # TODO PROJECT: set bev_feature_maps or img_feature_maps to zeros for testing # bev_feature_maps = tf.zeros_like(bev_feature_maps) # self.bev_feature_maps = tf.zeros_like(bev_feature_maps) # bev_feature_maps = self.bev_feature_maps #################################################################################### bev_rois = tf.image.crop_and_resize( bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='bev_rois') # Do ROI Pooling on image img_rois = tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='img_rois') #################################################################################### # TODO PROJECT: create member variables for accessing # bev_rois4moe = tf.image.crop_and_resize( # bev_feature_maps, # bev_proposal_boxes_norm_tf_order, # tf_box_indices, # [28,28], # name='bev_rois4moe') # # Do ROI Pooling on image # img_rois4moe = tf.image.crop_and_resize( # img_feature_maps, # img_proposal_boxes_norm_tf_order, # tf_box_indices, # [28,28], # name='img_rois4moe') #################################################################################### #################################################################################### # TODO PROJECT: create member variables for accessing # self.bev_rois = bev_rois # self.img_rois = img_rois self.bev_boxes = bev_proposal_boxes_tf_order self.bev_boxes_norm = bev_proposal_boxes_norm self.img_boxes = img_proposal_boxes self.img_boxes_norm = img_proposal_boxes_norm # self.bev_mask = rpn_model.bev_path_drop_mask # self.img_mask = rpn_model.img_path_drop_mask #################################################################################### #################################################################################### # TODO PROJECT: scale the features to features with larger maximum values # self.max_img_feature_val = tf.reduce_max(img_rois, axis=None) # self.max_bev_feature_val = tf.reduce_max(bev_rois, axis=None) # # bev_rois_moe = tf.cond(tf.greater(self.max_img_feature_val, self.max_bev_feature_val), # lambda: self.scale_bev(bev_rois, img_rois), # lambda: bev_rois) # img_rois_moe = tf.cond(tf.greater(self.max_bev_feature_val, self.max_img_feature_val), # lambda: self.scale_img(bev_rois, img_rois), # lambda: img_rois) #################################################################################### #################################################################################### # TODO PROJECT: insert code here to add mixture of experts # self._moe_model = MoeModel(img_rois, bev_rois, img_proposal_boxes, bev_proposal_boxes) # self._moe_model = MoeModel(img_feature_maps, bev_feature_maps, img_proposal_boxes, bev_proposal_boxes) # self._moe_model._set_up_input_pls() # self.moe_prediction = self._moe_model.build() #################################################################################### #################################################################################### # TODO PROJECT: weight the feature before average img and bev # img_weights = tf.reshape(self.moe_prediction['img_weight'],[-1,1,1,1]) # bev_weights = tf.reshape(self.moe_prediction['bev_weight'],[-1,1,1,1]) # img_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32) # bev_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32) # weighted_img_rois = tf.multiply(img_weights,img_rois) # weighted_bev_rois = tf.multiply(bev_weights,bev_rois) #################################################################################### #################################################################################### # TODO PROJECT: create fused bev _, bev_mar_boxes_norm = cf.add_margin_to_regions( bev_proposal_boxes, self.dataset.kitti_utils.bev_extents) bev_pixels_loc = cf.bev_pixel_eq_1_loc( self._rpn_model._bev_preprocessed) max_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_hi min_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_lo num_slices = self.dataset.config.kitti_utils_config.bev_generator.slices.num_slices height_list = [ min_height + (2 * x + 1) * (max_height - min_height) / (2.0 * num_slices) for x in range(num_slices) ] print("bev_preprocess shape: ", (self._rpn_model._bev_preprocessed).shape) velo_pc = cf.bev_pixel_loc_to_3d_velo( bev_pixels_loc, tf.shape(self._rpn_model._bev_preprocessed)[1:3], height_list, self.dataset.kitti_utils.bev_extents) print("PL_CALIB_P2 shape: ", self._rpn_model.placeholders[RpnModel.PL_CALIB_P2].shape) p_2d = anchor_projector.project_to_image_tensor( tf.transpose(tf.cast(velo_pc, tf.float32)), self._rpn_model.placeholders[RpnModel.PL_CALIB_P2]) print("image feature maps [0] shape: ", img_feature_maps[0].shape) features_at_p_2d = tf.gather_nd( img_feature_maps[0], tf.cast(tf.round(tf.transpose(p_2d)), tf.int32)) print("features_at_p_2d shape: ", features_at_p_2d.shape) new_bev = cf.create_fused_bev( tf.shape(self._rpn_model._bev_preprocessed), bev_pixels_loc, features_at_p_2d) # raise Exception("finish fused_bev generation!") self._new_bev_feature_extractor = feature_extractor_builder.get_extractor( self.model_config.layers_config.bev_feature_extractor) self.new_bev_feature_maps, self.new_bev_end_points = \ self._new_bev_feature_extractor.build( new_bev, self._bev_pixel_size, self._is_training, scope='new_bev_vgg' ) new_bev_rois = tf.image.crop_and_resize( self.new_bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='new_bev_rois') #################################################################################### # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config # fc_output_layers = \ # avod_fc_layers_builder.build( # layers_config=avod_layers_config, # input_rois=[bev_rois, img_rois], # input_weights=[bev_mask, img_mask], # num_final_classes=self._num_final_classes, # box_rep=self._box_rep, # top_anchors=top_anchors, # ground_plane=ground_plane, # is_training=self._is_training) #################################################################################### # TODO PROJECT: average img and bev features first and then concat with new bev rois_sum = tf.reduce_sum([bev_rois, img_rois], axis=0) rois_mean = tf.divide(rois_sum, tf.reduce_sum([bev_mask, img_mask])) fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, input_rois=[rois_mean, new_bev_rois], input_weights=[1, img_mask], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training) #################################################################################### #################################################################################### # TODO PROJECT: input weighted bev_rois and img_rois to output layer # fc_output_layers = \ # avod_fc_layers_builder.build( # layers_config=avod_layers_config, # input_rois=[weighted_bev_rois, weighted_img_rois], # input_weights=[bev_mask * bev_weights, img_mask * img_weights], # num_final_classes=self._num_final_classes, # box_rep=self._box_rep, # top_anchors=top_anchors, # ground_plane=ground_plane, # is_training=self._is_training) #################################################################################### all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None all_angle_vectors = \ fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') # Project anchor_gts to 2D bev with tf.variable_scope('avod_gt_projection'): bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt) with tf.variable_scope('avod_box_list'): # Convert to box_list format anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order) mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # TODO: Don't create a mini batch in test mode # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(top_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self._rpn_model.placeholders[ self._rpn_model.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) ###################################################### # ROI summary images ###################################################### avod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size with tf.variable_scope('bev_avod_rois'): mb_bev_anchors_norm = tf.boolean_mask( bev_proposal_boxes_norm_tf_order, mb_mask) mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._rpn_model._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_avod_rois', bev_input_roi_summary_images[-1], max_outputs=avod_mini_batch_size) with tf.variable_scope('img_avod_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_avod_rois', img_input_rois, max_outputs=avod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('avod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(top_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV avod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) avod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( avod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( avod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather(all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather(prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) top_orientations = tf.gather(all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors if self._box_rep == 'box_3d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors # 8c means 8 corners elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c # 4c means 4 corners elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) # prediction_dict[self.PRED_MAX_IOUS] = max_ious # prediction_dict[self.PRED_ALL_IOUS] = all_ious return prediction_dict
def _calculate_anchors_info(self, all_anchor_boxes_3d, empty_anchor_filter, gt_labels): """Calculates the list of anchor information in the format: N x 8 [max_gt_2d_iou, max_gt_3d_iou, (6 x offsets), class_index] max_gt_out - highest 3D iou with any ground truth box offsets - encoded offsets [dx, dy, dz, d_dimx, d_dimy, d_dimz] class_index - the anchor's class as an index (e.g. 0 or 1, for "Background" or "Car") Args: all_anchor_boxes_3d: list of anchors in box_3d format N x [x, y, z, l, w, h, ry] empty_anchor_filter: boolean mask of which anchors are non empty gt_labels: list of Object Label data format containing ground truth labels to generate positives/negatives from. Returns: list of anchor info """ # Check for ground truth objects if len(gt_labels) == 0: raise Warning("No valid ground truth label to generate anchors.") kitti_utils = self._dataset.kitti_utils # Filter empty anchors anchor_indices = np.where(empty_anchor_filter)[0] anchor_boxes_3d = all_anchor_boxes_3d[empty_anchor_filter] # Convert anchor_boxes_3d to anchor format anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) # Convert gt to boxes_3d -> anchors -> iou format gt_boxes_3d = np.asarray( [box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in gt_labels]) gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d, ortho_rotate=True) rpn_iou_type = self.mini_batch_utils.rpn_iou_type if rpn_iou_type == '2d': # Convert anchors to 2d iou format anchors_for_2d_iou, _ = np.asarray(anchor_projector.project_to_bev( anchors, kitti_utils.bev_extents)) gt_boxes_for_2d_iou, _ = anchor_projector.project_to_bev( gt_anchors, kitti_utils.bev_extents) elif rpn_iou_type == '3d': # Convert anchors to 3d iou format for calculation anchors_for_3d_iou = box_3d_encoder.box_3d_to_3d_iou_format( anchor_boxes_3d) gt_boxes_for_3d_iou = \ box_3d_encoder.box_3d_to_3d_iou_format(gt_boxes_3d) else: raise ValueError('Invalid rpn_iou_type {}', rpn_iou_type) # Initialize sample and offset lists num_anchors = len(anchor_boxes_3d) all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length)) # Update anchor indices all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices # For each of the labels, generate samples for gt_idx in range(len(gt_labels)): gt_obj = gt_labels[gt_idx] gt_box_3d = gt_boxes_3d[gt_idx] # Get 2D or 3D IoU for every anchor if self.mini_batch_utils.rpn_iou_type == '2d': gt_box_for_2d_iou = gt_boxes_for_2d_iou[gt_idx] ious = evaluation.two_d_iou(gt_box_for_2d_iou, anchors_for_2d_iou) elif self.mini_batch_utils.rpn_iou_type == '3d': gt_box_for_3d_iou = gt_boxes_for_3d_iou[gt_idx] ious = evaluation.three_d_iou(gt_box_for_3d_iou, anchors_for_3d_iou) # Only update indices with a higher iou than before update_indices = np.greater( ious, all_info[:, self.mini_batch_utils.col_ious]) # Get ious to update ious_to_update = ious[update_indices] # Calculate offsets, use 3D iou to get highest iou anchors_to_update = anchors[update_indices] gt_anchor = box_3d_encoder.box_3d_to_anchor(gt_box_3d, ortho_rotate=True) offsets = anchor_encoder.anchor_to_offset(anchors_to_update, gt_anchor) # Convert gt type to index class_idx = kitti_utils.class_str_to_index(gt_obj.type) # Update anchors info (indices already updated) # [index, iou, (offsets), class_index] all_info[update_indices, self.mini_batch_utils.col_ious] = ious_to_update all_info[update_indices, self.mini_batch_utils.col_offsets_lo: self.mini_batch_utils.col_offsets_hi] = offsets all_info[update_indices, self.mini_batch_utils.col_class_idx] = class_idx return all_info
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=True)
def build(self): # Setup input placeholders self._set_up_input_pls() # Setup feature extractors self._set_up_feature_extractors() bev_proposal_input = self.bev_feature_maps img_proposal_input = self.img_feature_maps fusion_mean_div_factor = 2.0 # If both img and bev probabilites are set to 1.0, don't do # path drop. if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('rpn_path_drop'): random_values = tf.random_uniform(shape=[3], minval=0.0, maxval=1.0) img_mask, bev_mask = self.create_path_drop_masks( self._path_drop_probabilities[0], self._path_drop_probabilities[1], random_values) img_proposal_input = tf.multiply(img_proposal_input, img_mask) bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask) self.img_path_drop_mask = img_mask self.bev_path_drop_mask = bev_mask # Overwrite the division factor fusion_mean_div_factor = img_mask + bev_mask with tf.variable_scope('proposal_roi_pooling'): with tf.variable_scope('box_indices'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims( self._bev_anchors_norm_pl, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_proposal_rois = tf.image.crop_and_resize( bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Do ROI Pooling on image img_proposal_rois = tf.image.crop_and_resize( img_proposal_input, self._img_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config with tf.variable_scope('proposal_roi_fusion'): feat_fusion_out = None fc_layers_type = avod_layers_config.WhichOneof('fc_layers') if fc_layers_type == 'basic_fc_layers': fusion_method = \ avod_layers_config.basic_fc_layers.fusion_method elif fc_layers_type == 'fusion_fc_layers': fusion_method = \ avod_layers_config.fusion_fc_layers.fusion_method if fusion_method == 'mean': tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois) feat_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) elif fusion_method == 'concat': feat_fusion_out = tf.concat( [bev_proposal_rois, img_proposal_rois], axis=3) else: raise ValueError('Invalid fusion method', self._fusion_method) all_anchors = self.placeholders[self.PL_ANCHORS] ground_plane = self.placeholders[self.PL_GROUND_PLANE] fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, input_rois=[feat_fusion_out], input_weights=[1.0], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=all_anchors, ground_plane=ground_plane, is_training=self._is_training) all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None all_angle_vectors = \ fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax( all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = self.placeholders[self.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = self.placeholders[self.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = self.placeholders[self.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') if self._train_val_test in ['train', 'val']: with tf.variable_scope('bev'): # Project all anchors into bev and image spaces bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( all_anchors, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) with tf.variable_scope('img'): image_shape = tf.cast(tf.shape( self.placeholders[self.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( all_anchors, self.placeholders[self.PL_CALIB_P2], image_shape) # Project anchor_gts to 2D bev with tf.variable_scope('avod_gt_projection'): bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes( bev_anchor_boxes_gt) with tf.variable_scope('avod_box_list'): # Convert to box_list format anchor_box_list_gt = \ box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = \ box_list.BoxList(bev_proposal_boxes_tf_order) class_labels = self.placeholders[self.PL_LABEL_CLASSES] mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask( all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(all_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb # sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self.placeholders[ self.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) elif self._train_val_test in ['test']: # In test-mode, skip mini-batch processing and just calculate # box conversions. if self._box_rep in ['box_4c', 'box_4ca']: # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) elif self._box_rep in ['box_8c', 'box_8co']: # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('avod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(all_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV avod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) avod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( avod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( avod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather( all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather( prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather( prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather( prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather( prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather( prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather( prediction_boxes_4c, nms_indices) top_orientations = tf.gather( all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) prediction_dict = dict() if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors if self._box_rep == 'box_3d': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) return prediction_dict
def build(self): # Setup input placeholders # 信息输入 self._set_up_input_pls() # Setup feature extractors # 特征提取 self._set_up_feature_extractors() # 1*1卷积后的bev_fasturemap和img_featuremap bev_proposal_input = self.bev_bottleneck img_proposal_input = self.img_bottleneck # 融合参数 fusion_mean_div_factor = 2.0 # If both img and bev probabilites are set to 1.0, don't do path drop # 如果image和bev概率都设置为1.0,则不要执行路径丢弃。 # train=0.9,test/val=1.0 if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('rpn_path_drop'): # 从均匀分布中输出随机值。.随机输出3个0-1之间的数 random_values = tf.random_uniform(shape=[3], minval=0.0, maxval=1.0) # 0.9,0.9. # 不是零就是1 img_mask, bev_mask = self.create_path_drop_masks( self._path_drop_probabilities[0], self._path_drop_probabilities[1], random_values) # 选择是否输入.如果mask为1 ,则输入,否则不输入 img_proposal_input = tf.multiply(img_proposal_input, img_mask) bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask) self.img_path_drop_mask = img_mask self.bev_path_drop_mask = bev_mask # Overwrite the division factor # 在训练时的融合参数 fusion_mean_div_factor = img_mask + bev_mask # bev和 iname 的 featuremap 的裁剪 with tf.variable_scope('proposal_roi_pooling'): with tf.variable_scope('box_indices'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims( self._bev_anchors_norm_pl, axis=0) # These should be all 0's since there is only 1 image # 这些应该全是0,因为只有1个图像 tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV # 主要目的是让两种数据的输入能够统一一下,便于后续做数据融合.resize为6*6的输出 bev_proposal_rois = tf.image.crop_and_resize( bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Do ROI Pooling on image img_proposal_rois = tf.image.crop_and_resize( img_proposal_input, self._img_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # bev和image的融合 with tf.variable_scope('proposal_roi_fusion'): rpn_fusion_out = None # mean if self._fusion_method == 'mean': tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois) # /2平均融合方式 rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) elif self._fusion_method == 'concat': rpn_fusion_out = tf.concat( [bev_proposal_rois, img_proposal_rois], axis=3) else: raise ValueError('Invalid fusion method', self._fusion_method) # TODO: move this section into an separate AnchorPredictor class with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]): # 融合后的作为输入 tensor_in = rpn_fusion_out # Parse rpn layers config layers_config = self._config.layers_config.rpn_config l2_weight_decay = layers_config.l2_weight_decay if l2_weight_decay > 0: # 正则化 weights_regularizer = slim.l2_regularizer(l2_weight_decay) else: weights_regularizer = None with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): # Use conv2d instead of fully_connected layers. # 256,6上一层的输出实际上就是6*6的所以将全连接化为卷积操作,使用6*6的卷积核 cls_fc6 = slim.conv2d(tensor_in, layers_config.cls_fc6, self._proposal_roi_crop_size, padding='VALID', scope='cls_fc6') cls_fc6_drop = slim.dropout(cls_fc6, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc6_drop') cls_fc7 = slim.conv2d(cls_fc6_drop, layers_config.cls_fc7, [1, 1], scope='cls_fc7') cls_fc7_drop = slim.dropout(cls_fc7, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc7_drop') # 2,分类 cls_fc8 = slim.conv2d(cls_fc7_drop, 2, [1, 1], activation_fn=None, scope='cls_fc8') # 删除指定尺寸为1 的 objectness = tf.squeeze( cls_fc8, [1, 2], name='cls_fc8/squeezed') # Use conv2d instead of fully_connected layers. reg_fc6 = slim.conv2d(tensor_in, layers_config.reg_fc6, self._proposal_roi_crop_size, padding='VALID', scope='reg_fc6') reg_fc6_drop = slim.dropout(reg_fc6, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc6_drop') reg_fc7 = slim.conv2d(reg_fc6_drop, layers_config.reg_fc7, [1, 1], scope='reg_fc7') reg_fc7_drop = slim.dropout(reg_fc7, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc7_drop') # ∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z # 256,6个回归值包括中心点差值,以及长宽高的差值 reg_fc8 = slim.conv2d(reg_fc7_drop, 6, [1, 1], activation_fn=None, scope='reg_fc8') offsets = tf.squeeze( reg_fc8, [1, 2], name='reg_fc8/squeezed') # Histogram summaries # 并没有用.就是一个可视化、可以自己选择是否可视化 with tf.variable_scope('histograms_feature_extractor'): with tf.variable_scope('bev_vgg'): for end_point in self.bev_end_points: tf.summary.histogram( end_point, self.bev_end_points[end_point]) with tf.variable_scope('img_vgg'): for end_point in self.img_end_points: tf.summary.histogram( end_point, self.img_end_points[end_point]) with tf.variable_scope('histograms_rpn'): with tf.variable_scope('anchor_predictor'): fc_layers = [cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7, reg_fc8, offsets] for fc_layer in fc_layers: # fix the name to avoid tf warnings tf.summary.histogram(fc_layer.name.replace(':', '_'), fc_layer) # Return the proposals with tf.variable_scope('proposals'): anchors = self.placeholders[self.PL_ANCHORS] # Decode anchor regression offsets with tf.variable_scope('decoding'): # 得到回归后的(x,y,z,dx,dy,dz).由最初的输入变为回归的值 regressed_anchors = anchor_encoder.offset_to_anchor( anchors, offsets) with tf.variable_scope('bev_projection'): # [[-40,40],[0,70]] # 返回bev_box_corner,bev_box_corners_norm _, bev_proposal_boxes_norm = anchor_projector.project_to_bev( regressed_anchors, self._bev_extents) with tf.variable_scope('softmax'): objectness_softmax = tf.nn.softmax(objectness) with tf.variable_scope('nms'): objectness_scores = objectness_softmax[:, 1] # Do NMS on regressed anchors # 实现极大值抑制non max suppression, # 其中boxes是不同boxes的坐标,scores是不同boxes预测的分数,max_boxes是保留的最大box的个数。 # iou_threshold是一个阈值,去掉大于这个阈值的所有boxes?。 # _nms_size=1024,0.8 # 筛选出来的序数 top_indices = tf.image.non_max_suppression( bev_proposal_boxes_norm, objectness_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_thresh) # 选择筛选后的anchors和objectness top_anchors = tf.gather(regressed_anchors, top_indices) top_objectness_softmax = tf.gather(objectness_scores, top_indices) # top_offsets = tf.gather(offsets, top_indices) # top_objectness = tf.gather(objectness, top_indices) # Get mini batch all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS] all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS] all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES] with tf.variable_scope('mini_batch'): mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mini_batch_mask, _ = \ mini_batch_utils.sample_rpn_mini_batch(all_ious_gt) # ROI summary images rpn_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size with tf.variable_scope('bev_rpn_rois'): mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl, mini_batch_mask) mb_bev_box_indices = tf.zeros_like( tf.boolean_mask(all_classes_gt, mini_batch_mask), dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split( bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_rpn_rois', bev_input_roi_summary_images[-1], max_outputs=rpn_mini_batch_size) with tf.variable_scope('img_rpn_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl, mini_batch_mask) mb_img_box_indices = tf.zeros_like( tf.boolean_mask(all_classes_gt, mini_batch_mask), dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_rpn_rois', img_input_rois, max_outputs=rpn_mini_batch_size) # Ground Truth Tensors with tf.variable_scope('one_hot_classes'): # Anchor classification ground truth # Object / Not Object min_pos_iou = \ self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0] objectness_classes_gt = tf.cast( tf.greater_equal(all_ious_gt, min_pos_iou), dtype=tf.int32) objectness_gt = tf.one_hot( objectness_classes_gt, depth=2, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=self._config.label_smoothing_epsilon) # Mask predictions for mini batch with tf.variable_scope('prediction_mini_batch'): objectness_masked = tf.boolean_mask(objectness, mini_batch_mask) offsets_masked = tf.boolean_mask(offsets, mini_batch_mask) with tf.variable_scope('ground_truth_mini_batch'): objectness_gt_masked = tf.boolean_mask( objectness_gt, mini_batch_mask) offsets_gt_masked = tf.boolean_mask(all_offsets_gt, mini_batch_mask) # Specify the tensors to evaluate predictions = dict() # Temporary predictions for debugging # predictions['anchor_ious'] = anchor_ious # predictions['anchor_offsets'] = all_offsets_gt if self._train_val_test in ['train', 'val']: # All anchors predictions[self.PRED_ANCHORS] = anchors # Mini-batch masks predictions[self.PRED_MB_MASK] = mini_batch_mask # Mini-batch predictions predictions[self.PRED_MB_OBJECTNESS] = objectness_masked predictions[self.PRED_MB_OFFSETS] = offsets_masked # Mini batch ground truth predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked # Proposals after nms predictions[self.PRED_TOP_INDICES] = top_indices predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax else: # self._train_val_test == 'test' predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax return predictions