def fast_rcnn_net(self): with tf.variable_scope('fast_rcnn_net'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( self.weight_decay)): flatten_rois_features = slim.flatten( self.fast_rcnn_all_level_rois) net = slim.fully_connected(flatten_rois_features, 1024, scope='fc_1') if self.use_dropout: net = slim.dropout(net, keep_prob=0.5, is_training=self.is_training, scope='dropout') net = slim.fully_connected(net, 1024, scope='fc_2') fast_rcnn_scores = slim.fully_connected(net, self.num_classes + 1, activation_fn=None, scope='classifier') fast_rcnn_encode_boxes = slim.fully_connected( net, self.num_classes * 4, activation_fn=None, scope='regressor') if DEBUG: print_tensors(fast_rcnn_encode_boxes, 'fast_rcnn_encode_bxes') return fast_rcnn_encode_boxes, fast_rcnn_scores
def get_rois(self): ''' 1)get roi from feature map 2)roi align or roi pooling. Here is roi align :return: all_level_rois: [N, 7, 7, C] all_level_proposals : [N, 4] all_level_proposals is matched with all_level_rois ''' levels = self.assign_level() all_level_roi_list = [] all_level_proposal_list = [] if DEBUG: print_tensors(levels, 'levels') with tf.variable_scope('fast_rcnn_roi'): # P6 is not used by the Fast R-CNN detector. for i in range(self.min_level, self.max_level + 1): level_i_proposal_indices = tf.reshape( tf.where(tf.equal(levels, i)), [-1]) level_i_proposals = tf.gather(self.rpn_proposals_boxes, level_i_proposal_indices) level_i_proposals = tf.cond( tf.equal(tf.shape(level_i_proposals)[0], 0), lambda: tf.constant([[0, 0, 0, 0]], dtype=tf.float32), lambda: level_i_proposals ) # to avoid level_i_proposals batch is 0, or it will broken when gradient BP all_level_proposal_list.append(level_i_proposals) ymin, xmin, ymax, xmax = tf.unstack(level_i_proposals, axis=1) img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast( self.img_shape[2], tf.float32) normalize_ymin = ymin / img_h normalize_xmin = xmin / img_w normalize_ymax = ymax / img_h normalize_xmax = xmax / img_w level_i_cropped_rois = tf.image.crop_and_resize( self.feature_pyramid['P%d' % i], boxes=tf.transpose( tf.stack([ normalize_ymin, normalize_xmin, normalize_ymax, normalize_xmax ])), box_ind=tf.zeros(shape=[ tf.shape(level_i_proposals)[0], ], dtype=tf.int32), crop_size=[self.roi_size, self.roi_size]) level_i_rois = slim.max_pool2d( level_i_cropped_rois, [self.roi_pool_kernel_size, self.roi_pool_kernel_size], stride=self.roi_pool_kernel_size) all_level_roi_list.append(level_i_rois) all_level_rois = tf.concat(all_level_roi_list, axis=0) all_level_proposals = tf.concat(all_level_proposal_list, axis=0) return all_level_rois, all_level_proposals
def batch_slice_rcnn_proposals(rpn_proposal_bbox, encode_boxes, categories, scores, image_window, config): """ mutilclass NMS :param rpn_proposal_bbox: (N, 4) :param encode_boxes: (N, 4) :param categories:(N, ) :param scores: (N, ) :param image_window:(y1, x1, y2, x2) the boundary of image :return: detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels] """ with tf.variable_scope('fast_rcnn_proposals'): # trim the zero graph rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph( rpn_proposal_bbox, name="trim_proposals_detection") encode_boxes = tf.boolean_mask(encode_boxes, non_zeros) categories = tf.boolean_mask(categories, non_zeros) scores = tf.boolean_mask(scores, non_zeros) fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=encode_boxes, reference_boxes=rpn_proposal_bbox, dev_factors=config.BBOX_STD_DEV) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, image_window) # remove the background keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32) if DEBUG: print_categories = tf.gather(categories, keep) print_scores = tf.gather(scores, keep) num_item = tf.minimum(tf.shape(print_scores)[0], 50) print_scores_vision, print_index = tf.nn.top_k( print_scores, k=num_item) print_categories_vision = tf.gather( print_categories, print_index) print_tensors(print_categories_vision, "categories") print_tensors(print_scores_vision, "scores") # Filter out low confidence boxes if config.FINAL_SCORE_THRESHOLD: conf_keep = tf.cast( tf.where(scores >= config.FINAL_SCORE_THRESHOLD)[:, 0], tf.int32) keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] pre_nms_class_ids = tf.gather(categories, keep) pre_nms_scores = tf.gather(scores, keep) pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.FAST_RCNN_NMS_IOU_THRESHOLD) # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape( class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int32) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(scores, keep) num_keep = tf.minimum( tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(fast_rcnn_decode_boxes, keep), tf.to_float(tf.gather(categories, keep))[..., tf.newaxis], tf.gather(scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \ self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) # print_tensors(minibatch_boxes_softmax_scores, "minibatch_boxes_softmax_scores") last_dim_num = tf.shape(minibatch_boxes_scores)[-2] k_shown = tf.minimum(30, last_dim_num) # print(k_shown, last_dim_num) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_scores[:, 1], k=k_shown) print_tensors(minibatch_boxes_scores, "minibatch_boxes_scores") top_detections_in_img = draw_box_with_color( self.img_batch, tf.gather(minibatch_decode_boxes, top_k_indices), text=tf.shape(top_k_scores)[0]) tf.summary.image('/top_5', top_detections_in_img) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss