def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \ self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_scores[:, 1], k=5) top_detections_in_img = draw_box_with_color( self.img_batch, tf.gather(minibatch_decode_boxes, top_k_indices), text=tf.shape(top_k_scores)[0]) tf.summary.image('/top_5', top_detections_in_img) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss
def compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype( # np.float32, copy=False) targets = encode_and_decode.encode_boxes( unencode_boxes=gt_rois, reference_boxes=ex_rois, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) return targets
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, \ object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes(unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) minibatch_boxes_softmax_scores = tf.gather(slim.softmax(self.rpn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_boxes_softmax_scores[:, 1], k=20) top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=top_k_boxes, scores=top_k_scores) tf.summary.image('/top_20', top_detections_in_img) temp_indices = tf.reshape(tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices) rpn_predict_scores = tf.gather(top_k_scores, temp_indices) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss(location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) # [minibatch_size, num_classes*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def compute_targets(ex_rois, gt_rois): """ Compute bound-box regression targets for an image :param ex_rois: :param gt_rois: :return: """ assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = encode_and_decode.encode_boxes( unencode_boxes=gt_rois, reference_boxes=ex_rois, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) return targets
def _compute_targets_h(ex_rois, gt_rois_h, labels): """Compute bounding-box regression targets for an image. that is : [label, tx, ty, tw, th] """ assert ex_rois.shape[0] == gt_rois_h.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois_h.shape[1] == 4 targets_h = encode_and_decode.encode_boxes(unencode_boxes=gt_rois_h, reference_boxes=ex_rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois, # gt_rois=gt_rois, # scale_factor=cfgs.ROI_SCALE_FACTORS) return np.hstack((labels[:, np.newaxis], targets_h)).astype(np.float32, copy=False)
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) if cfgs.CLASS_NUM == 1: minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather( slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores( self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) # [minibatch_size, num_classes*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = tf.losses.softmax_cross_entropy( logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) tf.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def build_rpn_target(gt_boxes, anchors, config): """ assign anchors targets: object or background. :param anchors: (all_anchors, 4)[y1, x1, y2, x2]. use N to represent all_anchors :param gt_boxes: (M, 4). :param config: the config of making data :return: """ with tf.variable_scope('rpn_find_positive_negative_samples'): gt_boxes = tf.cast(gt_boxes, tf.float32) ious = iou_calculate(anchors, gt_boxes) # (N, M) # an anchor that has an IoU overlap higher than 0.7 with any ground-truth box max_iou_each_row = tf.reduce_max(ious, axis=1) rpn_labels = tf.ones(shape=[ tf.shape(anchors)[0], ], dtype=tf.float32) * (-1) # [N, ] # ignored is -1 matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) positives1 = tf.greater_equal(max_iou_each_row, config.RPN_IOU_POSITIVE_THRESHOLD) # the anchor/anchors with the highest Intersection-over-Union (IoU) overlap with a ground-truth box max_iou_each_column = tf.reduce_max(ious, 0) # (M, ) positives2 = tf.reduce_sum(tf.cast(tf.equal(ious, max_iou_each_column), tf.float32), axis=1) positives = tf.logical_or(positives1, tf.cast(positives2, tf.bool)) rpn_labels += 2 * tf.cast(positives, tf.float32) anchors_matched_gtboxes = tf.gather(gt_boxes, matchs) # [N, 4] # background's gtboxes tmp set the first gtbox, it dose not matter, because use object_mask will ignored it negatives = tf.less(max_iou_each_row, config.RPN_IOU_NEGATIVE_THRESHOLD) rpn_labels = rpn_labels + tf.cast( negatives, tf.float32 ) # [N, ] positive is >=1.0, negative is 0, ignored is -1.0 ''' Need to note: when positive, labels may >= 1.0. Because, when all the iou< 0.7, we set anchors having max iou each column as positive. these anchors may have iou < 0.3. In the begining, labels is [-1, -1, -1...-1] then anchors having iou<0.3 as well as are max iou each column will be +1.0. when decide negatives, because of iou<0.3, they add 1.0 again. So, the final result will be 2.0 So, when opsitive, labels may in [1.0, 2.0]. that is labels >=1.0 ''' positives = tf.cast(tf.greater_equal(rpn_labels, 1.0), tf.float32) ignored = tf.cast(tf.equal(rpn_labels, -1.0), tf.float32) * -1 rpn_labels = positives + ignored with tf.variable_scope('rpn_minibatch'): # random choose the positive objects positive_indices = tf.reshape(tf.where(tf.equal( rpn_labels, 1.0)), [-1]) # use labels is same as object_mask num_of_positives = tf.minimum( tf.shape(positive_indices)[0], tf.cast(config.RPN_MINIBATCH_SIZE * config.RPN_POSITIVE_RATE, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # random choose the negative objects negatives_indices = tf.reshape(tf.where(tf.equal(rpn_labels, 0.0)), [-1]) num_of_negatives = tf.minimum( config.RPN_MINIBATCH_SIZE - num_of_positives, tf.shape(negatives_indices)[0]) negatives_indices = tf.random_shuffle(negatives_indices) negatives_indices = tf.slice(negatives_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat([positive_indices, negatives_indices], axis=0) # padding the negative objects if need gap = config.RPN_MINIBATCH_SIZE - tf.shape(minibatch_indices)[0] extract_indices = tf.random_shuffle(negatives_indices) extract_indices = tf.slice(extract_indices, begin=[0], size=[gap]) minibatch_indices = tf.concat([minibatch_indices, extract_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) # (config.RPN_MINI_BATCH_SIZE, 4) minibatch_anchor_matched_gtboxes = tf.gather(anchors_matched_gtboxes, minibatch_indices) rpn_labels = tf.cast(tf.gather(rpn_labels, minibatch_indices), tf.int32) # encode gtboxes minibatch_anchors = tf.gather(anchors, minibatch_indices) minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, dev_factors=config.RPN_BBOX_STD_DEV) rpn_labels_one_hot = tf.one_hot(rpn_labels, 2, axis=-1) return minibatch_indices, minibatch_encode_gtboxes, rpn_labels_one_hot
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_rotate_proposals) ####################### # minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_horizontal_proposals, minibatch_indices) minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_rotate_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather(self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*5] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims( minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather(slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) # [minibatch_size, num_classes*5] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes+1): tmp_class_weights = tf.ones(shape=[tf.shape(minibatch_encode_boxes)[0], 5], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims(category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat(class_weights_list, axis=1) # [minibatch_size, num_classes*5] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) # if DEBUG: # print_tensors(minibatch_scores, 'minibatch_scores') # print_tensors(classification_loss, '2nd_cls_loss') with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def rpn_loss(self): ''' :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls] :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax] :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w] :return: ''' with tf.variable_scope('rpn_loss'): minibatch_indices,\ minibatch_anchor_matched_gtboxes,\ object_mask,\ minibatch_label_onehot = self.make_minibatch() minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_rpn_scores = tf.gather(self.rpn_scores, minibatch_indices) minibatch_encode_boxes_label = encode_and_decode.encode_boxes( minibatch_anchors, minibatch_anchor_matched_gtboxes, self.scale_factors) # summary positive_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0]) minibatch_decode_anchors = encode_and_decode.decode_boxes( encode_boxes=minibatch_rpn_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) # clip boxes into image shape minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries( minibatch_decode_anchors, tf.shape(self.img_batch)) positive_decode_anchor_in_img = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0] ) tf.summary.image('images/rpn/losses/anchors_positive_minibatch', positive_anchors_in_img) tf.summary.image('images/rpn/losses/anchors_negative_minibatch', negative_anchors_in_img) tf.summary.image('images/rpn/losses/decode_anchor_positive', positive_decode_anchor_in_img) # losses with tf.variable_scope('rpn_localization_losses'): classify_loss = slim.losses.softmax_cross_entropy( logits=minibatch_rpn_scores, onehot_labels=minibatch_label_onehot) location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_rpn_encode_boxes, gtboxes=minibatch_encode_boxes_label, object_weights=object_mask) slim.losses.add_loss( location_loss) # add location loss to losses collections return location_loss, classify_loss
def batch_slice_build_sample(gtboxes_and_label, rpn_proposals_boxes): with tf.name_scope('select_pos_neg_samples'): gtboxes = tf.cast( tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]), tf.float32) gt_class_ids = tf.cast( tf.reshape(gtboxes_and_label[:, -1], [ -1, ]), tf.int32) gtboxes, non_zeros = boxes_utils.trim_zeros_graph( gtboxes, name="trim_gt_box") # [M, 4] gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros) rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph( rpn_proposals_boxes, name="trim_rpn_proposal_train") ious = iou.iou_calculate(rpn_proposals_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) positives = tf.cast( tf.greater_equal( max_iou_each_row, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32) reference_boxes_mattached_gtboxes = tf.gather( gtboxes, matchs) # [N, 4] gt_class_ids = tf.gather(gt_class_ids, matchs) # [N, ] object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient gt_class_ids = gt_class_ids * positives with tf.name_scope('head_train_minibatch'): # choose the positive indices positive_indices = tf.reshape( tf.where(tf.equal(object_mask, 1.)), [-1]) num_of_positives = tf.minimum( tf.shape(positive_indices)[0], tf.cast( cfgs.FAST_RCNN_MINIBATCH_SIZE * cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # choose the negative indices, # Strictly propose the proportion of positive and negative is 1:3 negative_indices = tf.reshape( tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\ - num_of_positives num_of_negatives = tf.minimum( tf.shape(negative_indices)[0], num_of_negatives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat( [positive_indices, negative_indices], axis=0) minibatch_reference_gtboxes = tf.gather( reference_boxes_mattached_gtboxes, minibatch_indices) minibatch_reference_proboxes = tf.gather( rpn_proposals_boxes, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_gtboxes, reference_boxes=minibatch_reference_proboxes, scale_factors=cfgs.BBOX_STD_DEV) object_mask = tf.gather(object_mask, minibatch_indices) gt_class_ids = tf.gather(gt_class_ids, minibatch_indices) # padding if necessary gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE - (num_of_positives + num_of_negatives), dtype=tf.int32) bbox_padding = tf.zeros((gap, 4)) minibatch_reference_proboxes = tf.concat( [minibatch_reference_proboxes, bbox_padding], axis=0) minibatch_encode_gtboxes = tf.concat( [minibatch_encode_gtboxes, bbox_padding], axis=0) object_mask = tf.pad(object_mask, [(0, gap)]) gt_class_ids = tf.pad(gt_class_ids, [(0, gap)]) return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \ self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_scores[:, 1], k=1) top_detections_in_img = draw_box_with_color( self.img_batch, tf.gather(minibatch_decode_boxes, top_k_indices), text=tf.shape(top_k_scores)[0]) tf.summary.image('/top_1', top_detections_in_img) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): # logits = tf.cast(minibatch_boxes_scores, tf.float32) # onehot_labels = tf.cast(minibatch_labels_one_hot, tf.float32) # one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32) # predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1-logits) # # # add small value to avoid # alpha_t = tf.scalar_mul(0.25, one) # alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t) # gamma = tf.scalar_mul(2, one) # new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma) # classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits, # onehot_labels=onehot_labels)), tf.pow((1-predictions_pt), 2)) # # classification_loss = tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, # # labels=onehot_labels), tf.pow((1-predictions_pt), 2)) # classification_loss = tf.reduce_sum(classification_loss[:,0]+classification_loss[:,1]) # # classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_boxes_scores,1e-8,tf.reduce_max(minibatch_boxes_scores)), # # onehot_labels=minibatch_labels_one_hot) classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) if cfgs.CLASS_NUM == 1: minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather( slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores( self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) # [minibatch_size, num_classes*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] # loss with tf.variable_scope('fast_rcnn_classification_loss'): logits = tf.cast(minibatch_scores, tf.float32) onehot_labels = tf.cast(minibatch_label_one_hot, tf.float32) one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32) predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1 - logits) # add small value to avoid alpha_t = tf.scalar_mul(0.25, one) alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t) gamma = tf.scalar_mul(2, one) new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma) ##PFL fast_rcnn_classification_loss = tf.multiply( tf.multiply( alpha_t, slim.losses.softmax_cross_entropy( logits=logits, onehot_labels=onehot_labels)), tf.pow(1 - predictions_pt, new_gamma)) ##FL # fast_rcnn_classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits, # onehot_labels=onehot_labels)), tf.pow(1-predictions_pt, 2)) #FL和PFL,不注释这句;CE注释这句 fast_rcnn_classification_loss = tf.reduce_sum( fast_rcnn_classification_loss[:, 0] + fast_rcnn_classification_loss[:, 1]) ##CE # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_scores,1e-8,tf.reduce_max(minibatch_scores)), # onehot_labels=minibatch_label_one_hot) # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy( # logits=minibatch_scores, # onehot_labels=minibatch_label_one_hot) with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, \ minibatch_reference_boxes_mattached_gtboxes_rotate, \ minibatch_reference_boxes_mattached_head_quadrant, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_encode_boxes_rotate = tf.gather( self.fast_rcnn_encode_boxes_rotate, minibatch_indices) # [minibatch_size, num_classes*5] minibatch_head_quadrant = tf.gather(self.fast_rcnn_head_quadrant, minibatch_indices) minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) minibatch_scores_rotate = tf.gather(self.fast_rcnn_scores_rotate, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) minibatch_encode_gtboxes_rotate = encode_and_decode.encode_boxes_rotate( unencode_boxes= minibatch_reference_boxes_mattached_gtboxes_rotate, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) ############### Class-agnostic Without tile # [minibatch_size, num_classes*4] # minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) ############### Class-agnostic Without tile # [minibatch_size, num_classes*5] # minibatch_encode_gtboxes_rotate = tf.tile(minibatch_encode_gtboxes_rotate, [1, self.num_classes]) ############### Class-agnostic Without tile # minibatch_gt_head_quadrant = tf.tile(minibatch_reference_boxes_mattached_head_quadrant, [1, self.num_classes]) minibatch_gt_head_quadrant = minibatch_reference_boxes_mattached_head_quadrant class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] class_weights_list_rotate = [] category_list_rotate = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights_rotate = tf.ones( shape=[tf.shape(minibatch_encode_boxes_rotate)[0], 5], dtype=tf.float32) tmp_class_weights_rotate = tmp_class_weights_rotate * tf.expand_dims( category_list_rotate[i], axis=1) class_weights_list_rotate.append(tmp_class_weights_rotate) class_weights_rotate = tf.concat( class_weights_list_rotate, axis=1) # [minibatch_size, num_classes*5] class_weights_list_head = [] category_list_head = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights_head = tf.ones( shape=[tf.shape(minibatch_head_quadrant)[0], 4], dtype=tf.float32) tmp_class_weights_head = tmp_class_weights_head * tf.expand_dims( category_list_head[i], axis=1) class_weights_list_head.append(tmp_class_weights_head) class_weights_head = tf.concat(class_weights_list_head, axis=1) # loss with tf.variable_scope('fast_rcnn_classification_loss'): # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores, # onehot_labels=minibatch_label_one_hot) fast_rcnn_classification_loss = losses.focal_loss( prediction_tensor=minibatch_scores, target_tensor=minibatch_label_one_hot) slim.losses.add_loss(fast_rcnn_classification_loss) with tf.variable_scope('fast_rcnn_location_loss'): # fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, # gtboxes=minibatch_encode_gtboxes, # object_weights=minibatch_object_mask, # classes_weights=class_weights) # Class-agnostic regression fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss(fast_rcnn_location_loss) with tf.variable_scope('fast_rcnn_classification_rotate_loss'): # fast_rcnn_classification_rotate_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores_rotate, # onehot_labels=minibatch_label_one_hot) fast_rcnn_classification_rotate_loss = losses.focal_loss( prediction_tensor=minibatch_scores_rotate, target_tensor=minibatch_label_one_hot) slim.losses.add_loss(fast_rcnn_classification_rotate_loss) with tf.variable_scope('fast_rcnn_location_rotate_loss'): # fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes_rotate, # gtboxes=minibatch_encode_gtboxes_rotate, # object_weights=minibatch_object_mask, # classes_weights=class_weights_rotate) # Class-agnostic regression fast_rcnn_location_rotate_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes_rotate, gtboxes=minibatch_encode_gtboxes_rotate, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss(fast_rcnn_location_rotate_loss) with tf.variable_scope('fast_rcnn_head_quadrant_loss'): # fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(predict_boxes=minibatch_head_quadrant, # gtboxes=minibatch_gt_head_quadrant, # object_weights=minibatch_object_mask, # classes_weights=class_weights_head) # Class-agnostic regression fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses( predict_boxes=minibatch_head_quadrant, gtboxes=minibatch_gt_head_quadrant, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss( fast_rcnn_head_quadrant_loss * 10) # More importance by the bigger weight return fast_rcnn_location_loss, fast_rcnn_classification_loss, \ fast_rcnn_location_rotate_loss, fast_rcnn_classification_rotate_loss, fast_rcnn_head_quadrant_loss * 10
def fast_rcnn_loss(self): ''' :return: ''' with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_gtboxes, minibatch_onehot_label, minibatch_object_mask = self.make_minibatch( ) minibatch_proposal_boxes = tf.gather(self.rois_boxes, minibatch_indices) minibatch_predict_scores = tf.gather(self.fast_rcnn_cls_scores, minibatch_indices) minibatch_predict_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # encode minibatch_gtboxes minibatch_encode_gtboxes = encode_boxes( anchors=minibatch_proposal_boxes, gtboxes=minibatch_gtboxes, scale_factors=self.scale_factors) # [minibatch_size, 4]->[minibatch_size, num_cls*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_cls]) # class_weight_mask [minibatch_size, num_cls*4] class_weight_mask_list = [] category_list = tf.unstack(minibatch_onehot_label, axis=1) for i in range(1, self.num_cls + 1): class_weight = tf.ones([self.fast_rcnn_minibatch_size, 4], dtype=tf.float32) class_weight = class_weight * tf.expand_dims(category_list[i], axis=1) class_weight_mask_list.append(class_weight) class_weight_mask = tf.concat(class_weight_mask_list, axis=1) # cls loss with tf.variable_scope('fast_rcnn_cls_losses'): fast_rcnn_cls_loss = slim.losses.softmax_cross_entropy( logits=minibatch_predict_scores, onehot_labels=minibatch_onehot_label) # boxes loss with tf.variable_scope('fast_rcnn_boxes_losses'): fast_rcnn_boxes_loss = losses.l1_smooth_losses( predict_boxes=minibatch_predict_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weight_mask) slim.losses.add_loss(fast_rcnn_boxes_loss) # check loss and decode boxes # summary positive proposals and negative proposals minibatch_proposal_boxes = boxes_utils.clip_boxes_to_img_boundaries( minibatch_proposal_boxes, self.img_shape) minibatch_positive_proposals = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_proposal_boxes*tf.expand_dims(tf.cast(minibatch_object_mask, tf.float32), 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1)))[0] ) minibatch_negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) minibatch_negative_proposals = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_proposal_boxes * tf.expand_dims(minibatch_negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_negative_mask, 1)))[0] ) tf.summary.image('minibatch_positive_proposals', minibatch_positive_proposals) tf.summary.image('minibatch_negative_proposal', minibatch_negative_proposals) # check the cls tensor part tf.summary.tensor_summary('minibatch_object_mask', minibatch_object_mask) tf.summary.tensor_summary('class_weight_mask', class_weight_mask) tf.summary.tensor_summary('minibatch_predict_encode_boxes', minibatch_predict_encode_boxes) tf.summary.tensor_summary('minibatch_encode_gtboxes', minibatch_encode_gtboxes) tf.summary.tensor_summary('location_loss', fast_rcnn_boxes_loss) tf.summary.tensor_summary('logits', minibatch_predict_scores) tf.summary.tensor_summary('one_hot', minibatch_onehot_label) return fast_rcnn_boxes_loss, fast_rcnn_cls_loss