def fast_rcnn_find_positive_negative_samples(self, reference_boxes): ''' when training, we should know each reference box's label and gtbox, in second stage iou >= 0.5 is object iou < 0.5 is background :param reference_boxes: [num_of_input_boxes, 4] :return: reference_boxes_mattached_gtboxes: each reference box mattched gtbox, shape: [num_of_input_boxes, 4] object_mask: indicate box(a row) weather is a object, 1 is object, 0 is background category_label: indicate box's class, one hot encoding. shape: [num_of_input_boxes, num_classes+1] ''' with tf.variable_scope('fast_rcnn_find_positive_negative_samples'): gtboxes = tf.cast( tf.reshape(self.gtboxes_and_label_minAreaRectangle[:, :-1], [-1, 4]), tf.float32) # [M, 4] gtboxes_rotate = tf.cast( tf.reshape(self.gtboxes_and_label[:, :-1], [-1, 5]), tf.float32) # [M, 5] head_quadrant = tf.cast(tf.reshape(self.head_quadrant, [-1, 4]), tf.float32) # [M, 4] ious = iou.iou_calculate(reference_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) # [N, ] positives = tf.cast( tf.greater_equal(max_iou_each_row, self.fast_rcnn_positives_iou_threshold), tf.int32) reference_boxes_mattached_gtboxes = tf.gather(gtboxes, matchs) # [N, 4] reference_boxes_mattached_gtboxes_rotate = tf.gather( gtboxes_rotate, matchs) reference_boxes_mattached_head_quadrant = tf.gather( head_quadrant, matchs) object_mask = tf.cast(positives, tf.float32) # [N, ] label = tf.gather(self.gtboxes_and_label_minAreaRectangle[:, -1], matchs) # [N, ] label = tf.cast(label, tf.int32) * positives # background is 0 return reference_boxes_mattached_gtboxes, reference_boxes_mattached_gtboxes_rotate, \ reference_boxes_mattached_head_quadrant, object_mask, label
def fast_rcnn_find_positive_negative_samples(self, reference_boxes): ''' when training, we should know each reference box's label and gtbox, in second stage iou >= 0.5 is object iou < 0.5 is background :param reference_boxes: [num_of_input_boxes, 4] :return: reference_boxes_mattached_gtboxes: each reference box mattched gtbox, shape: [num_of_input_boxes, 4] object_mask: indicate box(a row) weather is a object, 1 is object, 0 is background category_label: indicate box's class, one hot encoding. shape: [num_of_input_boxes, num_classes+1] ''' with tf.variable_scope('fast_rcnn_find_positive_negative_samples'): gtboxes = tf.cast( tf.reshape(self.gtboxes_and_label_minAreaRectangle[:, :-1], [-1, 4]), tf.float32) # [M, 4] ious = iou.iou_calculate(reference_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) # [N, ] positives = tf.cast( tf.greater_equal(max_iou_each_row, self.fast_rcnn_positives_iou_threshold), tf.int32) # matchs = matchs * greater_than_threshold_indicator # in matchs, negative is 0, object is 0, 1, 2, ... ,num_of_classes reference_boxes_mattached_gtboxes = tf.gather(gtboxes, matchs) # [N, 4] object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient label = tf.gather(self.gtboxes_and_label_minAreaRectangle[:, -1], matchs) # [N, ] label = tf.cast(label, tf.int32) * positives # background is 0 # label = tf.one_hot(category_label, depth=self.num_classes + 1) return reference_boxes_mattached_gtboxes, object_mask, label
def iou_smooth_l1_loss_1(preds, anchor_state, target_boxes, anchors, sigma=3.0, alpha=1.0, beta=1.0, is_refine=False): if cfgs.METHOD == 'H' and not is_refine: x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) sigma_squared = sigma**2 indices = tf.reshape(tf.where(tf.equal(anchor_state, 1)), [ -1, ]) preds = tf.gather(preds, indices) # targets = tf.gather(targets, indices) target_boxes = tf.gather(target_boxes, indices) anchors = tf.gather(anchors, indices) boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=preds, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) target_boxes = tf.reshape(target_boxes, [-1, 6]) boxes_pred_x, boxes_pred_y, boxes_pred_w, boxes_pred_h, boxes_pred_theta = tf.unstack( boxes_pred, axis=-1) target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h, target_boxes_theta, _ = tf.unstack( target_boxes, axis=-1) # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff_angle = boxes_pred_theta - target_boxes_theta regression_diff_angle = tf.abs(regression_diff_angle) regression_diff_angle = tf.where( tf.less(regression_diff_angle, 1.0 / sigma_squared), 0.5 * sigma_squared * tf.pow(regression_diff_angle, 2), regression_diff_angle - 0.5 / sigma_squared) iou = iou_calculate( tf.transpose( tf.stack([boxes_pred_x, boxes_pred_y, boxes_pred_w, boxes_pred_h])), tf.transpose( tf.stack([ target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h ]))) iou_loss_appro = regression_diff_angle - iou overlaps = tf.py_func(iou_rotate_calculate2, inp=[ tf.reshape(boxes_pred, [-1, 5]), tf.reshape(target_boxes[:, :-1], [-1, 5]) ], Tout=[tf.float32]) overlaps = tf.reshape(overlaps, [-1, 1]) iou_loss_appro = tf.reshape(iou_loss_appro, [-1, 1]) # 1-exp(1-x) iou_factor = tf.stop_gradient(tf.exp(alpha * (1 - overlaps)**beta) - 1) / ( tf.stop_gradient(iou_loss_appro) + cfgs.EPSILON) # iou_factor = tf.stop_gradient(1-overlaps) / (tf.stop_gradient(regression_loss) + cfgs.EPSILON) # iou_factor = tf.Print(iou_factor, [iou_factor], 'iou_factor', summarize=50) normalizer = tf.stop_gradient(tf.where(tf.equal(anchor_state, 1))) normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32) normalizer = tf.maximum(1.0, normalizer) # normalizer = tf.stop_gradient(tf.cast(tf.equal(anchor_state, 1), tf.float32)) # normalizer = tf.maximum(tf.reduce_sum(normalizer), 1) return tf.reduce_sum(iou_loss_appro * iou_factor) / normalizer
def rpn_find_positive_negative_samples(self, anchors): ''' assign anchors targets: object or background. :param anchors: [valid_num_of_anchors, 4]. use N to represent valid_num_of_anchors :return:labels. anchors_matched_gtboxes, object_mask labels shape is [N, ]. positive is 1, negative is 0, ignored is -1 anchor_matched_gtboxes. each anchor's gtbox(only positive box has gtbox)shape is [N, 4] object_mask. tf.float32. 1.0 represent box is object, 0.0 is others. shape is [N, ] ''' with tf.variable_scope('rpn_find_positive_negative_samples'): gtboxes = tf.reshape(self.gtboxes_and_label[:, :-1], [-1, 4]) gtboxes = tf.cast(gtboxes, tf.float32) ious = iou.iou_calculate(anchors, gtboxes) # [N, M] max_iou_each_row = tf.reduce_max(ious, axis=1) labels = tf.ones(shape=[tf.shape(anchors)[0], ], dtype=tf.float32) * \ (-1) # [N, ] # ignored is -1 matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # an anchor that has an IoU overlap higher than 0.7 with any ground-truth box positives1 = tf.greater_equal( max_iou_each_row, self.rpn_iou_positive_threshold) # iou >= 0.7 is positive # to avoid none of boxes iou >= 0.7, use max iou boxes as positive max_iou_each_column = tf.reduce_max(ious, 0) # the anchor/anchors with the highest Intersection-over-Union (IoU) overlap with a ground-truth box positives2 = tf.reduce_sum(tf.cast( tf.equal(ious, max_iou_each_column), tf.float32), axis=1) positives = tf.logical_or(positives1, tf.cast(positives2, tf.bool)) # Now, positive is 1, ignored and background is -1 labels += 2 * tf.cast(positives, tf.float32) # object_mask = tf.cast(positives, tf.float32) # 1.0 is object, 0.0 is others # matchs = matchs * tf.cast(positives, dtype=matchs.dtype) # remove background and ignored anchors_matched_gtboxes = tf.gather(gtboxes, matchs) # [N, 4] # background's gtboxes tmp set the first gtbox, it dose not matter, because use object_mask will ignored it negatives = tf.less(max_iou_each_row, self.rpn_iou_negative_threshold) negatives = tf.logical_and(negatives, tf.greater_equal(max_iou_each_row, 0.1)) # [N, ] positive is >=1.0, negative is 0, ignored is -1.0 labels = labels + tf.cast(negatives, tf.float32) ''' Need to note: when opsitive, labels may >= 1.0. Because, when all the iou < 0.7, we set anchors having max iou each column as positive. these anchors may have iou < 0.3. In the begining, labels is [-1, -1, -1...-1] then anchors having iou<0.3 as well as are max iou each column will be +1.0. when decide negatives, because of iou<0.3, they add 1.0 again. So, the final result will be 2.0 So, when opsitive, labels may in [1.0, 2.0]. that is labels >=1.0 ''' positives = tf.cast(tf.greater_equal(labels, 1.0), tf.float32) ignored = tf.cast(tf.equal(labels, -1.0), tf.float32) * -1 labels = positives + ignored object_mask = tf.cast(positives, tf.float32) # 1.0 is object, 0.0 is others return labels, anchors_matched_gtboxes, object_mask
def batch_slice_build_sample(gtboxes_and_label, rpn_proposals_boxes): with tf.name_scope('select_pos_neg_samples'): gtboxes = tf.cast( tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]), tf.float32) gt_class_ids = tf.cast( tf.reshape(gtboxes_and_label[:, -1], [ -1, ]), tf.int32) gtboxes, non_zeros = boxes_utils.trim_zeros_graph( gtboxes, name="trim_gt_box") # [M, 4] gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros) rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph( rpn_proposals_boxes, name="trim_rpn_proposal_train") ious = iou.iou_calculate(rpn_proposals_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) positives = tf.cast( tf.greater_equal( max_iou_each_row, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32) reference_boxes_mattached_gtboxes = tf.gather( gtboxes, matchs) # [N, 4] gt_class_ids = tf.gather(gt_class_ids, matchs) # [N, ] object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient gt_class_ids = gt_class_ids * positives with tf.name_scope('head_train_minibatch'): # choose the positive indices positive_indices = tf.reshape( tf.where(tf.equal(object_mask, 1.)), [-1]) num_of_positives = tf.minimum( tf.shape(positive_indices)[0], tf.cast( cfgs.FAST_RCNN_MINIBATCH_SIZE * cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # choose the negative indices, # Strictly propose the proportion of positive and negative is 1:3 negative_indices = tf.reshape( tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\ - num_of_positives num_of_negatives = tf.minimum( tf.shape(negative_indices)[0], num_of_negatives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat( [positive_indices, negative_indices], axis=0) minibatch_reference_gtboxes = tf.gather( reference_boxes_mattached_gtboxes, minibatch_indices) minibatch_reference_proboxes = tf.gather( rpn_proposals_boxes, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_gtboxes, reference_boxes=minibatch_reference_proboxes, scale_factors=cfgs.BBOX_STD_DEV) object_mask = tf.gather(object_mask, minibatch_indices) gt_class_ids = tf.gather(gt_class_ids, minibatch_indices) # padding if necessary gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE - (num_of_positives + num_of_negatives), dtype=tf.int32) bbox_padding = tf.zeros((gap, 4)) minibatch_reference_proboxes = tf.concat( [minibatch_reference_proboxes, bbox_padding], axis=0) minibatch_encode_gtboxes = tf.concat( [minibatch_encode_gtboxes, bbox_padding], axis=0) object_mask = tf.pad(object_mask, [(0, gap)]) gt_class_ids = tf.pad(gt_class_ids, [(0, gap)]) return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids