def detection_targets_graph_mod(proposals, gt_class_ids, gt_boxes, config): ''' Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: ------- proposals: [N, 2000, (y1, x1, y2, x2)] in normalized coordinates. Proposals passed from the proposal layer. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] ground truth object class ids gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] ground truth bbox normalized coordinates gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. ground truth mask information Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. -------- output_rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates Contains BOTH POSITIVE and NEGATIVE examples target_class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. only contains POSTITIVE exmaples target_bbox_deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Class-specific bbox refinements between the POSITIVE examples (in target_gt_bboxes) and their ground_truth counterparts (in output_rois) target_gt_bboxes [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] (roi_gt_bboxes) Ground Truth bbox coordinates - Only for POSITIVE examples target_gt_masks: [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox boundaries and resized to neural network output size. only for POSITIVE examples. Note: Returned arrays might be zero padded if not enough target ROIs. ''' # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion") ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # print('>>> detection_targets_graph ') # print(' propsals.shape :', proposals.shape, proposals.get_shape(), KB.int_shape(proposals) ) # print(' gt_boxes.shape :', gt_boxes.shape , KB.int_shape(gt_boxes) ) # print(' gt_class_ids.shape :', gt_class_ids.shape, KB.int_shape(gt_class_ids)) # print(' gt_masks.shape :', gt_masks.shape , KB.int_shape(gt_masks) ) ##------------------------------------------------------------------------------------------ ## Remove zero padding ## 'non_zeros' returns indicies to valid bboxes, and used to index gt_class_ids, and gt_masks ##------------------------------------------------------------------------------------------ proposals, _ = utils.trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = utils.trim_zeros_graph(gt_boxes , name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") # gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,name="trim_gt_masks") ##------------------------------------------------------------------------------------------ ## Handle COCO crowds ## A crowd box in COCO is a bounding box around several instances. Exclude these from ## training. A crowd box is given a negative class ID. ##------------------------------------------------------------------------------------------ # tf.where : returns the coordinates of true elements of the specified conditon. # The coordinates are returned in a 2-D tensor where the first dimension (rows) # represents the number of true elements, and the second dimension (columns) # represents the coordinates of the true elements. # Keep in mind, the shape of the output tensor can vary depending on how many # true values there are in input. Indices are output in row-major order. # # tf.gather: Gather slices from params axis (default = 0) according to indices. # indices must be an integer tensor of any dimension (usually 0-D or 1-D). # Produces an output tensor with shape: # params.shape[:axis] + indices.shape + params.shape[axis + 1:] # # tf.squeeze: Removes dimensions of size 1 from the shape of a tensor. # Given a tensor input, this operation returns a tensor of the same type with # all dimensions of size 1 removed. If you don't want to remove all size 1 # dimensions, you can remove specific size 1 dimensions by specifying axis. #------------------------------------------------------------------------------------------ ##------------------------------------------------------------------------------------------ ## Separate GT boxes and masks by 'crowd' and 'non-crowd' classifications ##------------------------------------------------------------------------------------------ crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) # crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) # gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) ##------------------------------------------------------------------------------------------ ## Compute overlaps with crowd boxes [anchors, crowds] ##------------------------------------------------------------------------------------------ crowd_overlaps = overlaps_graph_mod(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) ##------------------------------------------------------------------------------------------ ## Compute ## * overlaps matrix [proposals, gt_boxes] : ## The IoU between proposals and gt_boxes (non-crowd gt boxes, designated by classId < 0 in Coco) ## * roi_iou_max returns the Maximum overlap between each RoI and all gt_boxes ## The value represents the best overlap an RoI can obtain. ##------------------------------------------------------------------------------------------ overlaps = overlaps_graph_mod(proposals, gt_boxes) roi_iou_max = tf.reduce_max(overlaps, axis=1) # print(' overlaps.shape :', overlaps.shape, KB.int_shape(overlaps) ) ##------------------------------------------------------------------------------------------ ## 1. Determine indices of positive ROI propsal boxes ## Identify ROI proposal boxes that have an IoU >= 0.5 overlap with some gt_box, and store ## indices into positive_indices ##------------------------------------------------------------------------------------------ positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] ##------------------------------------------------------------------------------------------ ## 2. Determine indices of negative ROI proposal boxes ## those with < 0.5 with every GT box and are not crowds bboxes ## the where creates a array with shape [# of answers, 1] so we use [:, 0] after ##------------------------------------------------------------------------------------------ ## current method negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] ## new method # this modification will determine negative ROI proposal boxes but in addition, # will suppress the zero RoIs from the indicies # note that ( negative_bool = ~positive_roi_bool) # negative_nonzero_bool = tf.logical_and(~positive_roi_bool, (roi_iou_max > 0)) # negative_nonzero_bool = tf.logical_and(negative_nonzero_bool, no_crowd_bool) # negative_indices2 = tf.where(negative_nonzero_bool) [:, 0] ##------------------------------------------------------------------------------------------ ## 3. Subsample positive ROIs based on ROI_POSITIVE_RATIO ## Aim for 33% positive (config.ROI_POSITIVE_RATIO = 0.33) ## Positive ROIs 33% of config.TRAIN_ROIS_PER_IMAGE ~ 11 ##------------------------------------------------------------------------------------------ positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] ##------------------------------------------------------------------------------------------ ## 4. Add Negative ROIs. Add enough to maintain positive:negative ratio ##------------------------------------------------------------------------------------------ # negative_count = int((positive_count / config.ROI_POSITIVE_RATIO) - positive_count) r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] ##------------------------------------------------------------------------------------------ ## 5. Gather selected positive and negative ROIs ##------------------------------------------------------------------------------------------ positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) ##------------------------------------------------------------------------------------------ ## 6. Assign positive ROIs to GT boxes. ## roi_gt_box_assignment shows for each positive_overlap, which class has the maximum overlap ##------------------------------------------------------------------------------------------ positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) roi_gt_boxes = tf.gather(gt_boxes , roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # print(' shape of positive overlaps is :', positive_overlaps.get_shape()) ##------------------------------------------------------------------------------------------ ## 7. Compute bbox delta (between the selected positive RPN proposals and corrspoing gt bboxes) ## calculate refinement (difference b/w positive rois and gt_boxes) for positive ROIs ##------------------------------------------------------------------------------------------ roi_gt_deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) roi_gt_deltas /= config.BBOX_STD_DEV #------------------------------------------------------------------------------------------ # 8. prepare gt_masks #------------------------------------------------------------------------------------------- # transpose gt_masks from [h, w, N] to [N, height, width] and add 4th dim at end [N, height, width, 1] # Pick the right mask for each ROI # transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets # boxes = positive_rois # if config.USE_MINI_MASK: # Transform ROI corrdinates from normalized image space # to normalized mini-mask space. # y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) # gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) # gt_h = gt_y2 - gt_y1 # gt_w = gt_x2 - gt_x1 # y1 = (y1 - gt_y1) / gt_h # x1 = (x1 - gt_x1) / gt_w # y2 = (y2 - gt_y1) / gt_h # x2 = (x2 - gt_x1) / gt_w # boxes = tf.concat([y1, x1, y2, x2], 1) # box_ids = tf.range(0, tf.shape(roi_masks)[0]) # masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), # boxes, # box_ids, # config.MASK_SHAPE) # Remove the extra dimension from masks. # masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. # masks = tf.round(masks) ##------------------------------------------------------------------------------------------ ## 9. Prepare final outputs ## Append negative ROIs and pad bbox roi_gt_deltas and masks that are not used for ## negative ROIs with zeros. ##------------------------------------------------------------------------------------------ rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois , [(0, P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) roi_gt_deltas = tf.pad(roi_gt_deltas , [(0, N + P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes , [(0, N + P), (0, 0)]) # masks = tf.pad(masks , [[0, N + P], (0, 0), (0, 0)]) # print(' roi_gt_boxes : ' , tf.shape(roi_gt_boxes) ) # print(' P: ' , P, ' N : ', N) # print(' roi.shape :', rois.shape , tf.shape(rois)) # print(' roi_gt_class_ids.shape:', roi_gt_class_ids.shape, tf.shape(roi_gt_class_ids)) # print(' roi_gt_deltas.shape :', roi_gt_deltas.shape , tf.shape(roi_gt_deltas)) # print(' masks.shape :', masks.shape , tf.shape(masks)) # print(' roi_gt_boxes.shape :', roi_gt_boxes.shape , tf.shape(roi_gt_boxes)) return rois, roi_gt_class_ids, roi_gt_deltas, roi_gt_boxes
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))] masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [proposals, crowd_boxes] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine positive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random.shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random.shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn = lambda: tf.argmax(positive_overlaps, axis=1), false_fn = lambda: tf.cast(tf.constant([]),tf.int64) ) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Assign positive ROIs to GT masks # Permute masks to [N, height, width, 1] transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # Pick the right mask for each ROI roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets boxes = positive_rois if config.USE_MINI_MASK: # Transform ROI coordinates from normalized image space # to normalized mini-mask space. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE) # Remove the extra dimension from masks. masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. masks = tf.round(masks) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): with tf.variable_scope("detection_targets_graph", reuse=tf.AUTO_REUSE): # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks") crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) overlaps = overlaps_graph(proposals, gt_boxes) crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) roi_iou_max = tf.reduce_max(overlaps, axis=1) positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64)) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) boxes = positive_rois if config.USE_MINI_MASK: y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE) masks = tf.squeeze(masks, axis=3) masks = tf.round(masks) rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks