Example #1
0
def detection_targets_graph_mod(proposals, gt_class_ids, gt_boxes,  config):
    '''
    Generates detection targets for one image. Subsamples proposals and
    generates target class IDs, bounding box deltas, and masks for each.

    Inputs:
    -------
    proposals:              [N, 2000, (y1, x1, y2, x2)] in normalized coordinates. 
                            Proposals passed from the proposal layer.
                            Might be zero padded if there are not enough proposals.

    gt_class_ids:           [MAX_GT_INSTANCES] 
                            ground truth object class ids 
                            
    gt_boxes:               [MAX_GT_INSTANCES, (y1, x1, y2, x2)] 
                            ground truth bbox normalized coordinates 
                            
    gt_masks:               [height, width, MAX_GT_INSTANCES] of boolean type.
                            ground truth mask information

    Returns:            Target ROIs and corresponding class IDs, bounding box shifts, and masks.
    --------
    output_rois:            [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
                            Contains BOTH POSITIVE and NEGATIVE examples
                            
    target_class_ids:       [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
                            only contains POSTITIVE exmaples
                            
    target_bbox_deltas:     [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
                            Class-specific bbox refinements between the POSITIVE examples
                            (in target_gt_bboxes) and their ground_truth counterparts (in output_rois)
    
    target_gt_bboxes        [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]                   
    (roi_gt_bboxes)         Ground Truth bbox coordinates - Only for POSITIVE examples
    
    target_gt_masks:        [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox
                            boundaries and resized to neural network output size.
                            only for POSITIVE examples.
   
    Note: Returned arrays might be zero padded if not enough target ROIs.

    ''' 
    # Assertions
    asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion") ]
    
    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)
    
    # print('>>> detection_targets_graph ')
    # print('     propsals.shape        :',  proposals.shape, proposals.get_shape(), KB.int_shape(proposals) )
    # print('     gt_boxes.shape        :',  gt_boxes.shape ,    KB.int_shape(gt_boxes)   )
    # print('     gt_class_ids.shape    :',  gt_class_ids.shape, KB.int_shape(gt_class_ids))
    # print('     gt_masks.shape        :',  gt_masks.shape ,    KB.int_shape(gt_masks)   )

    ##------------------------------------------------------------------------------------------
    ## Remove zero padding   
    ##   'non_zeros' returns indicies to valid bboxes, and used to index gt_class_ids, and gt_masks
    ##------------------------------------------------------------------------------------------
    proposals, _        = utils.trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = utils.trim_zeros_graph(gt_boxes , name="trim_gt_boxes")
    gt_class_ids        = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids")
    # gt_masks            = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,name="trim_gt_masks")

    ##------------------------------------------------------------------------------------------
    ## Handle COCO crowds
    ##   A crowd box in COCO is a bounding box around several instances. Exclude these from 
    ##   training. A crowd box is given a negative class ID.
    ##------------------------------------------------------------------------------------------
    # tf.where : returns the coordinates of true elements of  the specified conditon.
    #            The coordinates are returned in a 2-D tensor where the first dimension (rows) 
    #            represents the number of true elements, and the second dimension (columns) 
    #            represents the coordinates of the true elements. 
    #            Keep in mind, the shape of the output tensor can vary depending on how many 
    #            true values there are in input. Indices are output in row-major order.
    #
    # tf.gather: Gather slices from params axis (default = 0) according to indices.
    #            indices must be an integer tensor of any dimension (usually 0-D or 1-D). 
    #            Produces an output tensor with shape:
    #                   params.shape[:axis] + indices.shape + params.shape[axis + 1:] 
    #
    # tf.squeeze: Removes dimensions of size 1 from the shape of a tensor.
    #            Given a tensor input, this operation returns a tensor of the same type with 
    #            all dimensions of size 1 removed. If you don't want to remove all size 1 
    #            dimensions, you can remove specific size 1 dimensions by specifying axis.
    #------------------------------------------------------------------------------------------
    
    ##------------------------------------------------------------------------------------------
    ##  Separate GT boxes and masks by 'crowd' and 'non-crowd' classifications
    ##------------------------------------------------------------------------------------------
    crowd_ix        = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix    = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes     = tf.gather(gt_boxes, crowd_ix)
    # crowd_masks     = tf.gather(gt_masks, crowd_ix, axis=2)
    
    gt_class_ids    = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes        = tf.gather(gt_boxes, non_crowd_ix)
    # gt_masks        = tf.gather(gt_masks, non_crowd_ix, axis=2)


    ##------------------------------------------------------------------------------------------
    ## Compute overlaps with crowd boxes [anchors, crowds]
    ##------------------------------------------------------------------------------------------
    crowd_overlaps  = overlaps_graph_mod(proposals, crowd_boxes)
    crowd_iou_max   = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool   = (crowd_iou_max < 0.001)


    ##------------------------------------------------------------------------------------------
    ## Compute 
    ## * overlaps matrix [proposals, gt_boxes] : 
    ##    The IoU between proposals and gt_boxes (non-crowd gt boxes, designated by classId < 0 in Coco)
    ## * roi_iou_max returns the Maximum overlap between each RoI and all gt_boxes  
    ##    The value represents the best overlap an RoI can obtain.  
    ##------------------------------------------------------------------------------------------
    overlaps        = overlaps_graph_mod(proposals, gt_boxes)
    roi_iou_max     = tf.reduce_max(overlaps, axis=1)
    # print('     overlaps.shape        :',  overlaps.shape, KB.int_shape(overlaps)   )

    ##------------------------------------------------------------------------------------------
    ## 1. Determine indices of positive ROI propsal boxes
    ##    Identify ROI proposal boxes that have an IoU >= 0.5 overlap with some gt_box, and store 
    ##    indices into positive_indices
    ##------------------------------------------------------------------------------------------
    positive_roi_bool     = (roi_iou_max >= 0.5)
    positive_indices      = tf.where(positive_roi_bool)[:, 0]

    ##------------------------------------------------------------------------------------------
    ## 2. Determine indices of negative ROI proposal boxes
    ##    those with < 0.5 with every GT box and are not crowds bboxes 
    ##    the where creates a array with shape [# of answers, 1] so we use [:, 0] after
    ##------------------------------------------------------------------------------------------
    ## current method
    negative_indices      = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    ## new method
    # this modification will determine negative ROI proposal boxes but in addition, 
    # will suppress the zero RoIs from the indicies 
    # note that   ( negative_bool         = ~positive_roi_bool)
    # negative_nonzero_bool = tf.logical_and(~positive_roi_bool, (roi_iou_max > 0))
    # negative_nonzero_bool = tf.logical_and(negative_nonzero_bool, no_crowd_bool)
    # negative_indices2     = tf.where(negative_nonzero_bool) [:, 0]

    ##------------------------------------------------------------------------------------------
    ## 3. Subsample positive ROIs based on ROI_POSITIVE_RATIO
    ##    Aim for 33% positive (config.ROI_POSITIVE_RATIO = 0.33)
    ##    Positive ROIs   33% of config.TRAIN_ROIS_PER_IMAGE ~  11
    ##------------------------------------------------------------------------------------------
    positive_count        = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO)
    positive_indices      = tf.random_shuffle(positive_indices)[:positive_count]
    positive_count        = tf.shape(positive_indices)[0]
    
    ##------------------------------------------------------------------------------------------
    ## 4. Add Negative ROIs. Add enough to maintain positive:negative ratio
    ##------------------------------------------------------------------------------------------
    #     negative_count = int((positive_count / config.ROI_POSITIVE_RATIO) - positive_count)
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count        = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
    negative_indices      = tf.random_shuffle(negative_indices)[:negative_count]
    
    ##------------------------------------------------------------------------------------------
    ## 5.   Gather selected positive and negative ROIs
    ##------------------------------------------------------------------------------------------
    positive_rois         = tf.gather(proposals, positive_indices)
    negative_rois         = tf.gather(proposals, negative_indices)

    ##------------------------------------------------------------------------------------------
    ## 6.   Assign positive ROIs to GT boxes.
    ##      roi_gt_box_assignment shows for each positive_overlap, which class has the maximum overlap
    ##------------------------------------------------------------------------------------------
    positive_overlaps     = tf.gather(overlaps, positive_indices)
    roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1)
    roi_gt_boxes          = tf.gather(gt_boxes    , roi_gt_box_assignment)
    roi_gt_class_ids      = tf.gather(gt_class_ids, roi_gt_box_assignment)
    # print('     shape of positive overlaps is :', positive_overlaps.get_shape())

    ##------------------------------------------------------------------------------------------
    ## 7.   Compute bbox delta (between the selected positive RPN proposals and corrspoing gt bboxes)
    ##      calculate refinement (difference b/w positive rois and gt_boxes) for positive ROIs
    ##------------------------------------------------------------------------------------------
    roi_gt_deltas  = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    roi_gt_deltas /= config.BBOX_STD_DEV

    #------------------------------------------------------------------------------------------
    # 8.  prepare gt_masks 
    #-------------------------------------------------------------------------------------------
    #  transpose gt_masks from [h, w, N] to [N, height, width] and add 4th dim at end [N, height, width, 1]
    #  Pick the right mask for each ROI
    # transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
    # roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

    # Compute mask targets
    # boxes = positive_rois
    
    # if config.USE_MINI_MASK:
        # Transform ROI corrdinates from normalized image space
        # to normalized mini-mask space.
        # y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
        # gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
        # gt_h = gt_y2 - gt_y1
        # gt_w = gt_x2 - gt_x1
        # y1 = (y1 - gt_y1) / gt_h
        # x1 = (x1 - gt_x1) / gt_w
        # y2 = (y2 - gt_y1) / gt_h
        # x2 = (x2 - gt_x1) / gt_w
        # boxes = tf.concat([y1, x1, y2, x2], 1)
    
    # box_ids = tf.range(0, tf.shape(roi_masks)[0])
    # masks   = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), 
                                       # boxes,
                                       # box_ids,
                                       # config.MASK_SHAPE)
                                     
    # Remove the extra dimension from masks.
    # masks = tf.squeeze(masks, axis=3)

    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
    # binary cross entropy loss.
    # masks = tf.round(masks)

    ##------------------------------------------------------------------------------------------
    ## 9. Prepare final outputs
    ##    Append negative ROIs and pad bbox roi_gt_deltas and masks that are not used for 
    ##    negative ROIs with zeros.
    ##------------------------------------------------------------------------------------------
    rois             = tf.concat([positive_rois, negative_rois], axis=0)
    N                = tf.shape(negative_rois)[0]
    P                = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois             = tf.pad(rois            , [(0, P), (0, 0)])
    
    
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    roi_gt_deltas    = tf.pad(roi_gt_deltas   , [(0, N + P), (0, 0)])
    roi_gt_boxes     = tf.pad(roi_gt_boxes    , [(0, N + P), (0, 0)])       
    # masks            = tf.pad(masks           , [[0, N + P], (0, 0), (0, 0)])
    
    # print(' roi_gt_boxes :  ' , tf.shape(roi_gt_boxes) )
    # print(' P:  ' , P,  ' N :    ', N)   
    # print('     roi.shape             :',  rois.shape            , tf.shape(rois))
    # print('     roi_gt_class_ids.shape:',  roi_gt_class_ids.shape, tf.shape(roi_gt_class_ids))
    # print('     roi_gt_deltas.shape   :',  roi_gt_deltas.shape   , tf.shape(roi_gt_deltas))
    # print('     masks.shape           :',  masks.shape           , tf.shape(masks))
    # print('     roi_gt_boxes.shape    :',  roi_gt_boxes.shape    , tf.shape(roi_gt_boxes))
    
    return rois, roi_gt_class_ids,  roi_gt_deltas, roi_gt_boxes
Example #2
0
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config):
    """Generates detection targets for one image. Subsamples proposals and
    generates target class IDs, bounding box deltas, and masks for each.

    Inputs:
    proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.
    gt_class_ids: [MAX_GT_INSTANCES] int class IDs
    gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
    gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.

    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
    and masks.
    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
    deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
    masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
           boundaries and resized to neural network output size.

    Note: Returned arrays might be zero padded if not enough target ROIs.
    """
    # Assertions
    asserts = [
        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                  name="roi_assertion"),
    ]
    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)

    # Remove zero padding
    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros,
                                   name="trim_gt_class_ids")
    gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,
                         name="trim_gt_masks")

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
    gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)

    # Compute overlaps matrix [proposals, gt_boxes]
    overlaps = overlaps_graph(proposals, gt_boxes)

    # Compute overlaps with crowd boxes [proposals, crowd_boxes]
    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool = (crowd_iou_max < 0.001)

    # Determine positive and negative ROIs
    roi_iou_max = tf.reduce_max(overlaps, axis=1)
    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
    positive_roi_bool = (roi_iou_max >= 0.5)
    positive_indices = tf.where(positive_roi_bool)[:, 0]
    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
    negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    # Subsample ROIs. Aim for 33% positive
    # Positive ROIs
    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                         config.ROI_POSITIVE_RATIO)
    positive_indices = tf.random.shuffle(positive_indices)[:positive_count]
    positive_count = tf.shape(positive_indices)[0]
    # Negative ROIs. Add enough to maintain positive:negative ratio.
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
    negative_indices = tf.random.shuffle(negative_indices)[:negative_count]
    # Gather selected ROIs
    positive_rois = tf.gather(proposals, positive_indices)
    negative_rois = tf.gather(proposals, negative_indices)

    # Assign positive ROIs to GT boxes.
    positive_overlaps = tf.gather(overlaps, positive_indices)
    roi_gt_box_assignment = tf.cond(
        tf.greater(tf.shape(positive_overlaps)[1], 0),
        true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
        false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
    )
    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)

    # Compute bbox refinement for positive ROIs
    deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    deltas /= config.BBOX_STD_DEV

    # Assign positive ROIs to GT masks
    # Permute masks to [N, height, width, 1]
    transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
    # Pick the right mask for each ROI
    roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

    # Compute mask targets
    boxes = positive_rois
    if config.USE_MINI_MASK:
        # Transform ROI coordinates from normalized image space
        # to normalized mini-mask space.
        y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
        gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
        gt_h = gt_y2 - gt_y1
        gt_w = gt_x2 - gt_x1
        y1 = (y1 - gt_y1) / gt_h
        x1 = (x1 - gt_x1) / gt_w
        y2 = (y2 - gt_y1) / gt_h
        x2 = (x2 - gt_x1) / gt_w
        boxes = tf.concat([y1, x1, y2, x2], 1)
    box_ids = tf.range(0, tf.shape(roi_masks)[0])
    masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
                                     box_ids,
                                     config.MASK_SHAPE)
    # Remove the extra dimension from masks.
    masks = tf.squeeze(masks, axis=3)

    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
    # binary cross entropy loss.
    masks = tf.round(masks)

    # Append negative ROIs and pad bbox deltas and masks that
    # are not used for negative ROIs with zeros.
    rois = tf.concat([positive_rois, negative_rois], axis=0)
    N = tf.shape(negative_rois)[0]
    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois = tf.pad(rois, [(0, P), (0, 0)])
    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
    masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])

    return rois, roi_gt_class_ids, deltas, masks
Example #3
0
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks,
                            config):
    with tf.variable_scope("detection_targets_graph", reuse=tf.AUTO_REUSE):

        # Assertions
        asserts = [
            tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                      name="roi_assertion"),
        ]
        with tf.control_dependencies(asserts):
            proposals = tf.identity(proposals)

        proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")

        gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
        gt_class_ids = tf.boolean_mask(gt_class_ids,
                                       non_zeros,
                                       name="trim_gt_class_ids")
        gt_masks = tf.gather(gt_masks,
                             tf.where(non_zeros)[:, 0],
                             axis=2,
                             name="trim_gt_masks")

        crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
        non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
        crowd_boxes = tf.gather(gt_boxes, crowd_ix)
        crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2)
        gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
        gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
        gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)

        overlaps = overlaps_graph(proposals, gt_boxes)

        crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
        crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
        no_crowd_bool = (crowd_iou_max < 0.001)

        roi_iou_max = tf.reduce_max(overlaps, axis=1)
        positive_roi_bool = (roi_iou_max >= 0.5)
        positive_indices = tf.where(positive_roi_bool)[:, 0]
        negative_indices = tf.where(
            tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

        positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                             config.ROI_POSITIVE_RATIO)
        positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
        positive_count = tf.shape(positive_indices)[0]
        r = 1.0 / config.ROI_POSITIVE_RATIO
        negative_count = tf.cast(r * tf.cast(positive_count, tf.float32),
                                 tf.int32) - positive_count
        negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
        positive_rois = tf.gather(proposals, positive_indices)
        negative_rois = tf.gather(proposals, negative_indices)

        # Assign positive ROIs to GT boxes.
        positive_overlaps = tf.gather(overlaps, positive_indices)
        roi_gt_box_assignment = tf.cond(
            tf.greater(tf.shape(positive_overlaps)[1], 0),
            true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
            false_fn=lambda: tf.cast(tf.constant([]), tf.int64))
        roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
        roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)

        deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
        deltas /= config.BBOX_STD_DEV

        transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]),
                                          -1)
        roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

        boxes = positive_rois
        if config.USE_MINI_MASK:
            y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
            gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
            gt_h = gt_y2 - gt_y1
            gt_w = gt_x2 - gt_x1
            y1 = (y1 - gt_y1) / gt_h
            x1 = (x1 - gt_x1) / gt_w
            y2 = (y2 - gt_y1) / gt_h
            x2 = (x2 - gt_x1) / gt_w
            boxes = tf.concat([y1, x1, y2, x2], 1)
        box_ids = tf.range(0, tf.shape(roi_masks)[0])
        masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
                                         box_ids, config.MASK_SHAPE)
        masks = tf.squeeze(masks, axis=3)
        masks = tf.round(masks)

        rois = tf.concat([positive_rois, negative_rois], axis=0)
        N = tf.shape(negative_rois)[0]
        P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
        rois = tf.pad(rois, [(0, P), (0, 0)])
        roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
        roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
        deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
        masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])

        return rois, roi_gt_class_ids, deltas, masks