Beispiel #1
0
    def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info,
                       init_state0):
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
        # Sample foreground indexes
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
        bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0]
        keep_inds = np.append(fg_inds, bg_inds).astype(int)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[len(fg_inds):] = 0
        rois = all_rois[keep_inds]
        #init_states = init_state0[keep_inds]
        init_states = init_state0

        bbox_target_data = bbox_compute_targets(
            rois[:, 1:5],
            gt_boxes[gt_assignment[keep_inds], :4],
            normalize=True)
        bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\
            .astype(np.float32, copy=False)
        bbox_targets, bbox_inside_weights = get_bbox_regression_label(
            bbox_target_data, 21)

        scaled_rois = rois[:, 1:5] / float(im_scale)
        scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale)

        pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        top_mask_info = np.zeros((len(keep_inds), 12))
        top_mask_info[len(fg_inds):, :] = -1

        for i, val in enumerate(fg_inds):
            gt_box = scaled_gt_boxes[gt_assignment[val]]
            gt_box = np.around(gt_box).astype(int)
            ex_box = np.around(scaled_rois[i]).astype(int)
            gt_mask = gt_masks[gt_assignment[val]]
            gt_mask_info = mask_info[gt_assignment[val]]
            gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]]
            # regression targets is the intersection of bounding box and gt mask
            ex_mask = intersect_mask(ex_box, gt_box, gt_mask)
            pos_masks[i, ...] = ex_mask
            top_mask_info[i, 0] = gt_assignment[val]
            top_mask_info[i, 1] = gt_mask_info[0]
            top_mask_info[i, 2] = gt_mask_info[1]
            top_mask_info[i, 3] = labels[i]
            top_mask_info[i, 4:8] = ex_box
            top_mask_info[i, 8:12] = gt_box

        return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights, init_states
Beispiel #2
0
    def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info):
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
        # Sample foreground indexes
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
        bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0]
        keep_inds = np.append(fg_inds, bg_inds).astype(int)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[len(fg_inds):] = 0
        rois = all_rois[keep_inds]

        bbox_target_data = bbox_compute_targets(
            rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True)
        bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\
            .astype(np.float32, copy=False)
        bbox_targets, bbox_inside_weights = get_bbox_regression_label(
            bbox_target_data, 21)

        scaled_rois = rois[:, 1:5] / float(im_scale)
        scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale)

        pos_masks = np.zeros((len(keep_inds), 1,  cfg.MASK_SIZE,  cfg.MASK_SIZE))
        top_mask_info = np.zeros((len(keep_inds), 12))
        top_mask_info[len(fg_inds):, :] = -1

        for i, val in enumerate(fg_inds):
            gt_box = scaled_gt_boxes[gt_assignment[val]]
            gt_box = np.around(gt_box).astype(int)
            ex_box = np.around(scaled_rois[i]).astype(int)
            gt_mask = gt_masks[gt_assignment[val]]
            gt_mask_info = mask_info[gt_assignment[val]]
            gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]]
            # regression targets is the intersection of bounding box and gt mask
            ex_mask = intersect_mask(ex_box, gt_box, gt_mask)
            pos_masks[i, ...] = ex_mask
            top_mask_info[i, 0] = gt_assignment[val]
            top_mask_info[i, 1] = gt_mask_info[0]
            top_mask_info[i, 2] = gt_mask_info[1]
            top_mask_info[i, 3] = labels[i]
            top_mask_info[i, 4:8] = ex_box
            top_mask_info[i, 8:12] = gt_box

        return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks, im_scale, mask_info):
    """
    Generate a random sample of RoIs comprising
    foreground and background examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Sample foreground indexes
    fg_inds = []
    for i in xrange(len(cfg.TRAIN.FG_FRACTION)):
        cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i]) &
                            (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0]
        cur_rois_this_image = min(cur_inds.size, np.round(rois_per_image *
                                                          cfg.TRAIN.FG_FRACTION[i]))
        if cur_inds.size > 0:
            cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False)
        fg_inds = np.hstack((fg_inds, cur_inds))
        fg_inds = np.unique(fg_inds)
    fg_rois_per_image = fg_inds.size
    # Sample background indexes according to number of foreground
    bg_rois_per_this_image = rois_per_image - fg_rois_per_image
    bg_inds = []
    for i in xrange(len(cfg.TRAIN.BG_FRACTION)):
        cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i]) &
                            (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0]
        cur_rois_this_image = min(cur_inds.size, np.round(bg_rois_per_this_image *
                                                          cfg.TRAIN.BG_FRACTION[i]))
        if cur_inds.size > 0:
            cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False)
        bg_inds = np.hstack((bg_inds, cur_inds))
        bg_inds = np.unique(bg_inds)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds).astype(int)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = bbox_compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True)
    bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\
        .astype(np.float32, copy=False)
    bbox_targets, bbox_inside_weights = get_bbox_regression_label(
        bbox_target_data, num_classes)
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    blobs = {
        'rois': rois,
        'labels': labels,
        'bbox_targets': bbox_targets,
        'bbox_inside_weights': bbox_inside_weights,
        'bbox_outside_weights': bbox_outside_weights
    }

    if cfg.MNC_MODE:
        scaled_rois = rois[:, 1:5] / float(im_scale)

        # map to original image space
        scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale)
        pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE,  cfg.MASK_SIZE))
        top_mask_info = np.zeros((len(keep_inds), 12))
        top_mask_info[len(fg_inds):, :] = -1

        for i, val in enumerate(fg_inds):
            gt_box = scaled_gt_boxes[gt_assignment[val]]
            gt_box = np.around(gt_box).astype(int)
            ex_box = np.around(scaled_rois[i]).astype(int)
            gt_mask = gt_masks[gt_assignment[val]]
            gt_mask_info = mask_info[gt_assignment[val]]
            gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]]
            # calculate mask regression targets
            # (intersection of bounding box and gt mask)
            ex_mask = intersect_mask(ex_box, gt_box, gt_mask)

            pos_masks[i, ...] = ex_mask
            top_mask_info[i, 0] = gt_assignment[val]
            top_mask_info[i, 1] = gt_mask_info[0]
            top_mask_info[i, 2] = gt_mask_info[1]
            top_mask_info[i, 3] = labels[i]

            top_mask_info[i, 4:8] = ex_box
            top_mask_info[i, 8:12] = gt_box

        mask_weight = np.zeros((rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        # only assign box-level foreground as positive mask regression
        mask_weight[0:len(fg_inds), :, :, :] = 1
        blobs['mask_targets'] = pos_masks
        blobs['mask_weight'] = mask_weight
        blobs['gt_masks_info'] = top_mask_info

    return blobs, fg_inds, bg_inds, keep_inds
Beispiel #4
0
    def _sample_blobs(self, roidbs, img_names):

        random_scale_inds = np.random.randint(0,
                                              high=len(cfg.TRAIN.SCALES),
                                              size=cfg.TRAIN.IMS_PER_BATCH)
        im_blob, im_scales = self._get_image_blob(roidbs, random_scale_inds,
                                                  img_names)

        rois_per_img = cfg.TRAIN.BATCH_SIZE / cfg.TRAIN.IMS_PER_BATCH

        rois_blob = np.zeros((0, 5), dtype=np.float32)
        masks_blob = np.zeros((0, 1, self.input_mz, self.input_mz))
        box_labels_blob = np.zeros((0, 1))
        mask_labels_blob = np.zeros((0, 1))
        bbox_targets_blob = np.zeros((0, self._num_classes * 4))
        mask_targets_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        bbox_inside_weights_blob = np.zeros((0, self._num_classes * 4))
        bbox_outside_weights_blob = np.zeros((0, self._num_classes * 4))
        mask_weights_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE))

        for im_i, roidb in enumerate(roidbs):
            # Sample positive/negative using box-level overlap
            det_overlap = roidb['det_overlap']
            num_gt = len(roidb['gt_classes'])
            fg_det_inds = np.where(det_overlap >= cfg.TRAIN.FG_DET_THRESH)
            keep_inds = []
            for i in xrange(len(cfg.TRAIN.FRACTION_SAMPLE)):
                cur_keep_inds = np.where(
                    (det_overlap >= cfg.TRAIN.THRESH_LO_SAMPLE[i])
                    & (det_overlap <= cfg.TRAIN.THRESH_HI_SAMPLE[i]))[0]
                cur_rois_this_image = np.round(rois_per_img *
                                               cfg.TRAIN.FRACTION_SAMPLE[i])
                cur_rois_this_image = min(cur_rois_this_image,
                                          len(cur_keep_inds))
                if cur_keep_inds.size > 0:
                    cur_keep_inds = npr.choice(cur_keep_inds,
                                               size=cur_rois_this_image,
                                               replace=False)

                if i == 0:
                    keep_inds = cur_keep_inds
                else:
                    keep_inds = np.unique(np.hstack(
                        (keep_inds, cur_keep_inds)))

            fg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds)]
            bg_inds_det = keep_inds[np.in1d(keep_inds,
                                            fg_det_inds,
                                            invert=True)]
            keep_inds = np.append(fg_inds_det, bg_inds_det).astype(int)
            # Assign box-level label and mask-level label
            input_box_labels = roidb['output_label'][keep_inds]
            # input_box_labels[len(fg_inds_det):] = 0
            input_box_labels[len(fg_inds_det):] = 0
            seg_overlap = roidb['seg_overlap'][keep_inds]
            bg_inds_seg = np.where(seg_overlap < cfg.TRAIN.FG_SEG_THRESH)[0]
            input_mask_labels = input_box_labels.copy()
            input_mask_labels[bg_inds_seg] = 0

            gt_classes = roidb['gt_classes']
            input_masks = roidb['masks'][keep_inds, :, :]
            input_boxes = roidb['boxes'][keep_inds, :] * im_scales[im_i]

            mask_target = roidb['mask_targets']
            mask_target = mask_target[keep_inds, :, :]
            mask_resize = np.zeros(
                (input_masks.shape[0], self.input_mz, self.input_mz))
            for i in xrange(mask_target.shape[0]):
                mask_resize[i, :, :] = cv2.resize(
                    input_masks[i, :, :].astype(np.float),
                    (self.input_mz, self.input_mz))
            mask_resize = mask_resize >= cfg.BINARIZE_THRESH

            mask_target_weights = np.zeros(mask_target.shape)

            mask_target_weights[0:len(fg_inds_det), :, :] = 1

            gt_boxes = roidb['boxes'][0:num_gt, :] * im_scales[im_i]
            gt_assignment = roidb['gt_assignment'][:, 0]
            bbox_target_data = bbox_compute_targets(
                input_boxes, gt_boxes[gt_assignment[keep_inds], :4], False)
            # normalize targets
            bbox_target_data = np.hstack((input_box_labels, bbox_target_data))\
                .astype(np.float32, copy=False)
            bbox_targets, bbox_inside_weights = get_bbox_regression_label(
                bbox_target_data, self._num_classes)

            for i in xrange(len(fg_inds_det)):
                cls = gt_classes[gt_assignment[fg_inds_det[i]]][0]
                if cls == 0:
                    continue
                mean = self._mean
                std = self._std
                bbox_targets[i, cls * 4:cls * 4 + 4] -= mean[cls, :]
                bbox_targets[i, cls * 4:cls * 4 + 4] /= std[cls, :]

            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(
                np.float32)
            input_boxes = np.hstack((im_i * np.ones(
                (input_boxes.shape[0], 1)), input_boxes))
            bz = input_boxes.shape[0]
            rois_blob = np.vstack((rois_blob, input_boxes))
            masks_blob = np.concatenate(
                (masks_blob,
                 mask_resize.reshape(bz, 1, self.input_mz, self.input_mz)),
                axis=0)
            box_labels_blob = np.concatenate(
                (box_labels_blob, input_box_labels), axis=0)
            mask_labels_blob = np.concatenate(
                (mask_labels_blob, input_mask_labels), axis=0)
            bbox_targets_blob = np.concatenate(
                (bbox_targets_blob, bbox_targets), axis=0)
            mask_targets_blob = np.concatenate(
                (mask_targets_blob,
                 mask_target.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)),
                axis=0)
            bbox_inside_weights_blob = np.concatenate(
                (bbox_inside_weights_blob, bbox_inside_weights), axis=0)
            bbox_outside_weights_blob = np.concatenate(
                (bbox_outside_weights_blob, bbox_outside_weights), axis=0)
            mask_weights_blob = np.concatenate(
                (mask_weights_blob,
                 mask_target_weights.reshape(bz, 1, cfg.MASK_SIZE,
                                             cfg.MASK_SIZE)),
                axis=0)

        return {
            'data': im_blob,
            'rois': rois_blob,
            'masks': masks_blob,
            'box_label': box_labels_blob,
            'mask_label': mask_labels_blob,
            'bbox_targets': bbox_targets_blob,
            'mask_targets': mask_targets_blob,
            'bbox_inside_weights': bbox_inside_weights_blob,
            'bbox_outside_weights': bbox_outside_weights_blob,
            'mask_weight': mask_weights_blob
        }
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks,
                 im_scale, mask_info):
    """
    Generate a random sample of RoIs comprising
    foreground and background examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Sample foreground indexes
    fg_inds = []
    for i in xrange(len(cfg.TRAIN.FG_FRACTION)):
        cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i])
                            & (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0]
        cur_rois_this_image = min(
            cur_inds.size, np.round(rois_per_image * cfg.TRAIN.FG_FRACTION[i]))
        if cur_inds.size > 0:
            cur_inds = npr.choice(cur_inds,
                                  size=cur_rois_this_image,
                                  replace=False)
        fg_inds = np.hstack((fg_inds, cur_inds))
        fg_inds = np.unique(fg_inds)
    fg_rois_per_image = fg_inds.size
    # Sample background indexes according to number of foreground
    bg_rois_per_this_image = rois_per_image - fg_rois_per_image
    bg_inds = []
    for i in xrange(len(cfg.TRAIN.BG_FRACTION)):
        cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i])
                            & (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0]
        cur_rois_this_image = min(
            cur_inds.size,
            np.round(bg_rois_per_this_image * cfg.TRAIN.BG_FRACTION[i]))
        if cur_inds.size > 0:
            cur_inds = npr.choice(cur_inds,
                                  size=cur_rois_this_image,
                                  replace=False)
        bg_inds = np.hstack((bg_inds, cur_inds))
        bg_inds = np.unique(bg_inds)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds).astype(int)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = bbox_compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True)
    bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\
        .astype(np.float32, copy=False)
    bbox_targets, bbox_inside_weights = get_bbox_regression_label(
        bbox_target_data, num_classes)
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    blobs = {
        'rois': rois,
        'labels': labels,
        'bbox_targets': bbox_targets,
        'bbox_inside_weights': bbox_inside_weights,
        'bbox_outside_weights': bbox_outside_weights
    }

    if cfg.MNC_MODE:
        scaled_rois = rois[:, 1:5] / float(im_scale)

        # map to original image space
        scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale)
        pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        top_mask_info = np.zeros((len(keep_inds), 12))
        top_mask_info[len(fg_inds):, :] = -1

        for i, val in enumerate(fg_inds):
            gt_box = scaled_gt_boxes[gt_assignment[val]]
            gt_box = np.around(gt_box).astype(int)
            ex_box = np.around(scaled_rois[i]).astype(int)
            gt_mask = gt_masks[gt_assignment[val]]
            gt_mask_info = mask_info[gt_assignment[val]]
            gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]]
            # calculate mask regression targets
            # (intersection of bounding box and gt mask)
            ex_mask = intersect_mask(ex_box, gt_box, gt_mask)

            pos_masks[i, ...] = ex_mask
            top_mask_info[i, 0] = gt_assignment[val]
            top_mask_info[i, 1] = gt_mask_info[0]
            top_mask_info[i, 2] = gt_mask_info[1]
            top_mask_info[i, 3] = labels[i]

            top_mask_info[i, 4:8] = ex_box
            top_mask_info[i, 8:12] = gt_box

        mask_weight = np.zeros(
            (rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        # only assign box-level foreground as positive mask regression
        mask_weight[0:len(fg_inds), :, :, :] = 1
        blobs['mask_targets'] = pos_masks
        blobs['mask_weight'] = mask_weight
        blobs['gt_masks_info'] = top_mask_info

    return blobs, fg_inds, bg_inds, keep_inds
Beispiel #6
0
    def _sample_blobs(self, roidbs, img_names):

        random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=cfg.TRAIN.IMS_PER_BATCH)
        im_blob, im_scales = self._get_image_blob(roidbs, random_scale_inds, img_names)

        rois_per_img = cfg.TRAIN.BATCH_SIZE / cfg.TRAIN.IMS_PER_BATCH

        rois_blob = np.zeros((0, 5), dtype=np.float32)
        masks_blob = np.zeros((0, 1, self.input_mz, self.input_mz))
        box_labels_blob = np.zeros((0, 1))
        mask_labels_blob = np.zeros((0, 1))
        bbox_targets_blob = np.zeros((0, self._num_classes * 4))
        mask_targets_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        bbox_inside_weights_blob = np.zeros((0, self._num_classes * 4))
        bbox_outside_weights_blob = np.zeros((0, self._num_classes * 4))
        mask_weights_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE))

        for im_i, roidb in enumerate(roidbs):
            # Sample positive/negative using box-level overlap
            det_overlap = roidb['det_overlap']
            num_gt = len(roidb['gt_classes'])
            fg_det_inds = np.where(det_overlap >= cfg.TRAIN.FG_DET_THRESH)
            keep_inds = []
            for i in xrange(len(cfg.TRAIN.FRACTION_SAMPLE)):
                cur_keep_inds = np.where((det_overlap >= cfg.TRAIN.THRESH_LO_SAMPLE[i]) &
                                         (det_overlap <= cfg.TRAIN.THRESH_HI_SAMPLE[i]))[0]
                cur_rois_this_image = np.round(rois_per_img * cfg.TRAIN.FRACTION_SAMPLE[i])
                cur_rois_this_image = min(cur_rois_this_image, len(cur_keep_inds))
                if cur_keep_inds.size > 0:
                    cur_keep_inds = npr.choice(cur_keep_inds, size=cur_rois_this_image, replace=False)

                if i == 0:
                    keep_inds = cur_keep_inds
                else:
                    keep_inds = np.unique(np.hstack((keep_inds, cur_keep_inds)))

            fg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds)]
            bg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds, invert=True)]
            keep_inds = np.append(fg_inds_det, bg_inds_det).astype(int)
            # Assign box-level label and mask-level label
            input_box_labels = roidb['output_label'][keep_inds]
            # input_box_labels[len(fg_inds_det):] = 0
            input_box_labels[len(fg_inds_det):] = 0
            seg_overlap = roidb['seg_overlap'][keep_inds]
            bg_inds_seg = np.where(seg_overlap < cfg.TRAIN.FG_SEG_THRESH)[0]
            input_mask_labels = input_box_labels.copy()
            input_mask_labels[bg_inds_seg] = 0

            gt_classes = roidb['gt_classes']
            input_masks = roidb['masks'][keep_inds, :, :]
            input_boxes = roidb['boxes'][keep_inds, :] * im_scales[im_i]

            mask_target = roidb['mask_targets']
            mask_target = mask_target[keep_inds, :, :]
            mask_resize = np.zeros((input_masks.shape[0], self.input_mz, self.input_mz))
            for i in xrange(mask_target.shape[0]):
                mask_resize[i, :, :] = cv2.resize(input_masks[i, :, :].astype(np.float), (self.input_mz, self.input_mz))
            mask_resize = mask_resize >= cfg.BINARIZE_THRESH

            mask_target_weights = np.zeros(mask_target.shape)
            
            mask_target_weights[0:len(fg_inds_det), :, :] = 1

            gt_boxes = roidb['boxes'][0:num_gt, :] * im_scales[im_i]
            gt_assignment = roidb['gt_assignment'][:, 0]
            bbox_target_data = bbox_compute_targets(input_boxes, gt_boxes[gt_assignment[keep_inds], :4], False)
            # normalize targets
            bbox_target_data = np.hstack((input_box_labels, bbox_target_data))\
                .astype(np.float32, copy=False)
            bbox_targets, bbox_inside_weights = get_bbox_regression_label(
                bbox_target_data, self._num_classes)

            for i in xrange(len(fg_inds_det)):
                cls = gt_classes[gt_assignment[fg_inds_det[i]]][0]
                if cls == 0:
                    continue
                mean = self._mean
                std = self._std
                bbox_targets[i, cls*4:cls*4+4] -= mean[cls, :]
                bbox_targets[i, cls*4:cls*4+4] /= std[cls, :]

            bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
            input_boxes = np.hstack((im_i * np.ones((input_boxes.shape[0], 1)), input_boxes))
            bz = input_boxes.shape[0]
            rois_blob = np.vstack((rois_blob, input_boxes))
            masks_blob = np.concatenate((masks_blob,
                                         mask_resize.reshape(bz, 1, self.input_mz, self.input_mz)), axis=0)
            box_labels_blob = np.concatenate((box_labels_blob, input_box_labels), axis=0)
            mask_labels_blob = np.concatenate((mask_labels_blob, input_mask_labels), axis=0)
            bbox_targets_blob = np.concatenate((bbox_targets_blob, bbox_targets), axis=0)
            mask_targets_blob = np.concatenate((mask_targets_blob,
                                                mask_target.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0)
            bbox_inside_weights_blob = np.concatenate((bbox_inside_weights_blob, bbox_inside_weights), axis=0)
            bbox_outside_weights_blob = np.concatenate((bbox_outside_weights_blob, bbox_outside_weights), axis=0)
            mask_weights_blob = np.concatenate((mask_weights_blob,
                                                mask_target_weights.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0)

        return {
            'data': im_blob,
            'rois': rois_blob,
            'masks': masks_blob,
            'box_label': box_labels_blob,
            'mask_label': mask_labels_blob,
            'bbox_targets': bbox_targets_blob,
            'mask_targets': mask_targets_blob,
            'bbox_inside_weights': bbox_inside_weights_blob,
            'bbox_outside_weights': bbox_outside_weights_blob,
            'mask_weight': mask_weights_blob
        }