Beispiel #1
0
 def _add_proposals_from_file(
     self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh
 ):
     """Add proposals from a proposals file to an roidb."""
     logger.info('Loading proposals from: {}'.format(proposal_file))
     with open(proposal_file, 'r') as f:
         proposals = pickle.load(f)
     id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
     _sort_proposals(proposals, id_field)
     box_list = []
     for i, entry in enumerate(roidb):
         if i % 2500 == 0:
             logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
         boxes = proposals['boxes'][i]
         # Sanity check that these boxes are for the correct image id
         assert entry['id'] == proposals[id_field][i]
         # Remove duplicate boxes and very small boxes and then take top k
         boxes = box_utils.clip_boxes_to_image(
             boxes, entry['height'], entry['width']
         )
         keep = box_utils.unique_boxes(boxes)
         boxes = boxes[keep, :]
         keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
         boxes = boxes[keep, :]
         if top_k > 0:
             boxes = boxes[:top_k, :]
         box_list.append(boxes)
     _merge_proposal_boxes_into_roidb(roidb, box_list)
     if crowd_thresh > 0:
         _filter_crowd_proposals(roidb, crowd_thresh)
Beispiel #2
0
 def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size,
                              top_k, crowd_thresh):
     """Add proposals from a proposals file to an roidb."""
     logger.info('Loading proposals from: {}'.format(proposal_file))
     with open(proposal_file, 'r') as f:
         proposals = pickle.load(f)
     id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
     _sort_proposals(proposals, id_field)
     box_list = []
     for i, entry in enumerate(roidb):
         if i % 2500 == 0:
             logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
         boxes = proposals['boxes'][i]
         # Sanity check that these boxes are for the correct image id
         assert entry['id'] == proposals[id_field][i]
         # Remove duplicate boxes and very small boxes and then take top k
         boxes = box_utils.clip_boxes_to_image(boxes, entry['height'],
                                               entry['width'])
         keep = box_utils.unique_boxes(boxes)
         boxes = boxes[keep, :]
         keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
         boxes = boxes[keep, :]
         if top_k > 0:
             boxes = boxes[:top_k, :]
         box_list.append(boxes)
     _merge_proposal_boxes_into_roidb(roidb, box_list)
     if crowd_thresh > 0:
         _filter_crowd_proposals(roidb, crowd_thresh)
Beispiel #3
0
    def forward(self, inputs, outputs):

        up_scale = self.up_scale
        M = self.resolution
        for lvl in range(self.num_fpn_lvls):
            feat = inputs[2 * lvl].data
            rois = inputs[2 * lvl + 1].data

            num_rois = rois.shape[0]
            spatial_scale = self.spatial_scales[lvl]

            # convert from NCHW to NHWC
            feat = feat.transpose((0, 2, 3, 1))
            feat_h, feat_w = feat.shape[1], feat.shape[2]

            # pad rois and narrow to the feature map scale
            pad_rois = box_utils.expand_boxes(rois[:, 1:5], up_scale)
            pad_rois = pad_rois * spatial_scale
            pad_rois = box_utils.clip_boxes_to_image(pad_rois, feat_h, feat_w)

            # abstact feature from the pad_rois
            pad_roi_feats = np.zeros((num_rois, M, M, feat.shape[3]))
            batch_idx = rois[:, 0]
            for i in range(num_rois):
                batch_id = int(batch_idx[i])
                pad_roi = pad_rois[i]
                pad_roi_feat = feat[batch_id, pad_roi[1]:pad_roi[3] + 1,
                                    pad_roi[0]:pad_roi[2] + 1, :]
                pad_roi_feat_resize = cv2.resize(pad_roi_feat, (M, M))
                pad_roi_feats[i] = pad_roi_feat_resize

            pad_roi_feats = pad_roi_feats.transpose((0, 3, 1, 2))

            outputs[lvl].reshape(pad_roi_feats.shape)
            outputs[lvl].data[...] = pad_roi_feats
Beispiel #4
0
 def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size,
                              top_k):
     """Add proposals from a proposals file to an roidb.
     """
     logger.info('Loading proposals from: {}'.format(proposal_file))
     with open(proposal_file, 'r') as f:
         proposals = pickle.load(f)
     #proposals[seq_idx][idx]
     box_list = []
     for i, entry in enumerate(roidb):
         if i % 500 == 0:
             logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
         seq_idx = entry['seq_idx']
         idx = entry['idx']
         boxes = proposals['boxes'][seq_idx][idx]
         # Remove duplicate boxes and very small boxes and then take top k
         boxes = box_utils.clip_boxes_to_image(boxes, entry['height'],
                                               entry['width'])
         keep = box_utils.unique_boxes(boxes)
         boxes = boxes[keep, :]
         keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
         boxes = boxes[keep, :]
         if top_k > 0:
             boxes = boxes[:top_k, :]
         box_list.append(boxes)
     _merge_proposal_boxes_into_roidb(roidb, box_list)
Beispiel #5
0
    def get_context_rois(self, rois, im_info, zoom_ratio=1.1):
        """Return the rois with more context. 

          - 'im_info': 2D tensor of shape (N, 3) where the three columns encode
            the input image's [height, width, scale]. Height and width are
            for the input to the network, not the original image; scale is the
            scale factor used to scale the original image to the network input
            size.

          - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the
            five columns encode [batch ind, x1, y1, x2, y2]. The boxes are
            w.r.t. the network input, which is a *scaled* version of the
            original image; these proposals must be scaled by 1 / scale (where
            scale comes from im_info; see above) to transform it back to the
            original input image coordinate system.
        """
        if zoom_ratio > 0:
            roi_boxes = rois[:, 1:]
            batch_indice = rois[:, 0]
            roi_boxes = boxes_utils.expand_boxes(roi_boxes, zoom_ratio)
            roi_boxes = boxes_utils.clip_boxes_to_image(roi_boxes, im_info[0], im_info[1])
            rois = np.concatenate((batch_indice, roi_boxes), axis=1)
        else:
            # Add global context, i.e. roi-pooling on the whole feature map.
            rois[:, 1] = 0
            rois[:, 2] = 0
            rois[:, 3] = 800 - 1
            rois[:, 4] = 800 - 1
            #rois[:, 3] = im_info[0][1] - 1
            #rois[:, 4] = im_info[0][0] - 1
        return rois
Beispiel #6
0
    def forward(self, inputs, outputs):
        data = inputs[0].data
        k_min = self.k_min
        k_max = self.k_max
        up_scale = self.up_scale
        height, width = data.shape[2], data.shape[3]

        for lvl in range(k_min, k_max + 1):
            rois = inputs[1 + lvl - k_min].data  # skip the 'data' blob
            bboxes = rois[:, 1:5]
            batch_ids = rois[:, [0]]
            # up-scale the bboxes and narrow down to image boundary
            pad_bboxes = box_utils.expand_boxes(bboxes, up_scale)
            pad_bboxes = box_utils.clip_boxes_to_image(pad_bboxes, height,
                                                       width)
            # add the batch_ids to the rois
            pad_rois = np.hstack((batch_ids, pad_bboxes))

            outputs[lvl - k_min].reshape(pad_rois.shape)
            outputs[lvl - k_min].data[...] = pad_rois

        # copy rois_idx_restore_int32 to the scale_rois_idx_restore_int32
        # A little surgery for int32 type requirement
        rois_idx_restore_int32 = inputs[-1].data
        outputs[-1].init(list(rois_idx_restore_int32.shape),
                         caffe2_pb2.TensorProto.INT32)
        outputs[-1].reshape(rois_idx_restore_int32.shape)
        outputs[-1].data[...] = rois_idx_restore_int32.astype(np.int32)
Beispiel #7
0
def add_refine_keypoints_blobs_gaussian(blobs, roidb, fg_rois_per_image,
                                        fg_inds, im_scale, batch_idx, data):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    gt_keypoints = roidb['gt_keypoints']
    # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue
    # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big
    # issue for training.
    kp_fg_inds = blobs['keypoint_fg_inds']
    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        # Let's expand the rois
        up_scale = cfg.REFINENET.UP_SCALE
        inp_h, inp_w = data.shape[2], data.shape[3]
        pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

        pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale)
        pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h,
                                                    pad_img_w)

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype)
        for ii in range(len(pad_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_gaussian_heatmap_labels(
            sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE)

    else:  # If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob
        M = cfg.REFINENET.KRCNN.HEATMAP_SIZE
        heats = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, M, M))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, 1))

    pad_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1))
    pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois))

    blobs['refined_keypoint_rois'] = pad_fg_rois
    blobs['refined_keypoint_heatmaps'] = heats
    blobs['refined_keypoint_weights'] = weights
Beispiel #8
0
    def forward(self, inputs, outputs):

        data = inputs[0].data
        mask_probs = inputs[1].data
        mask_rois = inputs[2].data

        # whether using binary threshold for indicator
        if cfg.REFINENET.USE_PERCENTTHRES_INDICATOR:
            mask_probs_reshape = mask_probs.reshape(mask_probs.shape[0],
                                                    mask_probs.shape[1],-1)
            thres = int(cfg.REFINENET.PERCENTINDICATOR_THRES * \
                        mask_probs_reshape.shape[2])
            mask_probs_sort = np.argsort(-mask_probs_reshape, axis=2)[:,:,:thres]
            mask_binary = np.zeros(mask_probs_reshape.shape, dtype=np.float32)
            for i in range(mask_probs_sort.shape[0]):
                for j in range(mask_probs_sort.shape[1]):
                    mask_binary[i,j,mask_probs_sort[i,j]] = 1.
            mask_binary = mask_binary.reshape(mask_probs.shape)
            mask_probs *= mask_binary
        elif cfg.REFINENET.USE_THRES_INDICATOR:
            mask_binary = np.array(
                mask_probs > cfg.REFINENET.INDICATOR_THRES, dtype=np.float32
            )
            mask_probs *= mask_binary
        # output indicator resolution
        M = self.resolution
        up_scale = self.up_scale
        num_cls = mask_probs.shape[1]
        num_rois = mask_rois.shape[0]
        mask_indicators = np.zeros((num_rois, M, M, num_cls), dtype=np.float32)

        # preparing data
        height, width = data.shape[2], data.shape[3]
        mask_probs_NHWC = mask_probs.transpose((0,2,3,1))
        rois = mask_rois[:, 1:5] # ignore batch_id
        pad_rois = box_utils.expand_boxes(rois, up_scale)
        pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width)

        # calculate converted coordinates
        converted_coords = box_utils.convert_coordinate(rois, pad_rois, M)
        for i in range(num_rois):
            mask_prob = mask_probs_NHWC[i]
            coords = converted_coords[i]
            shape = (coords[2]-coords[0]+1, coords[3]-coords[1]+1) # w,h
            if shape[0] < 1 or shape[1] < 1:
                continue
            mask_prob_resize = cv2.resize(mask_prob, shape)
            if mask_prob_resize.shape[2] == 1:
                mask_prob_resize = mask_prob_resize[:, :, np.newaxis]
            mask_indicators[i, coords[1]:coords[3]+1, coords[0]:coords[2]+1] = \
                mask_prob_resize

        swap_order = (0, 3, 1, 2)
        mask_indicators = mask_indicators.transpose(swap_order)

        outputs[0].reshape(mask_indicators.shape)
        outputs[0].data[...] = mask_indicators
    def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size,
                                 top_k, crowd_thresh):
        """Add proposals from a proposals file to an roidb."""
        logger.info('Loading proposals from: {}'.format(proposal_file))
        #        with open(proposal_file, 'rb') as f:
        #            proposals = pickle.load(f, encoding='bytes')
        with open(proposal_file, 'rb') as f:
            proposals1 = pickle.load(f, encoding='bytes')

    #[b'cfg', b'boxes', b'scores', b'ids', b'names']

        proposals = {}
        proposals['cfg'] = proposals1[b'cfg']
        proposals['boxes'] = proposals1[b'boxes']
        proposals['scores'] = proposals1[b'scores']
        proposals['ids'] = proposals1[b'ids']
        proposals['names'] = proposals1[b'names']
        #proposals  = pd.read_pickle(proposal_file)
        print(proposals.keys())
        print(
            np.array(proposals['boxes']).shape,
            "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        #        proposals = self.convert(proposals)
        print(proposals.keys())

        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
        _sort_proposals(proposals, id_field)
        box_list = []
        for i, entry in enumerate(roidb):
            if i % 2500 == 0:
                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
            boxes = proposals['boxes'][i]
            print("entry id in entry['id'] ", entry['id'])
            #print(proposals[id_field][i])
            # Sanity check that these boxes are for the correct image id
            assert entry['id'] == proposals[id_field][i]
            # Remove duplicate boxes and very small boxes and then take top k
            boxes = box_utils.clip_boxes_to_image(boxes, entry['height'],
                                                  entry['width'])
            #            keep = box_utils.unique_boxes(boxes)
            #            boxes = boxes[keep, :]
            #            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
            #            boxes = boxes[keep, :]
            #            if top_k > 0:
            #                boxes = boxes[:top_k, :]
            box_list.append(boxes)
        print("shape box_list!!!!!!!!!!!!!!!!!!!!!!!!!   ", np.array(box_list))
        _merge_proposal_boxes_into_roidb(roidb, box_list)
        if crowd_thresh > 0:
            _filter_crowd_proposals(roidb, crowd_thresh)
Beispiel #10
0
    def forward(self, inputs, outputs):
        data = inputs[0].data
        rois = inputs[1].data
        up_scale = self.up_scale
        height, width = data.shape[2], data.shape[3]

        bboxes = rois[:, 1:5]
        batch_ids = rois[:, [0]]
        # up-scale the bboxes and clip to image boundary
        # pad bboxes and narrow to the feature map scale
        pad_bboxes = box_utils.expand_boxes(bboxes, up_scale)
        pad_bboxes = box_utils.clip_boxes_to_image(pad_bboxes, height, width)

        # add the batch_ids to the rois
        pad_rois = np.hstack((batch_ids, pad_bboxes))

        outputs[0].reshape(pad_rois.shape)
        outputs[0].data[...] = pad_rois
    def forward(self, inputs, outputs):
        data = inputs[0].data
        keypoint_probs = inputs[1].data
        keypoint_rois = inputs[2].data

        # output indicator resolution
        M = self.resolution
        up_scale = self.up_scale
        num_rois = keypoint_rois.shape[0]
        num_keypoints = keypoint_probs.shape[1]

        # first expand the keypoint rois
        height, width = data.shape[2], data.shape[3]
        pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale)
        pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width)

        # get keypoint predictions and their probs
        # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob)
        pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois)
        
        # map keypoint position to the pad_rois
        # output shape is (#rois, #keypoints), locations flatter out
        locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels(
            pred_rois, pad_rois, M
        )
        locations_on_pad_rois = locations_on_pad_rois.astype(np.int32)

        # and now generate keypoint indicators
        keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2))
        for i in range(num_rois):
            locations = locations_on_pad_rois[i] # shape (#keypoints, )
            for k in range(num_keypoints):
                keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k]

        # and reshape to 4 dimension
        keypoint_indicators = keypoint_indicators.reshape(
            (num_rois, num_keypoints, M, M)
        )

        outputs[0].reshape(keypoint_indicators.shape)
        outputs[0].data[...] = keypoint_indicators
Beispiel #12
0
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.REFINENET.RESOLUTION
    up_scale = cfg.REFINENET.UP_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    gt_classes = roidb['gt_classes'][polys_gt_inds]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # Expand the foreground rois by a factor of up_scale and
        # clip by the padded image boundary
        pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale)
        pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h,
                                                    pad_img_w)

        if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES:
            # Only use crowded samples to train the RefineNet
            THRES = cfg.REFINENET.OVERLAP_THRESHOLD
            for i in range(rois_fg.shape[0]):
                overlap = overlaps_bbfg_bbpolys[i]
                if np.sum(overlap > THRES) > 1:
                    # if has multiple instances overlapped, use it for training
                    fg_polys_ind = fg_polys_inds[i]
                    poly_gt = polys_gt[fg_polys_ind]
                    pad_roi_fg = pad_rois_fg[i]
                    # Rasterize the portion of the polygon mask within the given fg roi
                    # to an M x M binary image
                    mask = segm_utils.polys_to_mask_wrt_box(
                        poly_gt, pad_roi_fg, M)
                    mask = np.array(mask > 0,
                                    dtype=np.int32)  # Ensure it's binary
                    masks[i, :] = np.reshape(mask, M**2)

                else:  # Only one instance, then set label to be -1 (ignored)
                    masks[i, :] = -1
                    mask_class_labels[i] = 0
        elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
            loss_weights = blob_utils.ones((rois_fg.shape[0], ))
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                class_label = mask_class_labels[i]

                # Rasterize the portion of the polygon mask within the given
                # fg roi to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

                # And now determine the weight for each roi. If any instance
                # that is of the same class as the RoI, then we expect it to
                # be a hard sample and assigns a larger weight for this RoI
                for j in range(len(polys_gt)):
                    if j == fg_polys_ind:
                        continue
                    if gt_classes[
                            j] == class_label:  # only same class is valid
                        mask = segm_utils.polys_to_mask_wrt_box(
                            polys_gt[j], pad_roi_fg, M)
                        # and check if has anypart fall inside the bbox
                        is_inside_bbox = (np.sum(mask) > 0)
                        if is_inside_bbox:
                            loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED
                            break  # early stop

        else:
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # pad_rois_fg is actually one background roi, but that's ok because ...
        pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1))
    pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32)

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = pad_rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks

    if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
        blobs['loss_weights'] = loss_weights
Beispiel #13
0
	def __init__(self,
				 root,
				 year='2007',
				 image_set='trainval',
				 download=False,
				 transforms=None):

		self.root = root

		self.transforms = transforms
		self.year = year
		if year == "2007" and image_set == "test":
			year = "2007-test"
		self.url = DATASET_YEAR_DICT[year]['url']
		self.filename = DATASET_YEAR_DICT[year]['filename']
		self.md5 = DATASET_YEAR_DICT[year]['md5']
		valid_sets = ["train", "trainval", "val"]
		if year == "2007-test":
			valid_sets.append("test")
		self.image_set = verify_str_arg(image_set, "image_set", valid_sets)

		base_dir = DATASET_YEAR_DICT[year]['base_dir']
		voc_root = os.path.join(self.root, base_dir)
		image_dir = os.path.join(voc_root, 'JPEGImages')
		annotation_dir = os.path.join(voc_root, 'Annotations')

		if download:
			download_extract(self.url, self.root, self.filename, self.md5)

		# pause()
		if not os.path.isdir(voc_root):
			raise RuntimeError('Dataset not found or corrupted.' +
							   ' You can use download=True to download it')

		splits_dir = os.path.join(voc_root, 'ImageSets/Main')

		split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')

		with open(os.path.join(split_f), "r") as f:
			file_names = [x.strip() for x in f.readlines()]



		self.class_labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

		self.num_classes  = len(self.class_labels)

		self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
		annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names]
		assert (len(self.images) == len(annotations))


		#load annotations:
		self.annotations = []
		for i, ann_path in enumerate(annotations):
			raw_annot = self.parse_voc_xml(ET.parse(ann_path).getroot())['annotation']
			self.annotations.append({
				'img_key': int(raw_annot['filename'].replace('.jpg','')),
				'width'  : int(raw_annot['size']['width']),
				'height' : int(raw_annot['size']['height']),
				'object'  : raw_annot['object']
				})
		


		##TODO ajust this to be more beauty =p
		ss_data = self.root + 'selective_search_data/voc_' + self.year + '_' + self.image_set + '.pkl'
		with open(ss_data, 'rb') as f:
			proposals = pickle.load(f)

		sort_proposals(proposals, 'indexes')

		self.proposals = []
		for i, boxes in enumerate(proposals['boxes']):
			if i % 2500 == 0:
				logger.info(' {:d}/{:d}'.format(i + 1, len(proposals['boxes'])))
			
			annotation = self.annotations[i]
			assert annotation['img_key'] == proposals['indexes'][i]
			# Remove duplicate boxes and very small boxes and then take top k
			boxes = box_utils.clip_boxes_to_image(boxes, annotation['height'], annotation['width'])
			keep = box_utils.unique_boxes(boxes)
			boxes = boxes[keep, :]
			keep = box_utils.filter_small_boxes(boxes, cfg.FAST_RCNN.MIN_PROPOSAL_SIZE)
			boxes = boxes[keep, :]
			if cfg.FAST_RCNN.TOP_K > 0:
				boxes = boxes[:cfg.FAST_RCNN.TOP_K, :]
			self.proposals.append(boxes.astype(np.float))