Example #1
0
  def testSingleImageGroundtruthExport(self):
    masks = np.array(
        [[[1, 1,], [1, 1]],
         [[0, 0], [0, 1]],
         [[0, 0], [0, 0]]], dtype=np.uint8)
    boxes = np.array([[0, 0, 1, 1],
                      [0, 0, .5, .5],
                      [.5, .5, 1, 1]], dtype=np.float32)
    coco_boxes = np.array([[0, 0, 1, 1],
                           [0, 0, .5, .5],
                           [.5, .5, .5, .5]], dtype=np.float32)
    classes = np.array([1, 2, 3], dtype=np.int32)
    is_crowd = np.array([0, 1, 0], dtype=np.int32)
    next_annotation_id = 1
    expected_counts = ['04', '31', '4']

    # Tests exporting without passing in is_crowd (for backward compatibility).
    coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco(
        image_id='first_image',
        category_id_set=set([1, 2, 3]),
        next_annotation_id=next_annotation_id,
        groundtruth_boxes=boxes,
        groundtruth_classes=classes,
        groundtruth_masks=masks)
    for i, annotation in enumerate(coco_annotations):
      self.assertEqual(annotation['segmentation']['counts'],
                       expected_counts[i])
      self.assertTrue(np.all(np.equal(mask.decode(
          annotation['segmentation']), masks[i])))
      self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i])))
      self.assertEqual(annotation['image_id'], 'first_image')
      self.assertEqual(annotation['category_id'], classes[i])
      self.assertEqual(annotation['id'], i + next_annotation_id)

    # Tests exporting with is_crowd.
    coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco(
        image_id='first_image',
        category_id_set=set([1, 2, 3]),
        next_annotation_id=next_annotation_id,
        groundtruth_boxes=boxes,
        groundtruth_classes=classes,
        groundtruth_masks=masks,
        groundtruth_is_crowd=is_crowd)
    for i, annotation in enumerate(coco_annotations):
      self.assertEqual(annotation['segmentation']['counts'],
                       expected_counts[i])
      self.assertTrue(np.all(np.equal(mask.decode(
          annotation['segmentation']), masks[i])))
      self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i])))
      self.assertEqual(annotation['image_id'], 'first_image')
      self.assertEqual(annotation['category_id'], classes[i])
      self.assertEqual(annotation['iscrowd'], is_crowd[i])
      self.assertEqual(annotation['id'], i + next_annotation_id)
Example #2
0
    def load_dataset(self):
        dataset  = self.cfg.dataset
        dataset_phase = self.cfg.dataset_phase
        dataset_ann = self.cfg.dataset_ann

        # initialize COCO api
        annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase)
        self.coco = COCO(annFile)

        imgIds = self.coco.getImgIds()

        data = []

        # loop through each image
        for imgId in imgIds:
            item = DataItem()

            img = self.coco.loadImgs(imgId)[0]
            item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"])
            item.im_size = [3, img["height"], img["width"]]
            item.coco_id = imgId
            annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False)
            anns = self.coco.loadAnns(annIds)

            all_person_keypoints = []
            masked_persons_RLE = []
            visible_persons_RLE = []
            all_visibilities = []

            # Consider only images with people
            has_people = len(anns) > 0
            if not has_people and self.cfg.coco_only_images_with_people:
                continue

            for ann in anns: # loop through each person
                person_keypoints = []
                visibilities = []
                if ann["num_keypoints"] != 0:
                    for i in range(self.cfg.num_joints):
                        x_coord = ann["keypoints"][3 * i]
                        y_coord = ann["keypoints"][3 * i + 1]
                        visibility = ann["keypoints"][3 * i + 2]
                        visibilities.append(visibility)
                        if visibility != 0: # i.e. if labeled
                            person_keypoints.append([i, x_coord, y_coord])
                    all_person_keypoints.append(np.array(person_keypoints))
                    visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann)))
                    all_visibilities.append(visibilities)
                if ann["num_keypoints"] == 0:
                    masked_persons_RLE.append(self.coco.annToRLE(ann))

            item.joints = all_person_keypoints
            item.im_neg_mask = maskUtils.merge(masked_persons_RLE)
            if self.cfg.use_gt_segm:
                item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1)
                item.visibilities = all_visibilities
            data.append(item)

        self.has_gt = self.cfg.dataset is not "image_info"
        return data
Example #3
0
  def testExportSegmentsToCOCO(self):
    image_ids = ['first', 'second']
    detection_masks = [np.array(
        [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]],
        dtype=np.uint8), np.array(
            [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]],
            dtype=np.uint8)]

    for i, detection_mask in enumerate(detection_masks):
      detection_masks[i] = detection_mask[:, :, :, None]

    detection_scores = [np.array([.8], np.float), np.array([.7], np.float)]
    detection_classes = [np.array([1], np.int32), np.array([1], np.int32)]

    categories = [{'id': 0, 'name': 'person'},
                  {'id': 1, 'name': 'cat'},
                  {'id': 2, 'name': 'dog'}]
    output_path = os.path.join(tf.test.get_temp_dir(), 'segments.json')
    result = coco_tools.ExportSegmentsToCOCO(
        image_ids,
        detection_masks,
        detection_scores,
        detection_classes,
        categories,
        output_path=output_path)
    with tf.gfile.GFile(output_path, 'r') as f:
      written_result = f.read()
      written_result = json.loads(written_result)
      mask_load = mask.decode([written_result[0]['segmentation']])
      self.assertTrue(np.allclose(mask_load, detection_masks[0]))
      self.assertAlmostEqual(result, written_result)
Example #4
0
def rle_masks_to_boxes(masks):
    """Computes the bounding box of each mask in a list of RLE encoded masks."""
    if len(masks) == 0:
        return []

    decoded_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks
    ]

    def get_bounds(flat_mask):
        inds = np.where(flat_mask > 0)[0]
        return inds.min(), inds.max()

    boxes = np.zeros((len(decoded_masks), 4))
    keep = [True] * len(decoded_masks)
    for i, mask in enumerate(decoded_masks):
        if mask.sum() == 0:
            keep[i] = False
            continue
        flat_mask = mask.sum(axis=0)
        x0, x1 = get_bounds(flat_mask)
        flat_mask = mask.sum(axis=1)
        y0, y1 = get_bounds(flat_mask)
        boxes[i, :] = (x0, y0, x1, y1)

    return boxes, np.where(keep)[0]
Example #5
0
def polys_to_mask_wrt_box(polygons, box, M):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed in the given box and rasterized to an M x M
    mask. The resulting mask is therefore of shape (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
def evaluate_masks(
    json_dataset,
    all_boxes,
    all_segms,
    output_dir,
    use_salt=True,
    cleanup=False
):
    if cfg.CLUSTER.ON_CLUSTER:
        # On the cluster avoid saving these files in the job directory
        output_dir = '/tmp'
    res_file = os.path.join(
        output_dir, 'segmentations_' + json_dataset.name + '_results')
    if use_salt:
        res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'

    results_dir = os.path.join(output_dir, 'results')
    if not os.path.exists(results_dir):
        os.mkdir(results_dir)

    os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR]
    os.environ['CITYSCAPES_RESULTS'] = output_dir

    # Load the Cityscapes eval script *after* setting the required env vars,
    # since the script reads their values into global variables (at load time).
    import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
        as cityscapes_eval

    roidb = json_dataset.get_roidb()
    for i, entry in enumerate(roidb):
        im_name = entry['image']

        basename = os.path.splitext(os.path.basename(im_name))[0]
        txtname = os.path.join(output_dir, basename + 'pred.txt')
        with open(txtname, 'w') as fid_txt:
            if i % 10 == 0:
                logger.info('i: {}: {}'.format(i, basename))
            for j in range(1, len(all_segms)):
                clss = json_dataset.classes[j]
                clss_id = cityscapes_eval.name2label[clss].id
                segms = all_segms[j][i]
                boxes = all_boxes[j][i]
                if segms == []:
                    continue
                masks = mask_util.decode(segms)

                for k in range(boxes.shape[0]):
                    score = boxes[k, -1]
                    mask = masks[:, :, k]
                    pngname = os.path.join(
                        'results',
                        basename + '_' + clss + '_{}.png'.format(k))
                    # write txt
                    fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score))
                    # save mask
                    cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)
    logger.info('Evaluating...')
    cityscapes_eval.main([])
    return None
Example #7
0
def crop_mask(boxes,segmentations,flipped, imsize):
    assert (boxes.shape[0]==len(segmentations))
    psegmentations=[]
    for i in xrange(len(segmentations)):
        gts=segmentations[i]
        box=boxes[i,:]
        if type(gts) == list and gts:
            assert (type(gts[0]) != dict)
            prle= mask.frPyObjects(gts,imsize[1],imsize[0])
        elif type(gts) == dict and type(gts['counts']) == list:
            prle= mask.frPyObjects([gts],imsize[1],imsize[0])
        elif type(gts) == dict and \
                     type(gts['counts'] == unicode or type(gts['counts']) == str):
            prle = [gts]
        else:
            print '{} box has no segmentation'.format(i)
            psegmentations.append([])
            continue
        if len(prle)==1:
            prle=prle[0]
        else:
            prle= mask.merge(prle)
        pmask=mask.decode([prle])
        if flipped:
            pmask=pmask[:,::-1,:]
        pmask=np.copy(pmask[box[1]:box[3],box[0]:box[2],:],order='F')
        psegmentations.append(mask.encode(pmask))
    return psegmentations
Example #8
0
 def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
     size = scmap_shape[0:2]
     scmask = np.ones(size)
     m = maskUtils.decode(data_item.im_neg_mask)
     if m.size:
         scmask = 1.0 - imresize(m, size)
     scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1)
     return scmask
 def annToMask(self, ann, height, width):
     """
     Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
     :return: binary mask (numpy 2D array)
     """
     rle = self.annToRLE(ann, height, width)
     m = maskUtils.decode(rle)
     return m
Example #10
0
 def _flip_rle(rle, height, width):
     if 'counts' in rle and type(rle['counts']) == list:
         # Magic RLE format handling painfully discovered by looking at the
         # COCO API showAnns function.
         rle = mask_util.frPyObjects([rle], height, width)
     mask = mask_util.decode(rle)
     mask = mask[:, ::-1, :]
     rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
     return rle
Example #11
0
    def draw_objdb_masks(self, output_dir, objdb=None):
        if objdb == None:
            objdb = self.objdb

        mask_dir = osp.join(output_dir, '{}_objdb_masks'.format(self._image_set))
        img_dir  = osp.join(output_dir, '{}_objdb_imgs'.format(self._image_set))

        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(mask_dir)
        ds_utils.maybe_create(img_dir)


        for i in xrange(len(objdb)):
            obj     = objdb[i]

            im_path = obj['image']
            ann_id  = obj['obj_id']
            poly    = obj['poly']
            bb      = obj['box'].astype(np.int16)
            cls     = obj['cls']
            width   = obj['width']
            height  = obj['height']

            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            msk = np.amax(COCOmask.decode(poly), axis=2)

            # binarize the mask
            msk = msk * 255
            retVal, msk = cv2.threshold(msk, 127, 255, cv2.THRESH_BINARY)
            msk = msk.astype(np.uint8)
            # msk = ds_utils.dilate_mask(msk, 9)

            # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \
            #       0.5/255 * msk.reshape((height, width, 1)) * \
            #       np.random.random((1, 3)) * 255

            # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \
            #             (0, 255, 0), 2)
            #
            # fontScale = 0.0009 * math.sqrt(float(width*width + height*height))
            #
            #
            # cv2.putText(img, '{:}'.format(self.classes[cls]), \
            #             (bb[0], bb[1] - 2), \
            #             cv2.FONT_HERSHEY_SIMPLEX, \
            #             fontScale, (0, 0, 255), 1)

            im_name, im_ext = osp.splitext(osp.basename(im_path))

            output_path = osp.join(mask_dir, im_name+'_'+str(ann_id).zfill(12)+im_ext)
            # output_path = osp.join(mask_dir, im_name+im_ext)
            cv2.imwrite(output_path, msk)

            output_path = osp.join(img_dir,  im_name+'_'+str(ann_id).zfill(12)+im_ext)
            # output_path = osp.join(img_dir,  im_name+im_ext)
            cv2.imwrite(output_path, img)
            print i
Example #12
0
def get_mask(idx):
    ann_ids = coco.getAnnIds(imgIds=img_ids[idx])
    anns = coco.loadAnns(ann_ids)
    img = coco.loadImgs(img_ids[idx])[0]
    m = np.zeros((img['height'], img['width']))
    for j in anns:
        if j['iscrowd']:
            rle = mask.frPyObjects(j['segmentation'], img['height'], img['width'])
            m += mask.decode(rle)
    return m < 0.5
 def convert(self, mode):
     width, height = self.size
     if mode == "mask":
         rles = mask_utils.frPyObjects(
             [p.numpy() for p in self.polygons], height, width
         )
         rle = mask_utils.merge(rles)
         mask = mask_utils.decode(rle)
         mask = torch.from_numpy(mask)
         # TODO add squeeze?
         return mask
Example #14
0
    def draw_roidb_masks(self, output_dir, roidb=None):

        mask_dir = osp.join(output_dir, '{}_roidb_masks'.format(self._image_set))
        img_dir  = osp.join(output_dir, '{}_roidb_imgs'.format(self._image_set))

        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(mask_dir)
        ds_utils.maybe_create(img_dir)

        if roidb == None:
            roidb = self.roidb

        for i in xrange(len(roidb)):
            rois    = roidb[i]
            im_path = rois['image']
            clses   = rois['clses']
            boxes   = rois['boxes']
            rles    = rois['polys']
            width   = rois['width']
            height  = rois['height']

            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            msk = np.zeros((height, width), dtype=np.uint8)

            for j in xrange(len(rles)):
                rle = rles[j]
                bb  = boxes[j,:].astype(np.int)
                cls = clses[j]

                tmp = np.amax(COCOmask.decode(rle), axis=2) * 255
                retVal, tmp = cv2.threshold(tmp, 127, 255, cv2.THRESH_BINARY)
                tmp = tmp.astype(np.uint8)
                tmp = ds_utils.dilate_mask(tmp, 9)
                msk = np.maximum(msk, tmp)

                # fontScale = 0.0009 * math.sqrt(float(width*width + height*height))
                # cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), \
                #             (0, 255, 0), 2)
                # cv2.putText(img, '{:}'.format(self.classes[cls]), \
                #             (bb[0], bb[1] - 2), \
                #             cv2.FONT_HERSHEY_SIMPLEX, \
                #             fontScale, (0, 0, 255), 1)

            # img = (1 - 0.5/255 * msk.reshape((height, width, 1))) * img + \
            #       0.5/255 * msk.reshape((height, width, 1)) * \
            #       np.random.random((1, 3)) * 255


            output_path = osp.join(mask_dir, osp.basename(im_path))
            cv2.imwrite(output_path, msk)
            output_path = osp.join(img_dir,  osp.basename(im_path))
            cv2.imwrite(output_path, img)

            print i
Example #15
0
def polys_to_mask(polygons, height, width):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed inside a height x width image. The resulting
    mask is therefore of shape (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
Example #16
0
def segmentation_to_mask(polys, height, width):
    """
    Convert polygons to binary masks.

    Args:
        polys: a list of nx2 float array

    Returns:
        a binary matrix of (height, width)
    """
    polys = [p.flatten().tolist() for p in polys]
    rles = cocomask.frPyObjects(polys, height, width)
    rle = cocomask.merge(rles)
    return cocomask.decode(rle)
Example #17
0
def vis_one_image_opencv(
        im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2,
        show_box=False, dataset=None, show_class=False):
    """Constructs a numpy array with the detections visualized."""

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return im

    if segms is not None:
        masks = mask_util.decode(segms)
        color_list = colormap()
        mask_color_id = 0

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        if show_box:
            im = vis_bbox(
                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))

        # show class (off by default)
        if show_class:
            class_str = get_class_string(classes[i], score, dataset)
            im = vis_class(im, (bbox[0], bbox[1] - 2), class_str)

        # show mask
        if segms is not None and len(segms) > i:
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1
            im = vis_mask(im, masks[..., i], color_mask)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            im = vis_keypoints(im, keypoints[i], kp_thresh)

    return im
Example #18
0
        def _getIgnoreRegion(iid, coco):
            img = coco.imgs[iid]

            if not 'ignore_regions_x' in img.keys():
                return None

            if len(img['ignore_regions_x']) == 0:
                return None

            rgns_merged = []
            for region_x, region_y in zip(img['ignore_regions_x'], img['ignore_regions_y']):
                rgns = [iter(region_x), iter(region_y)]
                rgns_merged.append(list(it.next() for it in itertools.cycle(rgns)))
            rles = maskUtils.frPyObjects(rgns_merged, img['height'], img['width'])
            rle = maskUtils.merge(rles)
            return maskUtils.decode(rle)
Example #19
0
def _get_mask_targets(polygons):
    mask_targets_blob = np.zeros((len(polygons), cfg.MWIDTH * cfg.MHEIGHT), dtype=np.float32)
    mask_targets_weights=mp.zeros((len(polygons),1),dtype=np.float32)
    img=np.ones( (cfg.MHEIGHT,cfg.MWIDTH, 1), dtype=np.float32)
    for i, polygon in enumerate(polygons):
        if not polygon:
            continue
        else:
            #rle=COCOmask.frPyObjects(polygon,cfg.MHEIGHT,cfg.MWIDTH)
            m = COCOmask.decode(polygon)
            m = np.sum(m,axis=2)
            assert max(m.ravel())==1
            assert min(m.ravel())==0
            m=simage.interpolation.zoom(input=m, zoom=(float(cfg.MHEIGHT)/m.shape[0],float(cfg.MWIDTH)/m.shape[1]), order = 2)
            # debug
            mask_targets_blob[i,:]=m.ravel()
            mask_targets_weights[i]=1.
    return mask_targets_blob,mask_targets_weights
Example #20
0
def polys_to_mask_wrt_box(polygons, box, M):
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
Example #21
0
 def testSingleImageDetectionMaskExport(self):
   masks = np.array(
       [[[1, 1,], [1, 1]],
        [[0, 0], [0, 1]],
        [[0, 0], [0, 0]]], dtype=np.uint8)
   classes = np.array([1, 2, 3], dtype=np.int32)
   scores = np.array([0.8, 0.2, 0.7], dtype=np.float32)
   coco_annotations = coco_tools.ExportSingleImageDetectionMasksToCoco(
       image_id='first_image',
       category_id_set=set([1, 2, 3]),
       detection_classes=classes,
       detection_scores=scores,
       detection_masks=masks)
   expected_counts = ['04', '31', '4']
   for i, mask_annotation in enumerate(coco_annotations):
     self.assertEqual(mask_annotation['segmentation']['counts'],
                      expected_counts[i])
     self.assertTrue(np.all(np.equal(mask.decode(
         mask_annotation['segmentation']), masks[i])))
     self.assertEqual(mask_annotation['image_id'], 'first_image')
     self.assertEqual(mask_annotation['category_id'], classes[i])
     self.assertAlmostEqual(mask_annotation['score'], scores[i])
Example #22
0
 def getMask(self, ref):
     '''
     :return: mask, mask-area, mask-center
     '''
     ann = self.refToAnn[ref['ref_id']]
     image = self.imgs[ref['image_id']]
     if type(ann['segmentation'][0]) == list: # polygon
         rle = mask.frPyObjects(ann['segmentation'], image['height'], image['width'])
     else: # mask
         rle = ann['segmentation']
     m = mask.decode(rle)
     m = np.sum(m, axis=2)   # sometimes there are multiple binary map (corresponding to multiple segs)
     m = m.astype(np.uint8)  # convert to np.uint8
     # area
     area = sum(mask.area(rle))              # very close to ann['area']
     # position
     position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c++ style)
     position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style)    -> y (c++ style)
     # mass position (If there were multiple regions, we use the largest one.)
     label_m = label(m, connectivity=m.ndim)
     regions = regionprops(label_m)
     if len(regions) > 0:
         largest_id = np.argmax(np.array([props.filled_area for props in regions]))
         largest_props = regions[largest_id]
         mass_y, mass_x = largest_props.centroid
     else:
         mass_x, mass_y = position_x, position_y
     # if centroid is not in mask, we find the closest point to it from mask
     if m[mass_y, mass_x] != 1:
         print 'Finding closest mask point...'
         kernel = np.ones((10, 10),np.uint8)
         me = cv2.erode(m, kernel, iterations = 1)
         points = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist())  # row, col style
         points = np.array(points)
         dist   = np.sum((points - (mass_y, mass_x))**2, axis=1)
         id     = np.argsort(dist)[0]
         mass_y, mass_x = points[id]
     # return
     return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}
Example #23
0
 def showRef(self, ref):
     # show image
     image = self.imgs[ref['image_id']]
     I = io.imread(osp.join(self.IMAGE_DIR, image['file_name']))
     plt.figure()
     plt.imshow(I)
     # show refer expression
     for sid, sent in enumerate(ref['sentences']):
         print '%s. %s' % (sid+1, sent['sent'])
     # show annotation
     ann_id = ref['ann_id']
     ann    = self.anns[ann_id]
     ax = plt.gca()
     polygons = []
     color = []
     # c = np.random.random((1, 3)).tolist()[0]
     c = 'none'
     if type(ann['segmentation'][0]) == list:
         # polygon
         for seg in ann['segmentation']:
             poly = np.array(seg).reshape((len(seg)/2, 2))
             polygons.append(Polygon(poly, True, alpha=0.4))
             color.append(c)
         p = PatchCollection(polygons, facecolors=color, edgecolors=(1,1,0,0), linewidths=3, alpha=1)
         ax.add_collection(p)  # yellow polygon
         p = PatchCollection(polygons, facecolors=color, edgecolors=(1,0,0,0), linewidths=1, alpha=1)
         ax.add_collection(p)  # red polygon
     else:
         # mask
         rle = ann['segmentation']
         m = mask.decode(rle)
         img = np.ones( (m.shape[0], m.shape[1], 3) )
         color_mask = np.array([2.0,166.0,101.0])/255
         for i in range(3):
             img[:,:,i] = color_mask[i]
         ax.imshow(np.dstack( (img, m*0.5) ))
         # p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4)
         # ax.add_collection(p)
     plt.show()
Example #24
0
def to_mask(polys, size):
    """Convert list of polygons to full size binary mask

    Parameters
    ----------
    polys : list of numpy.ndarray
        Numpy.ndarray with shape (N, 2) where N is the number of bounding boxes.
        The second axis represents points of the polygons.
        Specifically, these are :math:`(x, y)`.
    size : tuple
        Tuple of length 2: (width, height).

    Returns
    -------
    numpy.ndarray
        Full size binary mask of shape (height, width)
    """
    try_import_pycocotools()
    import pycocotools.mask as cocomask
    width, height = size
    polys = [p.flatten().tolist() for p in polys]
    rles = cocomask.frPyObjects(polys, height, width)
    rle = cocomask.merge(rles)
    return cocomask.decode(rle)
Example #25
0
    def evaluate_masks(
        self,
        all_boxes,
        all_segms,
        output_dir,
    ):
        res_file = os.path.join(
            output_dir, 'segmentations_' + self.dataset.name + '_results')
        res_file += '.json'

        os.environ['CITYSCAPES_DATASET'] = os.path.join(
            os.path.dirname(__file__), '../../data/cityscapes')
        os.environ['CITYSCAPES_RESULTS'] = os.path.join(output_dir, 'inst_seg')
        sys.path.insert(
            0,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                         '..', 'lib', 'dataset_devkit', 'cityscapesScripts'))
        sys.path.insert(
            0,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                         '..', 'lib', 'dataset_devkit', 'cityscapesScripts',
                         'cityscapesscripts', 'evaluation'))

        # Load the Cityscapes eval script *after* setting the required env vars,
        # since the script reads their values into global variables (at load time).
        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
            as cityscapes_eval
        sys.argv = []

        roidb = self.dataset.get_roidb()
        for i, entry in enumerate(roidb):
            im_name = entry['image']

            basename = os.path.splitext(os.path.basename(im_name))[0]
            txtname = os.path.join(output_dir, 'inst_seg',
                                   basename + 'pred.txt')
            os.makedirs(os.path.join(output_dir, 'inst_seg'), exist_ok=True)
            with open(txtname, 'w') as fid_txt:
                for j in range(1, len(all_segms)):
                    clss = self.dataset.classes[j]
                    clss_id = cityscapes_eval.name2label[clss].id
                    segms = all_segms[j][i]
                    boxes = all_boxes[j][i]
                    if segms == []:
                        continue
                    masks = mask_util.decode(segms)

                    for k in range(boxes.shape[0]):
                        score = boxes[k, -1]
                        mask = masks[:, :, k]
                        pngname = os.path.join(
                            'seg_results', basename,
                            basename + '_' + clss + '_{}.png'.format(k))
                        # write txt
                        fid_txt.write('{} {} {}\n'.format(
                            pngname, clss_id, score))
                        # save mask
                        os.makedirs(os.path.join(output_dir, 'inst_seg',
                                                 'seg_results', basename),
                                    exist_ok=True)
                        cv2.imwrite(
                            os.path.join(output_dir, 'inst_seg', pngname),
                            mask * 255)
        cityscapes_eval.main()
        return None
Example #26
0
 def showAnns(self, anns):
     """
     Display the specified annotations.
     :param anns (array of object): annotations to display
     :return: None
     """
     if len(anns) == 0:
         return 0
     if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
         datasetType = 'instances'
     elif 'caption' in anns[0]:
         datasetType = 'captions'
     else:
         raise Exception('datasetType not supported')
     if datasetType == 'instances':
         ax = plt.gca()
         ax.set_autoscale_on(False)
         polygons = []
         color = []
         for ann in anns:
             c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
             if 'segmentation' in ann:
                 if type(ann['segmentation']) == list:
                     # polygon
                     for seg in ann['segmentation']:
                         poly = np.array(seg).reshape(
                             (int(len(seg) / 2), 2))
                         polygons.append(Polygon(poly))
                         color.append(c)
                 else:
                     # mask
                     t = self.imgs[ann['image_id']]
                     if type(ann['segmentation']['counts']) == list:
                         rle = maskUtils.frPyObjects([ann['segmentation']],
                                                     t['height'],
                                                     t['width'])
                     else:
                         rle = [ann['segmentation']]
                     m = maskUtils.decode(rle)
                     img = np.ones((m.shape[0], m.shape[1], 3))
                     if ann['iscrowd'] == 1:
                         color_mask = np.array([2.0, 166.0, 101.0]) / 255
                     if ann['iscrowd'] == 0:
                         color_mask = np.random.random((1, 3)).tolist()[0]
                     for i in range(3):
                         img[:, :, i] = color_mask[i]
                     ax.imshow(np.dstack((img, m * 0.5)))
             if 'keypoints' in ann and type(ann['keypoints']) == list:
                 # turn skeleton into zero-based index
                 sks = np.array(
                     self.loadCats(ann['category_id'])[0]['skeleton']) - 1
                 kp = np.array(ann['keypoints'])
                 x = kp[0::3]
                 y = kp[1::3]
                 v = kp[2::3]
                 for sk in sks:
                     if np.all(v[sk] > 0):
                         plt.plot(x[sk], y[sk], linewidth=3, color=c)
                 plt.plot(x[v > 0],
                          y[v > 0],
                          'o',
                          markersize=8,
                          markerfacecolor=c,
                          markeredgecolor='k',
                          markeredgewidth=2)
                 plt.plot(x[v > 1],
                          y[v > 1],
                          'o',
                          markersize=8,
                          markerfacecolor=c,
                          markeredgecolor=c,
                          markeredgewidth=2)
         p = PatchCollection(polygons,
                             facecolor=color,
                             linewidths=0,
                             alpha=0.4)
         ax.add_collection(p)
         p = PatchCollection(polygons,
                             facecolor='none',
                             edgecolors=color,
                             linewidths=2)
         ax.add_collection(p)
     elif datasetType == 'captions':
         for ann in anns:
             print(ann['caption'])
def main():
    with open(__file__, 'r') as f:
        _file_source = f.read()

    # Use first line of file docstring as description if it exists.
    parser = argparse.ArgumentParser(
        description=__doc__.split('\n')[0] if __doc__ else '',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--fbms-annotation-json',
                        help='FBMS JSON annotations',
                        required=True)
    parser.add_argument(
        '--motion-masks-root',
        required=True,
        help='Directory containing estimated PNG motion masks for each frame.')
    parser.add_argument(
        '--detections-root',
        help='Directory containing outputs of detectron on FBMS.',
        required=True)
    parser.add_argument('--save-pickle', action='store_true')
    parser.add_argument('--moving-threshold', default=0.5, type=float)
    parser.add_argument('--output-dir', required=True)
    parser.add_argument(
        '--filename-format',
        choices=['frame', 'sequence_frame', 'fbms'],
        default='fbms',
        help=(
            'Specifies how to get frame number from the filename. '
            '"frame": the filename is the frame number, '
            '"sequence_frame": the frame number is separated by an '
            'underscore, '  # noqa: E127
            '"fbms": assume fbms style frame numbers'))
    args = parser.parse_args()

    detectron_root = Path(args.detections_root)
    motion_root = Path(args.motion_masks_root)

    dataset = COCO(args.fbms_annotation_json)

    output_root = Path(args.output_dir)
    output_root.mkdir(parents=True)

    logging_path = str(output_root / (Path(__file__).stem + '.log'))
    setup_logging(logging_path)

    file_logger = logging.getLogger(logging_path)
    file_logger.info('Source:\n%s' % _file_source)

    logging.info('Args:\n %s', pformat(vars(args)))

    # Map (sequence, frame_name) to frame_id.
    frame_key_to_id = {}
    for annotation in dataset.imgs.values():
        # Path ends in 'sequence/frame_name'
        path = Path(annotation['file_name'])
        frame_key_to_id[(path.parent.stem, path.stem)] = annotation['id']

    logging.info('Loading motion paths')
    # Map sequence to dict mapping frame index to motion mask path
    motion_mask_paths = load_motion_masks(motion_root)

    logging.info('Loading detectron paths')
    predictions = load_detectron_predictions(detectron_root)

    logging.info('Outputting moving detections')
    detection_results = []
    segmentation_results = []

    if args.filename_format == 'fbms':
        from utils.fbms.utils import get_framenumber
    elif args.filename_format == 'sequence_frame':

        def get_framenumber(x):
            return int(x.split('_')[-1])
    elif args.filename_format == 'frame':
        get_framenumber = int
    else:
        raise ValueError('Unknown --filename-format: %s' %
                         args.filename_format)

    # The last frame won't have a motion mask, so we use the second to last
    # frame's mask as the last frame's mask.
    for sequence in predictions.keys():
        frame_index_names = sorted(predictions[sequence].keys(),
                                   key=lambda x: get_framenumber(x))
        second_last_frame, last_frame = frame_index_names[-2:]
        if last_frame not in motion_mask_paths:
            motion_mask_paths[sequence][last_frame] = (
                motion_mask_paths[sequence][second_last_frame])

    tasks = [(sequence, frame_name) for sequence in predictions.keys()
             for frame_name in predictions[sequence]]
    for sequence, frame_name in tasks:
        frame_key = (sequence, frame_name)
        # If --save-pickle is true, process every frame. Otherwise, only
        # process frames that are in --fbms-annotations-json.
        if not args.save_pickle and frame_key not in frame_key_to_id:
            continue

        boxes = predictions[sequence][frame_name]['boxes']
        segmentations = predictions[sequence][frame_name]['segmentations']

        motion_mask = np.array(
            Image.open(motion_mask_paths[sequence][frame_name])) != 0

        if args.save_pickle:
            updated_boxes = []
            updated_segmentations = []
        for i, (box, segmentation) in enumerate(zip(boxes, segmentations)):
            mask = mask_util.decode(segmentation)
            x1, y1, x2, y2, score = box.tolist()
            w = x2 - x1 + 1
            h = y2 - y1 + 1

            if mask.sum() < 1e-10:
                moving_portion = 0
            else:
                moving_portion = (mask & motion_mask).sum() / mask.sum()

            if moving_portion < args.moving_threshold:
                score = translate_range(score, (0, 1), (0, 0.5))
            else:
                score = translate_range(score, (0, 1), (0.5, 1))

            if frame_key in frame_key_to_id:
                frame_id = frame_key_to_id[frame_key]
                detection_results.append({
                    'image_id': frame_id,
                    'category_id': 1,
                    'bbox': [x1, y1, w, h],
                    'score': score
                })
                segmentation_results.append({
                    'image_id': frame_id,
                    'category_id': 1,
                    'segmentation': segmentation,
                    'score': score
                })

            if args.save_pickle:
                updated_boxes.append([x1, y1, x2, y2, score])
                updated_segmentations.append(segmentation)

        if args.save_pickle:
            output_path = (output_root / 'pickle' / sequence /
                           frame_name).with_suffix('.pickle')
            output_path.parent.mkdir(exist_ok=True, parents=True)
            with open(output_path, 'wb') as f:
                # TODO(achald): Make this work for multiple classes.
                updated_boxes = [[], updated_boxes]
                if len(updated_segmentations):
                    updated_segmentations = [[], updated_segmentations]
                else:
                    updated_segmentations = None
                pickle.dump(
                    {
                        'boxes': updated_boxes,
                        'segmentations': updated_segmentations,
                        'keypoints': [[], []]
                    }, f)

    box_output = output_root / 'bbox_fbms_results.json'
    logging.info('Writing box results to %s' % box_output)
    with open(box_output, 'w') as f:
        json.dump(detection_results, f)

    segmentation_output = output_root / 'segmentation_fbms_results.json'
    logging.info('Writing segmentation results to %s' % segmentation_output)
    with open(segmentation_output, 'w') as f:
        json.dump(segmentation_results, f)

    for eval_type, results in (('bbox', detection_results),
                               ('segm', segmentation_results)):
        predictions_dataset = dataset.loadRes(results)
        coco_eval = COCOeval(dataset, predictions_dataset, eval_type)
        coco_eval.evaluate()
        coco_eval.accumulate()
        summary_f = io.StringIO()
        with redirect_stdout(summary_f):
            coco_eval.summarize()
        summary = summary_f.getvalue()
        logging.info('COCO evaluation:')
        logging.info('\n%s', summary)
Example #28
0
    def results2txt(self, results, outfile_prefix):
        """Dump the detection results to a txt file.

        Args:
            results (list[list | tuple | ndarray]): Testing results of the
                dataset.
            outfile_prefix (str): The filename prefix of the json files.
                If the prefix is "somepath/xxx",
                the txt files will be named "somepath/xxx.txt".

        Returns:
            list[str: str]: result txt files which contains corresponding
            instance segmentation images.
        """
        try:
            import cityscapesscripts.helpers.labels as CSLabels
        except ImportError:
            raise ImportError('Please run "pip install citscapesscripts" to '
                              'install cityscapesscripts first.')
        result_files = []
        os.makedirs(outfile_prefix, exist_ok=True)
        prog_bar = mmcv.ProgressBar(len(self))
        for idx in range(len(self)):
            result = results[idx]
            filename = self.data_infos[idx]['filename']
            basename = osp.splitext(osp.basename(filename))[0]
            pred_txt = osp.join(outfile_prefix, basename + '_pred.txt')

            bbox_result, segm_result = result
            bboxes = np.vstack(bbox_result)
            # segm results
            if isinstance(segm_result, tuple):
                # Some detectors use different scores for bbox and mask,
                # like Mask Scoring R-CNN. Score of segm will be used instead
                # of bbox score.
                segms = mmcv.concat_list(segm_result[0])
                mask_score = segm_result[1]
            else:
                # use bbox score for mask score
                segms = mmcv.concat_list(segm_result)
                mask_score = [bbox[-1] for bbox in bboxes]
            labels = [
                np.full(bbox.shape[0], i, dtype=np.int32)
                for i, bbox in enumerate(bbox_result)
            ]
            labels = np.concatenate(labels)

            assert len(bboxes) == len(segms) == len(labels)
            num_instances = len(bboxes)
            prog_bar.update()
            with open(pred_txt, 'w') as fout:
                for i in range(num_instances):
                    pred_class = labels[i]
                    classes = self.CLASSES[pred_class]
                    class_id = CSLabels.name2label[classes].id
                    score = mask_score[i]
                    mask = maskUtils.decode(segms[i]).astype(np.uint8)
                    png_filename = osp.join(outfile_prefix,
                                            basename + f'_{i}_{classes}.png')
                    mmcv.imwrite(mask, png_filename)
                    fout.write(f'{osp.basename(png_filename)} {class_id} '
                               f'{score}\n')
            result_files.append(pred_txt)

        return result_files
Example #29
0
def rle_mask_voting(
    top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'
):
    """Returns new masks (in correspondence with `top_masks`) by combining
    multiple overlapping masks coming from the pool of `all_masks`. Two methods
    for combining masks are supported: 'AVG' uses a weighted average of
    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
    ]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out
Example #30
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    target.gt_boxes = Boxes(boxes)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            # TODO check type and provide better error
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
            )
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
def polys_to_mask(polygons, height, width):
    rles = cocomask.frPyObjects(polygons, height, width)
    rle = cocomask.merge(rles)
    mask = cocomask.decode(rle)
    return mask
def show_result(img,
                result,
                class_names,
                score_thr=0.3,
                wait_time=0,
                show=True,
                out_file=None):
    """Visualize the detection results on the image.

    Args:
        img (str or np.ndarray): Image filename or loaded image.
        result (tuple[list] or list): The detection result, can be either
            (bbox, segm) or just bbox.
        class_names (list[str] or tuple[str]): A list of class names.
        score_thr (float): The threshold to visualize the bboxes and masks.
        wait_time (int): Value of waitKey param.
        show (bool, optional): Whether to show the image with opencv or not.
        out_file (str, optional): If specified, the visualization result will
            be written to the out file instead of shown in a window.

    Returns:
        np.ndarray or None: If neither `show` nor `out_file` is specified, the
            visualized image is returned, otherwise None is returned.
    """
    assert isinstance(class_names, (tuple, list))
    img = mmcv.imread(img)
    img = img.copy()
    if isinstance(result, tuple):
        bbox_result, segm_result = result
    else:
        bbox_result, segm_result = result, None
    bboxes = np.vstack(bbox_result)
    labels = [
        np.full(bbox.shape[0], i, dtype=np.int32)
        for i, bbox in enumerate(bbox_result)
    ]
    labels = np.concatenate(labels)
    # draw segmentation masks
    if segm_result is not None:
        segms = mmcv.concat_list(segm_result)
        inds = np.where(bboxes[:, -1] > score_thr)[0]
        np.random.seed(42)
        color_masks = [
            np.random.randint(0, 256, (1, 3), dtype=np.uint8)
            for _ in range(max(labels) + 1)
        ]
        for i in inds:
            i = int(i)
            color_mask = color_masks[labels[i]]
            mask = maskUtils.decode(segms[i]).astype(np.bool)
            img[mask] = img[mask] * 0.5 + color_mask * 0.5
    # if out_file specified, do not show image in window
    if out_file is not None:
        show = False
    # draw bounding boxes
    mmcv.imshow_det_bboxes(img,
                           bboxes,
                           labels,
                           class_names=class_names,
                           score_thr=score_thr,
                           show=show,
                           wait_time=wait_time,
                           out_file=out_file)
    if not (show or out_file):
        return img
Example #33
0
def annToMask(ann, i_w, i_h):
    rle = annToRLE(ann, i_w, i_h)
    return maskUtils.decode(rle)
Example #34
0
    def vis_all_mask(self, all_boxes, all_masks, save_path=None):
        """
        visualize all detections in one image
        :param im_array: [b=1 c h w] in rgb
        :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        :param class_names: list of names in imdb
        :param scale: visualize the scaled image
        :return:
        """
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        from matplotlib.patches import Polygon
        import random
        import cv2
        palette = {
            'person': (220, 20, 60),
            'rider': (255, 0, 0),
            'car': (0, 0, 142),
            'truck': (0, 0, 70),
            'bus': (0, 60, 100),
            'train': (0, 80, 100),
            'motorcycle': (0, 0, 230),
            'bicycle': (119, 11, 32),
            #
            'road': (128, 64, 128),
            'sidewalk': (244, 35, 232),
            'building': (70, 70, 70),
            'wall': (102, 102, 156),
            'fence': (190, 153, 153),
            'pole': (153, 153, 153),
            'sky': (70, 130, 180),
            'traffic light': (250, 170, 30),
            'traffic sign': (220, 220, 0),
            'vegetation': (107, 142, 35),
            'terrain': (152, 251, 152)
        }
        name2id = {
            'road': 0,
            'sidewalk': 1,
            'building': 2,
            'wall': 3,
            'fence': 4,
            'pole': 5,
            'traffic light': 6,
            'traffic sign': 7,
            'vegetation': 8,
            'terrain': 9,
            'sky': 10
        }

        self.classes = [
            '__background__',
            'person',
            'rider',
            'car',
            'truck',
            'bus',
            'train',
            'motorcycle',
            'bicycle',
        ]

        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)

        for i in range(len(self.roidb)):

            im = np.array(Image.open(self.roidb[i]['image']))
            fig = plt.figure(frameon=False)

            fig.set_size_inches(im.shape[1] / 200, im.shape[0] / 200)
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.axis('off')
            fig.add_axes(ax)
            ax.imshow(im)
            for j, name in enumerate(self.classes):
                if name == '__background__':
                    continue
                boxes = all_boxes[j][i]
                segms = all_masks[j][i]
                if segms == []:
                    continue
                masks = mask_util.decode(segms)
                for k in range(boxes.shape[0]):
                    score = boxes[k, -1]
                    mask = masks[:, :, k]
                    if score < 0.5:
                        continue
                    bbox = boxes[k, :]
                    ax.add_patch(
                        plt.Rectangle((bbox[0], bbox[1]),
                                      bbox[2] - bbox[0],
                                      bbox[3] - bbox[1],
                                      fill=False,
                                      edgecolor='g',
                                      linewidth=1,
                                      alpha=0.5))
                    ax.text(bbox[0],
                            bbox[1] - 2,
                            name + '{:0.2f}'.format(score).lstrip('0'),
                            fontsize=5,
                            family='serif',
                            bbox=dict(facecolor='g',
                                      alpha=0.4,
                                      pad=0,
                                      edgecolor='none'),
                            color='white')
                    _, contour, hier = cv2.findContours(
                        mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
                    color = (palette[name][0] / 255, palette[name][1] / 255,
                             palette[name][2] / 255)
                    for c in contour:
                        ax.add_patch(
                            Polygon(c.reshape((-1, 2)),
                                    fill=True,
                                    facecolor=color,
                                    edgecolor='w',
                                    linewidth=0.8,
                                    alpha=0.5))
            if save_path is None:
                plt.show()
            else:
                fig.savefig(os.path.join(
                    save_path, '{}.png'.format(
                        self.roidb[i]['image'].split('/')[-1][:-16])),
                            dpi=200)
            plt.close('all')
Example #35
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      include_mask=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids',
      u'neg_category_ids']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official LVIS dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing LVIS category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_mask: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    success: whether the conversion is successful
    filename: image filename
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['coco_url']
    filename = osp.join(*filename.split('/')[-2:])

    image_id = image['id']
    image_not_exhaustive_category_ids = image['not_exhaustive_category_ids']
    image_neg_category_ids = image['neg_category_ids']

    full_path = os.path.join(image_dir, filename)
    if not tf.gfile.Exists(full_path):
        tf.logging.warn(f'image {full_path} not exists! skip')
        return False, None, None

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/not_exhaustive_category_ids':
        dataset_util.int64_list_feature(image_not_exhaustive_category_ids),
        'image/image_neg_category_ids':
        dataset_util.int64_list_feature(image_neg_category_ids),
    }

    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])

            xmin_single = max(float(x) / image_width, 0.0)
            xmax_single = min(float(x + width) / image_width, 1.0)
            ymin_single = max(float(y) / image_height, 0.0)
            ymax_single = min(float(y + height) / image_height, 1.0)
            if xmax_single <= xmin_single or ymax_single <= ymin_single:
                continue
            xmin.append(xmin_single)
            xmax.append(xmax_single)
            ymin.append(ymin_single)
            ymax.append(ymax_single)

            is_crowd.append(0)
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_mask:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_mask:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return True, filename, example
    else:
        raise Exception("Category {} is not defined in {}".format(_id, os.path.join(base_path, conf)))


font = ImageFont.load_default()
# Add bounding boxes and masks
for idx, annotation in enumerate(annotations):
    if annotation["image_id"] == image_idx:
        draw = ImageDraw.Draw(im)
        bb = annotation['bbox']
        draw.rectangle(((bb[0], bb[1]), (bb[0] + bb[2], bb[1] + bb[3])), fill=None, outline="red")
        draw.text((bb[0] + 2, bb[1] + 2), get_category(annotation["category_id"]), font=font)
        if annotation["iscrowd"]:
            im.putalpha(255)
            an_sg = annotation["segmentation"]
            item = mask.decode(mask.frPyObjects(an_sg, im.size[1], im.size[0])).astype(np.uint8) * 255
            item = Image.fromarray(item, mode='L')
            overlay = Image.new('RGBA', im.size)
            draw_ov = ImageDraw.Draw(overlay)
            draw_ov.bitmap((0, 0), item, fill=(255, 0, 0, 128))
            im = Image.alpha_composite(im, overlay)
        else:
            item = annotation["segmentation"][0]
            poly = Image.new('RGBA', im.size)
            pdraw = ImageDraw.Draw(poly)
            pdraw.polygon(item, fill=(255, 255, 255, 127), outline=(255, 255, 255, 255))
            im.paste(poly, mask=poly)
if save:
    im.save(os.path.join(base_path, 'coco_annotated_{}.png'.format(image_idx)), "PNG")
im.show()
Example #37
0
def transform_instance_annotations(annotation,
                                   transforms,
                                   image_size,
                                   *,
                                   keypoint_hflip_indices=None):
    """
    Apply transforms to box, segmentation and keypoints annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # bbox is 1d (per-instance bounding box)
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"],
                           BoxMode.XYXY_ABS)
    # clip transformed bbox to image size
    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    if "segmentation" in annotation:
        # each instance contains 1 or more polygons
        segm = annotation["segmentation"]
        if isinstance(segm, list):
            # polygons
            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
            annotation["segmentation"] = [
                p.reshape(-1) for p in transforms.apply_polygons(polygons)
            ]
        elif isinstance(segm, dict):
            # RLE
            mask = mask_util.decode(segm)
            mask = transforms.apply_segmentation(mask)
            assert tuple(mask.shape[:2]) == image_size
            annotation["segmentation"] = mask
        else:
            raise ValueError(
                "Cannot transform segmentation of type '{}'!"
                "Supported types are: polygons as list[list[float] or ndarray],"
                " COCO-style RLE as a dict.".format(type(segm)))

    if "keypoints" in annotation:
        keypoints = transform_keypoint_annotations(annotation["keypoints"],
                                                   transforms, image_size,
                                                   keypoint_hflip_indices)
        annotation["keypoints"] = keypoints

    return annotation
Example #38
0
def rle_mask_voting(top_masks,
                    all_masks,
                    all_dets,
                    iou_thresh,
                    binarize_thresh,
                    method='AVG'):
    """Returns new masks (in correspondence with `top_masks`) by combining
    multiple overlapping masks coming from the pool of `all_masks`. Two methods
    for combining masks are supported: 'AVG' uses a weighted average of
    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
    ]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out
Example #39
0
 def annToMask(self, ann, height, width):
     rle = self.annToRLE(ann, height, width)
     m = maskUtils.decode(rle)
     return m
Example #40
0
def vis_one_image_opencv(im,
                         boxes,
                         segms=None,
                         keypoints=None,
                         thresh=0.9,
                         kp_thresh=2,
                         show_box=False,
                         dataset=None,
                         show_class=False,
                         alpha=0.4,
                         show_border=True,
                         border_thick=1,
                         bbox_thick=1,
                         font_scale=0.35):
    """Constructs a numpy array with the detections visualized."""

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return im

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)
        color_list = colormap()
        mask_color_id = 0

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        if show_box:
            im = vis_bbox(
                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]),
                thick=bbox_thick)

        # show class (off by default)
        if show_class:
            class_str = get_class_string(classes[i], score, dataset)
            im = vis_class(im, (bbox[0], bbox[1] - 2),
                           class_str,
                           font_scale=font_scale)

        # show mask
        if segms is not None and len(segms) > i:
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1
            im = vis_mask(im,
                          masks[..., i],
                          color_mask,
                          alpha=alpha,
                          show_border=show_border,
                          border_thick=border_thick)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            im = vis_keypoints(im, keypoints[i], kp_thresh)

    return im
Example #41
0
    def _compute_f(gt_data, tracker_data, tracker_data_id, gt_id, bound_th):
        """
        Perform F computation for a given gt and a given tracker ID. Adapted from
        https://github.com/davisvideochallenge/davis2017-evaluation
        :param gt_data: the encoded gt masks
        :param tracker_data: the encoded tracker masks
        :param tracker_data_id: the tracker ID
        :param gt_id: the ground truth ID
        :param bound_th: boundary threshold parameter
        :return: the F value for the given tracker and gt ID
        """

        # Only loaded when run to reduce minimum requirements
        from pycocotools import mask as mask_utils
        from skimage.morphology import disk
        import cv2

        f = np.zeros(len(gt_data))

        for t, (gt_masks,
                tracker_masks) in enumerate(zip(gt_data, tracker_data)):
            curr_tracker_mask = mask_utils.decode(
                tracker_masks[tracker_data_id])
            curr_gt_mask = mask_utils.decode(gt_masks[gt_id])

            bound_pix = bound_th if bound_th >= 1 - np.finfo('float').eps else \
                np.ceil(bound_th * np.linalg.norm(curr_tracker_mask.shape))

            # Get the pixel boundaries of both masks
            fg_boundary = JAndF._seg2bmap(curr_tracker_mask)
            gt_boundary = JAndF._seg2bmap(curr_gt_mask)

            # fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
            fg_dil = cv2.dilate(fg_boundary.astype(np.uint8),
                                disk(bound_pix).astype(np.uint8))
            # gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
            gt_dil = cv2.dilate(gt_boundary.astype(np.uint8),
                                disk(bound_pix).astype(np.uint8))

            # Get the intersection
            gt_match = gt_boundary * fg_dil
            fg_match = fg_boundary * gt_dil

            # Area of the intersection
            n_fg = np.sum(fg_boundary)
            n_gt = np.sum(gt_boundary)

            # % Compute precision and recall
            if n_fg == 0 and n_gt > 0:
                precision = 1
                recall = 0
            elif n_fg > 0 and n_gt == 0:
                precision = 0
                recall = 1
            elif n_fg == 0 and n_gt == 0:
                precision = 1
                recall = 1
            else:
                precision = np.sum(fg_match) / float(n_fg)
                recall = np.sum(gt_match) / float(n_gt)

            # Compute F measure
            if precision + recall == 0:
                f_val = 0
            else:
                f_val = 2 * precision * recall / (precision + recall)

            f[t] = f_val

        return f
Example #42
0
def show_result(img,
                result,
                class_names,
                score_thr=0.3,
                wait_time=0,
                show=True,
                out_file=None):
    """Visualize the detection results on the image.

    Args:
        img (str or np.ndarray): Image filename or loaded image.
        result (tuple[list] or list): The detection result, can be either
            (bbox, segm) or just bbox.
        class_names (list[str] or tuple[str]): A list of class names.
        score_thr (float): The threshold to visualize the bboxes and masks.
        wait_time (int): Value of waitKey param.
        show (bool, optional): Whether to show the image with opencv or not.
        out_file (str, optional): If specified, the visualization result will
            be written to the out file instead of shown in a window.

    Returns:
        np.ndarray or None: If neither `show` nor `out_file` is specified, the
            visualized image is returned, otherwise None is returned.
    """
    assert isinstance(class_names, (tuple, list))
    img = mmcv.imread(img)
    img = img.copy()
    if isinstance(result, tuple):
        bbox_result, segm_result = result
    else:
        bbox_result, segm_result = result, None
    bboxes = np.vstack(bbox_result)
    labels = [
        np.full(bbox.shape[0], i, dtype=np.int32)
        for i, bbox in enumerate(bbox_result)
    ]
    labels = np.concatenate(labels)

    # for 1715.jpg
    """
    bboxes = np.delete(bboxes, [9, 10, 11, 12, 13, 14, 15, 16], axis=0)
    labels = np.delete(labels, [9, 10, 11, 12, 13, 14, 15, 16], axis=0)
    bboxes[1][1] = 325
    bboxes[2][1] = 308
    bboxes[4][1] = 294
    """

    # for 2894.jpg
    """
    bboxes = np.delete(bboxes, [7, 8, 9, 10, 11, 12, 20, 22], axis=0)
    labels = np.delete(labels, [7, 8, 9, 10, 11, 12, 20, 22], axis=0)
    bboxes[7][0] = 400
    bboxes[7][1] = 160
    bboxes[7][2] = 550
    bboxes[7][3] = 900
    bboxes[1][0] = 425
    bboxes[1][1] = 380
    bboxes[1][2] = 469
    bboxes[1][3] = 642
    bboxes[2][0] = 472
    bboxes[2][1] = 379
    bboxes[2][3] = 643
    bboxes[6][2] = 465
    bboxes[3][0] = 472
    x = bboxes[7][0]
    y = bboxes[7][1]
    for index in range(bboxes.shape[0]):
        bboxes[index][0] -= x
        bboxes[index][1] -= y
        bboxes[index][2] -= x
        bboxes[index][3] -= y
    w = bboxes[7][2]
    h = bboxes[7][3]
    for index in range(bboxes.shape[0]):
        x1 = bboxes[index][0]
        y1 = bboxes[index][1]
        x2 = bboxes[index][2]
        y2 = bboxes[index][3]
        bboxes[index][0] = y1
        bboxes[index][1] = w - x2
        bboxes[index][2] = y2
        bboxes[index][3] = w - x1
    bboxes[2][1] = 308
    bboxes[4][1] = 294
    bboxes = np.append(bboxes, [[71, 565, 79, 571, 0.24], [72, 573, 78,578, 0.34]], axis=0)
    labels = np.append(labels, [8, 8], axis=0)
    """

    #for 3641.jpg
    """
    bboxes = np.delete(bboxes, [6], axis=0)
    labels = np.delete(labels, [6], axis=0)
    bboxes[4][0] = 60
    bboxes[4][1] = 565
    bboxes[4][2] = 1220
    bboxes = np.append(bboxes, [[110, 688, 185, 700, 0.8]], axis=0)
    labels = np.append(labels, [7], axis=0)
    """

    # for 6411.jpg
    """
    bboxes = np.delete(bboxes, [11, 14, 16], axis=0)
    labels = np.delete(labels, [11, 14, 16], axis=0)
    bboxes = np.append(bboxes, [[65, 260, 170, 280, 0.8]], axis=0)
    labels = np.append(labels, [7], axis=0)
    """

    # draw segmentation masks
    if segm_result is not None:
        segms = mmcv.concat_list(segm_result)
        inds = np.where(bboxes[:, -1] > score_thr)[0]
        np.random.seed(42)
        color_masks = [
            np.random.randint(0, 256, (1, 3), dtype=np.uint8)
            for _ in range(max(labels) + 1)
        ]
        for i in inds:
            i = int(i)
            color_mask = color_masks[labels[i]]
            mask = maskUtils.decode(segms[i]).astype(np.bool)
            img[mask] = img[mask] * 0.5 + color_mask * 0.5
    # if out_file specified, do not show image in window
    if out_file is not None:
        show = False
    # draw bounding boxes
    mmcv.imshow_det_bboxes(img,
                           bboxes,
                           labels,
                           class_names=class_names,
                           score_thr=score_thr,
                           show=show,
                           wait_time=wait_time,
                           out_file=out_file)
    if not (show or out_file):
        return img
Example #43
0
    def create_batches(self, batch_size, shuffle=True):
        # 1 batch = [(image, [([x, y, w, h], id), ([x, y, w, h], id), ...]), ...]
        batch = []
        counter_samples=0

        while True:
            indices = range(len(self.img_ids))

            if shuffle:
                indices = np.random.permutation(indices)
            for index in indices:
                index += 1
                try:
                    img = self.coco.loadImgs(self.img_ids[index])[0]
                except:
                    print(index)
                    continue
                path = os.path.join(self.image_dir, self.get_image_path(id=img['id'], name=img['file_name']))
                I = cv2.imread(path).astype(np.uint8)[:, :, ::-1]
                I = np.ascontiguousarray(I)


                try:
                    if len(I.shape) != 3:
                        continue
                except:
                    print("no image exist")
                    continue

                ann_ids = self.coco.getAnnIds(imgIds=img['id'], catIds=self.cat_ids, iscrowd=None)
                anns = self.coco.loadAnns(ann_ids)
                ann_list = []

                rles = []
                for ann in anns:
                    bb = [f for f in ann["bbox"]]
                    try:
                        rle = frPyObjects(ann['segmentation'], I.shape[0], I.shape[1])[0]
                        bbb = toBbox(rle)
                    except:
                        continue
                    # make sure we dont include unknown classes
                    if self.id2i[ann["category_id"]] < 0 or self.id2i[ann["category_id"]] > config.TOTAL_CLASSES:
                        print("This class cannot be processed %d ...", self.id2i[ann["category_id"]])
                        continue

                    rles.append(rle)
                    ann_list.append((decode(rle).astype(np.float), bb, self.id2i[ann["category_id"]]))

                # print("------- RLEs-SHAPE")
                # print(len(rles))

                if len(rles) == 0:
                    print("NO RLE was extracted continue with next picture ...")
                    continue

                mask = decode(rles).astype(np.float)
                batch.append((I, mask, ann_list))

                # print("------- MASKS-SHAPE")
                # print(len(mask))
                # print(np.max(mask), np.min(mask))

                if len(batch) >= batch_size:
                    self.counter_samples += len(batch)
                    print("Getting new batch with %d elements ..." % (len(batch)))
                    yield batch
                    del mask
                    del ann_list
                    del rles
                    del batch
                    batch = []
Example #44
0
def visualize_sequences(seq_id,
                        tracks,
                        max_frames_seq,
                        img_folder,
                        gt_folder,
                        output_folder,
                        draw_boxes=False,
                        create_video=True):
    colors = generate_colors()
    dpi = 100.0
    frames_with_annotations = [
        frame for frame in tracks.keys() if len(tracks[frame]) > 0
    ]
    img_sizes = next(iter(tracks[frames_with_annotations[0]])).mask["size"]
    for t in range(max_frames_seq + 1):
        print("Processing frame", t)
        filename_t = img_folder + "/" + seq_id + "/%06d" % t

        if os.path.exists(filename_t + ".png"):
            filename_t = filename_t + ".png"
        elif os.path.exists(filename_t + ".jpg"):
            filename_t = filename_t + ".jpg"
        else:
            print("Image file not found for " + filename_t +
                  ".png/.jpg, continuing...")
            continue

        img = np.array(Image.open(filename_t), dtype="float32") / 255

        # If gt_folder is provided, combine the predicted frame with gt frame
        if gt_folder:
            fname_gt = gt_folder + "/" + seq_id + "/%06d" % t
            if os.path.exists(fname_gt + ".png"):
                fname_gt = fname_gt + ".png"
            elif os.path.exists(fname_gt + ".jpg"):
                fname_gt = fname_gt + ".jpg"
            else:
                print("GT Image file not found for " + fname_gt +
                      ".png/.jpg, continuing...")
                continue
            gt_img = np.array(Image.open(fname_gt), dtype="float32") / 255
            img_sizes[0] = img_sizes[0] * 2

        fig = plt.figure()
        fig.set_size_inches(img_sizes[1] / dpi,
                            img_sizes[0] / dpi,
                            forward=True)
        fig.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            wspace=None,
                            hspace=None)
        ax = fig.subplots()
        ax.set_axis_off()

        if t in tracks:
            for obj in tracks[t]:
                color = colors[obj.track_id % len(colors)]
                if obj.class_id == 1:
                    category_name = "Car"
                elif obj.class_id == 2:
                    category_name = "Pedestrian"
                else:
                    category_name = "Ignore"
                    color = (0.7, 0.7, 0.7)
                if obj.class_id == 1 or obj.class_id == 2:  # Don't show boxes or ids for ignore regions
                    x, y, w, h = rletools.toBbox(obj.mask)
                    if draw_boxes:
                        import matplotlib.patches as patches
                        rect = patches.Rectangle((x, y),
                                                 w,
                                                 h,
                                                 linewidth=1,
                                                 edgecolor=color,
                                                 facecolor='none',
                                                 alpha=1.0)
                        ax.add_patch(rect)
                    category_name += ":" + str(obj.track_id)
                    ax.annotate(category_name, (x + 0.5 * w, y + 0.5 * h),
                                color=color,
                                weight='bold',
                                fontsize=7,
                                ha='center',
                                va='center',
                                alpha=1.0)
                binary_mask = rletools.decode(obj.mask)
                apply_mask(img, binary_mask, color)

        if gt_folder:
            # combine predicted images with gt images
            img = np.vstack((img, gt_img))

        ax.imshow(img)
        fig.savefig(output_folder + "/" + seq_id + "/%06d" % t + ".jpg")
        plt.close(fig)
    if create_video:
        os.chdir(output_folder + "/" + seq_id)
        call([
            "ffmpeg", "-framerate", "10", "-y", "-i", "%06d.jpg", "-c:v",
            "libx264", "-profile:v", "high", "-crf", "20", "-pix_fmt",
            "yuv420p", "-vf", "pad=\'width=ceil(iw/2)*2:height=ceil(ih/2)*2\'",
            "output.mp4"
        ])
Example #45
0
def binary_from_rle(rle):
    return cocomask.decode(rle)
Example #46
0
def vis_one_image(im,
                  im_name,
                  output_dir,
                  boxes,
                  segms=None,
                  keypoints=None,
                  thresh=0.9,
                  kp_thresh=2,
                  dpi=200,
                  box_alpha=0.0,
                  dataset=None,
                  show_class=False,
                  ext='pdf',
                  is_show_boxes=True):
    """Visual debugging of detections."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return

    if segms is not None:
        masks = mask_util.decode(segms)

    color_list = colormap(rgb=True) / 255

    dataset_keypoints, _ = keypoint_utils.get_keypoints()
    kp_lines = kp_connections(dataset_keypoints)
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]

    fig = plt.figure(frameon=False)
    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im)

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    mask_color_id = 0
    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        print(dataset.classes[classes[i]], score)
        # show box (off by default, box_alpha=0.0)
        if is_show_boxes:
            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1],
                              fill=False,
                              edgecolor='g',
                              linewidth=0.5,
                              alpha=box_alpha))

            if show_class:
                ax.text(bbox[0],
                        bbox[1] - 2,
                        get_class_string(classes[i], score, dataset),
                        fontsize=3,
                        family='serif',
                        bbox=dict(facecolor='g',
                                  alpha=0.4,
                                  pad=0,
                                  edgecolor='none'),
                        color='white')

        # show mask
        if segms is not None and len(segms) > i:
            img = np.ones(im.shape)
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1

            w_ratio = .4
            for c in range(3):
                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
            for c in range(3):
                img[:, :, c] = color_mask[c]
            e = masks[:, :, i]

            _, contour, hier = cv2.findContours(e.copy(), cv2.RETR_CCOMP,
                                                cv2.CHAIN_APPROX_NONE)

            for c in contour:
                polygon = Polygon(c.reshape((-1, 2)),
                                  fill=True,
                                  facecolor=color_mask,
                                  edgecolor='w',
                                  linewidth=1.2,
                                  alpha=0.5)
                ax.add_patch(polygon)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            kps = keypoints[i]
            plt.autoscale(False)
            for l in range(len(kp_lines)):
                i1 = kp_lines[l][0]
                i2 = kp_lines[l][1]
                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
                    x = [kps[0, i1], kps[0, i2]]
                    y = [kps[1, i1], kps[1, i2]]
                    line = ax.plot(x, y)
                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
                if kps[2, i1] > kp_thresh:
                    ax.plot(kps[0, i1],
                            kps[1, i1],
                            '.',
                            color=colors[l],
                            markersize=3.0,
                            alpha=0.7)
                if kps[2, i2] > kp_thresh:
                    ax.plot(kps[0, i2],
                            kps[1, i2],
                            '.',
                            color=colors[l],
                            markersize=3.0,
                            alpha=0.7)

            # add mid shoulder / mid hip for better visualization
            mid_shoulder = (
                kps[:2, dataset_keypoints.index('right_shoulder')] +
                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
            sc_mid_shoulder = np.minimum(
                kps[2, dataset_keypoints.index('right_shoulder')],
                kps[2, dataset_keypoints.index('left_shoulder')])
            mid_hip = (kps[:2, dataset_keypoints.index('right_hip')] +
                       kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
            sc_mid_hip = np.minimum(
                kps[2, dataset_keypoints.index('right_hip')],
                kps[2, dataset_keypoints.index('left_hip')])
            if (sc_mid_shoulder > kp_thresh
                    and kps[2, dataset_keypoints.index('nose')] > kp_thresh):
                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
                line = ax.plot(x, y)
                plt.setp(line,
                         color=colors[len(kp_lines)],
                         linewidth=1.0,
                         alpha=0.7)
            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                x = [mid_shoulder[0], mid_hip[0]]
                y = [mid_shoulder[1], mid_hip[1]]
                line = ax.plot(x, y)
                plt.setp(line,
                         color=colors[len(kp_lines) + 1],
                         linewidth=1.0,
                         alpha=0.7)

        output_name = os.path.basename(im_name) + '.' + ext
        fig.savefig(os.path.join(output_dir, '{}'.format(output_name)),
                    dpi=dpi)
        plt.close('all')
Example #47
0
    def __init__(self, masks, size):
        """
            Arguments:
                masks: Either torch.tensor of [num_instances, H, W]
                    or list of torch.tensors of [H, W] with num_instances elems,
                    or RLE (Run Length Encoding) - interpreted as list of dicts,
                    or BinaryMaskList.
                size: absolute image size, width first

            After initialization, a hard copy will be made, to leave the
            initializing source data intact.
        """

        assert isinstance(size, (list, tuple))
        assert len(size) == 2

        if isinstance(masks, torch.Tensor):
            # The raw data representation is passed as argument
            masks = masks.clone()
        elif isinstance(masks, (list, tuple)):
            if len(masks) == 0:
                masks = torch.empty([0, size[1],
                                     size[0]])  # num_instances = 0!
            elif isinstance(masks[0], torch.Tensor):
                masks = torch.stack(masks, dim=2).clone()
            elif isinstance(masks[0], dict) and "counts" in masks[0]:
                # RLE interpretation
                rle_sizes = [tuple(inst["size"]) for inst in masks]

                masks = mask_utils.decode(masks)  # [h, w, n]
                masks = torch.tensor(masks).permute(2, 0, 1)  # [n, h, w]

                assert rle_sizes.count(rle_sizes[0]) == len(rle_sizes), (
                    "All the sizes must be the same size: %s" % rle_sizes)

                # in RLE, height come first in "size"
                rle_height, rle_width = rle_sizes[0]
                assert masks.shape[1] == rle_height
                assert masks.shape[2] == rle_width

                width, height = size
                if width != rle_width or height != rle_height:
                    masks = interpolate(
                        input=masks[None].float(),
                        size=(height, width),
                        mode="bilinear",
                        align_corners=False,
                    )[0].type_as(masks)
            else:
                RuntimeError(
                    "Type of `masks[0]` could not be interpreted: %s" %
                    type(masks))
        elif isinstance(masks, BinaryMaskList):
            # just hard copy the BinaryMaskList instance's underlying data
            masks = masks.masks.clone()
        else:
            RuntimeError(
                "Type of `masks` argument could not be interpreted:%s" %
                type(masks))

        if len(masks.shape) == 2:
            # if only a single instance mask is passed
            masks = masks[None]

        assert len(masks.shape) == 3
        assert masks.shape[1] == size[1], "%s != %s" % (masks.shape[1],
                                                        size[1])
        assert masks.shape[2] == size[0], "%s != %s" % (masks.shape[2],
                                                        size[0])

        self.masks = masks
        self.size = tuple(size)
Example #48
0
def bbox_merge(dets, segs, iou_thr, scr_thr, mask_thr):
    # dets: [[x1, y1, x2, y2, score], ... ]
    if dets.shape[0] <= 1:
        return dets, segs
    order = dets[:, -1].ravel().argsort()[::-1]
    dets = dets[order, :]
    scr_keep_inds = (np.where(dets[:, -1] > scr_thr))[0]
    dets = dets[scr_keep_inds, :]
    segs = [segs[ind] for ind in scr_keep_inds]

    dets_res = np.zeros([0, 5])
    segs_res = []
    imgHeight, imgWidth = 1024, 2048

    while dets.shape[0] > 0:
        num = dets.shape[0]
        # IoU
        area = (dets[:, 2] - dets[:, 0] + 1) * (dets[:, 3] - dets[:, 1] + 1)
        xx1 = np.maximum(dets[0, 0], dets[:, 0])
        yy1 = np.maximum(dets[0, 1], dets[:, 1])
        xx2 = np.minimum(dets[0, 2], dets[:, 2])
        yy2 = np.minimum(dets[0, 3], dets[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[0] + area[:] - inter)

        # get needed merge det and delete these det
        merge_inds = np.where(o >= iou_thr)[0]
        dets_to_merge = dets[merge_inds, :]
        segs_to_merge = [segs[ind] for ind in merge_inds]
        dets = np.delete(dets, merge_inds,
                         0)  # remained dets and segs after remerge.
        segs = [segs[i] for i in range(num) if i not in merge_inds]

        if merge_inds.shape[0] <= 1:
            dets_res = np.row_stack((dets_res, dets_to_merge))
            segs_res += segs_to_merge

        else:
            scores = dets_to_merge[:, -1:]
            dets_to_merge[:, :-1] = dets_to_merge[:, :-1] * np.tile(
                scores, (1, 4))
            max_score = np.max(scores)
            det_merged = np.zeros((1, 5))
            det_merged[:, :-1] = np.sum(dets_to_merge[:, :-1],
                                        axis=0) / np.sum(scores)
            det_merged[:, -1] = max_score
            dets_res = np.row_stack((dets_res, det_merged))

            img = np.zeros((imgHeight, imgWidth))
            for i in range(merge_inds.shape[0]):
                mask = maskUtils.decode(segs_to_merge[i]).astype(np.bool)
                img[mask] += scores[i, -1]
            img = img / np.max(img)
            img[img >= mask_thr] = 1
            img[img < mask_thr] = 0
            img = img.astype(np.uint8)
            # print(img.shape)
            seg_merged = maskUtils.encode(
                np.array(img[:, :, np.newaxis], order='F'))[0]
            segs_res.append(seg_merged)

    return dets_res, segs_res
def generate_simulated_scenes(config, split, year):
    db = coco(config, split, year)
    data_dir = osp.join(config.data_dir, 'coco')
    if (split == 'test') or (split == 'aux'):
        images_dir = osp.join(data_dir, 'crn_images', 'train' + year)
        noices_dir = osp.join(data_dir, 'crn_noices', 'train' + year)
        labels_dir = osp.join(data_dir, 'crn_labels', 'train' + year)
        masks_dir = osp.join(data_dir, 'crn_masks', 'train' + year)
    else:
        images_dir = osp.join(data_dir, 'crn_images', split + year)
        noices_dir = osp.join(data_dir, 'crn_noices', split + year)
        labels_dir = osp.join(data_dir, 'crn_labels', split + year)
        masks_dir = osp.join(data_dir, 'crn_masks', split + year)
    maybe_create(images_dir)
    maybe_create(noices_dir)
    maybe_create(labels_dir)
    maybe_create(masks_dir)

    traindb = coco(config, 'train', '2017')
    nn_tables = AllCategoriesTables(traindb)
    nn_tables.build_nntables_for_all_categories(True)

    # start_ind = 0
    # end_ind = len(db.scenedb)
    start_ind = 25000 + 14000 * config.seed
    end_ind = 25000 + 14000 * (config.seed + 1)
    patches_per_class = traindb.patches_per_class
    color_transfer_threshold = 0.8

    for i in range(start_ind, end_ind):
        entry = db.scenedb[i]
        width = entry['width']
        height = entry['height']
        xywhs = entry['boxes']
        masks = entry['masks']
        clses = entry['clses']
        image_index = entry['image_index']
        instance_inds = entry['instance_inds']

        full_mask = np.zeros((height, width), dtype=np.float32)
        full_label = np.zeros((height, width), dtype=np.float32)
        full_image = np.zeros((height, width, 3), dtype=np.float32)
        full_noice = np.zeros((height, width, 3), dtype=np.float32)

        original_image = cv2.imread(db.color_path_from_index(image_index),
                                    cv2.IMREAD_COLOR)

        for j in range(len(masks)):
            src_img = original_image.astype(np.float32).copy()
            xywh = xywhs[j]
            mask = masks[j]
            cls_idx = clses[j]
            instance_ind = instance_inds[j]
            embed_path = db.patch_path_from_indices(
                image_index, instance_ind, 'patch_feature', 'pkl',
                config.use_patch_background)
            with open(embed_path, 'rb') as fid:
                query_vector = pickle.load(fid)
            n_samples = min(
                100, len(patches_per_class[cls_idx])
            )  #min(config.n_nntable_trees, len(patches_per_class[cls_idx]))
            candidate_patches = nn_tables.retrieve(cls_idx, query_vector,
                                                   n_samples)
            candidate_patches = [
                x for x in candidate_patches
                if x['instance_ind'] != instance_ind
            ]
            assert (len(candidate_patches) > 1)

            # candidate_instance_ind = instance_ind
            # candidate_patch = None
            # while (candidate_instance_ind == instance_ind):
            # 	cid = np.random.randint(0, len(candidate_patches))
            # 	candidate_patch = candidate_patches[cid]
            # 	candidate_instance_ind = candidate_patch['instance_ind']
            candidate_patch = find_closest_patch(db, traindb, image_index,
                                                 instance_ind,
                                                 candidate_patches)

            # stenciling
            src_mask = COCOmask.decode(mask)
            dst_mask = COCOmask.decode(candidate_patch['mask'])
            src_xyxy = xywh_to_xyxy(xywh, width, height)
            dst_xyxy = xywh_to_xyxy(candidate_patch['box'],
                                    candidate_patch['width'],
                                    candidate_patch['height'])
            dst_mask = dst_mask[dst_xyxy[1]:(dst_xyxy[3] + 1),
                                dst_xyxy[0]:(dst_xyxy[2] + 1)]
            dst_mask = cv2.resize(
                dst_mask,
                (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1),
                interpolation=cv2.INTER_NEAREST)
            src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)] = \
             np.minimum(dst_mask, src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)])
            # color transfer
            if random.random() > color_transfer_threshold:
                candidate_index = candidate_patch['image_index']
                candidate_image = cv2.imread(
                    traindb.color_path_from_index(candidate_index),
                    cv2.IMREAD_COLOR).astype(np.float32)
                candidate_cropped = candidate_image[dst_xyxy[1]:(dst_xyxy[3] +
                                                                 1),
                                                    dst_xyxy[0]:(dst_xyxy[2] +
                                                                 1)]
                candidate_cropped = cv2.resize(candidate_cropped,
                                               (src_xyxy[2] - src_xyxy[0] + 1,
                                                src_xyxy[3] - src_xyxy[1] + 1),
                                               interpolation=cv2.INTER_CUBIC)
                original_cropped = src_img[src_xyxy[1]:(src_xyxy[3] + 1),
                                           src_xyxy[0]:(src_xyxy[2] + 1)]
                transfer_cropped = Monge_Kantorovitch_color_transfer(
                    original_cropped, candidate_cropped)
                src_img[src_xyxy[1]:(src_xyxy[3] + 1),
                        src_xyxy[0]:(src_xyxy[2] + 1)] = transfer_cropped

            # im1 = cv2.resize(full_image, (128, 128))
            # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128))
            # # im2 = cv2.resize(np.repeat(255*src_mask[...,None], 3, -1), (128, 128))
            # im3 = cv2.resize(candidate_image, (128, 128))
            # im4 = cv2.resize(candidate_cropped, (128, 128))
            # im = np.concatenate((im1, im2, im3, im4), 1)
            # cv2.imwrite("%03d_%03d.png"%(i, j), im)

            full_image = compose(full_image, src_img, src_mask)

            # boundary elision
            radius = int(0.05 * min(width, height))
            if np.amin(src_mask) > 0:
                src_mask[0, :] = 0
                src_mask[-1, :] = 0
                src_mask[:, 0] = 0
                src_mask[:, -1] = 0
            sobelx = cv2.Sobel(src_mask, cv2.CV_64F, 1, 0, ksize=3)
            sobely = cv2.Sobel(src_mask, cv2.CV_64F, 0, 1, ksize=3)
            sobel = np.abs(sobelx) + np.abs(sobely)
            edge = np.zeros_like(sobel)
            edge[sobel > 0.9] = 1.0
            morp_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                                    (radius, radius))
            edge = cv2.dilate(edge, morp_kernel, iterations=1)
            row, col = np.where(edge > 0)
            n_edge_pixels = len(row)
            pixel_indices = np.random.permutation(range(n_edge_pixels))
            pixel_indices = pixel_indices[:(n_edge_pixels // 2)]
            row = row[pixel_indices]
            col = col[pixel_indices]
            src_img[row, col, :] = 255

            full_mask = np.maximum(full_mask, src_mask)
            full_label[src_mask > 0] = cls_idx
            full_noice = compose(full_noice, src_img, src_mask)

            # im1 = cv2.resize(full_image, (128, 128))
            # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128))
            # im3 = cv2.resize(candidate_image, (128, 128))
            # im4 = cv2.resize(candidate_cropped, (128, 128))
            # im = np.concatenate((im1, im2, im3, im4), 1)
            # cv2.imwrite("%03d_%03d.png"%(i, j), im)

        output_name = str(image_index).zfill(12)
        output_path = osp.join(images_dir, output_name + '.jpg')
        cv2.imwrite(output_path,
                    clamp_array(full_image, 0, 255).astype(np.uint8))
        output_path = osp.join(noices_dir, output_name + '.jpg')
        cv2.imwrite(output_path,
                    clamp_array(full_noice, 0, 255).astype(np.uint8))
        output_path = osp.join(masks_dir, output_name + '.png')
        cv2.imwrite(output_path,
                    clamp_array(255 * full_mask, 0, 255).astype(np.uint8))
        output_path = osp.join(labels_dir, output_name + '.png')
        cv2.imwrite(output_path, full_label.astype(np.uint8))
        print(i, image_index)
Example #50
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Example #51
0
def vis_one_image(
        im, im_name, output_dir, boxes, segms=None, keypoints=None, body_uv=None, thresh=0.9,
        kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False,
        ext='pdf'):
    """Visual debugging of detections."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return

    dataset_keypoints, _ = keypoint_utils.get_keypoints()

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)

    color_list = colormap(rgb=True) / 255

    kp_lines = kp_connections(dataset_keypoints)
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]

    fig = plt.figure(frameon=False)
    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im)

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    mask_color_id = 0
    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1],
                          fill=False, edgecolor='g',
                          linewidth=0.5, alpha=box_alpha))

        if show_class:
            ax.text(
                bbox[0], bbox[1] - 2,
                get_class_string(classes[i], score, dataset),
                fontsize=3,
                family='serif',
                bbox=dict(
                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
                color='white')

        # show mask
        if segms is not None and len(segms) > i:
            img = np.ones(im.shape)
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1

            w_ratio = .4
            for c in range(3):
                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
            for c in range(3):
                img[:, :, c] = color_mask[c]
            e = masks[:, :, i]

            _, contour, hier = cv2.findContours(
                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)

            for c in contour:
                polygon = Polygon(
                    c.reshape((-1, 2)),
                    fill=True, facecolor=color_mask,
                    edgecolor='w', linewidth=1.2,
                    alpha=0.5)
                ax.add_patch(polygon)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            kps = keypoints[i]
            plt.autoscale(False)
            for l in range(len(kp_lines)):
                i1 = kp_lines[l][0]
                i2 = kp_lines[l][1]
                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
                    x = [kps[0, i1], kps[0, i2]]
                    y = [kps[1, i1], kps[1, i2]]
                    line = plt.plot(x, y)
                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
                if kps[2, i1] > kp_thresh:
                    plt.plot(
                        kps[0, i1], kps[1, i1], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

                if kps[2, i2] > kp_thresh:
                    plt.plot(
                        kps[0, i2], kps[1, i2], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

            # add mid shoulder / mid hip for better visualization
            mid_shoulder = (
                kps[:2, dataset_keypoints.index('right_shoulder')] +
                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
            sc_mid_shoulder = np.minimum(
                kps[2, dataset_keypoints.index('right_shoulder')],
                kps[2, dataset_keypoints.index('left_shoulder')])
            mid_hip = (
                kps[:2, dataset_keypoints.index('right_hip')] +
                kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
            sc_mid_hip = np.minimum(
                kps[2, dataset_keypoints.index('right_hip')],
                kps[2, dataset_keypoints.index('left_hip')])
            if (sc_mid_shoulder > kp_thresh and
                    kps[2, dataset_keypoints.index('nose')] > kp_thresh):
                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                x = [mid_shoulder[0], mid_hip[0]]
                y = [mid_shoulder[1], mid_hip[1]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines) + 1], linewidth=1.0,
                    alpha=0.7)
                
    #   DensePose Visualization Starts!!
    ##  Get full IUV image out 
    IUV_fields = body_uv[1]
    #
    All_Coords = np.zeros(im.shape)
    All_inds = np.zeros([im.shape[0],im.shape[1]])
    K = 26
    ##
    inds = np.argsort(boxes[:,4])
    ##
    for i, ind in enumerate(inds):
        entry = boxes[ind,:]
        if entry[4] > 0.65:
            entry=entry[0:4].astype(int)
            ####
            output = IUV_fields[ind]
            ####
            All_Coords_Old = All_Coords[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2],:]
            All_Coords_Old[All_Coords_Old==0]=output.transpose([1,2,0])[All_Coords_Old==0]
            All_Coords[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2],:]= All_Coords_Old
            ###
            CurrentMask = (output[0,:,:]>0).astype(np.float32)
            All_inds_old = All_inds[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2]]
            All_inds_old[All_inds_old==0] = CurrentMask[All_inds_old==0]*i
            All_inds[ entry[1] : entry[1]+output.shape[1],entry[0]:entry[0]+output.shape[2]] = All_inds_old
    #
    All_Coords[:,:,1:3] = 255. * All_Coords[:,:,1:3]
    All_Coords[All_Coords>255] = 255.
    All_Coords = All_Coords.astype(np.uint8)
    All_inds = All_inds.astype(np.uint8)
    #
    IUV_SaveName = os.path.basename(im_name).split('.')[0]+'_IUV.png'
    INDS_SaveName = os.path.basename(im_name).split('.')[0]+'_INDS.png'
    cv2.imwrite(os.path.join(output_dir, '{}'.format(IUV_SaveName)), All_Coords )
    cv2.imwrite(os.path.join(output_dir, '{}'.format(INDS_SaveName)), All_inds )
    print('IUV written to: ' , os.path.join(output_dir, '{}'.format(IUV_SaveName)) )
    ###
    ### DensePose Visualization Done!!
    #
    output_name = os.path.basename(im_name) + '.' + ext
    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)
    plt.close('all')
Example #52
0
def vis_one_image(
        im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9,
        kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False,
        ext='pdf'):
    """Visual debugging of detections."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return

    dataset_keypoints, _ = keypoint_utils.get_keypoints()

    if segms is not None:
        masks = mask_util.decode(segms)

    color_list = colormap(rgb=True) / 255

    kp_lines = kp_connections(dataset_keypoints)
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]

    fig = plt.figure(frameon=False)
    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im)

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)

    mask_color_id = 0
    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        # show box (off by default)
        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1],
                          fill=False, edgecolor='g',
                          linewidth=0.5, alpha=box_alpha))

        if show_class:
            ax.text(
                bbox[0], bbox[1] - 2,
                get_class_string(classes[i], score, dataset),
                fontsize=3,
                family='serif',
                bbox=dict(
                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
                color='white')

        # show mask
        if segms is not None and len(segms) > i:
            img = np.ones(im.shape)
            color_mask = color_list[mask_color_id % len(color_list), 0:3]
            mask_color_id += 1

            w_ratio = .4
            for c in range(3):
                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
            for c in range(3):
                img[:, :, c] = color_mask[c]
            e = masks[:, :, i]

            _, contour, hier = cv2.findContours(
                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)

            for c in contour:
                polygon = Polygon(
                    c.reshape((-1, 2)),
                    fill=True, facecolor=color_mask,
                    edgecolor='w', linewidth=1.2,
                    alpha=0.5)
                ax.add_patch(polygon)

        # show keypoints
        if keypoints is not None and len(keypoints) > i:
            kps = keypoints[i]
            plt.autoscale(False)
            for l in range(len(kp_lines)):
                i1 = kp_lines[l][0]
                i2 = kp_lines[l][1]
                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
                    x = [kps[0, i1], kps[0, i2]]
                    y = [kps[1, i1], kps[1, i2]]
                    line = plt.plot(x, y)
                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
                if kps[2, i1] > kp_thresh:
                    plt.plot(
                        kps[0, i1], kps[1, i1], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

                if kps[2, i2] > kp_thresh:
                    plt.plot(
                        kps[0, i2], kps[1, i2], '.', color=colors[l],
                        markersize=3.0, alpha=0.7)

            # add mid shoulder / mid hip for better visualization
            mid_shoulder = (
                kps[:2, dataset_keypoints.index('right_shoulder')] +
                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
            sc_mid_shoulder = np.minimum(
                kps[2, dataset_keypoints.index('right_shoulder')],
                kps[2, dataset_keypoints.index('left_shoulder')])
            mid_hip = (
                kps[:2, dataset_keypoints.index('right_hip')] +
                kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
            sc_mid_hip = np.minimum(
                kps[2, dataset_keypoints.index('right_hip')],
                kps[2, dataset_keypoints.index('left_hip')])
            if (sc_mid_shoulder > kp_thresh and
                    kps[2, dataset_keypoints.index('nose')] > kp_thresh):
                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                x = [mid_shoulder[0], mid_hip[0]]
                y = [mid_shoulder[1], mid_hip[1]]
                line = plt.plot(x, y)
                plt.setp(
                    line, color=colors[len(kp_lines) + 1], linewidth=1.0,
                    alpha=0.7)

    output_name = os.path.basename(im_name) + '.' + ext
    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)
    plt.close('all')
def vis_extract_func(im,
                     im_name,
                     output_dir,
                     boxes,
                     segms=None,
                     keypoints=None,
                     cls_feats=None,
                     thresh=0.9,
                     kp_thresh=2,
                     dpi=200,
                     box_alpha=0.0,
                     dataset=None,
                     show_class=False,
                     ext='pdf',
                     out_when_no_box=False):
    """Visual debugging of detections."""
    one_human_assigned = 0
    human_feats = None
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    #ADDED similar to convert_from_cls_format, but for feats_list
    feats_list = [b for b in cls_feats if len(b) > 0]
    if len(feats_list) > 0:
        feats = np.concatenate(feats_list)
    else:
        feats = None

    if (boxes is None or boxes.shape[0] == 0
            or max(boxes[:, 4]) < thresh) and not out_when_no_box:
        return None, None, 0

    dataset_keypoints, _ = keypoint_utils.get_keypoints()

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)

    color_list = colormap(rgb=True) / 255

    kp_lines = kp_connections(dataset_keypoints)
    cmap = plt.get_cmap('rainbow')
    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]

    fig = plt.figure(frameon=False)
    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im)

    if boxes is None:
        sorted_inds = []  # avoid crash when 'boxes' is None
    else:
        # Display in largest to smallest order to reduce occlusion
        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        sorted_inds = np.argsort(-areas)

    mask_color_id = 0
    for i in sorted_inds:
        bbox = boxes[i, :4]
        score = boxes[i, -1]
        if score < thresh:
            continue

        #ADDED human features are extracted
        if classes[i] == 1 and not one_human_assigned:
            human_feats = feats[i]

            # show box (off by default)
            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1],
                              fill=False,
                              edgecolor='g',
                              linewidth=0.5,
                              alpha=box_alpha))

            if show_class:
                ax.text(bbox[0],
                        bbox[1] - 2,
                        get_class_string(classes[i], score, dataset),
                        fontsize=3,
                        family='serif',
                        bbox=dict(facecolor='g',
                                  alpha=0.4,
                                  pad=0,
                                  edgecolor='none'),
                        color='white')

            # show mask
            if segms is not None and len(segms) > i:
                img = np.ones(im.shape)
                color_mask = color_list[mask_color_id % len(color_list), 0:3]
                mask_color_id += 1

                w_ratio = .4
                for c in range(3):
                    color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
                for c in range(3):
                    img[:, :, c] = color_mask[c]
                e = masks[:, :, i]

                _, contour, hier = cv2.findContours(e.copy(), cv2.RETR_CCOMP,
                                                    cv2.CHAIN_APPROX_NONE)

                for c in contour:
                    polygon = Polygon(c.reshape((-1, 2)),
                                      fill=True,
                                      facecolor=color_mask,
                                      edgecolor='w',
                                      linewidth=1.2,
                                      alpha=0.5)
                    ax.add_patch(polygon)

            # show keypoints
            if keypoints is not None and len(keypoints) > i:
                kps = keypoints[i]
                bbox_width = bbox[2] - bbox[0]
                bbox_height = bbox[3] - bbox[1]
                x_coor = (kps[0] - bbox[0]) / 256
                y_coor = (kps[1] - bbox[1]) / 256

                #extracted_kps = np.concatenate((np.asarray([bbox_width/255, bbox_height/255]), x_coor, y_coor, kps[3]), axis=0)
                extracted_kps = np.concatenate(
                    (np.asarray([bbox_width / 255, bbox_height / 255
                                 ]), x_coor[:11], y_coor[:11], kps[3][:11]),
                    axis=0)
                #print('extracted_kps', extracted_kps)
                #extracted_kps = [item for sublist in extracted_kps for item in sublist]
                one_human_assigned = 1

                plt.autoscale(False)
                for l in range(len(kp_lines)):
                    i1 = kp_lines[l][0]
                    i2 = kp_lines[l][1]
                    if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
                        x = [kps[0, i1], kps[0, i2]]
                        y = [kps[1, i1], kps[1, i2]]
                        line = plt.plot(x, y)
                        plt.setp(line,
                                 color=colors[l],
                                 linewidth=1.0,
                                 alpha=0.7)
                    if kps[2, i1] > kp_thresh:
                        plt.plot(kps[0, i1],
                                 kps[1, i1],
                                 '.',
                                 color=colors[l],
                                 markersize=3.0,
                                 alpha=0.7)

                    if kps[2, i2] > kp_thresh:
                        plt.plot(kps[0, i2],
                                 kps[1, i2],
                                 '.',
                                 color=colors[l],
                                 markersize=3.0,
                                 alpha=0.7)

                # add mid shoulder / mid hip for better visualization
                mid_shoulder = (
                    kps[:2, dataset_keypoints.index('right_shoulder')] +
                    kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
                sc_mid_shoulder = np.minimum(
                    kps[2, dataset_keypoints.index('right_shoulder')],
                    kps[2, dataset_keypoints.index('left_shoulder')])
                mid_hip = (kps[:2, dataset_keypoints.index('right_hip')] +
                           kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
                sc_mid_hip = np.minimum(
                    kps[2, dataset_keypoints.index('right_hip')],
                    kps[2, dataset_keypoints.index('left_hip')])
                if (sc_mid_shoulder > kp_thresh and
                        kps[2, dataset_keypoints.index('nose')] > kp_thresh):
                    x = [
                        mid_shoulder[0], kps[0,
                                             dataset_keypoints.index('nose')]
                    ]
                    y = [
                        mid_shoulder[1], kps[1,
                                             dataset_keypoints.index('nose')]
                    ]
                    line = plt.plot(x, y)
                    plt.setp(line,
                             color=colors[len(kp_lines)],
                             linewidth=1.0,
                             alpha=0.7)
                if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                    x = [mid_shoulder[0], mid_hip[0]]
                    y = [mid_shoulder[1], mid_hip[1]]
                    line = plt.plot(x, y)
                    plt.setp(line,
                             color=colors[len(kp_lines) + 1],
                             linewidth=1.0,
                             alpha=0.7)

    output_name = os.path.basename(im_name) + '.' + ext
    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)
    plt.close('all')

    return extracted_kps, human_feats, one_human_assigned
Example #54
0
 def _read_segmentation(self, ann, H, W):
     s = ann['segmentation']
     s = s if type(s) == list else [s]
     return mask.decode(mask.frPyObjects(s, H, W)).max(axis=2)
Example #55
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin_ratio = float(x) / image_width
    xmax_ratio = float(x + width) / image_width
    ymin_ratio = float(y) / image_height
    ymax_ratio = float(y + height) / image_height
    if xmin_ratio < 0.0 or ymin_ratio < 0.0:
      num_annotations_skipped += 1
      print('NOTICE: skip illegal bounding box ratio: {}, {}'.format(xmin_ratio, ymin_ratio))
      continue
    if xmax_ratio > 1.0 or ymax_ratio > 1.0:
      num_annotations_skipped += 1
      print('NOTICE: skip illegal bounding box ratio: {}, {}'.format(xmin_ratio, ymin_ratio))
      continue
    xmin.append(xmin_ratio)
    xmax.append(xmax_ratio)
    ymin.append(ymin_ratio)
    ymax.append(ymax_ratio)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/text':
          dataset_util.bytes_list_feature(category_names),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Example #56
0
def segmToMask(segm, img_size):
    rle = segmToRLE(segm, img_size)
    m = maskUtils.decode(rle)
    return m
Example #57
0
def fix_segments_intersections(polygons, height, width, img_name, use_background_label,
                               threshold=0.0, ratio_tolerance=0.001, area_threshold=1):
    """Find all intersected regions and crop contour for back object by objects which
        are in front of the first one. It is related to a specialty of segmentation
        in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi
    Args:
        polygons: all objects on image represented as 2D array of objects' contours
        height: height of image
        width: width of image
        img_name: name of image file
        threshold: threshold of intersection over union of two objects.
            By default is set to 0 and processes any two intersected objects
        ratio_tolerance: used for situation when one object is fully or almost fully
            inside another one and we don't want make "hole" in one of objects
    """
    converted_polygons = []
    empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    # Convert points of polygons from string to coco's array.
    # All polygons must be sorted in order from bottom to top
    for polygon in polygons:
        label = polygon['label']
        points = polygon['points'].split(';')
        new_polygon = []
        for xy in points:
            x = float(xy.split(',')[0])
            y = float(xy.split(',')[1])
            new_polygon.append(x)
            new_polygon.append(y)
        converted_polygons.append({'label': label, 'points': new_polygon})

    for i in range(0, len(converted_polygons)):
        rle_bottom = mask_util.frPyObjects([converted_polygons[i]['points']], height, width)
        segment_overlapped = False
        for j in range(i + 1, len(converted_polygons)):
            rle_top = mask_util.frPyObjects([converted_polygons[j]['points']], height, width)
            iou = mask_util.iou(rle_bottom, rle_top, [0, 0])
            area_top = sum(mask_util.area(rle_top))
            area_bottom = sum(mask_util.area(rle_bottom))
            if area_bottom == 0:
                continue
            area_ratio = area_top / area_bottom
            sum_iou = sum(iou)

            # If segment is fully inside another one, save this segment as is
            if area_ratio - ratio_tolerance < sum_iou[0] < area_ratio + ratio_tolerance:
                continue
            # Check situation when bottom segment is fully inside top.
            # It means that in annotation is mistake. Save this segment as is
            if 1 / area_ratio - ratio_tolerance < sum_iou[0] < 1 / area_ratio + ratio_tolerance:
                continue

            if sum_iou[0] > threshold:
                segment_overlapped = True
                bottom_mask = np.array(mask_util.decode(rle_bottom), dtype=np.uint8)
                top_mask = np.array(mask_util.decode(rle_top), dtype=np.uint8)

                bottom_mask = np.subtract(bottom_mask, top_mask)
                bottom_mask[bottom_mask > 1] = 0

                bottom_mask = np.sum(bottom_mask, axis=2)
                bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8)
                converted_polygons[i]['points'] = mask_to_polygon(bottom_mask, area_threshold=area_threshold)
                # If some segment is empty, do small fix to avoid error in cocoapi function
                if len(converted_polygons[i]['points']) == 0:
                    converted_polygons[i]['points'] = [empty_polygon]
                rle_bottom = mask_util.frPyObjects(converted_polygons[i]['points'], height, width)
        if not segment_overlapped:
            converted_polygons[i]['points'] = [converted_polygons[i]['points']]

    output_polygons = []
    for i in range(0, len(converted_polygons)):
        if not use_background_label and converted_polygons[i]['label'] == 'background':
            continue
        poly_len = len(converted_polygons[i]['points'])
        if poly_len == 0 or converted_polygons[i]['points'] == [empty_polygon]:
            log.warning('Image <{}> has an empty polygon with label <{}>. '
                        'Perhaps there is a mistake in annotation'.
                        format(img_name, converted_polygons[i]['label']))
        else:
            output_polygons.append(converted_polygons[i])

    return output_polygons
Example #58
0
 def polygons_to_mask(self, polygons):
     rle = mask_util.frPyObjects(polygons, self.height, self.width)
     rle = mask_util.merge(rle)
     return mask_util.decode(rle)[:, :]
Example #59
0
def vis_one_proposal(img_fpath: str, bbox: List, mask: Dict, draw_boxes=True):
    """
    Visualized bbox and mask of one proposal.
    Args:
        img_fpath: the image file path.
        bbox: [x, y, w, h]
        mask: RLE format
    """
    img_name = img_fpath.split('/')[-1].replace(".jpg", "")
    colors = generate_colors()
    dpi = 100.0
    img = np.array(Image.open(img_fpath), dtype="float32") / 255
    img_sizes = mask["size"]

    fig = plt.figure()
    fig.set_size_inches(img_sizes[1] / dpi, img_sizes[0] / dpi, forward=True)
    fig.subplots_adjust(left=0,
                        bottom=0,
                        right=1,
                        top=1,
                        wspace=None,
                        hspace=None)
    ax = fig.subplots()
    ax.set_axis_off()

    color = colors[0]
    x, y, w, h = toBbox(mask)
    rect = patches.Rectangle((x, y),
                             w,
                             h,
                             linewidth=1,
                             linestyle='-.',
                             edgecolor=color,
                             facecolor='none',
                             alpha=1.0)
    ax.add_patch(rect)

    if draw_boxes:
        xb, yb, wb, hb = bbox
        rect = patches.Rectangle((xb, yb),
                                 wb,
                                 hb,
                                 linewidth=1,
                                 edgecolor=colors[-1],
                                 facecolor='none',
                                 alpha=1.0)
        ax.add_patch(rect)

    category_name = "object"
    ax.annotate(category_name, (x + 0.5 * w, y + 0.5 * h),
                color=color,
                weight='bold',
                fontsize=7,
                ha='center',
                va='center',
                alpha=1.0)
    binary_mask = decode(mask)
    apply_mask(img, binary_mask, color)

    ax.imshow(img)
    fig.savefig("plots/" + img_name + ".jpg")
    plt.close(fig)
Example #60
0
                print(i)
                all_mask = {}
                query_roi = gen_bbox(label_0, i)
                query_img = img_list_from_begin_label[0]

                if not os.path.exists(sys.argv[3] + 'object_json/' +
                                      video_dir + '_%d.json' % i):
                    print('this is not person')
                    search_instance.append(i)
                    continue
                sort_all_mask = json.loads(
                    open(sys.argv[3] + 'object_json/' + video_dir +
                         '_%d.json' % i).read())
                for gallery_img, (x1, y1, x2, y2, mask) in sort_all_mask:
                    x1, y1, x2, y2 = enlarge_bbox([x1, y1, x2, y2])
                    mask = np.array(maskUtils.decode(mask))

                    #prob_name = prob_dir + video_dir + '/%05d.png' % (int(gallery_img[-9:-4]) - head_num)
                    if int(gallery_img[-9:-4]) - small_num < start_index:
                        continue
                    prob = pic_list[int(gallery_img[-9:-4]) - head_num]
                    if i in np.unique(prob):
                        continue
                    print(gallery_img, x1, y1, x2, y2)
                    prob[(prob == 0) & (mask == 1)] = i
                    #prob = prob + mask * i
                    pic_list[int(gallery_img[-9:-4]) - head_num] = prob

                    propgate_forward(
                        int(gallery_img[-9:-4]) - head_num, i, gallery_img)
                    propgate_backward(