예제 #1
0
def crop_mask(boxes,segmentations,flipped, imsize):
    assert (boxes.shape[0]==len(segmentations))
    psegmentations=[]
    for i in xrange(len(segmentations)):
        gts=segmentations[i]
        box=boxes[i,:]
        if type(gts) == list and gts:
            assert (type(gts[0]) != dict)
            prle= mask.frPyObjects(gts,imsize[1],imsize[0])
        elif type(gts) == dict and type(gts['counts']) == list:
            prle= mask.frPyObjects([gts],imsize[1],imsize[0])
        elif type(gts) == dict and \
                     type(gts['counts'] == unicode or type(gts['counts']) == str):
            prle = [gts]
        else:
            print '{} box has no segmentation'.format(i)
            psegmentations.append([])
            continue
        if len(prle)==1:
            prle=prle[0]
        else:
            prle= mask.merge(prle)
        pmask=mask.decode([prle])
        if flipped:
            pmask=pmask[:,::-1,:]
        pmask=np.copy(pmask[box[1]:box[3],box[0]:box[2],:],order='F')
        psegmentations.append(mask.encode(pmask))
    return psegmentations
예제 #2
0
    def get_segmentation(self,gt_segmentation,box,max_ind,imsize):
        segmentation=[];
        gts=gt_segmentation[max_ind]
        if type(gts) == list:
            assert (type(gts[0]) != dict)
            prle= mask.frPyObjects(gts,imsize[0],imsize[1])
        elif type(gts) == dict and type(gts['counts']) == list:
            prle= mask.frPyObjects([gts],imsize[0],imsize[1])
        elif type(gts) == dict and \
                     type(gts['counts'] == unicode or type(gts['counts']) == str):
            prle = [gts]
        else:
            return segmentation
        if len(prle)==1:
            prle=prle[0]
        else:
            prle= mask.merge(prle)
        grle=mask.frPyObjects([[box[0],box[1],box[2],box[1],box[2],box[3],box[0],box[3],box[0],box[1]]],imsize[0],imsize[1])
        #print grle,'----'
        pmask=mask.merge([prle,grle[0]],intersect=True)
        segmentation=pmask
#            for sm in gts:
#                poly=Polygon(zip(sm(::2),sm(1::2)))
#                bpoly=Polygon([(box[0],box[1]),(box[0],box[3]),(box[2],box[3]),(box[2],boxes[1]),(box[0],box[1])])
#                bpoly=bpoly.intersection(poly)
#                coords=array(bpoly.exterior.coords)
#                coords=coords-[box[0],box[1]]
#                segmentation.append(coords.ravel().tolist())
        return segmentation
예제 #3
0
def polys_to_mask_wrt_box(polygons, box, M):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed in the given box and rasterized to an M x M
    mask. The resulting mask is therefore of shape (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
예제 #4
0
 def _flip_rle(rle, height, width):
     if 'counts' in rle and type(rle['counts']) == list:
         # Magic RLE format handling painfully discovered by looking at the
         # COCO API showAnns function.
         rle = mask_util.frPyObjects([rle], height, width)
     mask = mask_util.decode(rle)
     mask = mask[:, ::-1, :]
     rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
     return rle
 def annToRLE(self, ann, height, width):
     """
     Convert annotation which can be polygons, uncompressed RLE to RLE.
     :return: binary mask (numpy 2D array)
     """
     segm = ann['segmentation']
     if isinstance(segm, list):
         # polygon -- a single object might consist of multiple parts
         # we merge all parts into one mask rle code
         rles = maskUtils.frPyObjects(segm, height, width)
         rle = maskUtils.merge(rles)
     elif isinstance(segm['counts'], list):
         # uncompressed RLE
         rle = maskUtils.frPyObjects(segm, height, width)
     else:
         # rle
         rle = ann['segmentation']
     return rle
예제 #6
0
def get_mask(idx):
    ann_ids = coco.getAnnIds(imgIds=img_ids[idx])
    anns = coco.loadAnns(ann_ids)
    img = coco.loadImgs(img_ids[idx])[0]
    m = np.zeros((img['height'], img['width']))
    for j in anns:
        if j['iscrowd']:
            rle = mask.frPyObjects(j['segmentation'], img['height'], img['width'])
            m += mask.decode(rle)
    return m < 0.5
 def convert(self, mode):
     width, height = self.size
     if mode == "mask":
         rles = mask_utils.frPyObjects(
             [p.numpy() for p in self.polygons], height, width
         )
         rle = mask_utils.merge(rles)
         mask = mask_utils.decode(rle)
         mask = torch.from_numpy(mask)
         # TODO add squeeze?
         return mask
예제 #8
0
 def annToRLE(self, ann):
     """
     Convert annotation which can be polygons, uncompressed RLE to RLE.
     :return: binary mask (numpy 2D array)
     """
     t = self.imgs[ann['image_id']]
     h, w = t['height'], t['width']
     segm = ann['segmentation']
     if type(segm) == list:
         # polygon -- a single object might consist of multiple parts
         # we merge all parts into one mask rle code
         rles = maskUtils.frPyObjects(segm, h, w)
         rle = maskUtils.merge(rles)
     elif type(segm['counts']) == list:
         # uncompressed RLE
         rle = maskUtils.frPyObjects(segm, h, w)
     else:
         # rle
         rle = ann['segmentation']
     return rle
예제 #9
0
def polys_to_mask(polygons, height, width):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed inside a height x width image. The resulting
    mask is therefore of shape (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
예제 #10
0
파일: common.py 프로젝트: wu-yy/tensorpack
def segmentation_to_mask(polys, height, width):
    """
    Convert polygons to binary masks.

    Args:
        polys: a list of nx2 float array

    Returns:
        a binary matrix of (height, width)
    """
    polys = [p.flatten().tolist() for p in polys]
    rles = cocomask.frPyObjects(polys, height, width)
    rle = cocomask.merge(rles)
    return cocomask.decode(rle)
예제 #11
0
        def _getIgnoreRegion(iid, coco):
            img = coco.imgs[iid]

            if not 'ignore_regions_x' in img.keys():
                return None

            if len(img['ignore_regions_x']) == 0:
                return None

            rgns_merged = []
            for region_x, region_y in zip(img['ignore_regions_x'], img['ignore_regions_y']):
                rgns = [iter(region_x), iter(region_y)]
                rgns_merged.append(list(it.next() for it in itertools.cycle(rgns)))
            rles = maskUtils.frPyObjects(rgns_merged, img['height'], img['width'])
            rle = maskUtils.merge(rles)
            return maskUtils.decode(rle)
예제 #12
0
def polys_to_mask_wrt_box(polygons, box, M):
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
예제 #13
0
 def getMask(self, ref):
     '''
     :return: mask, mask-area, mask-center
     '''
     ann = self.refToAnn[ref['ref_id']]
     image = self.imgs[ref['image_id']]
     if type(ann['segmentation'][0]) == list: # polygon
         rle = mask.frPyObjects(ann['segmentation'], image['height'], image['width'])
     else: # mask
         rle = ann['segmentation']
     m = mask.decode(rle)
     m = np.sum(m, axis=2)   # sometimes there are multiple binary map (corresponding to multiple segs)
     m = m.astype(np.uint8)  # convert to np.uint8
     # area
     area = sum(mask.area(rle))              # very close to ann['area']
     # position
     position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c++ style)
     position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style)    -> y (c++ style)
     # mass position (If there were multiple regions, we use the largest one.)
     label_m = label(m, connectivity=m.ndim)
     regions = regionprops(label_m)
     if len(regions) > 0:
         largest_id = np.argmax(np.array([props.filled_area for props in regions]))
         largest_props = regions[largest_id]
         mass_y, mass_x = largest_props.centroid
     else:
         mass_x, mass_y = position_x, position_y
     # if centroid is not in mask, we find the closest point to it from mask
     if m[mass_y, mass_x] != 1:
         print 'Finding closest mask point...'
         kernel = np.ones((10, 10),np.uint8)
         me = cv2.erode(m, kernel, iterations = 1)
         points = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist())  # row, col style
         points = np.array(points)
         dist   = np.sum((points - (mass_y, mass_x))**2, axis=1)
         id     = np.argsort(dist)[0]
         mass_y, mass_x = points[id]
     # return
     return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}
예제 #14
0
def to_mask(polys, size):
    """Convert list of polygons to full size binary mask

    Parameters
    ----------
    polys : list of numpy.ndarray
        Numpy.ndarray with shape (N, 2) where N is the number of bounding boxes.
        The second axis represents points of the polygons.
        Specifically, these are :math:`(x, y)`.
    size : tuple
        Tuple of length 2: (width, height).

    Returns
    -------
    numpy.ndarray
        Full size binary mask of shape (height, width)
    """
    try_import_pycocotools()
    import pycocotools.mask as cocomask
    width, height = size
    polys = [p.flatten().tolist() for p in polys]
    rles = cocomask.frPyObjects(polys, height, width)
    rle = cocomask.merge(rles)
    return cocomask.decode(rle)
def create_tf_example(image_path,
                      image,
                      annotations_list,
                      category_index,
                      include_masks=False):
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for idx, object_annotations in enumerate(annotations_list):
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])
        if include_masks:
            segm = object_annotations['segmentation']
            if isinstance(segm, list):
                rles = mask.frPyObjects(segm, image_height, image_width)
                rle = mask.merge(rles)
                m = mask.decode(rle)
            else:
                m = mask.decode(segm)
            pil_image = PIL.Image.fromarray(m)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
            if DUMP_MASK_IMAGES:
                m[m > 0] = 255
                pil_image = PIL.Image.fromarray(m)
                save_path = filename.split('.')[0] + "_" + str(idx) + ".png"
                save_path = FLAGS.output_dir + '/' + filename.split(
                    '.')[0] + '_mask_' + str(idx) + '.png'
                pil_image.save(save_path)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
예제 #16
0
    def load_sequence(self, sequence):
        """Load a sequence of images/frames

        Auxiliary function that loads a sequence of frames with
        the corresponding ground truth and their filenames.
        Returns a dict with the images in [0, 1], their corresponding
        labels, their subset (i.e. category, clip, prefix) and their
        filenames.
        """
        from pycocotools import mask as cocomask
        from matplotlib.path import Path
        X = []
        Y = []
        F = []

        for prefix, img in sequence:
            if not os.path.exists('%s/%s' % (self.image_path,
                                             img['file_name'])):
                raise RuntimeError('Image %s is missing' % img['file_name'])

            im = Image.open('%s/%s' % (self.image_path,
                                       img['file_name'])).copy()
            if im.mode == 'L':
                if self.warn_grayscale:
                    warnings.warn('image %s is grayscale..' % img['file_name'],
                                  RuntimeWarning)
                im = im.convert('RGB')

            # load the annotations and build the mask
            anns = self.coco.loadAnns(self.coco.getAnnIds(
                    imgIds=img['id'], catIds=prefix, iscrowd=None))

            mask = np.zeros(im.size).transpose(1, 0)
            for ann in anns:
                catId = ann['category_id']
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        # xy vertex of the polygon
                        poly = np.array(seg).reshape((len(seg)/2, 2))
                        closed_path = Path(poly)
                        nx, ny = img['width'], img['height']
                        x, y = np.meshgrid(np.arange(nx), np.arange(ny))
                        x, y = x.flatten(), y.flatten()
                        points = np.vstack((x, y)).T
                        grid = closed_path.contains_points(points)
                        if np.count_nonzero(grid) == 0:
                            warnings.warn(
                                'One of the annotations that compose the mask '
                                'of %s was empty' % img['file_name'],
                                RuntimeWarning)
                        grid = grid.reshape((ny, nx))
                        mask[grid] = catId
                else:
                    # mask
                    if type(ann['segmentation']['counts']) == list:
                        rle = cocomask.frPyObjects(
                            [ann['segmentation']],
                            img['height'], img['width'])
                    else:
                        rle = [ann['segmentation']]
                    grid = cocomask.decode(rle)[:, :, 0]
                    grid = grid.astype('bool')
                    mask[grid] = catId

            mask = np.array(mask.astype('int32'))
            im = np.array(im).astype(floatX) / 255.
            X.append(im)
            Y.append(mask)
            F.append(img['file_name'])

        ret = {}
        ret['data'] = np.array(X)
        ret['labels'] = np.array(Y)
        ret['subset'] = prefix
        ret['filenames'] = np.array(F)
        return ret
예제 #17
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    global src_file_index
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        if not category_id_filter(category_id):
            num_annotations_skipped += 1
            continue

        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())

    if len(category_ids) == 0:
        return None, None, None
    category_ids = trans_category_ids(category_ids)
    feature_dict = {
        'image/height':
        int64_feature(image_height),
        'image/width':
        int64_feature(image_width),
        'image/channels':
        int64_feature(3),
        'image/shape':
        int64_feature([image_height, image_width, 3]),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymax),
        'image/object/bboxlabel':
        dataset_util.int64_list_feature(category_ids),
        'image/object/bbox/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/bbox/label_text':
        bytes_feature(category_to_text(category_ids)),
        'image/object/difficult':
        dataset_util.int64_list_feature(is_crowd),
        'image/object/truncated':
        dataset_util.int64_list_feature(np.ones_like(is_crowd)),
        'image/object/area':
        dataset_util.float_list_feature(area),
        'image/file_index':
        dataset_util.int64_feature(src_file_index),
    }

    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    src_file_index += 1
    return key, example, num_annotations_skipped
    for im_path in paths:
        im = cv.imread('restricted/' + im_path)
        print(im.shape)
        print(im_path)
        print(int(im_path[:-4]))
        seg_list, label_list = get_index(int(im_path[:-4]), load_dict)
        #print(seg_list)
        #print(label_list)
        #masks = np.zeros((im.shape[0],im.shape[1], 1), np.uint8)
        seg = []
        masks = []
        cnt = 0
        for seg_idx in seg_list:
            seg = load_dict['annotations'][seg_idx]['segmentation'][
                0]  #load first seg in seg list
            compactedRLE = maskutil.frPyObjects(
                [seg], im.shape[0], im.shape[1])  #compress through RLE
            mask = maskutil.decode(compactedRLE)  #decode to mask
            print(mask.shape)
            mask = np.reshape(mask, (im.shape[0], im.shape[1]))  #for display
            mask = mask * get_color(
                label_list[cnt])  #change color for different class
            masks.append(mask)  #add sub mask for a full mask
            print(mask.shape)
            cnt += 1
        final_mask = np.zeros((im.shape[0], im.shape[1]),
                              np.uint8)  #final mask for each img
        for mask in masks:  #merge all mask into final mask
            final_mask = final_mask + mask
        plt.imshow(final_mask)  #show final mask
        plt.show()
예제 #19
0
    def draw_binary_mask(self,
                         binary_mask,
                         color=None,
                         *,
                         edge_color=None,
                         text=None,
                         alpha=0.5,
                         area_threshold=4096):
        """
        Args:
            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
                W is the image width. Each value in the array is either a 0 or 1 value of uint8
                type.
            color: color of the mask. Refer to `matplotlib.colors` for a full list of
                formats that are accepted. If None, will pick a random color.
            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
                full list of formats that are accepted.
            text (str): if None, will be drawn in the object's center of mass.
            alpha (float): blending efficient. Smaller values lead to more transparent masks.
            area_threshold (float): a connected component small than this will not be shown.

        Returns:
            output (VisImage): image object with mask drawn.
        """
        if color is None:
            color = random_color(rgb=True, maximum=1)
        if area_threshold is None:
            area_threshold = 4096

        has_valid_segment = False
        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
        mask = GenericMask(binary_mask, self.output.height, self.output.width)
        shape2d = (binary_mask.shape[0], binary_mask.shape[1])

        if not mask.has_holes:
            # draw polygons for regular masks
            for segment in mask.polygons:
                area = mask_util.area(
                    mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
                if area < area_threshold:
                    continue
                has_valid_segment = True
                segment = segment.reshape(-1, 2)
                self.draw_polygon(segment,
                                  color=color,
                                  edge_color=edge_color,
                                  alpha=alpha)
        else:
            rgba = np.zeros(shape2d + (4, ), dtype="float32")
            rgba[:, :, :3] = color
            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
            has_valid_segment = True
            self.output.ax.imshow(rgba)

        if text is not None and has_valid_segment:
            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
            lighter_color = self._change_color_brightness(
                color, brightness_factor=0.7)
            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(
                binary_mask, 8)
            largest_component_id = np.argmax(stats[1:, -1]) + 1

            # draw text on the largest component, as well as other very large components.
            for cid in range(1, _num_cc):
                if cid == largest_component_id or stats[
                        cid, -1] > _LARGE_MASK_AREA_THRESH:
                    # median is more stable than centroid
                    # center = centroids[largest_component_id]
                    center = np.median((cc_labels == cid).nonzero(),
                                       axis=1)[::-1]
                    self.draw_text(text, center, color=lighter_color)
        return self.output
예제 #20
0
    def __getitem__(self, index):
        # index = (index + 2000) % len(self.roidb)
        blob = defaultdict(list)
        im_blob, im_scales = self.get_image_blob([self.roidb[index]])
        if config.network.has_rpn:
            if self.phase != 'test':
                add_rpn_blobs(blob, im_scales, [self.roidb[index]])
                data = {'data': im_blob,
                        'im_info': blob['im_info']}
                label = {'roidb': blob['roidb'][0]}
                for stride in config.network.rpn_feat_stride:
                    label.update({
                        'rpn_labels_fpn{}'.format(stride): blob['rpn_labels_int32_wide_fpn{}'.format(stride)].astype(
                            np.int64),
                        'rpn_bbox_targets_fpn{}'.format(stride): blob['rpn_bbox_targets_wide_fpn{}'.format(stride)],
                        'rpn_bbox_inside_weights_fpn{}'.format(stride): blob[
                            'rpn_bbox_inside_weights_wide_fpn{}'.format(stride)],
                        'rpn_bbox_outside_weights_fpn{}'.format(stride): blob[
                            'rpn_bbox_outside_weights_wide_fpn{}'.format(stride)]
                    })
            else:
                data = {'data': im_blob,
                        'im_info': np.array([[im_blob.shape[-2],
                                              im_blob.shape[-1],
                                             im_scales[0]]], np.float32)}
                label = {'roidb': self.roidb[index]}
        else:
            raise NotImplementedError
        if config.network.has_fcn_head:
            if self.phase != 'test':
                seg_gt = np.array(Image.open(self.roidb[index]['image'].replace('images', 'labels').replace('.jpg', '.png')))
                if self.roidb[index]['flipped']:
                    seg_gt = np.fliplr(seg_gt)
                seg_gt = cv2.resize(seg_gt, None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST)
                label.update({'seg_gt': seg_gt})
                # label.update({'seg_gt_4x': cv2.resize(seg_gt, (im_blob.shape[-1] // 4, im_blob.shape[-2] // 4), interpolation=cv2.INTER_NEAREST)})
                label.update({'gt_classes': label['roidb']['gt_classes']})
                label.update({'mask_gt': np.zeros((len(label['gt_classes']), im_blob.shape[-2], im_blob.shape[-1]))})
                for i in range(len(label['gt_classes'])):
                    if type(label['roidb']['segms'][i]) is list and type(label['roidb']['segms'][i][0]) is list:
                        img = Image.new('L', (int(np.round(im_blob.shape[-1] / im_scales[0])), int(np.round(im_blob.shape[-2] / im_scales[0]))), 0)
                        for j in range(len(label['roidb']['segms'][i])):
                            ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1)
                            # try:
                            #     ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1)
                            # except:
                            #     print(label['roidb']['segms'][i], j)
                            #     import pdb; pdb.set_trace()
                            #     sys.exit()
                        label['mask_gt'][i] = cv2.resize(np.array(img), None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST)
                    else:
                        assert type(label['roidb']['segms'][i]) is dict or type(label['roidb']['segms'][i][0]) is dict
                        if type(label['roidb']['segms'][i]) is dict:
                            label['mask_gt'][i] = cv2.resize(mask_util.decode(mask_util.frPyObjects([label['roidb']['segms'][i]], label['roidb']['segms'][i]['size'][0], label['roidb']['segms'][i]['size'][1]))[:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST)
                        else:
                            assert len(label['roidb']['segms'][i]) == 1
                            output = mask_util.decode(label['roidb']['segms'][i])
                            label['mask_gt'][i] = cv2.resize(output[:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST)
                if config.train.fcn_with_roi_loss:
                    gt_boxes = label['roidb']['boxes'][np.where(label['roidb']['gt_classes'] > 0)[0]]
                    gt_boxes = np.around(gt_boxes * im_scales[0]).astype(np.int32)
                    label.update({'seg_roi_gt': np.zeros((len(gt_boxes), config.network.mask_size, config.network.mask_size), dtype=np.int64)})
                    for i in range(len(gt_boxes)):
                        if gt_boxes[i][3] == gt_boxes[i][1]:
                            gt_boxes[i][3] += 1
                        if gt_boxes[i][2] == gt_boxes[i][0]:
                            gt_boxes[i][2] += 1
                        label['seg_roi_gt'][i] = cv2.resize(seg_gt[gt_boxes[i][1]:gt_boxes[i][3], gt_boxes[i][0]:gt_boxes[i][2]], (config.network.mask_size, config.network.mask_size), interpolation=cv2.INTER_NEAREST)
            else:
                pass

        return data, label, index
예제 #21
0
def determine_max_batch_size(cfg, distributed, dataset_len_per_gpu):
    def get_fake_input(cfg, orig_img_shape=(128, 128, 3), device='cuda'):
        test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
        test_pipeline = Compose(test_pipeline)
        data = dict(img=np.zeros(orig_img_shape, dtype=np.uint8))
        data = test_pipeline(data)
        data = scatter(collate([data], samples_per_gpu=1), [device])[0]
        return data

    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg).cuda()

    if 'pipeline' in cfg.data.train:
        img_shape = [
            t for t in cfg.data.train.pipeline if t['type'] == 'Resize'
        ][0]['img_scale']
    else:
        img_shape = [
            t for t in cfg.data.train.dataset.pipeline if t['type'] == 'Resize'
        ][0]['img_scale']

    channels = 3

    fake_input = get_fake_input(cfg,
                                orig_img_shape=list(img_shape) + [channels])
    img_shape = fake_input['img_metas'][0][0]['pad_shape']

    width, height = img_shape[0], img_shape[1]

    percentage = 0.9

    min_bs = 2
    max_bs = min(512, int(dataset_len_per_gpu / percentage) + 1)
    step = 1

    batch_size = min_bs
    for bs in range(min_bs, max_bs, step):
        try:
            gt_boxes = [
                torch.tensor([[0., 0., width, height]]).cuda()
                for _ in range(bs)
            ]
            gt_labels = [
                torch.tensor([0], dtype=torch.long).cuda() for _ in range(bs)
            ]
            img_metas = [fake_input['img_metas'][0][0] for _ in range(bs)]

            gt_masks = None

            if isinstance(model,
                          TwoStageDetector) and model.roi_head.with_mask:
                rles = maskUtils.frPyObjects(
                    [[0.0, 0.0, width, 0.0, width, height, 0.0, height]],
                    height, width)
                rle = maskUtils.merge(rles)
                mask = maskUtils.decode(rle)
                gt_masks = [
                    BitmapMasks([mask], height, width) for _ in range(bs)
                ]

            if gt_masks is None:
                model(torch.rand(bs, channels, height, width).cuda(),
                      img_metas=img_metas,
                      gt_bboxes=gt_boxes,
                      gt_labels=gt_labels)
            else:
                model(torch.rand(bs, channels, height, width).cuda(),
                      img_metas=img_metas,
                      gt_bboxes=gt_boxes,
                      gt_labels=gt_labels,
                      gt_masks=gt_masks)

            batch_size = bs
        except RuntimeError as e:
            if str(e).startswith('CUDA out of memory'):
                break

    resulting_batch_size = int(batch_size * percentage)

    del model
    torch.cuda.empty_cache()

    if distributed:
        rank, world_size = get_dist_info()

        resulting_batch_size = torch.tensor(resulting_batch_size).cuda()
        dist.all_reduce(resulting_batch_size, torch.distributed.ReduceOp.MIN)
        print('rank', rank, 'resulting_batch_size', resulting_batch_size)

        resulting_batch_size = int(resulting_batch_size.cpu())
    else:
        print('resulting_batch_size', resulting_batch_size)

    return resulting_batch_size
예제 #22
0
    def load_sequence(self, sequence):
        """Load a sequence of images/frames

        Auxiliary function that loads a sequence of frames with
        the corresponding ground truth and their filenames.
        Returns a dict with the images in [0, 1], their corresponding
        labels, their subset (i.e. category, clip, prefix) and their
        filenames.
        """
        from pycocotools import mask as cocomask
        from matplotlib.path import Path
        X = []
        Y = []
        F = []

        for prefix, img in sequence:
            if not os.path.exists('%s/%s' %
                                  (self.image_path, img['file_name'])):
                raise RuntimeError('Image %s is missing' % img['file_name'])

            im = Image.open('%s/%s' %
                            (self.image_path, img['file_name'])).copy()
            if im.mode == 'L':
                if self.warn_grayscale:
                    warnings.warn('image %s is grayscale..' % img['file_name'],
                                  RuntimeWarning)
                im = im.convert('RGB')

            # load the annotations and build the mask
            anns = self.coco.loadAnns(
                self.coco.getAnnIds(imgIds=img['id'],
                                    catIds=prefix,
                                    iscrowd=None))

            mask = np.zeros(im.size).transpose(1, 0)
            for ann in anns:
                catId = ann['category_id']
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        # xy vertex of the polygon
                        poly = np.array(seg).reshape((len(seg) / 2, 2))
                        closed_path = Path(poly)
                        nx, ny = img['width'], img['height']
                        x, y = np.meshgrid(np.arange(nx), np.arange(ny))
                        x, y = x.flatten(), y.flatten()
                        points = np.vstack((x, y)).T
                        grid = closed_path.contains_points(points)
                        if np.count_nonzero(grid) == 0:
                            warnings.warn(
                                'One of the annotations that compose the mask '
                                'of %s was empty' % img['file_name'],
                                RuntimeWarning)
                        grid = grid.reshape((ny, nx))
                        mask[grid] = catId
                else:
                    # mask
                    if type(ann['segmentation']['counts']) == list:
                        rle = cocomask.frPyObjects([ann['segmentation']],
                                                   img['height'], img['width'])
                    else:
                        rle = [ann['segmentation']]
                    grid = cocomask.decode(rle)[:, :, 0]
                    grid = grid.astype('bool')
                    mask[grid] = catId

            mask = np.array(mask.astype('int32'))
            im = np.array(im).astype(floatX) / 255.
            X.append(im)
            Y.append(mask)
            F.append(img['file_name'])

        ret = {}
        ret['data'] = np.array(X)
        ret['labels'] = np.array(Y)
        ret['subset'] = prefix
        ret['filenames'] = np.array(F)
        return ret
    num_clear = 0
    num_valid = 0
    bbox_merged_list = []
    if_clear_list = []
    ratio_list = []
    for idx in range(len(anns)):
        ## Get kps
        ann_kps = anns_kps[idx]
        if_clear = check_clear(
            ann_kps)  # check for head up and foot down person

        ## Get bbox
        ann = anns[idx]
        mask1 = ann['segmentation']
        # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L265
        rle = maskUtils.frPyObjects(mask1, img['height'], img['width'])
        area = maskUtils.area(rle)
        bboxes = maskUtils.toBbox(rle)  # [x, y, w, h]
        if len(bboxes.shape) != 2:
            #             print('Warning!! len(bboxes.shape)!=2')
            continue
        bbox_merged = bboxes[0] if len(bboxes) == 1 else merge_bboxes(bboxes)

        if bbox_merged[2] == 0. or bbox_merged[3] == 0.:
            continue
        ratio = float(bbox_merged[3]) / float(bbox_merged[2])
        #         if ratio <= 2.:
        #             continue

        if if_clear:
            any_clear = True
예제 #24
0
def fetch_from_COCO(filenames, img_list,
                    coco_info,
                    resize_images=False, resize_size=-1,
                    load_categories=['person']):
    images = []
    masks = []
    assert len(filenames) == len(img_list)
    for n, img_el in enumerate(img_list):
        # load image
        if not os.path.exists(filenames[n]):
            print('Image %s is missing' % filenames[n])
            continue

        pth = filenames[n]
        im = Image.open(pth)

        coco, catIds, imgIds = coco_info

        # load the annotations and build the mask
        anns = coco.loadAnns(coco.getAnnIds(
            imgIds=img_el['id'], catIds=catIds, iscrowd=None))

        mask = np.zeros(im.size).transpose(1, 0)
        for ann in anns:
            catId = ann['category_id']
            if type(ann['segmentation']) == list:
                # polygon
                for seg in ann['segmentation']:
                    # xy vertex of the polygon
                    poly = np.array(seg).reshape((len(seg)/2, 2))
                    closed_path = Path(poly)
                    nx, ny = img_el['width'], img_el['height']
                    x, y = np.meshgrid(np.arange(nx),
                                       np.arange(ny))
                    x, y = x.flatten(), y.flatten()
                    points = np.vstack((x, y)).T
                    grid = closed_path.contains_points(points)
                    if np.count_nonzero(grid) == 0:
                        warnings.warn(
                            'One of the annotations that compose the mask '
                            'of %s was empty' % img_el['file_name'],
                            RuntimeWarning)
                    grid = grid.reshape((ny, nx))
                    mask[grid] = catId
            else:
                # mask
                if type(ann['segmentation']['counts']) == list:
                    rle = cocomask.frPyObjects(
                        [ann['segmentation']],
                        img_el['height'], img_el['width'])
                else:
                    rle = [ann['segmentation']]
                grid = cocomask.decode(rle)[:, :, 0]
                grid = grid.astype('bool')
                mask[grid] = catId

        # zero_pad
        if resize_images:
            rx, ry = resize_size
            # resize (keeping proportions)
            [x, y] = im.size
            dx = float(rx)/x
            dy = float(ry)/y
            ratio = min(dx, dy)
            x = int(x * ratio)
            y = int(y * ratio)

            # workaround for PIL problems..
            @retry(stop_max_attempt_number=7, wait_fixed=2000)
            def res(im, x, y):
                return im.resize((x, y), Image.ANTIALIAS)
            im = res(im, x, y)
            # mask = mask / numpy.max(mask) * 255.0 --> only visualization
            mask = Image.fromarray(mask.astype('uint8'))
            mask = mask.resize((x, y), Image.NEAREST)

            tmp = im
            im = Image.new("RGB", (rx, ry))
            im.paste(tmp, ((rx-x)/2, (ry-y)/2))
            tmp = mask
            # 80 obj categories
            mask = Image.new("L", (rx, ry))
            mask.paste(tmp, ((rx-x)/2, (ry-y)/2))

            images.append(np.asarray(im))
            masks.append(np.asarray(mask))
    return images, masks, filenames
예제 #25
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
                      keypoint_annotations_dict=None,
                      densepose_annotations_dict=None,
                      remove_non_person_annotations=False,
                      remove_non_person_images=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    keypoint_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
    densepose_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
      representing part surface coordinates. For more information see
      http://densepose.org/.
    remove_non_person_annotations: Whether to remove any annotations that are
      not the "person" class.
    remove_non_person_images: Whether to remove any images that do not contain
      at least one "person" annotation.

  Returns:
    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
    num_keypoint_annotation_skipped: Number of keypoint annotations that were
      skipped.
    num_densepose_annotation_skipped: Number of DensePose annotations that were
      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    keypoints_x = []
    keypoints_y = []
    keypoints_visibility = []
    keypoints_name = []
    num_keypoints = []
    include_keypoint = keypoint_annotations_dict is not None
    num_annotations_skipped = 0
    num_keypoint_annotation_used = 0
    num_keypoint_annotation_skipped = 0
    dp_part_index = []
    dp_x = []
    dp_y = []
    dp_u = []
    dp_v = []
    dp_num_points = []
    densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
    include_densepose = densepose_annotations_dict is not None
    num_densepose_annotation_used = 0
    num_densepose_annotation_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        category_name = category_index[category_id]['name'].encode('utf8')
        if remove_non_person_annotations and category_name != b'person':
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_ids.append(category_id)
        category_names.append(category_name)
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())

        if include_keypoint:
            annotation_id = object_annotations['id']
            if annotation_id in keypoint_annotations_dict:
                num_keypoint_annotation_used += 1
                keypoint_annotations = keypoint_annotations_dict[annotation_id]
                keypoints = keypoint_annotations['keypoints']
                num_kpts = keypoint_annotations['num_keypoints']
                keypoints_x_abs = keypoints[::3]
                keypoints_x.extend(
                    [float(x_abs) / image_width for x_abs in keypoints_x_abs])
                keypoints_y_abs = keypoints[1::3]
                keypoints_y.extend(
                    [float(y_abs) / image_height for y_abs in keypoints_y_abs])
                keypoints_visibility.extend(keypoints[2::3])
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(num_kpts)
            else:
                keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(0)

        if include_densepose:
            annotation_id = object_annotations['id']
            if (annotation_id in densepose_annotations_dict
                    and all(key in densepose_annotations_dict[annotation_id]
                            for key in densepose_keys)):
                dp_annotations = densepose_annotations_dict[annotation_id]
                num_densepose_annotation_used += 1
                dp_num_points.append(len(dp_annotations['dp_I']))
                dp_part_index.extend([
                    int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I']
                ])
                # DensePose surface coordinates are defined on a [256, 256] grid
                # relative to each instance box (i.e. absolute coordinates in range
                # [0., 256.]). The following converts the coordinates
                # so that they are expressed in normalized image coordinates.
                dp_x_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_x']
                ]
                dp_x_norm = [(float(x) + x_box_rel * width) / image_width
                             for x_box_rel in dp_x_box_rel]
                dp_y_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_y']
                ]
                dp_y_norm = [(float(y) + y_box_rel * height) / image_height
                             for y_box_rel in dp_y_box_rel]
                dp_x.extend(dp_x_norm)
                dp_y.extend(dp_y_norm)
                dp_u.extend(dp_annotations['dp_U'])
                dp_v.extend(dp_annotations['dp_V'])
            else:
                dp_num_points.append(0)

    if (remove_non_person_images
            and not any(name == b'person' for name in category_names)):
        return (key, None, num_annotations_skipped,
                num_keypoint_annotation_skipped,
                num_densepose_annotation_skipped)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    if include_keypoint:
        feature_dict['image/object/keypoint/x'] = (
            dataset_util.float_list_feature(keypoints_x))
        feature_dict['image/object/keypoint/y'] = (
            dataset_util.float_list_feature(keypoints_y))
        feature_dict['image/object/keypoint/num'] = (
            dataset_util.int64_list_feature(num_keypoints))
        feature_dict['image/object/keypoint/visibility'] = (
            dataset_util.int64_list_feature(keypoints_visibility))
        feature_dict['image/object/keypoint/text'] = (
            dataset_util.bytes_list_feature(keypoints_name))
        num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) -
                                           num_keypoint_annotation_used)
    if include_densepose:
        feature_dict['image/object/densepose/num'] = (
            dataset_util.int64_list_feature(dp_num_points))
        feature_dict['image/object/densepose/part_index'] = (
            dataset_util.int64_list_feature(dp_part_index))
        feature_dict['image/object/densepose/x'] = (
            dataset_util.float_list_feature(dp_x))
        feature_dict['image/object/densepose/y'] = (
            dataset_util.float_list_feature(dp_y))
        feature_dict['image/object/densepose/u'] = (
            dataset_util.float_list_feature(dp_u))
        feature_dict['image/object/densepose/v'] = (
            dataset_util.float_list_feature(dp_v))
        num_densepose_annotation_skipped = (len(densepose_annotations_dict) -
                                            num_densepose_annotation_used)

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return (key, example, num_annotations_skipped,
            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
    return rmin - padSize, rmax + padSize + 1, cmin - padSize, cmax + padSize + 1


for i in np.arange(_count, len(anns)):
    print 'transforming instance %d' % i
    #transform_and_save_image(i)
    uint_image = io.imread('%s/images/%s/%s' %
                           (dataDir, dataType, imgs[i]['file_name']))
    if len(uint_image.shape) == 2:
        tmp_image = np.zeros(uint_image.shape + (3, ), dtype=np.uint8)
        tmp_image[:, :, 0] = tmp_image[:, :, 1] = tmp_image[:, :,
                                                            2] = uint_image
        uint_image = tmp_image
    float_image = np.array(uint_image, dtype=np.float32) / 255.0
    rle = mask.frPyObjects(anns[i]['segmentation'], imgs[i]['height'],
                           imgs[i]['width'])
    m_uint = mask.decode(rle)
    m = np.array(m_uint[:, :, 0], dtype=np.float32)
    base_tran = video_transformer.sample()
    frame1_tran = base_tran  # + frame_transformer.sample()
    frame2_tran = base_tran + frame_transformer.sample()
    image1 = frame1_tran.transform_img(float_image.copy(),
                                       float_image.shape[:2], m)
    #print 'image1 size: %s' % str(image1.shape)
    image1_padded = np.pad(image1,
                           ((padSize, padSize), (padSize, padSize), (0, 0)),
                           mode='constant')
    #print 'image1_padded size: %s' % str(image1_padded.shape)
    mask1 = frame1_tran.transform_mask(m.copy(), m.shape)

    #fills padded area with -1
예제 #27
0
 def _read_segmentation(self, ann, H, W):
     s = ann['segmentation']
     s = s if type(s) == list else [s]
     return mask.decode(mask.frPyObjects(s, H, W)).max(axis=2)
예제 #28
0
def showAnns(ori_img, anns, draw_bbox=False):
    h, w, c = ori_img.shape
    if len(anns) == 0:
        return ori_img
    if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
        datasetType = 'instances'
    elif 'caption' in anns[0]:
        datasetType = 'captions'
    else:
        raise Exception('datasetType not supported')
    if datasetType == 'instances':
        mask = np.zeros_like(ori_img).astype(np.uint8)

        for ann in anns:
            c = np.array((np.random.random(
                (1, 3)) * 0.6 + 0.4)[0] * 255).astype(int).tolist()
            if 'segmentation' in ann:
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        poly = np.array(seg).reshape((int(len(seg) / 2), 2))
                        pts = poly.reshape((-1, 1, 2))
                        cv2.polylines(ori_img, [pts],
                                      True,
                                      c,
                                      thickness=1,
                                      lineType=cv2.LINE_AA)
                        cv2.drawContours(mask, [pts], -1, c, -1)

                        if cv2.contourArea(pts) > 1:
                            M = cv2.moments(pts)
                            cX = int(M["m10"] / M["m00"])
                            cY = int(M["m01"] / M["m00"])
                            cv2.putText(ori_img,
                                        'CAT:{}'.format(ann['category_id']),
                                        (cX, cY), cv2.FONT_HERSHEY_PLAIN, 0.8,
                                        (255, 255, 255), 1, cv2.LINE_AA)
                else:
                    # mask
                    if type(ann['segmentation']['counts']) == list:
                        rle = maskUtils.frPyObjects([ann['segmentation']], h,
                                                    w)
                    else:
                        rle = [ann['segmentation']]
                    m = maskUtils.decode(rle)
                    img = np.ones((m.shape[0], m.shape[1], 3))
                    if ann['iscrowd'] == 1:
                        color_mask = np.array([2.0, 166.0, 101.0])
                    if ann['iscrowd'] == 0:
                        color_mask = np.random.random((1, 3)).tolist()[0]
                    for i in range(3):
                        img[:, :, i] = color_mask[i]
                    ori_img = cv2.addWeighted(img, 0.6, m, 0.6, 0.6)
            if draw_bbox:
                if 'bbox' in ann.keys():
                    [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
                    pt1 = (int(bbox_x), int(bbox_y))
                    pt2 = (int(bbox_x + bbox_w), int(bbox_y + bbox_h))
                    cv2.rectangle(ori_img,
                                  pt1,
                                  pt2,
                                  color=c,
                                  thickness=1,
                                  lineType=cv2.LINE_AA)

            if 'keypoints' in ann and type(ann['keypoints']) == list:
                # turn skeleton into zero-based index
                # sks = np.array(
                #     self.loadCats(ann['category_id'])[0]['skeleton']) - 1
                kp = np.array(ann['keypoints'])
                x = kp[0::3]
                y = kp[1::3]
                v = kp[2::3]
                # for sk in sks:
                #     if np.all(v[sk] > 0):
                #         cv2.line(ori_img, x[sk], y[sk], color=c)
                print(kp)
                print('keypoint vis not supported')

        if type(ann['segmentation']) == list:
            ori_img = cv2.addWeighted(ori_img, 0.7, mask, 0.6, 0.7)
    elif datasetType == 'captions':
        for ann in anns:
            print(ann['caption'])
    return ori_img
예제 #29
0
    def _load_annotations(self, ann, image_info=None):
        parsed_annotations = []

        ann_id = ann.get('id')

        attributes = {}
        if 'attributes' in ann:
            try:
                attributes.update(ann['attributes'])
            except Exception as e:
                log.debug("item #%s: failed to read annotation attributes: %s",
                          image_info['id'], e)
        if 'score' in ann:
            attributes['score'] = ann['score']

        group = ann_id  # make sure all tasks' annotations are merged

        if self._task in [CocoTask.instances, CocoTask.person_keypoints]:
            x, y, w, h = ann['bbox']
            label_id = self._get_label_id(ann)

            is_crowd = bool(ann['iscrowd'])
            attributes['is_crowd'] = is_crowd

            if self._task is CocoTask.person_keypoints:
                keypoints = ann['keypoints']
                points = [p for i, p in enumerate(keypoints) if i % 3 != 2]
                visibility = keypoints[2::3]
                parsed_annotations.append(
                    Points(points,
                           visibility,
                           label=label_id,
                           id=ann_id,
                           attributes=attributes,
                           group=group))

            segmentation = ann.get('segmentation')
            if segmentation and segmentation != [[]]:
                rle = None

                if isinstance(segmentation, list):
                    if not self._merge_instance_polygons:
                        # polygon - a single object can consist of multiple parts
                        for polygon_points in segmentation:
                            parsed_annotations.append(
                                Polygon(points=polygon_points,
                                        label=label_id,
                                        id=ann_id,
                                        attributes=attributes,
                                        group=group))
                    else:
                        # merge all parts into a single mask RLE
                        img_h = image_info['height']
                        img_w = image_info['width']
                        rles = mask_utils.frPyObjects(segmentation, img_h,
                                                      img_w)
                        rle = mask_utils.merge(rles)
                elif isinstance(segmentation['counts'], list):
                    # uncompressed RLE
                    img_h = image_info['height']
                    img_w = image_info['width']
                    mask_h, mask_w = segmentation['size']
                    if img_h == mask_h and img_w == mask_w:
                        rle = mask_utils.frPyObjects([segmentation], mask_h,
                                                     mask_w)[0]
                    else:
                        log.warning(
                            "item #%s: mask #%s "
                            "does not match image size: %s vs. %s. "
                            "Skipping this annotation.", image_info['id'],
                            ann_id, (mask_h, mask_w), (img_h, img_w))
                else:
                    # compressed RLE
                    rle = segmentation

                if rle is not None:
                    parsed_annotations.append(
                        RleMask(rle=rle,
                                label=label_id,
                                id=ann_id,
                                attributes=attributes,
                                group=group))
            else:
                parsed_annotations.append(
                    Bbox(x,
                         y,
                         w,
                         h,
                         label=label_id,
                         id=ann_id,
                         attributes=attributes,
                         group=group))
        elif self._task is CocoTask.labels:
            label_id = self._get_label_id(ann)
            parsed_annotations.append(
                Label(label=label_id,
                      id=ann_id,
                      attributes=attributes,
                      group=group))
        elif self._task is CocoTask.captions:
            caption = ann['caption']
            parsed_annotations.append(
                Caption(caption, id=ann_id, attributes=attributes,
                        group=group))
        else:
            raise NotImplementedError()

        return parsed_annotations
예제 #30
0
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
            When provided, this function will also do the following:

            * Put "thing_classes" into the metadata associated with this dataset.
            * Map the category ids into a contiguous range (needed by standard dataset format),
              and add "thing_dataset_id_to_contiguous_id" to the metadata associated
              with this dataset.

            This option should usually be provided, unless users need to load
            the original json content and apply more processing manually.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
        `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None.
        If `dataset_name` is None, the returned `category_ids` may be
        incontiguous and may not conform to the Detectron2 standard format.

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
        meta.thing_classes = thing_classes

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning(
                    """
                    Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
                    """
                )
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
    total_num_valid_anns = sum([len(x) for x in anns])
    total_num_anns = len(coco_api.anns)
    if total_num_valid_anns < total_num_anns:
        logger.warning(
            f"{json_file} contains {total_num_anns} annotations, but only "
            f"{total_num_valid_anns} of them match to images in the file."
        )

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
        assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
            json_file
        )

    imgs_anns = list(zip(imgs, anns))
    logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])

    num_instances_without_valid_segmentation = 0

    for (img_dict, anno_dict_list) in imgs_anns:
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if isinstance(segm, dict):
                    if isinstance(segm["counts"], list):
                        # convert to compressed RLE
                        segm = mask_util.frPyObjects(segm, *segm["size"])
                else:
                    # filter out invalid polygons (< 3 points)
                    segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                annotation_category_id = obj["category_id"]
                try:
                    obj["category_id"] = id_map[annotation_category_id]
                except KeyError as e:
                    raise KeyError(
                        f"Encountered category_id={annotation_category_id} "
                        "but this id does not exist in 'categories' of the json file."
                    ) from e
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. ".format(
                num_instances_without_valid_segmentation
            )
            + "There might be issues in your dataset generation process. "
            "A valid polygon should be a list[float] with even length >= 6."
        )
    return dataset_dicts
예제 #31
0
def annotation_data(folders, vid_id, ann_id, exists=False):
    hand_data = dict(info=hands_info,
                     licenses=hands_licenses,
                     categories=hands_categories,
                     videos=[],
                     annotations=[])

    for directory in folders:

        print(sorted(directory.glob('*.jpg'))[0])
        im_path = str(sorted(directory.glob('*.jpg'))[0])
        img = cv2.imread(im_path)

        video_polygons = get_path_polygons(directory)

        video = dict(width=img.shape[1],
                     length=len(sorted(directory.glob('*.jpg'))),
                     date_captured='',
                     license='',
                     flickr_url='',
                     file_names=[],
                     id=vid_id,
                     coco_url='',
                     height=img.shape[0])

        annotations = {}
        for i in range(4):
            annotations[ann_id] = dict(height=img.shape[0],
                                       width=img.shape[1],
                                       length=1,
                                       category_id=1,
                                       segmentations=[],
                                       bboxes=[],
                                       video_id=vid_id,
                                       iscrowd=False,
                                       id=ann_id,
                                       areas=[])

            ann_id += 1

        if not exists:
            for polygons, frame_path in zip(video_polygons,
                                            sorted(directory.glob('*.jpg'))):
                file_name = str(frame_path).split(os.sep)

                file_name = os.path.join(*file_name[-2:])

                video['file_names'].append(file_name)

                for inst_id, polygon in zip(annotations, list(polygons)):

                    if polygon.shape[0] > 1:
                        #polygon = polygon.astype(int).astype(float)

                        #polygon[:, 0], polygon[:, 1] = polygon[:, 1], polygon[:, 0].copy()

                        polygon = polygon.transpose()

                        contour = [
                            j for i in zip(polygon[0], polygon[1]) for j in i
                        ]

                        rles = mask.frPyObjects([contour], img.shape[0],
                                                img.shape[1])

                        rle = mask.merge(rles)
                        area = mask.area(rle)
                        bounding_box = mask.toBbox(rle)
                        annotations[inst_id]['bboxes'].append(
                            bounding_box.tolist())
                        annotations[inst_id]['areas'].append(int(area))

                        rle['counts'] = rle['counts'].decode('ascii')
                        annotations[inst_id]['segmentations'].append(rle)

                    else:
                        annotations[inst_id]['segmentations'].append(None)
                        annotations[inst_id]['bboxes'].append(None)
                        annotations[inst_id]['areas'].append(None)

        for _, ann in annotations.items():
            hand_data['annotations'].append(ann)

        hand_data['videos'].append(video)
        vid_id += 1

    return hand_data, vid_id, ann_id
예제 #32
0
def coco_poly_to_mask(poly, h, w):
    rles = mask_utils.frPyObjects(poly, h, w)
    rle = mask_utils.merge(rles)
    mask = mask_utils.decode(rle)
    return mask
예제 #33
0
 def segmentation_to_mask(self, polys, height, width):
     import pycocotools.mask as cocomask
     polys = [p.flatten().tolist() for p in polys]
     rles = cocomask.frPyObjects(polys, height, width)
     rle = cocomask.merge(rles)
     return cocomask.decode(rle)
예제 #34
0
def polys_to_mask_wrt_box(polygons, box, M):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed in the given box and rasterized to an M x M
    mask. The resulting mask is therefore of shape (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask_ = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask_ = np.sum(mask_, axis=2)
    mask_ = np.array(mask_ > 0, dtype=np.float32)
    mask_ = np.array(mask_, dtype=np.uint8)
    ret, thr = cv2.threshold(mask_, 0, 1, cv2.THRESH_BINARY)

    _, countors, _ = cv2.findContours(thr, cv2.RETR_TREE,
                                      cv2.CHAIN_APPROX_SIMPLE)
    height = M
    width = M
    mask_bshape = np.zeros((height, width), np.float32)
    inner_bshape = np.zeros((height, width), np.float32)
    temp1 = np.zeros((height, width), np.float32)
    temp2 = np.zeros((height, width), np.float32)
    temp3 = np.zeros((height, width), np.float32)
    temp4 = np.zeros((height, width), np.float32)
    temp5 = np.zeros((height, width), np.float32)
    temp6 = np.zeros((height, width), np.float32)
    temp7 = np.zeros((height, width), np.float32)
    temp8 = np.zeros((height, width), np.float32)
    temp9 = np.zeros((height, width), np.float32)
    temp10 = np.zeros((height, width), np.float32)
    # 3px mask
    polygon = countors
    pixels = cfg.BSHAPE.PIXELS
    mask = None
    if pixels == 3:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 3)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3
        mask = np.where(
            mask == 7, 1,
            np.where(mask == 6, 0.95,
                     np.where(mask == 5, 0.85, np.where(mask == 4, 0.70, 0))))
    elif pixels == 5:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5
        mask = np.where(
            mask == 11, 1,
            np.where(
                mask == 10, 0.95,
                np.where(
                    mask == 9, 0.85,
                    np.where(
                        mask == 8, 0.70,
                        np.where(mask == 7, 0.65, np.where(mask == 6, 0.60,
                                                           0))))))
    elif pixels == 7:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 7)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
        temp6 = cv2.polylines(temp6, polygon, True, 1, 7)
        temp7 = cv2.polylines(temp7, polygon, True, 1, 8)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7
        mask = np.where(
            mask == 15, 1,
            np.where(
                mask == 14, 0.95,
                np.where(
                    mask == 13, 0.90,
                    np.where(
                        mask == 12, 0.85,
                        np.where(
                            mask == 11, 0.80,
                            np.where(
                                mask == 10, 0.75,
                                np.where(mask == 9, 0.70,
                                         np.where(mask == 8, 0.65, 0))))))))
    elif pixels == 11:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 10)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
        temp6 = cv2.polylines(temp6, polygon, True, 1, 7)
        temp7 = cv2.polylines(temp7, polygon, True, 1, 8)
        temp8 = cv2.polylines(temp8, polygon, True, 1, 9)
        temp9 = cv2.polylines(temp9, polygon, True, 1, 10)
        temp10 = cv2.polylines(temp10, polygon, True, 1, 11)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 + temp8 + +temp9 + temp10
        mask = np.where(
            mask == 21, 1,
            np.where(
                mask == 20, 0.95,
                np.where(
                    mask == 19, 0.90,
                    np.where(
                        mask == 18, 0.85,
                        np.where(
                            mask == 17, 0.80,
                            np.where(
                                mask == 16, 0.75,
                                np.where(
                                    mask == 15, 0.70,
                                    np.where(
                                        mask == 14, 0.65,
                                        np.where(
                                            mask == 13, 0.60,
                                            np.where(
                                                mask == 12, 0.55,
                                                np.where(mask == 11, 0.50,
                                                         0)))))))))))
    # inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5)
    # mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
    # temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
    # temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
    # temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
    # temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
    # temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
    #
    # mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 +temp5
    # mask = np.where(mask == 11, 1, np.where(mask == 10, 0.95, np.where(mask == 9, 0.85, np.where(mask == 8, 0.70,
    #                                                                                            np.where(mask == 7, 0.65,
    #                                                                                                     np.where(
    #                                                                                                         mask == 6,
    #                                                                                                         0.60,
    #                                                                                                         0))))))

    mask = np.array(mask, dtype=np.float32)
    # print('!!!!!!!!!!!!!!!!!!!mask number check!!!!!!!!!!!!!!!')
    # print(np.unique(mask))

    # np.savetxt('/home/bkang/data.csv', mask, delimiter=',')

    return mask
예제 #35
0
    coco = CocoDetection(
        'data/MSCOCO/images/{}2017'.format(split),
        'data/MSCOCO/annotations/instances_{}2017.json'.format(split))

    dest = 'data/MSCOCO/imageclassification/{}/'.format(split)

    if not os.path.exists(dest):
        os.mkdir(dest)

    ii = 0
    for x, y in tqdm(coco):
        w, h = x.size
        x = np.array(x)
        for _y in y:
            cat = _y['category_id']
            rle = mask.frPyObjects(_y['segmentation'], h, w)
            mm = mask.toBbox(rle)
            for m in mm:
                if m.shape == (4, ):
                    m = [int(u) for u in m]
                    x1, x2, y1, y2 = m[0], m[0] + m[2], m[1], m[1] + m[3]
                    if m[2] > 32 and m[3] > 32:
                        im = Image.fromarray(x[y1:y2, x1:x2])
                        la = '{:03d}_{}'.format(_y['category_id'],
                                                cats[_y['category_id']])
                        la_path = os.path.join(dest, la)
                        if not os.path.exists(la_path):
                            os.mkdir(la_path)
                        im_path = os.path.join(la_path,
                                               '{:08d}.jpg'.format(ii))
                        im.save(im_path)
예제 #36
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
예제 #37
0
def project_masks_on_boxes(segmentation_masks, proposals, discretization_size,
                           maskiou_on):
    """
    Given segmentation masks and the bounding boxes corresponding
    to the location of the masks in the image, this function
    crops and resizes the masks in the position defined by the
    boxes. This prepares the masks for them to be fed to the
    loss computation as the targets. If use maskiou head, we will compute the maskiou target here.

    Arguments:
        segmentation_masks: an instance of SegmentationMask
        proposals: an instance of BoxList
    """
    masks = []
    mask_ratios = []
    M = discretization_size
    device = proposals.bbox.device
    proposals = proposals.convert("xyxy")
    assert segmentation_masks.size == proposals.size, "{}, {}".format(
        segmentation_masks, proposals)
    # TODO put the proposals on the CPU, as the representation for the
    # masks is not efficient GPU-wise (possibly several small tensors for
    # representing a single instance mask)
    proposals = proposals.bbox.to(torch.device("cpu"))
    for segmentation_mask, proposal in zip(segmentation_masks, proposals):
        # crop the masks, resize them to the desired resolution and
        # then convert them to the tensor representation,
        # instead of the list representation that was used
        cropped_mask = segmentation_mask.crop(proposal)
        scaled_mask = cropped_mask.resize((M, M))
        mask = scaled_mask.get_mask_tensor()
        masks.append(mask)

        if maskiou_on:
            x1 = int(proposal[0])
            y1 = int(proposal[1])
            x2 = int(proposal[2]) + 1
            y2 = int(proposal[3]) + 1
            # for poly_ in segmentation_mask.polygons:
            for polygonInstance_ in segmentation_mask.instances.polygons:
                poly = np.array(polygonInstance_.polygons[0], dtype=np.float32)
                x1 = np.minimum(x1, poly[0::2].min())
                x2 = np.maximum(x2, poly[0::2].max())
                y1 = np.minimum(y1, poly[1::2].min())
                y2 = np.maximum(y2, poly[1::2].max())
            img_h = segmentation_mask.size[1]
            img_w = segmentation_mask.size[0]
            x1 = np.maximum(x1, 0)
            x2 = np.minimum(x2, img_w - 1)
            y1 = np.maximum(y1, 0)
            y2 = np.minimum(y2, img_h - 1)
            segmentation_mask_for_maskratio = segmentation_mask.crop(
                [x1, y1, x2, y2])
            ''' 
            #type 1
            gt_img_mask = segmentation_mask_for_maskratio.convert(mode='mask')    
            gt_img_mask_area = gt_img_mask.sum().float()
            gt_box_mask = gt_img_mask[int(proposal[1]-y1):int(proposal[3]-y1)+1, int(proposal[0]-x1):int(proposal[2]-x1)+1]
            gt_box_mask_area = gt_box_mask.sum().float()
            mask_ratio = gt_box_mask_area / gt_img_mask_area
            '''
            #type 2
            rle_for_fullarea = mask_util.frPyObjects([
                p.polygons[0].numpy()
                for p in segmentation_mask_for_maskratio.instances.polygons
            ], y2 - y1, x2 - x1)
            full_area = torch.tensor(
                mask_util.area(rle_for_fullarea).sum().astype(float))
            rle_for_box_area = mask_util.frPyObjects([
                p.polygons[0].numpy() for p in cropped_mask.instances.polygons
            ], proposal[3] - proposal[1], proposal[2] - proposal[0])
            box_area = torch.tensor(
                mask_util.area(rle_for_box_area).sum().astype(float))
            mask_ratio = box_area / full_area

            mask_ratios.append(mask_ratio)

    if len(masks) == 0:
        return torch.empty(0, dtype=torch.float32,
                           device=device), torch.empty(0,
                                                       dtype=torch.float32,
                                                       device=device)
    if maskiou_on:
        mask_ratios = torch.stack(mask_ratios, dim=0).to(device,
                                                         dtype=torch.float32)
    else:
        mask_ratios = None
    # if len(masks) == 0:
    #     return torch.empty(0, dtype=torch.float32, device=device), torch.empty(0, dtype=torch.float32, device=device)

    return torch.stack(masks, dim=0).to(device,
                                        dtype=torch.float32), mask_ratios
예제 #38
0
파일: coco.py 프로젝트: myris3/cvat_tdt4265
    def fix_segments_intersections(polygons,
                                   height,
                                   width,
                                   img_name,
                                   threshold=0.0,
                                   ratio_tolerance=0.001,
                                   area_threshold=1):
        """Find all intersected regions and crop contour for back object by objects which
            are in front of the first one. It is related to a specialty of segmentation
            in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi
        Args:
            polygons: all objects on image represented as 2D array of objects' contours
            height: height of image
            width: width of image
            img_name: name of image file
            threshold: threshold of intersection over union of two objects.
                By default is set to 0 and processes any two intersected objects
            ratio_tolerance: used for situation when one object is fully or almost fully
                inside another one and we don't want make "hole" in one of objects
        """
        empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

        for i, _ in enumerate(polygons):
            rle_bottom = mask_util.frPyObjects([polygons[i]['points']], height,
                                               width)
            segment_overlapped = False
            for j in range(i + 1, len(polygons)):
                rle_top = mask_util.frPyObjects([polygons[j]['points']],
                                                height, width)
                iou = mask_util.iou(rle_bottom, rle_top, [0, 0])
                area_top = sum(mask_util.area(rle_top))
                area_bottom = sum(mask_util.area(rle_bottom))
                if area_bottom == 0:
                    continue
                area_ratio = area_top / area_bottom
                sum_iou = sum(iou)

                # If segment is fully inside another one, save this segment as is
                if area_ratio - ratio_tolerance < sum_iou[
                        0] < area_ratio + ratio_tolerance:
                    continue
                # Check situation when bottom segment is fully inside top.
                # It means that in annotation is mistake. Save this segment as is
                if 1 / area_ratio - ratio_tolerance < sum_iou[
                        0] < 1 / area_ratio + ratio_tolerance:
                    continue

                if sum_iou[0] > threshold:
                    segment_overlapped = True
                    bottom_mask = np.array(mask_util.decode(rle_bottom),
                                           dtype=np.uint8)
                    top_mask = np.array(mask_util.decode(rle_top),
                                        dtype=np.uint8)

                    bottom_mask = np.subtract(bottom_mask, top_mask)
                    bottom_mask[bottom_mask > 1] = 0

                    bottom_mask = np.sum(bottom_mask, axis=2)
                    bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8)
                    polygons[i]['points'] = mask_to_polygon(
                        bottom_mask, area_threshold=area_threshold)
                    # If some segment is empty, do small fix to avoid error in cocoapi function
                    if len(polygons[i]['points']) == 0:
                        polygons[i]['points'] = [empty_polygon]
                    rle_bottom = mask_util.frPyObjects(polygons[i]['points'],
                                                       height, width)
            if not segment_overlapped:
                polygons[i]['points'] = [polygons[i]['points']]

        output_polygons = []
        for polygon in polygons:
            poly_len = len(polygon['points'])
            if poly_len != 0 and polygon['points'] != [empty_polygon]:
                output_polygons.append(polygon)

        return output_polygons
예제 #39
0
 def polygons_to_mask(self, polygons):
     rle = mask_util.frPyObjects(polygons, self.height, self.width)
     rle = mask_util.merge(rle)
     return mask_util.decode(rle)[:, :]
예제 #40
0
파일: getMask.py 프로젝트: GoYchen/programs
coco.showAnns(anns)
plt.savefig('b.png')
for id in imgIds:
    img = coco.loadImgs(ids = id)[0]
    height = img['height']
    width = img['width']
    name = img['file_name']
    gtName = name[0:len(name)-3] + 'png'
    annIds = coco.getAnnIds(imgIds=id, iscrowd=None)
    anns = coco.loadAnns(annIds)
    gt = np.zeros((height, width))
    for ann in anns:
        catId = ann['category_id']
        if type(ann['segmentation']) == list:
#            print 'polygon'
            rle = mask.frPyObjects(ann['segmentation'], height, width)
        else: 
            if type(ann['segmentation']['counts']) == list:
#                print 'mask'
                rle = mask.frPyObjects([ann['segmentation']], height, width)
            else:
#                print 'third'
                rle = [ann['segmentation']]
        m = mask.decode(rle)
        m = m[:,:,0]
        gt *= 1 - m
        gt +=  m * catId;
        im = Image.fromarray(np.uint8(gt))
        im.save(savePath + gtName)
        #plt.imsave(savePath+gtName, gt)
    counter = counter + 1
예제 #41
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
                      keypoint_annotations_dict=None):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    keypoint_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.

  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    keypoints_x = []
    keypoints_y = []
    keypoints_visibility = []
    keypoints_name = []
    num_keypoints = []
    include_keypoint = keypoint_annotations_dict is not None
    num_annotations_skipped = 0
    num_keypoint_annotation_used = 0
    num_keypoint_annotation_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())

        if include_keypoint:
            annotation_id = object_annotations['id']
            if annotation_id in keypoint_annotations_dict:
                num_keypoint_annotation_used += 1
                keypoint_annotations = keypoint_annotations_dict[annotation_id]
                keypoints = keypoint_annotations['keypoints']
                num_kpts = keypoint_annotations['num_keypoints']
                keypoints_x_abs = keypoints[::3]
                keypoints_x.extend(
                    [float(x_abs) / image_width for x_abs in keypoints_x_abs])
                keypoints_y_abs = keypoints[1::3]
                keypoints_y.extend(
                    [float(y_abs) / image_height for y_abs in keypoints_y_abs])
                keypoints_visibility.extend(keypoints[2::3])
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(num_kpts)
            else:
                keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(0)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    if include_keypoint:
        feature_dict['image/object/keypoint/x'] = (
            dataset_util.float_list_feature(keypoints_x))
        feature_dict['image/object/keypoint/y'] = (
            dataset_util.float_list_feature(keypoints_y))
        feature_dict['image/object/keypoint/num'] = (
            dataset_util.int64_list_feature(num_keypoints))
        feature_dict['image/object/keypoint/visibility'] = (
            dataset_util.int64_list_feature(keypoints_visibility))
        feature_dict['image/object/keypoint/text'] = (
            dataset_util.bytes_list_feature(keypoints_name))
        num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) -
                                           num_keypoint_annotation_used)

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
예제 #42
0
def evalPointingGame(cocoAnn, cat, caffeNet, imgDir):
    imgIds  = cocoAnn.getImgIds(catIds=cat['id'])
    imgList = cocoAnn.loadImgs(ids=imgIds)
    hit  = 0
    miss = 0
    t0 = time.time()
    for I in imgList:
        # run EB on img, get max location on attMap
        imgName = imgDir + I['file_name']
        img     = caffe.io.load_image(imgName)
        attMap  = doExcitationBackprop(caffeNet, img, cat['name'])
        if 1:
            # naively take argmax
            maxSub = np.unravel_index(np.argmax(attMap), attMap.shape)
        else:
            # take center of max locations
            maxAtt = np.max(attMap)
            maxInd = np.where(attMap == maxAtt)
            maxSub = (np.mean(maxInd[0]), np.mean(maxInd[1]))

        # load annotations
        annList = cocoAnn.loadAnns(cocoAnn.getAnnIds(imgIds=I['id'], catIds=cat['id']))

        # hit/miss?
        isHit = 0
        for ann in annList:
            # create a radius-15 circle around max location and see if it 
            # intersects with segmentation mask
            if type(ann['segmentation']) == list:
                # polygon
                for seg in ann['segmentation']:
                    polyPts = np.array(seg).reshape((len(seg)/2, 2))
                    poly    = shapely.geometry.Polygon(polyPts)
                    circ    = shapely.geometry.Point(maxSub[::-1]).buffer(15)
                    isHit  += poly.intersects(circ)
            else:
                # RLE
                if type(ann['segmentation']['counts']) == list:
                    rle = mask.frPyObjects([ann['segmentation']], I['height'], I['width'])
                else:
                    rle = [ann['segmentation']]
                m = mask.decode(rle)
                m = m[:, :, 0]
                ind  = np.where(m>0)
                mp   = shapely.geometry.MultiPoint(zip(ind[0], ind[1]))
                circ = shapely.geometry.Point(maxSub).buffer(15)
                isHit += circ.intersects(mp)

            if isHit:
                break

        if isHit: 
            hit += 1
        else:
            miss += 1
        accuracy = (hit+0.0)/(hit+miss)

        if time.time() - t0 > 10: 
            print cat['name'], ': Hit =', hit, 'Miss =', miss, ' Acc =', accuracy
            t0 = time.time()

    return accuracy
예제 #43
0
파일: coco.py 프로젝트: myris3/cvat_tdt4265
def load(file_object, annotations):
    from pycocotools import coco as coco_loader
    from pycocotools import mask as mask_utils
    import numpy as np

    def get_filename(path):
        import os
        return os.path.splitext(os.path.basename(path))[0]

    def match_frame(frame_info, filename):
        import re
        # try to match by filename
        yolo_filename = get_filename(filename)
        for frame_number, info in frame_info.items():
            cvat_filename = get_filename(info["path"])
            if cvat_filename == yolo_filename:
                return frame_number

        # try to extract frame number from filename
        numbers = re.findall(r"\d+", filename)
        if numbers and len(numbers) == 1:
            return int(numbers[0])

        raise Exception(
            "Cannot match filename or determinate framenumber for {} filename".
            format(filename))

    coco = coco_loader.COCO(file_object.name)
    labels = {
        cat['id']: cat['name']
        for cat in coco.loadCats(coco.getCatIds())
    }

    group_idx = 0
    for img_id in coco.getImgIds():
        anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
        img = coco.loadImgs(ids=img_id)[0]
        frame_number = match_frame(annotations.frame_info, img['file_name'])
        for ann in anns:
            group = 0
            label_name = labels[ann['category_id']]
            if 'segmentation' in ann:
                polygons = []
                # polygon
                if ann['iscrowd'] == 0:
                    polygons = ann['segmentation']
                # mask
                else:
                    if isinstance(ann['segmentation']['counts'], list):
                        rle = mask_utils.frPyObjects([ann['segmentation']],
                                                     img['height'],
                                                     img['width'])
                    else:
                        rle = [ann['segmentation']]

                    mask = np.array(mask_utils.decode(rle), dtype=np.uint8)
                    mask = np.sum(mask, axis=2)
                    mask = np.array(mask > 0, dtype=np.uint8)
                    polygons = mask_to_polygon(mask)

                if len(polygons) > 1:
                    group_idx += 1
                    group = group_idx

                for polygon in polygons:
                    annotations.add_shape(
                        annotations.LabeledShape(
                            type='polygon',
                            frame=frame_number,
                            label=label_name,
                            points=polygon,
                            occluded=False,
                            attributes=[],
                            group=group,
                        ))
예제 #44
0
    def __getitem__(self, index):
        img = self.coco_kps.loadImgs(self.imgIds[index])[0]
        img_ori = cv2.imread(
            os.path.join(self.trainimagepath, img['file_name']))
        img_human_seg = np.zeros(shape=img_ori.shape[0:2], dtype=np.float32)
        loss_mask = np.ones_like(img_human_seg)
        annIds = self.coco_kps.getAnnIds(imgIds=img['id'],
                                         catIds=self.catIds,
                                         iscrowd=None)
        anns = self.coco_kps.loadAnns(annIds)
        #         plt.imshow(img_ori)
        #         self.coco_kps.showAnns(anns)
        #         plt.show()
        assert len(anns) > 0
        assert 'segmentation' in anns[0] or 'keypoints' in anns[0]
        polygons = []
        color = []
        keypoints = []  #(part_id,x,y)
        parts = []  #((partid0,x0,y0),(partid1,x1,y1))
        for ann in anns:
            c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
            if 'segmentation' in ann:
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        poly = np.array(seg).reshape((int(len(seg) / 2), 2))
                        cv2.drawContours(
                            img_human_seg,
                            [poly[np.newaxis, :].astype(np.int32)], 0,
                            (1, 1, 1), -1)
                        polygons.append(Polygon(poly))
                        color.append(c)
                    if 'keypoints' in ann and (ann['num_keypoints'] < 5
                                               or ann['area'] < 32 * 32):
                        for seg in ann['segmentation']:
                            poly = np.array(seg).reshape(
                                (int(len(seg) / 2), 2))
                            cv2.drawContours(
                                loss_mask,
                                [poly[np.newaxis, :].astype(np.int32)], 0,
                                (0, 0, 0), -1)

                else:
                    # mask
                    t = self.coco_kps.imgs[ann['image_id']]
                    if type(ann['segmentation']['counts']) == list:
                        rle = maskUtils.frPyObjects([ann['segmentation']],
                                                    t['height'], t['width'])
                    else:
                        rle = [ann['segmentation']]
                    m = maskUtils.decode(rle)

                    loss_mask *= (1.0 - m[:, :, 0]).astype(np.float32)

            COCO_to_ours_1 = [
                1, 6, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4
            ]
            COCO_to_ours_2 = [
                1, 7, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4
            ]
            mid_1 = [
                2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16
            ]
            mid_2 = [
                9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17,
                18
            ]
            assert len(COCO_to_ours_1) == len(COCO_to_ours_2) == self.NUM_PARTS
            if 'keypoints' in ann and type(ann['keypoints']) == list:
                # turn skeleton into zero-based index
                #                 sks = np.array(self.coco_kps.loadCats(ann['category_id'])[0]['skeleton'])-1
                kp = np.array(ann['keypoints'])

                x_coco = kp[0::3]
                y_coco = kp[1::3]
                v_coco = kp[2::3]
                x = []
                y = []
                v = []
                for index1, index2 in zip(COCO_to_ours_1, COCO_to_ours_2):
                    index1 -= 1
                    index2 -= 1
                    x.append(0.5 * (x_coco[index1] + x_coco[index2]))
                    y.append(0.5 * (y_coco[index1] + y_coco[index2]))
                    v.append(min(v_coco[index1], v_coco[index2]))
                for i in range(self.NUM_PARTS):
                    if v[i] > 0:
                        # cv2.circle(heatmaps[i],(int(round(x[i])),int(round(y[i]))),self.HEAT_RADIUS,(1,1,1),-1)
                        keypoints.append([i, x[i], y[i]])
                for i in range(self.NUM_LINKS):
                    kp0, kp1 = mid_1[i] - 1, mid_2[i] - 1
                    if v[kp0] > 0 and v[kp1] > 0:
                        parts.append([i, x[kp0], y[kp0], x[kp1], y[kp1]])
        if len(img_ori.shape) == 2:
            temp = np.empty(shape=(img_ori.shape[0], img_ori.shape[1], 3),
                            dtype=np.uint8)
            for i in range(3):
                temp[:, :, i] = img_ori
            print('gray img')
        '''
        Image augmentation.
        '''
        from img_aug import im_aug
        [img_ori, loss_mask], keypoints, parts = im_aug([img_ori, loss_mask],
                                                        keypoints, parts)
        img_ori = np.transpose(img_ori, (2, 0, 1))
        loss_mask = loss_mask[np.newaxis, :, :]
        img_ori, loss_mask = self.im_transpose([img_ori, loss_mask],
                                               axes=(1, 2, 0))
        img_ori, keypoints, parts, loss_mask = self.im_resize(
            img_ori, keypoints, parts, loss_mask)
        '''
        Generate pafmaps for stride 4 , other pafmaps can be sampled by this.
        '''
        pafmaps = [
            np.zeros_like(np.squeeze(loss_mask))
            for _ in range(self.NUM_LINKS * 2)
        ]
        pafmaps_count = [
            np.zeros_like(np.squeeze(loss_mask))
            for _ in range(self.NUM_LINKS * 2)
        ]

        for limb_id, x0, y0, x1, y1 in parts:
            p0 = np.array([x0, y0])
            p1 = np.array([x1, y1])
            mask_ = np.zeros_like(np.squeeze(loss_mask), dtype=np.uint8)
            cv2.line(mask_, (int(round(x0)), int(round(y0))),
                     (int(round(x1)), int(round(y1))), (1, 1, 1),
                     self.PART_LINE_WIDTH)
            vec = p1 - p0
            vec = vec / (np.linalg.norm(vec) + 0.001)
            vec_index = np.where(np.squeeze(mask_))
            pafmaps[2 * limb_id][vec_index] += vec[0]
            pafmaps[2 * limb_id + 1][vec_index] += vec[1]
            pafmaps_count[2 * limb_id][vec_index] += 1
            pafmaps_count[2 * limb_id + 1][vec_index] += 1

        pafmaps_count = np.array(pafmaps_count)
        pafmaps = np.array(pafmaps)
        pafmaps[np.where(pafmaps_count != 0)] /= pafmaps_count[np.where(
            pafmaps_count != 0)]
        '''
            Generate heatmaps for stride 1(the minimum stride)
        heatmaps for other strides can be sampled by this heatmaps.
        '''
        # for stride in [64,32,16,8,4]:
        import time
        t0 = time.time()
        heatmaps_strides = []
        for stride in self.STRIDES:
            dest_size = (int(self.INPUT_SIZE // stride),
                         int(self.INPUT_SIZE // stride))
            heatmaps = [
                np.zeros(shape=dest_size, dtype=np.float32)
                for _ in range(self.NUM_PARTS)
            ]
            from cheatmap.heatmaps import genGaussionHeatmap
            for part_id, x, y in keypoints:
                heat_tmp = genGaussionHeatmap(int(self.INPUT_SIZE // stride),
                                              int(self.INPUT_SIZE // stride),
                                              x,
                                              y,
                                              stride=stride)
                heatmaps[part_id] = np.max([heat_tmp, heatmaps[part_id]],
                                           axis=0)
            heatmaps = np.array(heatmaps)
            heatmaps = np.concatenate(
                [heatmaps, np.min(heatmaps, axis=0)[np.newaxis]])
            heatmaps_strides.append(heatmaps.reshape((-1)))
        heatmap_strides = np.concatenate(heatmaps_strides, axis=0)
        # for m in range(int(self.INPUT_SIZE//stride)):
        #     for n in range(int(self.INPUT_SIZE//stride)):
        #         ori_x = n *stride + stride / 2 - 0.5
        #         ori_y = m * stride + stride / 2 - 0.5
        #         for  pard_id,x,y in keypoints:
        #             d2 = (ori_x-x)**2+(ori_y-y)**2
        #             sigma  = 7.0
        #             exponent = d2 / 2.0 / (sigma**2)
        #             heatmaps[pard_id][m, n] = max(np.exp(-exponent), heatmaps[pard_id][m,n])
        # print(heatmaps.shape)
        # print(heatmaps.shape)

        pafmaps = np.array(pafmaps)
        t1 = time.time()
        # print(t1-t0)
        # print(heatmaps.shape,pafmaps.shape,loss_mask.shape)
        return np.transpose(
            img_ori, (2, 0, 1)), [heatmap_strides] + self.make_fpn_label(
                pafmaps,
                loss_mask,
            )
예제 #45
0
 def _read_segmentation(self, ann, H, W):
     s = ann['segmentation']
     s = s if type(s) == list else [s]
     return mask.decode(mask.frPyObjects(s, H, W)).max(axis=2)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    
    return rmin-padSize, rmax+padSize+1, cmin-padSize, cmax+padSize+1

for i in np.arange(_count, len(anns)):
    print 'transforming instance %d' % i
    #transform_and_save_image(i)
    uint_image = io.imread('%s/images/%s/%s' % (dataDir,dataType,imgs[i]['file_name']))
    if len(uint_image.shape) == 2:
        tmp_image = np.zeros(uint_image.shape + (3,), dtype=np.uint8)
        tmp_image[:,:,0] = tmp_image[:,:,1] = tmp_image[:,:,2] = uint_image
        uint_image = tmp_image
    float_image = np.array(uint_image, dtype=np.float32)/255.0
    rle = mask.frPyObjects(anns[i]['segmentation'], imgs[i]['height'], imgs[i]['width'])
    m_uint = mask.decode(rle)
    m = np.array(m_uint[:,:,0], dtype=np.float32)
    base_tran = video_transformer.sample()
    frame1_tran = base_tran # + frame_transformer.sample()
    frame2_tran = base_tran + frame_transformer.sample()
    image1 = frame1_tran.transform_img(float_image.copy(), float_image.shape[:2], m)
    #print 'image1 size: %s' % str(image1.shape)
    image1_padded = np.pad(image1,((padSize,padSize),(padSize,padSize),(0,0)), mode='constant')
    #print 'image1_padded size: %s' % str(image1_padded.shape)
    mask1 = frame1_tran.transform_mask(m.copy(), m.shape)
	
    #fills padded area with -1
    mask1 = mask1[0]
    mask1[mask1 == -1] = 0
    #print 'mask1 size: %s' % str(mask1.shape)
예제 #47
0
파일: test.py 프로젝트: GoYchen/programs
plt.imshow(I)
plt.show()

# load and display instance annotations
plt.imshow(I)
plt.imsave('a.png', I)
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
plt.savefig('b.png')
#img = coco.loadImgs(ids=ids2[2])[0]
#ann = coco.loadAnns(ids = coco.getAnnIds(imgIds=ids2[2]))[0]
ann = anns[0]
seg = ann['segmentation'][0]

rle = mask.frPyObjects([seg], img['height'], img['width'])
m = mask.decode(rle)

image = np.ones( (m.shape[0], m.shape[1], 3))
color_mask = np.random.random((1,3)).tolist()[0]
for i in range(3):
    image[:,:,i] = color_mask[i]
m2=np.dstack((image, m * 0.5))
plt.imsave('m.png', m2)


vocId = [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 64, 67, 72]
#vocId = [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72]
ids=coco.getImgIds()

print 'length of ids %d\n' % len(ids)
예제 #48
0
 def showAnns(self, anns):
     """
     Display the specified annotations.
     :param anns (array of object): annotations to display
     :return: None
     """
     if len(anns) == 0:
         return 0
     if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
         datasetType = 'instances'
     elif 'caption' in anns[0]:
         datasetType = 'captions'
     else:
         raise Exception('datasetType not supported')
     if datasetType == 'instances':
         ax = plt.gca()
         ax.set_autoscale_on(False)
         polygons = []
         color = []
         for ann in anns:
             c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
             if 'segmentation' in ann:
                 if type(ann['segmentation']) == list:
                     # polygon
                     for seg in ann['segmentation']:
                         poly = np.array(seg).reshape((int(len(seg)/2), 2))
                         polygons.append(Polygon(poly))
                         color.append(c)
                 else:
                     # mask
                     t = self.imgs[ann['image_id']]
                     if type(ann['segmentation']['counts']) == list:
                         rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
                     else:
                         rle = [ann['segmentation']]
                     m = maskUtils.decode(rle)
                     img = np.ones( (m.shape[0], m.shape[1], 3) )
                     if ann['iscrowd'] == 1:
                         color_mask = np.array([2.0,166.0,101.0])/255
                     if ann['iscrowd'] == 0:
                         color_mask = np.random.random((1, 3)).tolist()[0]
                     for i in range(3):
                         img[:,:,i] = color_mask[i]
                     ax.imshow(np.dstack( (img, m*0.5) ))
             if 'keypoints' in ann and type(ann['keypoints']) == list:
                 # turn skeleton into zero-based index
                 sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
                 kp = np.array(ann['keypoints'])
                 x = kp[0::3]
                 y = kp[1::3]
                 v = kp[2::3]
                 for sk in sks:
                     if np.all(v[sk]>0):
                         plt.plot(x[sk],y[sk], linewidth=3, color=c)
                 plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
                 plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
         p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
         ax.add_collection(p)
         p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
         ax.add_collection(p)
     elif datasetType == 'captions':
         for ann in anns:
             print(ann['caption'])
def coco2binary_mask(segmentation: list, height: int, width: int) -> np.array:
    rles = mutils.frPyObjects(segmentation, height, width)
    return mutils.decode(rles)[:, :, 0]
예제 #50
0
def crop_covered_segments(segments,
                          width,
                          height,
                          iou_threshold=0.0,
                          ratio_tolerance=0.001,
                          area_threshold=1,
                          return_masks=False):
    """
    Find all segments occluded by others and crop them to the visible part only.
    Input segments are expected to be sorted from background to foreground.

    Args:
        segments: 1d list of segment RLEs (in COCO format)
        width: width of the image
        height: height of the image
        iou_threshold: IoU threshold for objects to be counted as intersected
            By default is set to 0 to process any intersected objects
        ratio_tolerance: an IoU "handicap" value for a situation
            when an object is (almost) fully covered by another one and we
            don't want make a "hole" in the background object
        area_threshold: minimal area of included segments

    Returns:
        A list of input segments' parts (in the same order as input):
            [
                [[x1,y1, x2,y2 ...], ...], # input segment #0 parts
                mask1, # input segment #1 mask (if source segment is mask)
                [], # when source segment is too small
                ...
            ]
    """
    from pycocotools import mask as mask_utils

    segments = [[s] for s in segments]
    input_rles = [mask_utils.frPyObjects(s, height, width) for s in segments]

    for i, rle_bottom in enumerate(input_rles):
        area_bottom = sum(mask_utils.area(rle_bottom))
        if area_bottom < area_threshold:
            segments[i] = [] if not return_masks else None
            continue

        rles_top = []
        for j in range(i + 1, len(input_rles)):
            rle_top = input_rles[j]
            iou = sum(mask_utils.iou(rle_bottom, rle_top, [0, 0]))[0]

            if iou <= iou_threshold:
                continue

            area_top = sum(mask_utils.area(rle_top))
            area_ratio = area_top / area_bottom

            # If a segment is fully inside another one, skip this segment
            if abs(area_ratio - iou) < ratio_tolerance:
                continue

            # Check if the bottom segment is fully covered by the top one.
            # There is a mistake in the annotation, keep the background one
            if abs(1 / area_ratio - iou) < ratio_tolerance:
                rles_top = []
                break

            rles_top += rle_top

        if not rles_top and not isinstance(segments[i][0], dict) \
                and not return_masks:
            continue

        rle_bottom = rle_bottom[0]
        bottom_mask = mask_utils.decode(rle_bottom).astype(np.uint8)

        if rles_top:
            rle_top = mask_utils.merge(rles_top)
            top_mask = mask_utils.decode(rle_top).astype(np.uint8)

            bottom_mask -= top_mask
            bottom_mask[bottom_mask != 1] = 0

        if not return_masks and not isinstance(segments[i][0], dict):
            segments[i] = mask_to_polygons(bottom_mask,
                                           area_threshold=area_threshold)
        else:
            segments[i] = bottom_mask

    return segments
예제 #51
0
def polys_to_mask(polygons, height, width):
    """Convert from the COCO polygon segmentation format to a binary mask
    encoded as a 2D array of data type numpy.float32. The polygon segmentation
    is understood to be enclosed inside a height x width image. The resulting
    mask is therefore of shape (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask_ = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask_ = np.sum(mask_, axis=2)
    mask_ = np.array(mask_ > 0, dtype=np.float32)
    mask_ = np.array(mask_, dtype=np.uint8)
    ret, thr = cv2.threshold(mask_, 0, 1, cv2.THRESH_BINARY)

    _, countors, _ = cv2.findContours(thr, cv2.RETR_TREE,
                                      cv2.CHAIN_APPROX_SIMPLE)

    mask_bshape = np.zeros((height, width), np.float32)
    inner_bshape = np.zeros((height, width), np.float32)
    temp1 = np.zeros((height, width), np.float32)
    temp2 = np.zeros((height, width), np.float32)
    temp3 = np.zeros((height, width), np.float32)
    temp4 = np.zeros((height, width), np.float32)
    temp5 = np.zeros((height, width), np.float32)
    temp6 = np.zeros((height, width), np.float32)
    temp7 = np.zeros((height, width), np.float32)
    temp8 = np.zeros((height, width), np.float32)
    temp9 = np.zeros((height, width), np.float32)
    temp10 = np.zeros((height, width), np.float32)
    # 3px mask

    polygon = countors
    pixels = cfg.BSHAPE.PIXELS
    mask = None
    if pixels == 3:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 3)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3
        mask = np.where(
            mask == 7, 1,
            np.where(mask == 6, 0.95,
                     np.where(mask == 5, 0.85, np.where(mask == 4, 0.70, 0))))
    elif pixels == 5:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 5)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5
        mask = np.where(
            mask == 11, 1,
            np.where(
                mask == 10, 0.95,
                np.where(
                    mask == 9, 0.85,
                    np.where(
                        mask == 8, 0.70,
                        np.where(mask == 7, 0.65, np.where(mask == 6, 0.60,
                                                           0))))))
    elif pixels == 7:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 7)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
        temp6 = cv2.polylines(temp6, polygon, True, 1, 7)
        temp7 = cv2.polylines(temp7, polygon, True, 1, 8)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7
        mask = np.where(
            mask == 15, 1,
            np.where(
                mask == 14, 0.95,
                np.where(
                    mask == 13, 0.90,
                    np.where(
                        mask == 12, 0.85,
                        np.where(
                            mask == 11, 0.80,
                            np.where(
                                mask == 10, 0.75,
                                np.where(mask == 9, 0.70,
                                         np.where(mask == 8, 0.65, 0))))))))
    elif pixels == 11:
        inner_bshape = cv2.fillPoly(inner_bshape, polygon, 10)
        mask_bshape = cv2.polylines(mask_bshape, polygon, True, 1, 1)
        temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
        temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
        temp3 = cv2.polylines(temp3, polygon, True, 1, 4)
        temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
        temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
        temp6 = cv2.polylines(temp6, polygon, True, 1, 7)
        temp7 = cv2.polylines(temp7, polygon, True, 1, 8)
        temp8 = cv2.polylines(temp8, polygon, True, 1, 9)
        temp9 = cv2.polylines(temp9, polygon, True, 1, 10)
        temp10 = cv2.polylines(temp10, polygon, True, 1, 11)

        mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5 + temp6 + temp7 + temp8 + +temp9 + temp10
        mask = np.where(
            mask == 21, 1,
            np.where(
                mask == 20, 0.95,
                np.where(
                    mask == 19, 0.90,
                    np.where(
                        mask == 18, 0.85,
                        np.where(
                            mask == 17, 0.80,
                            np.where(
                                mask == 16, 0.75,
                                np.where(
                                    mask == 15, 0.70,
                                    np.where(
                                        mask == 14, 0.65,
                                        np.where(
                                            mask == 13, 0.60,
                                            np.where(
                                                mask == 12, 0.55,
                                                np.where(mask == 11, 0.50,
                                                         0)))))))))))

    # inner_bshape = cv2.fillPoly(inner_bshape, polygon, 4)
    # mask_bshape = cv2.polylines(mask_bshape,polygon, True, 1, 1)
    # temp1 = cv2.polylines(temp1, polygon, True, 1, 2)
    # temp2 = cv2.polylines(temp2, polygon, True, 1, 3)
    # temp3 = cv2.polylines(temp3,polygon, True, 1, 4)
    # temp4 = cv2.polylines(temp4, polygon, True, 1, 5)
    # temp5 = cv2.polylines(temp5, polygon, True, 1, 6)
    #
    # mask = inner_bshape + mask_bshape + temp1 + temp2 + temp3 + temp4 + temp5
    # mask = np.where(mask == 11, 1, np.where(mask == 10, 0.95, np.where(mask == 9, 0.85, np.where(mask == 8, 0.70,
    #                                                                                              np.where(mask == 7,
    #                                                                                                       0.65,
    #                                                                                                       np.where(
    #                                                                                                           mask == 6,
    #                                                                                                           0.60,
    #                                                                                                           0))))))

    mask = np.array(mask, dtype=np.float32)
    return mask
예제 #52
0
def polys_to_mask(polygons, height, width):
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask
def coco_rle_decode(rle, h, w):
    return mutils.decode(mutils.frPyObjects(rle, h, w))
예제 #54
0
파일: coco.py 프로젝트: liuguoyou/who_where
    def _load_coco_annotation(self, index):
        """
        Loads COCO bounding-box & segmentation instance annotations.
        Crowd instances are removed.
        """
        im_ann = self._COCO.loadImgs(index)[0]
        width  = im_ann['width']; height = im_ann['height']

        #######################################################################
        # get bboxes that are outside crowd regions
        annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=False)
        #######################################################################

        objs = self._COCO.loadAnns(annIds)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_IDs = []
        for i in xrange(len(objs)):
            obj = objs[i]
            x1 = np.max((0, obj['bbox'][0]))
            y1 = np.max((0, obj['bbox'][1]))
            x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
            y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
            if obj['area'] > 0 and x2 >= x1 and y2 >= y1 and (not obj['iscrowd']):
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_IDs.append(annIds[i])

        ########################################################################
        boxes      = []
        gt_classes = []
        seg_areas  = []
        # RLE representation of binary mask
        rles       = []
        #######################################################################

        #######################################################################
        # Lookup table to map from COCO category ids to our internal class
        # indices
        coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls],
                                          self._class_to_ind[cls])
                                          for cls in self._classes[1:]])

        for i in xrange(len(valid_objs)):
            obj = valid_objs[i]
            cls = coco_cat_id_to_class_ind[obj['category_id']]

            #######################################################################
            if type(obj['segmentation']) == list:
                # polygon
                rle = COCOmask.frPyObjects(obj['segmentation'], height, width)
            elif type(obj['segmentation']['counts']) == list:
                rle = COCOmask.frPyObjects([obj['segmentation']], height, width)
            else:
                rle = [obj['segmentation']]
            #######################################################################

            boxes.append(obj['clean_bbox'])
            gt_classes.append(cls)
            seg_areas.append(obj['area'])
            rles.append(rle)

        ###############################################################
        ## calculate the areas of objects
        area = float(width * height)
        mask = np.zeros((height, width), dtype=np.float32)
        for j in xrange(len(rles)):
            rle  = rles[j]
            cur_mask = np.amax(COCOmask.decode(rle), axis=2)
            mask = np.maximum(mask, cur_mask)
        seg_area = np.sum(mask)
        seg_ratio = seg_area/area
        # print seg_ratio
        ###############################################################

        return {'image'  : self.image_path_from_index(index),
                'width'  : width,
                'height' : height,
                'boxes'  : np.array(boxes).reshape((-1,4)),
                'clses'  : np.array(gt_classes),
                'polys'     : rles,
                'ann_ids'   : np.array(valid_IDs),
                'flipped'   : False,
                'seg_areas' : np.array(seg_areas),
                'image_index': index,
                'seg_ratio': seg_ratio}