Python COCO.annToMask Examples

Programming Language: Python

Namespace/Package Name: pycocotools.coco

Class/Type: COCO

Method/Function: annToMask

Examples at hotexamples.com: 32

The python pycocotools.coco.COCO.annToMask is a method that converts annotated object instances in a COCO dataset to binary masks. This method takes as input the annotation information of an object and generates a binary mask representing the shape and location of that object in the image. This mask can be further used for various tasks, such as segmentation and instance detection, in computer vision applications.

Python COCO.annToMask - 32 examples found. These are the top rated real world Python examples of pycocotools.coco.COCO.annToMask extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

COCO(30)

annToMask(30)

createIndex(30)

dataset(30)

annToRLE(18)

__init__(5)

computeArea(3)

anns(2)

cats(2)

all_captions(1)

catToImgs(1)

copy(1)

countCatNums(1)

createAnnNumpy(1)

Example #1

Show file

File: ms_coco_data_worker.py Project: lzb863/mobile_semantic_segmentation

class DataWorker:
    """Using for loading ms coco dataset (only train and val parts),
    filter data for person category (because of solving task),
    create generator for batch learning
    """

    train_folder = 'train2017'
    test_folder = 'val2017'
    annotation_folder = 'annotations'

    # person
    category_id = 1

    def __init__(self, data_path, seed=7):
        """Read images description and annotations about it

        :data_path (str): path to dataset's folder
        :seed (int): random seed
        """
        self.seed = seed
        if os.path.islink(data_path):
            data_path = os.readlink(data_path)
        self.data_path = data_path
        self.train_folder = os.path.join(data_path, self.train_folder)
        self.test_folder = os.path.join(data_path, self.test_folder)
        self.annotation_folder = os.path.join(data_path,
                                              self.annotation_folder)

        self.coco_train = COCO(
            os.path.join(self.annotation_folder, 'instances_train2017.json'))
        self.coco_test = COCO(
            os.path.join(self.annotation_folder, 'instances_val2017.json'))

        # load information about images with class label
        self.train_images = self.coco_train.loadImgs(
            ids=self.coco_train.getImgIds(catIds=self.category_id))
        self.test_images = self.coco_test.loadImgs(
            ids=self.coco_test.getImgIds(catIds=self.category_id))

        # load annotations for loaded images
        self.train_annotations = {
            img_desc['id']: self.coco_train.loadAnns(
                self.coco_train.getAnnIds(imgIds=img_desc['id'],
                                          iscrowd=False,
                                          catIds=self.category_id))
            for img_desc in self.train_images
        }
        self.test_annotations = {
            img_desc['id']: self.coco_test.loadAnns(
                self.coco_test.getAnnIds(imgIds=img_desc['id'],
                                         iscrowd=False,
                                         catIds=self.category_id))
            for img_desc in self.test_images
        }

        self.train_images, self.val_images = train_test_split(
            self.train_images, test_size=0.2)

    @property
    def train_shape(self):
        """Return train shape

        :return (int): return train shape
        """
        return len(self.train_images)

    @property
    def val_shape(self):
        """Return test shape

        :return (int): return test shape
        """
        return len(self.val_images)

    @property
    def test_shape(self):
        """Return test shape

        :return (int): return test shape
        """
        return len(self.test_images)

    def load_image_mask(self, image_desc):
        """Load image and corresponding mask

        :image_desc (dict): description of image in COCO format

        :return (tuple([N, M, 3], [N, M])): tuple of image and mask
        """
        # reorder, because cv2 use BGR format
        if image_desc['id'] in self.train_annotations:
            image = cv2.imread(
                os.path.join(self.train_folder, image_desc['file_name']))
            masks = [
                self.coco_train.annToMask(i_img_ann)
                for i_img_ann in self.train_annotations[image_desc['id']]
            ]
        else:
            image = cv2.imread(
                os.path.join(self.test_folder, image_desc['file_name']))
            masks = [
                self.coco_test.annToMask(i_img_ann)
                for i_img_ann in self.test_annotations[image_desc['id']]
            ]
        image = image[:, :, [2, 1, 0]]
        total_mask = np.bitwise_or.reduce(masks)
        return (image, total_mask)

    def batch_loader(self, images_descriptions, batch_size, height, width):
        """Load batches of images and resize with padding to given shape

        :images_descriptions (dict): descriptions of images in coco format
        :batch_size (int): size of batch
        :height (int): height of proccessed images
        :width (int): width of proccesses images

        :return (generator): generator of batches with images and masks
        tuple of [batch_size, height, width, 3] and [batch_size, height, width]
        """
        for start_ind in range(0, len(images_descriptions), batch_size):
            images = np.empty([0, height, width, 3], dtype=np.uint8)
            masks = np.empty([0, height, width], dtype=np.float32)
            for image_desc in images_descriptions[start_ind:start_ind +
                                                  batch_size]:
                image, mask = self.load_image_mask(image_desc)
                shaped_image = resize_pad(image, height, width)
                shaped_mask = resize_pad(mask, height, width)

                images = np.append(images, [shaped_image], axis=0)
                masks = np.append(masks, [shaped_mask], axis=0)
            yield (images, masks)

    def batch_augmentation(self, image_generator, augment_args):
        """Augmentate batch of images

        :image_generator (generator): generator with batches of images
        tuple of [batch_size, height, width, 3] and [batch_size, height, width]
        :augment_args (dict): params for augmentation

        :return (generator): generator with batches of augmented images
        tuple of [batch_size, height, width, 3] and [batch_size, height, width]
        """
        augment = ImageDataGenerator(**augment_args)
        for images, masks in image_generator:
            stacked = np.concatenate([images, masks[:, :, :, np.newaxis]],
                                     axis=-1)
            aug_batch = augment.flow(stacked,
                                     seed=self.seed,
                                     batch_size=stacked.shape[0],
                                     shuffle=False)
            for aug_stacked in aug_batch:
                aug_images = aug_stacked[:, :, :, :3].astype(np.uint8)
                aug_masks = aug_stacked[:, :, :, 3]  #[:, :, :, np.newaxis]
                yield (aug_images, aug_masks)

    def batch_generator(self,
                        images_descriptions,
                        batch_size=100,
                        height=512,
                        width=512,
                        augment_args=None):
        """Pipeline, which take image description and proccessing information,
        Load it by batches, augmented and normalize

        :images_descriptions (dict): descriptions of images in coco format
        :batch_size (int): size of batch
        :height (int): height of proccessed images
        :width (int): width of proccesses images
        :augment_args (dict): params for augmentation

        :return (generator): generator with batches of images
        tuple of [batch_size, height, width, 3] and [batch_size, height, width]
        """
        if augment_args is None:
            augment_args = {
                'rotation_range': 15,
                'width_shift_range': 0.1,
                'height_shift_range': 0.1,
                'zoom_range': 0.25,
                'horizontal_flip': True,
                'brightness_range': [0.75, 1.25],
                'fill_mode': 'constant'
            }
        batch_gen = self.batch_augmentation(
            self.batch_loader(images_descriptions, batch_size, height, width),
            augment_args)
        for images, masks in batch_gen:
            yield (images, masks)

Example #2

Show file

File: inpainting_dataset_test.py Project: leehomyc/guided_inpainting

class InpaintingDatasetTest:
    def initialize(self, opt, model):
        self.opt = opt
        self.root = opt.dataroot
        self.annFile = opt.ann_path
        self.coco = COCO(self.annFile)
        self.dataset_size = len(IMG_PAIRS)
        self.model = model

    def load_coco_image(self, image_id, object_id=None, use_seg=False):
        image_info = self.coco.loadImgs(ids=image_id)[0]
        image_url = image_info['coco_url']
        image_url_split = image_url.split('/')
        image_path = '{}/{}'.format(self.root, image_url_split[-1])
        image = scipy.misc.imread(image_path, mode='RGB')

        mask = None
        if object_id is not None:
            annIds = self.coco.getAnnIds(imgIds=image_info['id'], iscrowd=None)
            anns = self.coco.loadAnns(annIds)
            mask = self.coco.annToMask(anns[object_id])

        if use_seg is True:
            mask = get_segmentation(image, mask, self.model)
        return image, mask, image_path

    def compute_bounding_box(self, object_mask):
        object_mask[object_mask > 0] = 1  # fineSize x fineSize
        object_pixels = np.where(object_mask != 0)
        bbox = np.min(object_pixels[0]), np.max(object_pixels[0]), \
               np.min(object_pixels[1]), np.max(object_pixels[1])

        object_x = bbox[2]
        object_y = bbox[0]
        object_height = bbox[1] - object_y + 1
        object_width = bbox[3] - object_x + 1

        return object_x, object_y, object_height, object_width

    def get_item(self, index):
        # load object image and mask

        object_image, object_mask, image_path = \
            self.load_coco_image(image_id=IMG_PAIRS[index]['object_img_id'],
                                 object_id=IMG_PAIRS[index]['object_id'],
                                 use_seg=self.opt.use_segmentation)

        background_image, _, _ = \
            self.load_coco_image(image_id=IMG_PAIRS[index]['background_img_id'])

        object_image_height, object_image_width, _ = \
            object_image.shape

        # find bounding box
        _, _, object_ori_height, object_ori_width = \
            self.compute_bounding_box(object_mask)

        # Compute the new size of the image based on the size of inpainted
        # object.
        object_image_resize_height = int(
            object_image_height * IMG_PAIRS[index]['object_composite_height'] /
            object_ori_height)
        object_image_resize_width = int(
            object_image_width * IMG_PAIRS[index]['object_composite_width'] /
            object_ori_width)

        # Inpainting
        object_image_resized = scipy.misc.imresize(
            object_image,
            [object_image_resize_height, object_image_resize_width])
        object_image_resized_chw = np.rollaxis(object_image_resized, 2,
                                               0)  # 3 x fineSize x fineSize

        # resize object mask
        mask_resized = scipy.misc.imresize(
            object_mask,
            [object_image_resize_height, object_image_resize_width])
        mask_resized[mask_resized > 0] = 1  # fineSize x fineSize
        # find bounding box
        mask_resized_hw = mask_resized.copy()
        object_x, object_y, object_height, object_width = \
            self.compute_bounding_box(mask_resized)
        mask_resized = np.tile(mask_resized, (3, 1, 1))

        # normalize object image
        object_image_resized_chw = object_image_resized_chw / 122.5 - 1

        # get the image patch that contains the object.
        object_image_patch_with_bg = object_image_resized_chw[:, object_y:
                                                              object_y +
                                                              object_height,
                                                              object_x:
                                                              object_x +
                                                              object_width]  # noqa 501
        object_mask_patch = mask_resized[:, object_y:object_y + object_height,
                                         object_x:object_x + object_width]
        object_image_patch_no_bg = np.copy(object_image_patch_with_bg)
        object_image_patch_no_bg[object_mask_patch == 0] = 0

        # resize and normalize the background image.
        background_image_resized = scipy.misc.imresize(
            background_image, [self.opt.fineSize, self.opt.fineSize
                               ])  # fineSize x fineSize x 3  # noqa 501
        background_image_resized_chw = np.rollaxis(background_image_resized, 2,
                                                   0)  # noqa 501
        background_image_resized_chw = background_image_resized_chw / 122.5 - 1

        # image composition. We remove the background of the image patch.
        new_object_x = IMG_PAIRS[index]['object_composite_x']
        new_object_y = IMG_PAIRS[index]['object_composite_y']
        image_composite_no_bg = np.copy(background_image_resized_chw)
        image_composite_no_bg[:, new_object_y:new_object_y + object_height,
                              new_object_x:new_object_x +
                              object_width] = object_image_patch_no_bg  # noqa 501

        # Image composition. We keep the background of the image patch.
        image_composite_with_bg = np.copy(background_image_resized_chw)
        image_composite_with_bg[:, new_object_y:new_object_y + object_height,
                                new_object_x:new_object_x +
                                object_width] = object_image_patch_with_bg  # noqa 501

        mask_composite = np.zeros(image_composite_no_bg.shape)
        mask_composite[:, new_object_y:new_object_y + object_height,
                       new_object_x:new_object_x +
                       object_width] = 1 - object_mask_patch  # noqa 501

        mask_composite_object = np.zeros(image_composite_no_bg.shape)
        mask_composite_object[:, new_object_y:new_object_y + object_height,
        new_object_x:new_object_x + object_width] = \
            object_mask_patch

        mask_composite = torch.from_numpy(mask_composite).float()
        image_composite_no_bg = torch.from_numpy(image_composite_no_bg).float()
        background_image_resized_chw = torch.from_numpy(
            background_image_resized_chw).float()
        mask_composite_object = torch.from_numpy(mask_composite_object).float()
        image_composite_with_bg = torch.from_numpy(
            image_composite_with_bg).float()

        feat_tensor = 0

        image_composite_no_bg = image_composite_no_bg.unsqueeze(0)
        mask_composite = mask_composite.unsqueeze(0)
        background_image_resized_chw = background_image_resized_chw.unsqueeze(
            0)
        image_composite_with_bg = image_composite_with_bg.unsqueeze(0)
        mask_composite_object = mask_composite_object.unsqueeze(0)

        input_dict = {
            'input': image_composite_no_bg,
            'mask': mask_composite,
            'image': background_image_resized_chw,
            'feat': feat_tensor,
            'path': [image_path],
            'image_composite_with_bg': image_composite_with_bg,
            'mask_composite_object': mask_composite_object
        }

        return input_dict

    def __len__(self):
        return len(IMG_PAIRS)

    def name(self):
        return 'InpaintingDatasetGuided'

Example #3

Show file

def generate_json_mask(ann_path, json_path, mask_dir, filelist_path,
                       masklist_path):
    COCO_Order = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
    COCO_TO_OURS = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10]

    coco = COCO(ann_path)
    ids = list(coco.imgs.keys())
    lists = []

    filelist_fp = open(filelist_path, 'w')
    masklist_fp = open(masklist_path, 'w')
    for i, img_id in enumerate(ids):
        ann_ids = coco.getAnnIds(imgIds=img_id)
        img_anns = coco.loadAnns(ann_ids)

        numPeople = len(img_anns)
        name = coco.imgs[img_id]['file_name']
        height = coco.imgs[img_id]['height']
        width = coco.imgs[img_id]['width']

        persons = []
        person_centers = []

        for p in range(numPeople):

            if img_anns[p]['num_keypoints'] < 5 or img_anns[p][
                    'area'] < 32 * 32:
                continue
            kpt = img_anns[p]['keypoints']
            dic = dict()

            # person center
            person_center = [
                img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0,
                img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0
            ]
            scale = img_anns[p]['bbox'][3] / 368.0

            # skip this person if the distance to exiting person is too small
            flag = 0
            for pc in person_centers:
                dis = math.sqrt((person_center[0] - pc[0]) *
                                (person_center[0] - pc[0]) +
                                (person_center[1] - pc[1]) *
                                (person_center[1] - pc[1]))
                if dis < pc[2] * 0.3:
                    flag = 1
                    break
            if flag == 1:
                continue
            dic['objpos'] = person_center
            dic['keypoints'] = np.zeros((17, 3)).tolist()
            dic['scale'] = scale
            for part in range(17):
                dic['keypoints'][part][0] = kpt[part * 3]
                dic['keypoints'][part][1] = kpt[part * 3 + 1]
                # visiable is 1, unvisiable is 0 and not labeled is 2
                if kpt[part * 3 + 2] == 2:
                    dic['keypoints'][part][2] = 1
                elif kpt[part * 3 + 2] == 1:
                    dic['keypoints'][part][2] = 0
                else:
                    dic['keypoints'][part][2] = 2

            persons.append(dic)
            person_centers.append(
                np.append(person_center,
                          max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3])))

        if len(persons) > 0:
            filelist_fp.write(name + '\n')
            info = dict()
            info['filename'] = name
            info['info'] = []
            cnt = 1
            for person in persons:
                dic = dict()
                dic['pos'] = person['objpos']
                dic['keypoints'] = np.zeros((18, 3)).tolist()
                dic['scale'] = person['scale']
                for i in range(17):
                    dic['keypoints'][
                        COCO_TO_OURS[i]][0] = person['keypoints'][i][0]
                    dic['keypoints'][
                        COCO_TO_OURS[i]][1] = person['keypoints'][i][1]
                    dic['keypoints'][
                        COCO_TO_OURS[i]][2] = person['keypoints'][i][2]
                dic['keypoints'][1][0] = (person['keypoints'][5][0] +
                                          person['keypoints'][6][0]) * 0.5
                dic['keypoints'][1][1] = (person['keypoints'][5][1] +
                                          person['keypoints'][6][1]) * 0.5
                if person['keypoints'][5][2] == person['keypoints'][6][2]:
                    dic['keypoints'][1][2] = person['keypoints'][5][2]
                elif person['keypoints'][5][2] == 2 or person['keypoints'][6][
                        2] == 2:
                    dic['keypoints'][1][2] = 2
                else:
                    dic['keypoints'][1][2] = 0
                info['info'].append(dic)
            lists.append(info)

            mask_all = np.zeros((height, width), dtype=np.uint8)
            mask_miss = np.zeros((height, width), dtype=np.uint8)
            flag = 0
            for p in img_anns:
                if p['iscrowd'] == 1:
                    mask_crowd = coco.annToMask(p)
                    temp = np.bitwise_and(mask_all, mask_crowd)
                    mask_crowd = mask_crowd - temp
                    flag += 1
                    continue
                else:
                    mask = coco.annToMask(p)

                mask_all = np.bitwise_or(mask, mask_all)

                if p['num_keypoints'] <= 0:
                    mask_miss = np.bitwise_or(mask, mask_miss)

            if flag < 1:
                mask_miss = np.logical_not(mask_miss)
            elif flag == 1:
                mask_miss = np.logical_not(np.bitwise_or(
                    mask_miss, mask_crowd))
                mask_all = np.bitwise_or(mask_all, mask_crowd)
            else:
                raise Exception('crowd segments > 1')
            np.save(os.path.join(mask_dir,
                                 name.split('.')[0] + '.npy'), mask_miss)
            masklist_fp.write(
                os.path.join(mask_dir,
                             name.split('.')[0] + '.npy') + '\n')
        if i % 1000 == 0:
            print "Processed {} of {}".format(i, len(ids))

    masklist_fp.close()
    filelist_fp.close()
    print 'write json file'

    fp = open(json_path, 'w')
    fp.write(json.dumps(lists))
    fp.close()

    print 'done!'

Example #4

Show file

File: Data_handler.py Project: nomanshafqat/Segnet-BinaryClassification

class Datahandler_COCO():
    def __init__(self, image_dir, annotation_file):
        self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict(
        ), dict()

        self.annotation_file = annotation_file
        self.image_dir = image_dir
        print("loading dataset")

        dataset = json.load(open(self.annotation_file, 'r'))

        self.coco = COCO(self.annotation_file)
        # Load all classes (Only Building in this version)
        self.classIds = self.coco.getCatIds()
        #print(self.classIds)
        # Load all images
        self.image_ids = list(self.coco.imgs.keys())
        #print(self.image_ids)
        for image_id in self.image_ids:
            self.anns[image_id] = []

        self.categories = self.coco.loadCats([100])
        #print(self.categories)
        for object in dataset["annotations"]:
            self.anns[object["image_id"]].append(object)
            #print (object)
        #print(self.anns)
    def get_mask(self, id):

        temp = self.anns[id]
        m = self.coco.annToMask(temp[0])
        for ob in temp[1:]:
            m1 = self.coco.annToMask(ob)
            m = m | m1
        #print(temp)
        return m

    def make_batches(self, batchsize=4, Train=True):
        batch_images = []
        batch_masks = []
        list = self.image_ids
        #print(self.image_ids)

        while True:
            for id in list:
                #print(id)
                filename = self.coco.imgs[id]["file_name"]
                path = os.path.join(self.image_dir, filename)
                print(path)
                img = cv2.imread(path)
                mask = self.get_mask(id)
                img = cv2.resize(img, (320, 320))
                mask = cv2.resize(mask, (320, 320))

                batch_images.append(img)
                batch_masks.append(mask)

                #cv2.imwrite(filename+"gt.jpg",img)
                #cv2.imwrite(filename+"b.jpg",mask*255)
                if len(batch_images) == batchsize:

                    yield (np.array(batch_images),
                           np.expand_dims(np.array(batch_masks), axis=-1))
                    batch_images = []
                    batch_masks = []

    def get_batch(self, batch_size=1, train=True):
        a = next(self.make_batches(batch_size, train))
        for b in a:
            print(b)
            return np.array(b), np.expand_dims(np.array(b), axis=-1)

Example #5

Show file

File: Preprocesser_mask_img_ver.py Project: yangdb/image_processing_python_tools

     'min_crop_size': 0.3
 }
 auth = ExtraAugmentation_new(distort, randrot, expand, randcrop)  #
 for i in range(10):
     imn = imgp + res.loadImgs(imgids[i])[0]['file_name']
     im = np.array(Image.open(imn).convert('RGB'))
     im = im[..., ::-1]  # convert to GBR
     annids = res.getAnnIds(imgids[i])
     anns = res.loadAnns(annids)
     boxes = []
     masks = []
     labels = []
     for ann in anns:
         labels.append(ann['category_id'])
         boxes.append(ann['bbox'])
         masks.append(res.annToMask(ann))
     boxes = np.array(boxes)
     boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
     boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
     # segs = np.array(segs)  # segs is still a list for they may have different lenth
     labels = np.array(labels)
     imout = im.copy()
     for box, ma in zip(boxes, masks):
         cv2.rectangle(imout, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
         imout[ma > 0] = imout[ma > 0] * 0.5 + ma[ma > 0, None] * np.array([255, 0, 0]) * 0.5
     cv2.imwrite('a{}_0.jpg'.format(i), imout.astype(np.uint8))
     segs = [sum(masks)]
     cv2.imwrite('a{}_0_s.jpg'.format(i), segs[0] * 255)
     im, boxes, masks, segs, labels = auth(im, boxes, masks, segs, labels)
     im_o = im.copy()
     print(im_o.shape)

Example #6

Show file

File: augmentation_dataset.py Project: yancie-yjr/IoU-aware-single-stage-object-detector

class AugmentationDataset(CustomDataset):

    CLASSES = ('person')

    def load_annotations(self, ann_file):
        self.coco = COCO(ann_file)
        self.cat_ids = self.coco.getCatIds()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        self.img_ids = self.coco.getImgIds()
        img_infos = []
        for i in self.img_ids:
            info = self.coco.loadImgs([i])[0]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.img_infos[idx]['id']
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        ann_info = self.coco.loadAnns(ann_ids)
        return self._parse_ann_info(ann_info, self.with_mask)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, ann_info, with_mask=True):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if ann['iscrowd']:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])
            if with_mask:
                gt_masks.append(self.coco.annToMask(ann))
                mask_polys = [
                    p for p in ann['segmentation'] if len(p) >= 6
                ]  # valid polygons have >= 3 points (6 coordinates)
                poly_lens = [len(p) for p in mask_polys]
                gt_mask_polys.append(mask_polys)
                gt_poly_lens.extend(poly_lens)
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        ann = dict(
            bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)

        if with_mask:
            ann['masks'] = gt_masks
            # poly format is not used in the current implementation
            ann['mask_polys'] = gt_mask_polys
            ann['poly_lens'] = gt_poly_lens
        return ann

Example #7

Show file

File: dataset.py Project: Zhaoguanhua/RS_Detection-pytorch

class CrowAiBuildingDataset(torch.utils.data.Dataset):
    def __init__(self,
                 images_dir,
                 annotation_file,
                 use_mask=False,
                 transforms=None):
        self.images_dir = images_dir
        self.annotation_file = annotation_file
        self.transform = transforms
        self.use_mask = use_mask
        self.coco = COCO(self.annotation_file)
        self.class_Ids = self.coco.getCatIds()
        self.image_ids = self.coco.getImgIds()

    def __getitem__(self, i):

        annos = self.coco.getAnnIds(imgIds=[self.image_ids[i]],
                                    catIds=self.class_Ids,
                                    iscrowd=None)
        anns = self.coco.loadAnns(annos)

        img_name = self.coco.imgs[self.image_ids[i]]['file_name']
        image = cv2.imread(os.path.join(self.images_dir, img_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        boxes = []
        labels = []
        masks = []

        for ann in anns:

            seg = ann["segmentation"]

            x_points = seg[0][::2]
            y_points = seg[0][1::2]
            x11 = min(x_points)
            x22 = max(x_points)
            y11 = min(y_points)
            y22 = max(y_points)

            category_id = 1

            if x11 != x22 and y11 != y22:
                bbox = [x11, y11, x22, y22]
                boxes.append(bbox)
                labels.append(category_id)
                if self.use_mask:
                    mask = self.coco.annToMask(ann)
                    masks.append(mask)

        image_id = torch.tensor([i])
        iscrowd = torch.zeros((len(annos), ), dtype=torch.int64)
        #
        # print(boxes)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = torch.as_tensor(labels, dtype=torch.int64)

        if self.use_mask:
            masks = torch.as_tensor(masks, dtype=torch.uint8)

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        if self.use_mask:
            target["masks"] = masks
        target["image_id"] = image_id
        target["iscrowd"] = iscrowd
        target["area"] = area

        if self.transform is not None:
            image, target = self.transform(image, target)

        return image, target

    def __len__(self):
        return len(self.coco.getImgIds())

Example #8

Show file

File: coco.py Project: xyp8023/keras-contrib

def coco_image_segmentation_stats(seg_mask_output_paths, annotation_paths, seg_mask_image_paths, verbose):
    for (seg_mask_path, annFile, image_path) in zip(seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
        print('Loading COCO Annotations File: ', annFile)
        print('Segmentation Mask Output Folder: ', seg_mask_path)
        print('Source Image Folder: ', image_path)
        stats_json = os.path.join(seg_mask_path,
                                  'image_segmentation_class_stats.json')
        print('Image stats will be saved to:', stats_json)
        cat_csv = os.path.join(seg_mask_path,
                               'class_counts_over_sum_category_counts.csv')
        print('Category weights will be saved to:', cat_csv)
        coco = COCO(annFile)
        print('Annotation file info:')
        coco.info()
        print('category ids, not including 0 for background:')
        print(coco.getCatIds())
        # display COCO categories and supercategories
        cats = coco.loadCats(coco.getCatIds())
        nms = [cat['name'] for cat in cats]
        print('categories: \n\n', ' '.join(nms))

        nms = set([cat['supercategory'] for cat in cats])
        print('supercategories: \n', ' '.join(nms))
        img_ids = coco.getImgIds()
        use_original_dims = True  # not target_shape
        max_ids = max(ids()) + 1  # add background category
        # 0 indicates no category (not even background) for counting bins
        max_bin_count = max_ids + 1
        bin_count = np.zeros(max_bin_count)
        total_pixels = 0

        print('Calculating image segmentation stats...')
        progbar = Progbar(len(img_ids), verbose=verbose)
        i = 0
        for idx, img_id in enumerate(img_ids):
            img = coco.loadImgs(img_id)[0]
            i += 1
            progbar.update(i)
            ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
            anns = coco.loadAnns(ann_ids)
            target_shape = (img['height'], img['width'], max_ids)
            # print('\ntarget_shape:', target_shape)
            mask_one_hot = np.zeros(target_shape, dtype=np.uint8)

            # Note to only count background pixels once, we define a temporary
            # null class of 0, and shift all class category ids up by 1
            mask_one_hot[:, :, 0] = 1  # every pixel begins as background

            for ann in anns:
                mask_partial = coco.annToMask(ann)
                mask_one_hot[mask_partial > 0, ann['category_id']] = ann['category_id'] + 1
                mask_one_hot[mask_partial > 0, 0] = 0

            # print( mask_one_hot)
            # print('initial bin_count shape:', np.shape(bin_count))
            # flat_mask_one_hot = mask_one_hot.flatten()
            bincount_result = np.bincount(mask_one_hot.flatten())
            # print('bincount_result TYPE:', type(bincount_result))
            # np.array(np.ndarray.flatten(np.bincount(np.ndarray.flatten(np.array(mask_one_hot)).astype(int))).resize(max_bin_count))
            # print('bincount_result:', bincount_result)
            # print('bincount_result_shape', np.shape(bincount_result))
            length = int(np.shape(bincount_result)[0])
            zeros_to_add = max_bin_count - length
            z = np.zeros(zeros_to_add)
            # print('zeros_to_add TYPE:', type(zeros_to_add))
            # this is a workaround because for some strange reason the
            # output type of bincount couldn't interact with other numpy arrays
            bincount_result_long = bincount_result.tolist() + z.tolist()
            # bincount_result = bincount_result.resize(max_bin_count)
            # print('bincount_result2:', bincount_result_long)
            # print('bincount_result2_shape',bincount_result_long)
            bin_count = bin_count + np.array(bincount_result_long)
            total_pixels += (img['height'] * img['width'])

        print('Final Tally:')
        # shift categories back down by 1
        bin_count = bin_count[1:]
        category_ids = range(bin_count.size)
        sum_category_counts = np.sum(bin_count)

        # sum will be =1 as a pixel can be in multiple categories
        category_counts_over_sum_category_counts = \
            np.true_divide(bin_count.astype(np.float64), sum_category_counts)
        np.savetxt(cat_csv, category_counts_over_sum_category_counts)

        # sum will be >1 as a pixel can be in multiple categories
        category_counts_over_total_pixels = \
            np.true_divide(bin_count.astype(np.float64), total_pixels)

        # less common categories have more weight, sum = 1
        category_counts_p_complement = \
            [1 - x if x > 0.0 else 0.0
             for x in category_counts_over_sum_category_counts]

        # less common categories have more weight, sum > 1
        total_pixels_p_complement = \
            [1 - x if x > 0.0 else 0.0
             for x in category_counts_over_total_pixels]

        print(bin_count)
        stat_dict = {
            'total_pixels': total_pixels,
            'category_counts': dict(zip(category_ids, bin_count)),
            'sum_category_counts': sum_category_counts,
            'category_counts_over_sum_category_counts':
                dict(zip(category_ids,
                         category_counts_over_sum_category_counts)),
            'category_counts_over_total_pixels':
                dict(zip(category_ids, category_counts_over_total_pixels)),
            'category_counts_p_complement':
                dict(zip(category_ids, category_counts_p_complement)),
            'total_pixels_p_complement':
                dict(zip(category_ids, total_pixels_p_complement)),
            'ids': ids(),
            'categories': categories()
        }
        print(stat_dict)
        with open(stats_json, 'w') as fjson:
            json.dump(stat_dict, fjson, ensure_ascii=False)

Example #9

Show file

def preproc(mode):

    dataset_dir = 'dataset'  #os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset'))

    val_anno_path = os.path.join(
        dataset_dir, "annotations/person_keypoints_%s2017.json" % mode)
    val_images_dir = os.path.join(dataset_dir, "%s2017" % mode)
    val_masks_dir = os.path.join(dataset_dir, "%smask2017" % mode)

    if not os.path.exists(val_masks_dir):
        os.makedirs(val_masks_dir)

    coco = COCO(val_anno_path)
    ids = list(coco.imgs.keys())
    for i, img_id in enumerate(ids):
        ann_ids = coco.getAnnIds(imgIds=img_id)
        img_anns = coco.loadAnns(ann_ids)

        img_path = os.path.join(val_images_dir, "%012d.jpg" % (img_id))
        mask_miss_path = os.path.join(val_masks_dir,
                                      "mask_miss_%012d.png" % img_id)
        mask_all_path = os.path.join(val_masks_dir,
                                     "mask_all_%012d.png" % img_id)

        img = cv2.imread(img_path)
        h, w, c = img.shape

        mask_all = np.zeros((h, w), dtype=np.uint8)
        mask_miss = np.zeros((h, w), dtype=np.uint8)
        flag = 0
        for p in img_anns:
            seg = p["segmentation"]

            if p["iscrowd"] == 1:
                mask_crowd = coco.annToMask(p)
                temp = np.bitwise_and(mask_all, mask_crowd)
                mask_crowd = mask_crowd - temp
                flag += 1
                continue
            else:
                mask = coco.annToMask(p)

            mask_all = np.bitwise_or(mask, mask_all)

            if p["num_keypoints"] <= 0:
                mask_miss = np.bitwise_or(mask, mask_miss)

        if flag < 1:
            mask_miss = np.logical_not(mask_miss)
        elif flag == 1:
            mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd))
            mask_all = np.bitwise_or(mask_all, mask_crowd)
        else:
            raise Exception("crowd segments > 1")

        cv2.imwrite(mask_miss_path, mask_miss * 255)
        cv2.imwrite(mask_all_path, mask_all * 255)

        if (i % 1000 == 0):
            print("Processed %d of %d" % (i, len(ids)))

    print("Done !!!")

Example #10

Show file

File: Resnet_cam_detection.py Project: sagizty/Insight

class coco_background_Dataset(object):
    def __init__(self, coco_root, datasettype, transforms=None, num_classes=2):
        """

        :param coco_root:
        :param model: train or val
        :param transforms:
        """
        # 这里传个来自pytorch的transform函数实现数据变换
        self.transforms = transforms

        self.annpath = os.path.join(coco_root, "annotations",
                                    'instances_' + datasettype + '2017.json')
        self.image_path = os.path.join(coco_root, datasettype + "2017")

        self.coco = COCO(self.annpath)
        self.num_classes = num_classes

        self.image_ids = self.coco.getImgIds()

    def __getitem__(self, idx):  # 按tumor_slices_id取一个数据

        tumor_slices_id = self.image_ids[idx]

        imgInfo = self.coco.loadImgs(tumor_slices_id)[0]  # 【0】用于取出元素
        # print(f'图像{imgId}的信息如下：\n{imgInfo}')

        imPath = os.path.join(self.image_path, imgInfo['file_name'])

        # load image
        img = Image.open(imPath).convert("RGB")

        # 获取该图像对应的一系列anns的Id
        annIds = self.coco.getAnnIds(imgIds=imgInfo['id'])
        # print(f'图像{imgInfo["id"]}包含{len(annIds)}个ann对象，分别是:\n{annIds}')
        anns = self.coco.loadAnns(annIds)

        num_objs = len(anns)
        masks = []
        boxes = []
        labels = []

        for ann in anns:
            mask = self.coco.annToMask(ann)  # 01mask
            mask = np.asarray(mask)  # 每个ann id对应一个目标

            # coco mask是polygon格式编码的，不是01mask
            pos = np.where(mask)
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])  # 与coco格式不同！！！！！！！！！！！！
            # COCO_bbox = [xmin, ymin, width, height]   左上角横坐标、左上角纵坐标、宽度、高度
            masks.append(mask)
            label = int(self.coco.loadCats(ann['category_id'])[0]['id'])
            labels.append(label)

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # there is only one class：Tumor， so set the labels to 1
        if self.num_classes == 2:
            labels = torch.ones((num_objs, ), dtype=torch.int64)
        else:
            labels = torch.as_tensor(labels, dtype=torch.int64)

        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([tumor_slices_id])

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]
                                              )  # 格式和coco不同！！

        # suppose all instances are not crowd, instances with iscrowd=True will be ignored during evaluation.
        iscrowd = torch.zeros((num_objs, ), dtype=torch.int64)

        # create return anno
        target = {}

        target["boxes"] = boxes

        target["labels"] = labels
        target["masks"] = masks

        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        # 这个返回意味着，一个image 对应1个target，但是每个target内部的长度不确定（但是一致）

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):  # 总长度
        return len(self.image_ids)

Example #11

Show file

File: coco.py Project: xyp8023/keras-contrib

def coco_json_to_segmentation(seg_mask_output_paths, annotation_paths, seg_mask_image_paths, verbose):
    for (seg_mask_path, annFile, image_path) in zip(seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
        print('Loading COCO Annotations File: ', annFile)
        print('Segmentation Mask Output Folder: ', seg_mask_path)
        print('Source Image Folder: ', image_path)
        print('\n'
              'WARNING: Each pixel can have multiple classes! That means'
              'class data overlaps. Also, single objects can be outlined'
              'multiple times because they were labeled by different people!'
              'In other words, even a single object may be segmented twice.'
              'This means the .png files are missing entire objects.\n\n'
              'Use of categorical one-hot encoded .npy files is recommended,'
              'but .npy files also have limitations, because the .npy files'
              'only have one label per pixel for each class,'
              'and currently take the union of multiple human class labels.'
              'Improving how your data is handled will improve your results'
              'so remember to consider that limitation. There is still'
              'an opportunity to improve how this training data is handled &'
              'integrated with your training scripts and utilities...')
        coco = COCO(annFile)

        print('Converting Annotations to Segmentation Masks...')
        mkdir_p(seg_mask_path)
        total_imgs = len(coco.imgToAnns.keys())
        progbar = Progbar(total_imgs + len(coco.getImgIds()), verbose=verbose)
        # 'annotations' was previously 'instances' in an old version
        for img_num in range(total_imgs):
            # Both [0]'s are used to extract the element from a list
            img = coco.loadImgs(coco.imgToAnns[coco.imgToAnns.keys()[img_num]][0]['image_id'])[0]
            h = img['height']
            w = img['width']
            name = img['file_name']
            root_name = name[:-4]
            filename = os.path.join(seg_mask_path, root_name + ".png")
            file_exists = os.path.exists(filename)
            if file_exists:
                progbar.update(img_num, [('file_fraction_already_exists', 1)])
                continue
            else:
                progbar.update(img_num, [('file_fraction_already_exists', 0)])
                print(filename)

            MASK = np.zeros((h, w), dtype=np.uint8)
            np.where(MASK > 0)
            for ann in coco.imgToAnns[coco.imgToAnns.keys()[img_num]]:
                mask = coco.annToMask(ann)
                idxs = np.where(mask > 0)
                MASK[idxs] = ann['category_id']

            im = Image.fromarray(MASK)
            im.save(filename)

        print('\nConverting Annotations to one hot encoded'
              'categorical .npy Segmentation Masks...')
        img_ids = coco.getImgIds()
        use_original_dims = True  # not target_shape
        for idx, img_id in enumerate(img_ids):
            img = coco.loadImgs(img_id)[0]
            name = img['file_name']
            root_name = name[:-4]
            filename = os.path.join(seg_mask_path, root_name + ".npy")
            file_exists = os.path.exists(filename)
            if file_exists:
                progbar.add(1, [('file_fraction_already_exists', 1)])
                continue
            else:
                progbar.add(1, [('file_fraction_already_exists', 0)])

            if use_original_dims:
                target_shape = (img['height'], img['width'], max(ids()) + 1)
            ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
            anns = coco.loadAnns(ann_ids)
            mask_one_hot = np.zeros(target_shape, dtype=np.uint8)
            mask_one_hot[:, :, 0] = 1  # every pixel begins as background
            # mask_one_hot = cv2.resize(mask_one_hot,
            #                           target_shape[:2],
            #                           interpolation=cv2.INTER_NEAREST)

            for ann in anns:
                mask_partial = coco.annToMask(ann)
                # mask_partial = cv2.resize(mask_partial,
                #                           (target_shape[1], target_shape[0]),
                #                           interpolation=cv2.INTER_NEAREST)
                # # width and height match
                # assert mask_one_hot.shape[:2] == mask_partial.shape[:2]
                #    print('another shape:',
                #          mask_one_hot[mask_partial > 0].shape)
                mask_one_hot[mask_partial > 0, ann['category_id']] = 1
                mask_one_hot[mask_partial > 0, 0] = 0

            np.save(filename, mask_one_hot)

Example #12

Show file

File: coco.py Project: dandelin/mmdetection

class CocoDataset(CustomDataset):

    CLASSES = (
        "person",
        "bicycle",
        "car",
        "motorcycle",
        "airplane",
        "bus",
        "train",
        "truck",
        "boat",
        "traffic_light",
        "fire_hydrant",
        "stop_sign",
        "parking_meter",
        "bench",
        "bird",
        "cat",
        "dog",
        "horse",
        "sheep",
        "cow",
        "elephant",
        "bear",
        "zebra",
        "giraffe",
        "backpack",
        "umbrella",
        "handbag",
        "tie",
        "suitcase",
        "frisbee",
        "skis",
        "snowboard",
        "sports_ball",
        "kite",
        "baseball_bat",
        "baseball_glove",
        "skateboard",
        "surfboard",
        "tennis_racket",
        "bottle",
        "wine_glass",
        "cup",
        "fork",
        "knife",
        "spoon",
        "bowl",
        "banana",
        "apple",
        "sandwich",
        "orange",
        "broccoli",
        "carrot",
        "hot_dog",
        "pizza",
        "donut",
        "cake",
        "chair",
        "couch",
        "potted_plant",
        "bed",
        "dining_table",
        "toilet",
        "tv",
        "laptop",
        "mouse",
        "remote",
        "keyboard",
        "cell_phone",
        "microwave",
        "oven",
        "toaster",
        "sink",
        "refrigerator",
        "book",
        "clock",
        "vase",
        "scissors",
        "teddy_bear",
        "hair_drier",
        "toothbrush",
    )

    def load_annotations(self, ann_file):
        self.coco = COCO(ann_file)
        self.cat_ids = self.coco.getCatIds()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        self.img_ids = self.coco.getImgIds()
        img_infos = []
        for i in self.img_ids:
            info = self.coco.loadImgs([i])[0]
            info["filename"] = info["file_name"]
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.img_infos[idx]["id"]
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        ann_info = self.coco.loadAnns(ann_ids)
        return self._parse_ann_info(ann_info, self.with_mask)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_["image_id"] for _ in self.coco.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info["width"], img_info["height"]) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, ann_info, with_mask=True):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
        for i, ann in enumerate(ann_info):
            if ann.get("ignore", False):
                continue
            x1, y1, w, h = ann["bbox"]
            if ann["area"] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if ann["iscrowd"]:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann["category_id"]])
            if with_mask:
                gt_masks.append(self.coco.annToMask(ann))
                mask_polys = [
                    p for p in ann["segmentation"] if len(p) >= 6
                ]  # valid polygons have >= 3 points (6 coordinates)
                poly_lens = [len(p) for p in mask_polys]
                gt_mask_polys.append(mask_polys)
                gt_poly_lens.extend(poly_lens)
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        ann = dict(bboxes=gt_bboxes,
                   labels=gt_labels,
                   bboxes_ignore=gt_bboxes_ignore)

        if with_mask:
            ann["masks"] = gt_masks
            # poly format is not used in the current implementation
            ann["mask_polys"] = gt_mask_polys
            ann["poly_lens"] = gt_poly_lens

        return ann

Example #13

Show file

File: tinyDataset.py Project: yiiju/Tiny-VOC-Instance-Segmentation

class TinyDataset(object):
    def __init__(self, root, transforms, mode):
        self.root = root
        self.transforms = transforms
        self.mode = mode
        if mode == "train":
            self.annojson = os.path.join(root, "pascal_train.json")
            self.annococo = COCO(self.annojson)
        elif mode == "test":
            self.annojson = os.path.join(root, "test.json")
            self.annococo = COCO(self.annojson)

    def __getitem__(self, idx):
        imgid = list(self.annococo.imgs.keys())[idx]
        img_info = self.annococo.loadImgs(ids=imgid)
        if self.mode == "train":
            # load images
            img_path = os.path.join(self.root, "train",
                                    img_info[0]['file_name'])
            img = Image.open(img_path).convert("RGB")

            # get mask
            annids = self.annococo.getAnnIds(imgIds=imgid)
            anns = self.annococo.loadAnns(annids)
            num_objs = len(annids)
            boxes = []
            labels = []
            iscrowd = []
            masks = []
            for i in range(len(annids)):
                bbox = anns[i]['bbox']
                xmin = bbox[0]
                ymin = bbox[1]
                width = bbox[2]
                height = bbox[3]
                xmax = xmin + width
                ymax = ymin + height
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(anns[i]['category_id'])
                iscrowd.append(anns[i]['iscrowd'])
                masks.append(self.annococo.annToMask(anns[i]))

            # convert everything into a torch.Tensor
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            masks = torch.as_tensor(masks, dtype=torch.uint8)
            iscrowd = torch.as_tensor(iscrowd, dtype=torch.uint8)

            image_id = torch.tensor([imgid])
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

            target = {}
            target["boxes"] = boxes
            target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
            target["image_id"] = image_id
            target["area"] = area
            target["iscrowd"] = iscrowd
            target["masks"] = masks

            if self.transforms is not None:
                img, target = self.transforms(img, target)
        else:
            img_path = os.path.join(self.root, "test",
                                    img_info[0]['file_name'])
            img = Image.open(img_path).convert("RGB")
            target = imgid
            if self.transforms is not None:
                img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.annococo.imgs.keys())

Example #14

Show file

File: test.py Project: leonardtang/GistNet

total_count = 0

all_testing = []

for i in range(np.shape(img_data)[0]):

    print(i)

    img_line = img_data[i]

    image_id = int(img_line[0])
    ann_ids = int(img_line[6])
    annotations = coco.loadAnns(ann_ids)[0]
    gt_vector = coco.getCatIds().index(int(img_line[5]))
    object_mask = coco.annToMask(annotations)

    try:
        obj_im = fit_in_square(
            apply_mask(coco,
                       raw_image=load_image(image_id),
                       mask=object_mask,
                       keep='object',
                       crop=0.25), object_dim)
    except:
        raise ValueError('wrong')
        continue

    object_input = prepare_input(obj_im)
    # all_testing.append(prepare_input(apply_mask(coco, raw_image = load_image(image_id), mask = object_mask, keep = 'object', crop = 0)))

Example #15

Show file

File: generate_masks.py Project: Da-He/keras_Realtime_Multi-Person_Pose_Estimation

    img_path = os.path.join(val_images_dir, "%012d.jpg" % img_id)
    mask_miss_path = os.path.join(val_masks_dir, "mask_miss_%012d.png" % img_id)
    mask_all_path = os.path.join(val_masks_dir, "mask_all_%012d.png" % img_id)

    img = cv2.imread(img_path)
    h, w, c = img.shape

    mask_all = np.zeros((h, w), dtype=np.uint8)
    mask_miss = np.zeros((h, w), dtype=np.uint8)
    flag = 0
    for p in img_anns:
        seg = p["segmentation"]

        if p["iscrowd"] == 1:
            mask_crowd = coco.annToMask(p)
            temp = np.bitwise_and(mask_all, mask_crowd)
            mask_crowd = mask_crowd - temp
            flag += 1
            continue
        else:
            mask = coco.annToMask(p)

        mask_all = np.bitwise_or(mask, mask_all)

        if p["num_keypoints"] <= 0:
            mask_miss = np.bitwise_or(mask, mask_miss)

    if flag<1:
        mask_miss = np.logical_not(mask_miss)
    elif flag == 1:

Example #16

Show file

class MSCOCOSeq(BaseDataset):
    """ The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.

    Publication:
        Microsoft COCO: Common Objects in Context.
        Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
        Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
        ECCV, 2014
        https://arxiv.org/pdf/1405.0312.pdf

    Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
    organized as follows.
        - coco_root
            - annotations
                - instances_train2014.json
            - images
                - train2014

    Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
    """
    def __init__(self, root=None, image_loader=jpeg4py_loader):
        root = env_settings().coco_dir if root is None else root
        super().__init__(root, image_loader)

        self.img_pth = os.path.join(root, 'train2014/')
        self.anno_path = os.path.join(root,
                                      'annotations/instances_train2014.json')

        # Load the COCO set.
        self.coco_set = COCO(self.anno_path)

        self.cats = self.coco_set.cats

        self.class_list = self.get_class_list(
        )  # the parent class thing would happen in the sampler

        self.sequence_list = self._get_sequence_list()

        self.seq_per_class = self._build_seq_per_class()

    def _get_sequence_list(self):
        ann_list = list(self.coco_set.anns.keys())
        seq_list = [
            a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0
        ]

        return seq_list

    def is_video_sequence(self):
        return False

    def get_num_classes(self):
        return len(self.class_list)

    def get_name(self):
        return 'coco'

    def has_class_info(self):
        return True

    def get_class_list(self):
        class_list = []
        for cat_id in self.cats.keys():
            class_list.append(self.cats[cat_id]['name'])
        return class_list

    def has_segmentation_info(self):
        return True

    def get_num_sequences(self):
        return len(self.sequence_list)

    def _build_seq_per_class(self):
        seq_per_class = {}
        for i, seq in enumerate(self.sequence_list):
            class_name = self.cats[self.coco_set.anns[seq]
                                   ['category_id']]['name']
            if class_name not in seq_per_class:
                seq_per_class[class_name] = [i]
            else:
                seq_per_class[class_name].append(i)

        return seq_per_class

    def get_sequences_in_class(self, class_name):
        return self.seq_per_class[class_name]

    def get_sequence_info(self, seq_id):
        anno = self._get_anno(seq_id)

        bbox = torch.Tensor(anno['bbox']).view(1, 4)

        mask = self.coco_set.annToMask(anno)
        mask = np.array(mask)
        mask = mask.reshape(1, mask.shape[0], mask.shape[1], 1)

        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
        visible = valid.clone()

        return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}

    def _get_anno(self, seq_id):
        anno = self.coco_set.anns[self.sequence_list[seq_id]]

        return anno

    def _get_frames(self, seq_id):
        path = self.coco_set.loadImgs([
            self.coco_set.anns[self.sequence_list[seq_id]]['image_id']
        ])[0]['file_name']
        img = self.image_loader(os.path.join(self.img_pth, path))
        return img

    def get_meta_info(self, seq_id):
        try:
            cat_dict_current = self.cats[self.coco_set.anns[
                self.sequence_list[seq_id]]['category_id']]
            object_meta = OrderedDict({
                'object_class':
                cat_dict_current['name'],
                'motion_class':
                None,
                'major_class':
                cat_dict_current['supercategory'],
                'root_class':
                None,
                'motion_adverb':
                None
            })
        except:
            object_meta = OrderedDict({
                'object_class': None,
                'motion_class': None,
                'major_class': None,
                'root_class': None,
                'motion_adverb': None
            })
        return object_meta

    def get_frames(self, seq_id=None, frame_ids=None, anno=None):
        # COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
        # list containing these replicated images.
        frame = self._get_frames(seq_id)

        frame_list = [frame.copy() for _ in frame_ids]

        if anno is None:
            anno = self.get_sequence_info(seq_id)

        anno_frames = {}
        for key, value in anno.items():
            anno_frames[key] = [value[0, ...] for _ in frame_ids]

        object_meta = self.get_meta_info(seq_id)

        return frame_list, anno_frames, object_meta

Example #17

Show file

class CocoDataset(BaseDataset):
    def get_data_info(self, ann_path):
        """
        Load basic information of dataset such as image path, label and so on.
        :param ann_path: coco json file path
        :return: image info:
        [{'license': 2,
          'file_name': '000000000139.jpg',
          'coco_url': 'http://images.cocodataset.org/val2017/000000000139.jpg',
          'height': 426,
          'width': 640,
          'date_captured': '2013-11-21 01:34:01',
          'flickr_url': 'http://farm9.staticflickr.com/8035/8024364858_9c41dc1666_z.jpg',
          'id': 139},
         ...
        ]
        """
        self.coco_api = COCO(ann_path)
        self.cat_ids = sorted(self.coco_api.getCatIds())
        self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
        self.cats = self.coco_api.loadCats(self.cat_ids)
        self.img_ids = sorted(self.coco_api.imgs.keys())
        img_info = self.coco_api.loadImgs(self.img_ids)
        return img_info

    def get_img_annotation(self, idx):
        """
        load per image annotation
        :param idx: index in dataloader
        :return: annotation dict
        """
        img_id = self.img_ids[idx]
        ann_ids = self.coco_api.getAnnIds([img_id])
        anns = self.coco_api.loadAnns(ann_ids)
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        if self.use_instance_mask:
            gt_masks = []
        if self.use_keypoint:
            gt_keypoints = []
        for ann in anns:
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            if ann['category_id'] not in self.cat_ids:
                continue
            bbox = [x1, y1, x1 + w, y1 + h]
            if ann['iscrowd']:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])
                if self.use_instance_mask:
                    gt_masks.append(self.coco_api.annToMask(ann))
                if self.use_keypoint:
                    gt_keypoints.append(ann['keypoints'])
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)
        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
        annotation = dict(bboxes=gt_bboxes,
                          labels=gt_labels,
                          bboxes_ignore=gt_bboxes_ignore)
        if self.use_instance_mask:
            annotation['masks'] = gt_masks
        if self.use_keypoint:
            if gt_keypoints:
                annotation['keypoints'] = np.array(gt_keypoints,
                                                   dtype=np.float32)
            else:
                annotation['keypoints'] = np.zeros((0, 51), dtype=np.float32)
        return annotation

    def get_train_data(self, idx):
        """
        Load image and annotation
        :param idx:
        :return: meta-data (a dict containing image, annotation and other information)
        """
        img_info = self.data_info[idx]
        file_name = img_info['file_name']
        image_path = os.path.join(self.img_path, file_name)
        img = cv2.imread(image_path)
        ann = self.get_img_annotation(idx)
        meta = dict(img=img,
                    img_info=img_info,
                    gt_bboxes=ann['bboxes'],
                    gt_labels=ann['labels'])
        if self.use_instance_mask:
            meta['gt_masks'] = ann['masks']
        if self.use_keypoint:
            meta['gt_keypoints'] = ann['keypoints']

        meta = self.pipeline(meta, self.input_size)
        meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1))
        return meta

    def get_val_data(self, idx):
        """
        Currently no difference from get_train_data.
        Not support TTA(testing time augmentation) yet.
        :param idx:
        :return:
        """
        # TODO: support TTA
        return self.get_train_data(idx)

Example #18

Show file

class Obj365Dataset(CustomDataset):

    CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')

    def load_annotations(self, ann_file):
        self.coco = COCO(ann_file)
        self.cat_ids = _cat_ids  #self.coco.getCatIds()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        print("######obj365 $$$$$$$$%%%%%%%% id len is ", len(self.cat_ids))
        self.img_ids = self.coco.getImgIds()[:]
        img_infos = []
        for i in self.img_ids:
            info = self.coco.loadImgs([i])[0]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.img_infos[idx]['id']
        #print("obj365 img id is ",img_id)
        #img_id_2924 = self.img_infos[2924]['id']
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        #ann_ids_2924 = self.coco.getAnnIds(imgIds=[img_id_2924])
        ann_info = self.coco.loadAnns(ann_ids)
        #ann_info_2924 = self.coco.loadAnns(ann_ids_2924)
        #print("this anno info ", ann_info)
        #print("the 2924 anno info ", ann_info_2924)
        #print("this anno info for  ", idx)
        return self._parse_ann_info(ann_info, self.with_mask)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, ann_info, with_mask=True):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if ann['iscrowd']:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                #gt_labels.append(self.cat2label[ann['category_id']])
                gt_labels.append(ann['category_id'])
            if with_mask and not ann['iscrowd']:
                gt_masks.append(self.coco.annToMask(ann))
                mask_polys = [
                    p for p in ann['segmentation'] if len(p) >= 6
                ]  # valid polygons have >= 3 points (6 coordinates)
                poly_lens = [len(p) for p in mask_polys]
                gt_mask_polys.append(mask_polys)
                gt_poly_lens.extend(poly_lens)
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        ann = dict(bboxes=gt_bboxes,
                   labels=gt_labels,
                   bboxes_ignore=gt_bboxes_ignore)

        if with_mask:
            ann['masks'] = gt_masks
            # poly format is not used in the current implementation
            ann['mask_polys'] = gt_mask_polys
            ann['poly_lens'] = gt_poly_lens
        return ann

Example #19

Show file

File: demo.py Project: garnav/RGB-N

    for cat in cats[args.begin:args.end]:
        print("Creating data for category: " + cat['name'])
        for num in range(2000):
            try:
                catIds = coco.getCatIds(catNms=[cat['name']]);
                imgIds = coco.getImgIds(catIds=catIds );
                img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
                #I = io.imread('http://mscoco.org/images/%d'%(img['id']))
                #I = io.imread(img['coco_url'])
                I=io.imread(os.path.join(dataDir,dataType,'COCO_train2014_{:012d}.jpg'.format(img['id'])))
                #plt.imshow(I); plt.axis('off')
                annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
                anns = coco.loadAnns(annIds)
                #coco.showAnns(anns)
                bbx=anns[0]['bbox']
                mask=np.array(coco.annToMask(anns[0]))
                #print(np.shape(mask))
                #print(np.shape(I))
                #pdb.set_trace()
                I1=I

                #row,col=np.where(mask>0)
                #print(row)
                #print(col)
                #I1=I[row,col,0]
                #print(np.shape(I1))
                #print("I1 shape is: ")
                #print(I1.shape)
                I1[:,:,0]=np.array(I[:,:,0] * mask )
                I1[:,:,1]=np.array(I[:,:,1] * mask )
                I1[:,:,2]=np.array(I[:,:,2] * mask )

Example #20

Show file

# 显示图片
plt.axis("off")
plt.imshow(I)
plt.show()

# 实例分割标签
plt.axis("off")
plt.imshow(I)
ann_ids = coco.getAnnIds(imgIds=img["id"], catIds=cat_ids, iscrowd=None)
anns = coco.loadAnns(ann_ids)
coco.showAnns(anns)
plt.show()

# 单个对象的掩码
mask = coco.annToMask(anns[0])
plt.axis("off")
plt.imshow(mask)
plt.show()

# 多个对象的掩码
mask_all = coco.annToMask(anns[0])
for ann_one in anns[1:]:
    mask_all += coco.annToMask(ann_one)
plt.axis("off")
plt.imshow(mask_all)
plt.show()

# 骨骼
ann_file = os.path.join(data_dir, ann_dir,
                        "person_keypoints_{}.json".format(data_type))

Example #21

Show file

class COCODataset(torch.utils.data.Dataset):
    """
    COCODataset

    """
    def __init__(self, img_dir, json_path, aff_r):
        """__init__

            Args:
                img_dir: img 路径
                json_path: COCO annotation路径

        """
        self.img_dir = img_dir
        self.jsp = json_path
        self.img_size = 512
        self.coco = COCO(json_path)

        self.img_file = []
        for json_img in self.coco.dataset["images"]:
            self.img_file.append(json_img["file_name"])

        self.aff_r = aff_r  #affinity
        self.aff_resolution = 5
        mean = [0.477, 0.451, 0.411]
        std = [0.284, 0.280, 0.292]
        self.transform = transforms.ToTensor()
        self.transform_img = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean=mean, std=std)])

        f = open('./data/t_color.txt', "rb")
        self.t_color = pickle.load(f)
        f = open('./data/t_class_name.txt', "rb")
        self.t_color_name = pickle.load(f)
        self.labels = self.t_color

    def __len__(self):
        return len(self.img_file)

    def __getitem__(self, idx):
        """__getitem__

            Args:
                idx (int):  遍历

            Returns:
                Tensor: img (n_batch, ch, height, width)
                Tensor: sem_seg (n_batch, class数, height, width)
                Tensor: aff_map (n_batch, aff_r, aff_r**2, height, width)

        """
        #read img_file
        img_name = self.img_file[idx]
        img = np.array(Image.open(self.img_dir + img_name))
        width, height = img.shape[0], img.shape[1]
        if len(img.shape) < 3:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        img_seg = np.zeros((width, height, 3), dtype=int)
        img_ins = np.zeros((width, height, 3), dtype=int)

        #read ins
        id_ = int(img_name.split("/")[-1][-10:-4])
        pre_color = []
        for i in range(len(self.t_color_name) - 1):
            catIds = self.coco.getCatIds(catNms=[self.t_color_name[i]])
            annIds = self.coco.getAnnIds(imgIds=id_,
                                         catIds=catIds,
                                         iscrowd=False)
            if not annIds:
                continue
            anns = self.coco.loadAnns(annIds)
            for ann in anns:
                while (True):
                    color = np.random.randint(1, 255, 3)
                    if not [
                            j for j in range(len(pre_color))
                            if np.sum(pre_color[j] == color) == 3
                    ]:
                        pre_color.append(color)
                        break
                mask = self.coco.annToMask(ann)
                mask = np.array(mask, dtype=int)
                mask_seg = mask[..., None] * self.t_color[i]
                mask_ins = mask[..., None] * color

                img_seg = np.where(img_seg == 0, mask_seg, img_seg)
                img_ins = np.where(img_ins == 0, mask_ins, img_ins)

        img = Image.fromarray(img)
        img_seg = Image.fromarray(np.uint8(img_seg))
        img_ins = Image.fromarray(np.uint8(img_ins))

        #resize
        w, h = self.get_size((width, height))

        img = img.resize((w, h))
        img_seg = img_seg.resize((w, h))
        img_ins = img_ins.resize((w, h))

        #crop
        crop_size = self.img_size
        x = np.random.randint((w - crop_size) + 1)
        y = np.random.randint((h - crop_size) + 1)

        img = img.crop((x, y, x + crop_size, y + crop_size))
        img_seg = img_seg.crop((x, y, x + crop_size, y + crop_size))
        img_ins = img_ins.crop((x, y, x + crop_size, y + crop_size))

        #获得了img,seg,ins, 继续获取aff_gt
        sem_seg = np.array(img_seg)
        img_ins = np.array(img_ins)
        img_t_cls = np.zeros((img.size[0], img.size[1], len(self.labels)))
        # semantic标签
        for i in range(len(self.labels)):
            img_t_cls[:, :, i] = np.where(
                (sem_seg[:, :, 0] == self.labels[i][0])
                & (sem_seg[:, :, 1] == self.labels[i][1])
                & (sem_seg[:, :, 2] == self.labels[i][2]), 1, 0)

        out_data = torch.zeros((3, self.img_size, self.img_size))
        out_t = torch.zeros((len(self.labels), self.img_size, self.img_size))

        aff_map = self.Affinity_generator_new(img_ins)
        # convert to torch tensor
        img = self.transform_img(img)
        sem_seg = self.transform(img_t_cls)
        # aff_map = self.transform(aff_map)

        return img, sem_seg, aff_map

    def get_size(self, img_wh):
        width = img_wh[0]
        height = img_wh[1]
        if width < height:
            w = self.img_size
            h = int(self.img_size * height / width)
        else:
            h = self.img_size
            w = int(self.img_size * width / height)
        return w, h

    def Affinity_generator(self, img_ins):
        """
        SSAP resolution 1/2, 1/4, 1/16, 1/32, 1/64 
        """
        # img_ins = Image.fromarray(img_ins)
        # 初始化一个aff_r * aff_r^2 * size * size
        aff_map = torch.zeros(
            (self.aff_r, self.aff_r**2, self.img_size, self.img_size))
        ins_width, ins_height = img_ins.shape[0], img_ins.shape[1]

        for mul in range(self.aff_resolution):
            #resize大小后的ins, resize后的图片大小
            # ins_downsampe = cv2.resize(ins,cv2.INTER_NEAREST)
            img_t_aff_mul = img_ins[0:self.img_size:2**mul,
                                    0:self.img_size:2**mul]
            img_size = self.img_size // (2**mul)

            # 上下左右放大2个pixel
            img_t_aff_mul_2_pix = np.zeros(
                (img_size + (self.aff_r // 2) * 2,
                 img_size + (self.aff_r // 2) * 2, 3))
            img_t_aff_mul_2_pix[self.aff_r//2:img_size+self.aff_r//2,
                                self.aff_r//2:img_size+self.aff_r//2] \
                = img_t_aff_mul

            img_t_aff_compare = np.zeros(
                (self.aff_r**2, img_size, img_size, 3))
            # 对25个affinity map进行错位填充ins
            for i in range(self.aff_r):
                for j in range(self.aff_r):
                    img_t_aff_compare[i * self.aff_r +
                                      j] = img_t_aff_mul_2_pix[i:i + img_size,
                                                               j:j + img_size]

            # 相同物体affinity=1 不同affinity=0
            aff_data = np.where(
                (img_t_aff_compare[:, :, :, 0] == img_t_aff_mul[:, :, 0])
                & (img_t_aff_compare[:, :, :, 1] == img_t_aff_mul[:, :, 1])
                & (img_t_aff_compare[:, :, :, 2] == img_t_aff_mul[:, :, 2]), 1,
                0)
            aff_data = self.transform(aff_data.transpose(1, 2, 0))
            aff_map[mul, :, 0:img_size, 0:img_size] = aff_data
        return aff_map

    def Affinity_generator_new(self, img_ins):
        """
        SSAP resolution 1/2, 1/4, 1/16, 1/32, 1/64 
        """
        # img_ins = Image.fromarray(img_ins)
        # 初始化一个aff_r * aff_r^2 * size * size
        aff_map = np.zeros(
            (self.aff_r, self.aff_r**2, self.img_size, self.img_size))
        ins_width, ins_height = img_ins.shape[0], img_ins.shape[1]

        for mul in range(self.aff_resolution):
            #resize大小后的ins, resize后的图片大小,instance最近邻插值
            img_size = self.img_size // (2**mul)
            ins_downsampe = cv2.resize(img_ins, (img_size, img_size),
                                       cv2.INTER_NEAREST)
            # tree-ins_downsampe

            #按affinity kernel半径padding
            ins_pad = cv2.copyMakeBorder(ins_downsampe,
                                         int(self.aff_r),
                                         int(self.aff_r),
                                         int(self.aff_r),
                                         int(self.aff_r),
                                         cv2.BORDER_CONSTANT,
                                         value=(0, 0, 0))
            aff_compare = np.zeros((self.aff_r**2, img_size, img_size, 3))
            # 对25个affinity kernel上进行错位填充ins
            for i in range(self.aff_r):
                for j in range(self.aff_r):
                    aff_compare[i * self.aff_r + j] = ins_pad[i:i + img_size,
                                                              j:j + img_size]

            # 相同物体affinity=1 不同affinity=0
            aff_data = np.where(
                (aff_compare[:, :, :, 0] == ins_downsampe[:, :, 0])
                & (aff_compare[:, :, :, 1] == ins_downsampe[:, :, 1])
                & (aff_compare[:, :, :, 2] == ins_downsampe[:, :, 2]), 1, 0)

            # aff_data = self.transform(aff_data.transpose(1, 2, 0))
            aff_map[mul, :, 0:img_size, 0:img_size] = aff_data
        return aff_map

Example #22

Show file

for i, img_id in enumerate(tqdm(img_ids)):
    filepath = os.path.join(IMG_DIR, coco.imgs[img_id]['file_name'])
    img = cv2.imread(filepath, cv2.CV_LOAD_IMAGE_COLOR)
    h, w, c = img.shape

    crowd_mask = np.zeros((h, w), dtype='bool')
    unannotated_mask = np.zeros((h, w), dtype='bool')
    instance_masks = []
    keypoints = []

    img_anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
    if len(img_anns) == 0:
        continue
    for anno in img_anns:
        mask = coco.annToMask(anno)

        # if crowd, don't compute loss
        if anno['iscrowd'] == 1:
            crowd_mask = np.logical_or(crowd_mask, mask)
        # if tiny instance, don't compute loss
        elif anno['num_keypoints'] == 0:
            unannotated_mask = np.logical_or(unannotated_mask, mask)
            instance_masks.append(mask)
            keypoints.append(anno['keypoints'])
        else:
            instance_masks.append(mask)
            keypoints.append(anno['keypoints'])

    # Construct encoding:

Example #23

Show file

def processing(ann_path, filelist_path, masklist_path, json_path,  mask_dir):
    coco = COCO(ann_path)
    ids = list(coco.imgs.keys())
    lists = []
    
    filelist_fp = open(filelist_path, 'w')
    masklist_fp = open(masklist_path, 'w')
    
    for i, img_id in enumerate(ids):
        ann_ids = coco.getAnnIds(imgIds=img_id)
        img_anns = coco.loadAnns(ann_ids)
    
        numPeople = len(img_anns)
        name = coco.imgs[img_id]['file_name']
        height = coco.imgs[img_id]['height']
        width = coco.imgs[img_id]['width']
    
        person_centers = []
        info = dict()
        info['filename'] = name
        info['info'] = []
    
        for p in range(numPeople):
            if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32:
                continue
            kpt = img_anns[p]['keypoints']
            dic = dict()
    
            # person center
            person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0]
            scale = img_anns[p]['bbox'][3] / float(cfg.INPUT_SIZE)
    
            # skip this person if the distance to exiting person is too small
            flag = 0
            for pc in person_centers:
                dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * (person_center[1] - pc[1]))
                if dis < pc[2] * 0.3:
                    flag = 1;
                    break
            if flag == 1:
                continue

            dic['pos'] = person_center
            dic['keypoints'] = np.zeros((18, 3)).tolist()
            dic['scale'] = scale

            for part in range(17):
                dic['keypoints'][COCO_TO_OURS[part]][0] = kpt[part * 3]
                dic['keypoints'][COCO_TO_OURS[part]][1] = kpt[part * 3 + 1]
                # visiable is 2, unvisiable is 1 and not labeled is 0
                dic['keypoints'][COCO_TO_OURS[part]][2] = kpt[part * 3 + 2]
            
            # generate neck point based on LShoulder and RShoulder
            dic['keypoints'][1][0] = (kpt[5 * 3] + kpt[6 * 3]) * 0.5
            dic['keypoints'][1][1] = (kpt[5 * 3 + 1] + kpt[6 * 3 + 1]) * 0.5

            if kpt[5 * 3 + 2] == 0 or kpt[6 * 3 + 2] == 0:
                dic['keypoints'][1][2] = 0
            else:
                dic['keypoints'][1][2] = 1

            info['info'].append(dic)
            person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3])))

        if len(info['info']) > 0:
            lists.append(info)
            filelist_fp.write(name + '\n')
            mask_all = np.zeros((height, width), dtype=np.uint8)
            mask_miss = np.zeros((height, width), dtype=np.uint8)
            flag = 0
            for p in img_anns:
                if p['iscrowd'] == 1:
                    mask_crowd = coco.annToMask(p)
                    temp = np.bitwise_and(mask_all, mask_crowd)
                    mask_crowd = mask_crowd - temp
                    flag += 1
                    continue
                else:
                    mask = coco.annToMask(p)
        
                mask_all = np.bitwise_or(mask, mask_all)
            
                if p['num_keypoints'] <= 0:
                    mask_miss = np.bitwise_or(mask, mask_miss)
        
            if flag < 1:
                mask_miss = np.logical_not(mask_miss)
            elif flag == 1:
                mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd))
                mask_all = np.bitwise_or(mask_all, mask_crowd)
            else:
                raise Exception('crowd segments > 1')
            
            pickle.dump(mask_miss, open(os.path.join(mask_dir, name.split('.')[0] + '.npy'), 'w'))
            masklist_fp.write(os.path.join(mask_dir, name.split('.')[0] + '.npy') + '\n')

        if i % 1000 == 0:
            print "Processed {} of {}".format(i, len(ids))
    
    masklist_fp.close()
    filelist_fp.close()
    
    fp = open(json_path, 'w')
    fp.write(json.dumps(lists))
    fp.close()
    
    print 'done!'

Example #24

Show file

class CocoDataset(Dataset):
    def __init__(self, anns_file, images_dir, transform=None):
        self._anns_file = anns_file
        self._images_dir = images_dir
        self._transform = transform

        self._coco = COCO(anns_file)
        self._img_ids = self._coco.getImgIds()

    def __len__(self):
        """
        Get the number of samples in the dataset.
        :return: the number of samples in the dataset.
        """
        return len(self._img_ids)

    def __getitem__(self, item):
        """
        Get the sample that correspond to the given item.
        :param item: index of sample to return
        :return: sample of type dictionary with keys: 'image' and 'labels'
        """
        img_id = self._img_ids[item]
        coco_img = self._coco.imgs[img_id]
        coco_anns = self._coco.loadAnns(self._coco.getAnnIds(imgIds=img_id))

        image_path = os.path.join(self._images_dir, coco_img['file_name'])
        image = self.load_image(image_path)

        labels = self._anns_to_tensor(coco_img, coco_anns)

        sample = {'image': image, 'labels': labels}
        if self._transform:
            sample = self._transform(sample)

        return sample

    @staticmethod
    def load_image(file_path):
        """
        Load image from file to Tensor of type uint8.
        :param file_path: path to image file
        :return: Tensor that contains the image.
        """
        original_img = Image.open(file_path)
        original_np_img = np.array(original_img)

        # Handle 1D images
        if len(original_np_img.shape) == 2:
            tmp = original_np_img
            original_np_img = np.zeros(
                [original_np_img.shape[0], original_np_img.shape[1], 3])
            original_np_img[:, :, 0] = tmp

        # Handle RGBA images
        if original_np_img.shape[2] == 4:
            original_np_img = original_np_img[:, :, 0:3]

        return torch.tensor(original_np_img, dtype=torch.float)

    def _anns_to_tensor(self, img, anns):
        """
        Convert coco annotations to numpy array that represent labels.
        :param img: Image dictionary in coco format.
        :param anns: Annotations of the given image.
        :return: Tensor.
        """
        image_size = (img['height'], img['width'])
        labels = np.zeros(image_size)
        for i in range(len(anns)):
            ann = anns[i]
            label_mask = self._coco.annToMask(ann) == 1
            new_label = i + 1
            labels[label_mask] = new_label
        return torch.tensor(labels.astype('uint8'), dtype=torch.uint8)

Example #25

Show file

class Trainer:
    def __init__(self, args):
        #save args
        self.args = args
        #init coco utils
        self.coco_train = COCO("../annotations/instances_train2014.json")
        self.coco_val = COCO("../annotations/instances_val2014.json")
        #init tensorflow session
        tf.reset_default_graph()
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        #init model
        self.input_img = tf.placeholder(tf.float32,
                                        shape=(None, None, None, 3))
        self.label = tf.placeholder(tf.float32,
                                    shape=(None, None, None, args.nb_classes))
        self.model = Unet(input_img=self.input_img, nb_classes=args.nb_classes)
        #define loss : Cross Entropy and Dice
        with tf.variable_scope('optimization'):
            with tf.variable_scope('loss'):
                if args.loss == 'crossentropy':
                    """logits = tf.reshape(self.model.output_log, [-1, args.nb_classes])
                    labels = tf.reshape(self.label, [-1, args.nb_classes])"""
                    self.loss = -tf.reduce_mean(
                        tf.multiply(self.label, tf.log(
                            self.model.output_proba)))
                elif args.loss == "dice":
                    labels = self.label
                    proba = self.model.output_proba
                    intersection = tf.reduce_sum(proba * labels)
                    union = tf.reduce_sum(proba + labels)
                    self.loss = -intersection / union
            #Optimizer
            self.optimizer = tf.train.MomentumOptimizer(
                learning_rate=args.learning_rate, momentum=0.99)
            self.train_op = self.optimizer.minimize(self.loss)
        #summary file for tensorboard
        self.tf_train_loss = tf.Variable(0.0,
                                         trainable=False,
                                         name='Train_Loss')
        self.tf_train_loss_summary = tf.summary.scalar("Loss",
                                                       self.tf_train_loss)
        self.tf_train_accuracy = tf.Variable(0.0,
                                             trainable=False,
                                             name='Train_Accuracy')
        self.tf_train_accuracy_summary = tf.summary.scalar(
            "Train Accuracy", self.tf_train_accuracy)
        self.tf_train_dice = tf.Variable(0.0,
                                         trainable=False,
                                         name="Train_Dice_Coef")
        self.tf_train_dice_summary = tf.summary.scalar("Train Dice Coef",
                                                       self.tf_train_dice)
        self.tf_eval_accuracy = tf.Variable(0.0,
                                            trainable=False,
                                            name='Eval_accuracy')
        self.tf_eval_accuracy_summary = tf.summary.scalar(
            'Evaluation Accuracy', self.tf_eval_accuracy)
        self.tf_eval_dice = tf.Variable(0.0,
                                        trainable=False,
                                        name="Eval_Dice_Coef")
        self.tf_eval_dice_summary = tf.summary.scalar("Evaluation Dice Coef",
                                                      self.tf_eval_dice)
        self.writer = tf.summary.FileWriter('./graphs', self.sess.graph)
        #saver
        self.saver = tf.train.Saver()
        self.sess.run(tf.initialize_all_variables())

    def save_model(self, filename):
        with tf.Graph().as_default():
            self.saver.save(self.sess, filename)

    def train(self):
        with tf.Graph().as_default():
            for i_epoch in range(1, self.args.epochs + 1):
                #init paramters for summary
                loss_train = []
                accuracy_train = []
                accuracy_val = []
                dice_train = []
                dice_val = []
                #streaming image
                #images_train = img_generator('images_train.json')
                #images_val = img_generator('images_val.json')
                #checkpoint
                self.save_model(
                    filename='./checkpoints/checkpoint_epoch-{}.ckpt'.format(
                        i_epoch))
                #train
                catIDs = list(range(1, self.args.nb_classes + 1))
                print("Epoch {} \n".format(i_epoch))
                print("Train \n")
                #minibatch
                minibatch_image = []
                minibatch_label = []
                count = 0
                #Find images with categories
                imgIds = self.coco_train.getImgIds(catIds=catIDs)
                catIDs = [x - 1 for x in catIDs]
                for imgId in tqdm(imgIds):
                    count += 1
                    #get image
                    image = self.coco_train.loadImgs([imgId])
                    #create grouth truth map
                    y = np.zeros((512, 512, self.args.nb_classes))
                    for cat in catIDs:
                        annIds = self.coco_train.getAnnIds(
                            imgIds=image[0]['id'], catIds=[cat + 1])
                        anns = self.coco_train.loadAnns(annIds)
                        if len(anns) > 0:
                            for ann in anns:
                                mask = self.coco_train.annToMask(ann)
                                mask = resize(mask, (512, 512),
                                              interpolation=cv2.INTER_NEAREST)
                                y[:, :,
                                  cat] = np.logical_or(y[:, :, cat],
                                                       mask).astype(np.float32)
                    #import image
                    img = io.imread("../train2014/{}".format(
                        image[0]["file_name"]))
                    img = resize(img, (512, 512))
                    if img.shape == (512, 512):
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                    minibatch_image.append(img)
                    minibatch_label.append(y)
                    if len(minibatch_image
                           ) == self.args.batch_size or count == len(imgIds):
                        # get loss training
                        loss_train.append(
                            self.sess.run(self.loss,
                                          feed_dict={
                                              self.input_img:
                                              np.asarray(minibatch_image),
                                              self.label:
                                              np.asarray(minibatch_label)
                                          }))
                        #feed forward + back propagation
                        self.sess.run(self.train_op,
                                      feed_dict={
                                          self.input_img:
                                          np.asarray(minibatch_image),
                                          self.label:
                                          np.asarray(minibatch_label)
                                      })
                        #get accuracy training
                        softmax = self.sess.run(self.model.output_proba,
                                                feed_dict={
                                                    self.input_img:
                                                    np.asarray(minibatch_image)
                                                })
                        nb_total_bit = 512 * 512 * self.args.nb_classes
                        for i_batch in range(softmax.shape[0]):
                            predicted_mask = probaToBinaryMask(
                                softmax[i_batch])
                            nb_TP_bit = np.sum(
                                np.logical_and(predicted_mask,
                                               minibatch_label[i_batch]))
                            accuracy_train.append(nb_TP_bit / nb_total_bit)
                            #get dice coef training
                            intersection = nb_TP_bit
                            union = np.sum(predicted_mask) + np.sum(
                                minibatch_label[i_batch])
                            dice_train.append(2 * intersection / union)
                        #reset minibatch
                        minibatch_label.clear()
                        minibatch_image.clear()
                #evaluation
                #Find image with categories
                catIDs = list(range(1, self.args.nb_classes + 1))
                imgIds = self.coco_val.getImgIds(catIds=catIDs)
                catIDs = [x - 1 for x in catIDs]
                print("Evaluation \n")
                for imgId in tqdm(imgIds):
                    #get image
                    image = self.coco_val.loadImgs([imgId])
                    #create grouth truth map
                    y = np.zeros((512, 512, self.args.nb_classes))
                    for cat in catIDs:
                        annIds = self.coco_val.getAnnIds(imgIds=image[0]['id'],
                                                         catIds=[cat])
                        anns = self.coco_val.loadAnns(annIds)
                        if len(anns) > 0:
                            for ann in anns:
                                mask = self.coco_val.annToMask(ann)
                                mask = resize(mask, (512, 512),
                                              interpolation=cv2.INTER_NEAREST)
                                y[:, :,
                                  cat] = np.logical_or(y[:, :, cat],
                                                       mask).astype(np.float32)
                    #import image
                    img = io.imread("../val2014/{}".format(
                        image[0]["file_name"]))
                    img = resize(img, (512, 512))
                    if img.shape == (512, 512):
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                    #predict
                    softmax = self.sess.run(self.model.output_proba,
                                            feed_dict={
                                                self.input_img:
                                                np.expand_dims(img, axis=0)
                                            })
                    #get accuracy val
                    predicted_mask = probaToBinaryMask(softmax)
                    nb_TP_bit = np.sum(np.logical_and(predicted_mask, y))
                    np_total_bit = 512 * 512 * self.args.nb_classes
                    accuracy_val.append(append(nb_TP_bit / nb_total_bit))
                    #get dice val
                    intersection = nb_TP_bit
                    union = np.sum(predicted_mask) + np.sum(y)
                    dice_val.append(2 * intersection / union)
                #write event for tensorboard
                summary = self.sess.run(self.tf_train_accuracy_summary,
                                        feed_dict={
                                            self.tf_train_accuracy:
                                            np.mean(np.asarray(accuracy_train))
                                        })
                self.writer.add_summary(summary, i_epoch)
                summary = self.sess.run(self.tf_train_loss_summary,
                                        feed_dict={
                                            self.tf_train_loss:
                                            np.mean(np.asarray(loss_train))
                                        })
                self.writer.add_summary(summary, i_epoch)
                summary = self.sess.run(self.tf_train_dice_summary,
                                        feed_dict={
                                            self.tf_train_dice:
                                            np.mean(np.asarray(dice_train))
                                        })
                self.writer.add_summary(summary, i_epoch)
                summary = self.sess.run(self.tf_eval_accuracy_summary,
                                        feed_dict={
                                            self.tf_eval_accuracy:
                                            np.mean(np.asarray(accuracy_val))
                                        })
                self.writer.add_summary(summary, i_epoch)
                summary = self.sess.run(self.tf_eval_dice_summary,
                                        feed_dict={
                                            self.tf_eval_dice:
                                            np.mean(np.asarray(dice_val))
                                        })
        self.save_model(filename='./model-{}.ckpt'.format(i_epoch))

Example #26

Show file

File: create_dataset_incdec_val.py Project: AlamiMejjati/Gaze-Shift-Net

# while counter < nb_images:

for counter in tqdm(range(0, nb_images), desc='image'):
    imId = imgIds.pop(np.random.randint(0, len(imgIds)))
    img = coco.loadImgs(imId)[0]
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    counteri = 0
    if len(anns) < 3:
        continue
    name = str(anns[0]['image_id'])
    namei = []
    maski = []
    for i in range(len(anns)):
        mask = coco.annToMask(anns[i])
        if i == 0:
            insseg = np.round(mask) * anns[i][
                'category_id']  #segDict[anns[i]['category_id']]
        else:
            insseg += np.round(mask) * anns[i][
                'category_id']  #segDict[anns[i]['category_id']]
        ratio = mask.sum() / np.size(mask)
        if (ratio > 0.4):
            continue
        if (ratio < 0.03):
            continue

        minim = cv2.resize(mask, bsize, interpolation=cv2.INTER_NEAREST)
        im = Image.open(os.path.join(dataDir, dataType, img['file_name']))
        if im.mode != 'RGB':

Example #27

Show file

class COCODataset(GeneralizedDataset):
    def __init__(self, data_dir, split, train=False):
        super().__init__()
        from pycocotools.coco import COCO
        
        self.data_dir = data_dir
        self.split = split
        self.train = train
        
        if train:
            ann_file = '/data/zihaosh/data_hw3/pascal_train.json'
        else:
            ann_file = '/data/zihaosh/hw3/test.json'
        print(ann_file)
        self.coco = COCO(ann_file)
        self.ids = [str(k) for k in self.coco.imgs]
        
        self._classes = {k: v["name"] for k, v in self.coco.cats.items()}
        self.classes = tuple(self.coco.cats[k]["name"] for k in sorted(self.coco.cats))
        # resutls' labels convert to annotation labels
        self.ann_labels = {self.classes.index(v): k for k, v in self._classes.items()}
        
        checked_id_file = os.path.join(data_dir, "checked_{}.txt".format(split))
        if train:
            if not os.path.exists(checked_id_file):
                self._aspect_ratios = [v["width"] / v["height"] for v in self.coco.imgs.values()]
            self.check_dataset(checked_id_file)
        
    def get_image(self, img_id):
        img_id = int(img_id)
        img_info = self.coco.imgs[img_id]
        image = Image.open(os.path.join(self.data_dir, "{}".format(self.split), img_info["file_name"]))
        return image.convert("RGB")
    
    @staticmethod
    def convert_to_xyxy(box): # box format: (xmin, ymin, w, h)
        new_box = torch.zeros_like(box)
        new_box[:, 0] = box[:, 0]
        new_box[:, 1] = box[:, 1]
        new_box[:, 2] = box[:, 0] + box[:, 2]
        new_box[:, 3] = box[:, 1] + box[:, 3]
        return new_box # new_box format: (xmin, ymin, xmax, ymax)
        
    def get_target(self, img_id):
        img_id = int(img_id)
        ann_ids = self.coco.getAnnIds(img_id)
        anns = self.coco.loadAnns(ann_ids)
        boxes = []
        labels = []
        masks = []

        if len(anns) > 0:
            for ann in anns:
                boxes.append(ann['bbox'])
                name = self._classes[ann["category_id"]]
                labels.append(self.classes.index(name))
                mask = self.coco.annToMask(ann)
                mask = torch.tensor(mask, dtype=torch.uint8)
                masks.append(mask)

            boxes = torch.tensor(boxes, dtype=torch.float32)
            boxes = self.convert_to_xyxy(boxes)
            labels = torch.tensor(labels)
            masks = torch.stack(masks)

        target = dict(image_id=torch.tensor([img_id]), boxes=boxes, labels=labels, masks=masks)
        return target

Example #28

Show file

class COCO2014:
    def __init__(self, root_path='./COCO/', mode='train'):
        print('Init COCO2014 Object......')
        # set paths
        self.train_image_dir = root_path + 'images/train2014'
        self.val_image_dir = root_path + 'images/val2014'
        train_ann_path = root_path + 'annotations/instances_train2014.json'
        val_ann_path = root_path + 'annotations/instances_val2014.json'
        # Initialize COCO api for instance annotations.
        if mode == 'train':
            self.coco_train = COCO(train_ann_path)
            self.train_image_ids = self.coco_train.getImgIds()
        else:
            self.coco_val = COCO(val_ann_path)
            self.val_image_ids = self.coco_val.getImgIds()
            # get image ids
            voc_cat_ids = [5,2,16,9,44,6,3,17,62,21,67,18,19,4,1,64,20,7,72]
            unvoc_cat_ids = list(set(np.arange(0, 80, 1).tolist()) - set(voc_cat_ids))
            self.val_image_ids = set()
            for i, cat_id in enumerate(unvoc_cat_ids):
                one_cat_img_ids = set(self.coco_val.getImgIds(catIds=[cat_id]))
                while len(self.val_image_ids) != (i + 1) * 10:
                    if len(one_cat_img_ids) > 0:
                        self.val_image_ids.add(one_cat_img_ids.pop())
                    else:
                        break

            self.val_image_ids = list(self.val_image_ids)

    def random_pos_points(self, mask, num_points):
        index_xs, index_ys = np.where(mask == 1)
        index = np.stack([index_xs, index_ys], axis=1)
        real_num_points = min(len(index), num_points)
        temp = np.arange(len(index))
        if len(temp) != 0:
            random_points_indice = np.random.choice(temp, real_num_points)
        else:
            return []
        random_points = []
        for i in random_points_indice:
            random_points.append(index[i])
        return random_points

    def random_neg_points(self, mask, num_points):
        index_xs, index_ys = np.where(mask == 0)
        index = np.stack([index_xs, index_ys], axis=1)
        real_num_points = min(len(index), num_points)
        temp = np.arange(len(index))
        if len(temp) != 0:
            random_points_indice = np.random.choice(temp, real_num_points)
        else:
            return []
        random_points = []
        for i in random_points_indice:
            random_points.append(index[i])
        return random_points

    def read_one_train_image(self): # read one image and get its masks
        if hasattr(self, 'train_location') == False:
            self.train_location = 0
        # read image
        while 1:
            image_id = int(self.train_image_ids[self.train_location])
            self.train_location = (self.train_location + 1) % len(self.train_image_ids)
            image = self.coco_train.loadImgs(image_id)[0]
            filename = image['file_name']
            image = io.imread('{}/{}'.format(self.train_image_dir, filename))
            if len(np.shape(image)) != 2:
                break
        # read anns
        annIds = self.coco_train.getAnnIds(imgIds=image_id)
        anns = self.coco_train.loadAnns(annIds)

        # ann to masks
        masks = []
        for ann in anns:
            single_mask = self.coco_train.annToMask(ann)  # change ann to single mask
            single_mask = single_mask.astype(np.uint8)
            masks.append(single_mask)
        return image, masks
    def read_one_val_image(self): # read one image and get its masks
        if hasattr(self, 'val_location') == False:
            self.val_location = 0
        # read image
        while 1:
            image_id = int(self.val_image_ids[self.val_location])
            self.val_location = (self.val_location + 1) % len(self.val_image_ids)
            image = self.coco_val.loadImgs(image_id)[0]
            filename = image['file_name']
            image = io.imread('{}/{}'.format(self.val_image_dir, filename))

            # read anns
            annIds = self.coco_val.getAnnIds(imgIds=image_id)
            anns = self.coco_val.loadAnns(annIds)

            # ann to masks
            masks = []
            for ann in anns:
                single_mask = self.coco_val.annToMask(ann)  # change ann to single mask
                single_mask = single_mask.astype(np.uint8)
                masks.append(single_mask)
            if len(np.shape(image)) == 3 and len(np.shape(masks)) == 3:
                break
        return image, masks
    def simulate(self, image, masks):# for every mask, get positive and negtive inputs
        images = []
        for mask in masks:# every mask produces two planes
            # get points
            pos_points = deepcopy(self.random_pos_points(mask, 15))
            neg_points = deepcopy(self.random_neg_points(mask, 15))
            # get planes
            pos_plane = np.zeros_like(mask)[:, :, np.newaxis]
            neg_plane = np.zeros_like(mask)[:, :, np.newaxis]
            pos_plane = pos_plane.copy()
            neg_plane = neg_plane.copy()

            for i in range(len(pos_points)):
                cv2.circle(pos_plane, (pos_points[i][1], pos_points[i][0]), 5, 1, thickness=-1)
            for i in range(len(neg_points)):
                cv2.circle(neg_plane, (neg_points[i][1], neg_points[i][0]), 5, 1, thickness=-1)
            images.append(np.concatenate([image, pos_plane, neg_plane], axis=2)) # get 5 channels
        return images, masks # images:[x, height, width, 5], masks:[x, height, width]

    def add_queue(self, max_queue_size=10):
        if hasattr(self, 'queue') == False:
            self.queue = queue.Queue(maxsize=max_queue_size)
        while 1:
            image, masks = self.read_one_train_image()
            images, masks = self.simulate(image, masks)
            for i in range(len(images)):
                self.queue.put([images[i], masks[i]])
    def start_queue(self, max_queue_size=10):
        if hasattr(self, 'queue') == False:
            queue_thread = threading.Thread(target=self.add_queue, args=(max_queue_size, ))
            queue_thread.start()
    def get_batch_train(self, batch_size=4, image_size=(513, 513)):
        while hasattr(self, 'queue') == False:
            self.start_queue()
        batch_x = []
        batch_y = []
        for i in range(batch_size):
            image, mask = self.queue.get()
            image = cv2.resize(image, image_size)
            mask = cv2.resize(mask, image_size, interpolation=cv2.INTER_NEAREST)

            batch_x.append(image)
            batch_y.append(mask)
        return batch_x, batch_y

    def get_one_val(self):
        image, masks = self.read_one_val_image()
        images, masks = self.simulate(image, masks)
        return [images[0]], [masks[0]]
    def add_queue_val(self, max_queue_size=10):
        if hasattr(self, 'val_queue') == False:
            self.val_queue = queue.Queue(maxsize=max_queue_size)
        while 1:
            image, masks = self.read_one_val_image()
            images, masks = self.simulate(image, masks)
            for i in range(len(images)):
                self.val_queue.put([images[i], masks[i]])
    def start_queue_val(self, max_queue_size=10):
        if hasattr(self, 'val_queue') == False:
            queue_thread = threading.Thread(target=self.add_queue_val, args=(max_queue_size, ))
            queue_thread.start()
    def get_batch_val(self, batch_size=4):
        while hasattr(self, 'val_queue') == False:
            self.start_queue_val()
        batch_x = []
        batch_y = []
        for i in range(batch_size):
            image, mask = self.val_queue.get()
            if batch_x == []:
                batch_x.append(image)
                batch_y.append(mask)
            elif np.shape(batch_x)[1] == np.shape(image)[0] and np.shape(batch_x)[2] == np.shape(image)[1]:
                batch_x.append(image)
                batch_y.append(mask)
        return batch_x, batch_y

Example #29

Show file

File: coco.py Project: hepuzheng/GDesign

class COCODetection(data.Dataset):
    """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
    Args:
        root (string): Root directory where images are downloaded to.
        set_name (string): Name of the specific set of COCO images.
        transform (callable, optional): A function/transform that augments the
                                        raw images`
        target_transform (callable, optional): A function/transform that takes
        in the target (bbox) and transforms it.
        prep_crowds (bool): Whether or not to prepare crowds for the evaluation step.
    """
    def __init__(self,
                 image_path,
                 info_file,
                 transform=None,
                 target_transform=None,
                 dataset_name='MS COCO',
                 has_gt=True):
        # Do this here because we have too many things named COCO
        from pycocotools.coco import COCO

        if target_transform is None:
            target_transform = COCOAnnotationTransform()

        self.root = image_path
        self.coco = COCO(info_file)  #将标签文件导入coco API

        #self.coco.imgToAnns包含了标签文件中的所有bbox，category_id,imge_id,segmentation的信息
        #这里就是取出所有训练的图片中的信息
        self.ids = list(self.coco.imgToAnns.keys())
        print("self.ids:")
        print(len(self.ids))
        if len(self.ids) == 0 or not has_gt:
            self.ids = list(self.coco.imgs.keys())

        # transform是SSDAugmentation的实例对象
        # COCOAnnotationTransform这个类作用：将COCO的标签转换成bbox coords and label index
        # 的张量
        self.transform = transform
        self.target_transform = COCOAnnotationTransform()

        self.name = dataset_name
        self.has_gt = has_gt

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, (target, masks, num_crowds)).
                   target is the object returned by ``coco.loadAnns``.
        """
        im, gt, masks, h, w, num_crowds = self.pull_item(index)
        return im, (gt, masks, num_crowds)

    def __len__(self):
        return len(self.ids)

    def pull_item(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, target, masks, height, width, crowd).
                   target is the object returned by ``coco.loadAnns``.
            Note that if no crowd annotations exist, crowd will be None
        """
        img_id = self.ids[index]

        if self.has_gt:
            ann_ids = self.coco.getAnnIds(imgIds=img_id)

            # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
            target = [
                x for x in self.coco.loadAnns(ann_ids)
                if x['image_id'] == img_id
            ]
        else:
            target = []

        # Separate out crowd annotations. These are annotations that signify a large crowd of
        # objects of said class, where there is no annotation for each individual object. Both
        # during testing and training, consider these crowds as neutral.
        crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])]
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]
        num_crowds = len(crowd)

        for x in crowd:
            x['category_id'] = -1

        # This is so we ensure that all crowd annotations are at the end of the array
        target += crowd

        # The split here is to have compatibility with both COCO2014 and 2017 annotations.
        # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg.
        # Our script downloads the images as %012d.jpg so convert accordingly.
        file_name = self.coco.loadImgs(img_id)[0]['file_name']

        if file_name.startswith('COCO'):
            file_name = file_name.split('_')[-1]

        path = osp.join(self.root, file_name)
        assert osp.exists(path), 'Image path does not exist: {}'.format(path)

        img = cv2.imread(path)
        height, width, _ = img.shape

        if len(target) > 0:
            # Pool all the masks for this image into one [num_objects,height,width] matrix
            masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
            masks = np.vstack(masks)
            masks = masks.reshape(-1, height, width)

        if self.target_transform is not None and len(target) > 0:
            target = self.target_transform(target, width, height)

        if self.transform is not None:
            if len(target) > 0:
                target = np.array(target)
                img, masks, boxes, labels = self.transform(
                    img, masks, target[:, :4], {
                        'num_crowds': num_crowds,
                        'labels': target[:, 4]
                    })

                # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations
                num_crowds = labels['num_crowds']
                labels = labels['labels']

                target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
            else:
                img, _, _, _ = self.transform(
                    img, np.zeros((1, height, width), dtype=np.float),
                    np.array([[0, 0, 1, 1]]), {
                        'num_crowds': 0,
                        'labels': np.array([0])
                    })
                masks = None
                target = None

        if target.shape[0] == 0:
            print(
                'Warning: Augmentation output an example with no ground truth. Resampling...'
            )
            return self.pull_item(random.randint(0, len(self.ids) - 1))

        return torch.from_numpy(img).permute(
            2, 0, 1), target, masks, height, width, num_crowds

    def pull_image(self, index):
        '''Returns the original image object at index in PIL form

        Note: not using self.__getitem__(), as any transformations passed in
        could mess up this functionality.

        Argument:
            index (int): index of img to show
        Return:
            cv2 img
        '''
        img_id = self.ids[index]
        path = self.coco.loadImgs(img_id)[0]['file_name']
        return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR)

    def pull_anno(self, index):
        '''Returns the original annotation of image at index

        Note: not using self.__getitem__(), as any transformations passed in
        could mess up this functionality.

        Argument:
            index (int): index of img to get annotation of
        Return:
            list:  [img_id, [(label, bbox coords),...]]
                eg: ('001718', [('dog', (96, 13, 438, 332))])
        '''
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        return self.coco.loadAnns(ann_ids)

    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Root Location: {}\n'.format(self.root)
        tmp = '    Transforms (if any): '
        fmt_str += '{0}{1}\n'.format(
            tmp,
            self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        tmp = '    Target Transforms (if any): '
        fmt_str += '{0}{1}'.format(
            tmp,
            self.target_transform.__repr__().replace('\n',
                                                     '\n' + ' ' * len(tmp)))
        return fmt_str

Example #30

Show file

File: coco.py Project: tata24/body_shape_estimation

class COCODetection(data.Dataset):
    def __init__(self, image_path, info_file, augmentation=None):
        self.image_path = image_path
        self.coco = COCO(info_file)
        self.ids = list(self.coco.imgToAnns.keys())
        self.augmentation = augmentation
        self.label_map = cfg.label_map

    def __getitem__(self, index):
        im, gt, masks, h, w, num_crowds = self.pull_item(index)
        return im, gt, masks, num_crowds

    def __len__(self):
        return len(self.ids)

    def pull_item(self, index):
        img_ids = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_ids)

        # 'target' includes {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
        target = self.coco.loadAnns(ann_ids)

        # Separate out crowd annotations. These are annotations that signify a large crowd of objects, where there is
        # no annotation for each individual object. When testing and training, treat these crowds as neutral.
        crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])]
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]
        num_crowds = len(crowd)

        # Ensure that all crowd annotations are at the end of the array.
        target += crowd
        file_name = self.coco.loadImgs(img_ids)[0]['file_name']

        img_path = osp.join(self.image_path, file_name)
        assert osp.exists(img_path), f'Image path does not exist: {img_path}'

        img = cv2.imread(img_path)
        height, width, _ = img.shape

        if len(target) > 0:
            masks = [self.coco.annToMask(aa).reshape(-1) for aa in target]
            masks = np.vstack(masks)
            masks = masks.reshape(
                (-1, height, width))  # between 0~1, (num_objs, height, width)
            # Uncomment this to visualize the masks.
            # cv2.imshow('aa', masks[0]*255)
            # cv2.waitKey()

            scale = np.array([width, height, width, height])
            box_list = []
            for obj in target:
                if 'bbox' in obj:
                    bbox = obj['bbox']
                    label_idx = self.label_map[obj['category_id']] - 1
                    final_box = list(
                        np.array([
                            bbox[0], bbox[1], bbox[0] + bbox[2],
                            bbox[1] + bbox[3]
                        ]) / scale)
                    final_box.append(label_idx)
                    box_list += [
                        final_box
                    ]  # (xmin, ymin, xmax, ymax, label_idx), between 0~1
                else:
                    print("No bbox found for object ", obj)

        if self.augmentation is not None:
            if len(box_list) > 0:
                box_array = np.array(box_list)
                img, masks, boxes, labels = self.augmentation(
                    img, masks, box_array[:, :4], {
                        'num_crowds': num_crowds,
                        'labels': box_array[:, 4]
                    })

                # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations.
                num_crowds = labels['num_crowds']
                labels = labels['labels']
                boxes = np.hstack((boxes, np.expand_dims(labels, axis=1)))

        return torch.from_numpy(img).permute(
            2, 0, 1), boxes, masks, height, width, num_crowds

Example #31

Show file

def coco_to_TFrecords(keypoint_annotations_file, transformed_annotations_file,
                      config):
    """This script transforms the COCO 2017 keypoint train,val files
    into a format with all keypoints and joints for an image, in a more convenient format,
    where the first axes is the body part or joint, the second is the object, and the third are the
    components (x,y,a) for keypoint and (x1,y1,x2,y2,a) for joint.
    The script saves it into matching pickle files.
    Meant to run once.
    normalizes size the pixel coords to be normalized by size to 0..1 range
    """

    print("\nReading " + keypoint_annotations_file)

    coco = COCO(keypoint_annotations_file)

    category = 1
    imgIds = coco.getImgIds(catIds=[category])
    imgIds.sort()
    print("Found %d images" % len(imgIds))

    files_path = transformed_annotations_file + "-{:03}.tfrecords"
    with FileSharder(tf.io.TFRecordWriter, files_path,
                     config.IMAGES_PER_TFRECORD) as writer:
        for img_id in imgIds:
            img_info = coco.loadImgs(img_id)[0]

            size = [img_info['height'], img_info['width']]

            annIds = coco.getAnnIds(imgIds=[img_id])
            anns = coco.loadAnns(annIds)

            persons_kpts = []
            for annotation in anns:
                if annotation['num_keypoints'] > 0:
                    kpts = annotation['keypoints']

                    # map to new kpts
                    kpts = reshape_kpts(kpts, config)
                    kpts = map_new_kpts(kpts, config)

                    persons_kpts.append(kpts)

            if not persons_kpts:
                continue  # this means that the image has no people with keypoints annotations

            persons_kpts = np.array(
                persons_kpts, dtype=np.float32)  # convert from list to array

            keypoints = transform_keypts(persons_kpts,
                                         np.array(size, dtype=np.int))
            tr_joint = create_all_joints(keypoints, config)
            tr_keypoints = keypoints.transpose(
                (1, 0, 2))  # transpose keypoints for later stages

            total_mask = np.zeros(size, dtype=np.float32)
            for annotation in anns:
                if annotation[
                        'num_keypoints'] == 0:  # only mask those without keypoints
                    single_mask = coco.annToMask(annotation)
                    total_mask = np.max([total_mask, single_mask], axis=0)

            total_mask = cv2.resize(total_mask,
                                    (config.LABEL_HEIGHT, config.LABEL_WIDTH))
            total_mask = (total_mask > 0.01).astype(np.int16)

            kernel = np.ones((5, 5), np.uint8)
            total_mask = cv2.dilate(total_mask,
                                    kernel)  # get more area after downsample
            total_mask = total_mask.astype(np.bool)
            total_mask = np.invert(
                total_mask)  # invert for loss multiplication later
            total_mask = total_mask.astype(np.float32)

            try:
                img_path = config.IMAGES_PATH + "/" + img_info['file_name']
                image_raw = tf.io.read_file(img_path)
            except:
                print("Couldn't read file %s" % img_path)
                continue

            example = encode_example(img_id, image_raw, size, tr_keypoints,
                                     tr_joint, total_mask)
            writer.write(example)

Example #32

Show file

File: coco_pose_generator.py Project: shubhampachori12110095/pytorch-cv

class CocoPoseGenerator(object):

    def __init__(self, args, json_dir=JOSN_DIR, mask_dir=MASK_DIR, image_dir=IMAGE_DIR):
        self.args = args
        self.json_dir = os.path.join(self.args.root_dir, json_dir)
        if not os.path.exists(self.json_dir):
            os.makedirs(self.json_dir)

        self.image_dir = os.path.join(self.args.root_dir, image_dir)
        if not os.path.exists(self.image_dir):
            os.makedirs(self.image_dir)

        self.mask_dir = os.path.join(self.args.root_dir, mask_dir)
        if not os.path.exists(self.mask_dir):
            os.makedirs(self.mask_dir)

        self.coco = COCO(self.args.anno_file)
        self.img_ids = list(self.coco.imgs.keys())

    def generate_label(self):
        for i, img_id in enumerate(self.img_ids):
            json_dict = dict()
            ann_ids = self.coco.getAnnIds(imgIds=img_id)
            img_anns = self.coco.loadAnns(ann_ids)
            num_persons = len(img_anns)
            filename = self.coco.imgs[img_id]['file_name']
            width = self.coco.imgs[img_id]['width']
            height = self.coco.imgs[img_id]['height']
            json_dict['height'] = height
            json_dict['width'] = width

            mask_list = list()

            persons = list()
            person_centers = list()

            for p in range(num_persons):

                if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32:
                    mask_list.append(p)
                    continue
                kpt = img_anns[p]['keypoints']
                dic = dict()

                # person center
                person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0,
                                 img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0]
                scale = img_anns[p]['bbox'][3] / self.args.input_size

                # skip this person if the distance to exiting person is too small
                flag = 0
                for pc in person_centers:
                    dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0])
                                    + (person_center[1] - pc[1]) * (person_center[1] - pc[1]))
                    if dis < pc[2] * 0.3:
                        flag = 1
                        break

                if flag == 1:
                    mask_list.append(p)
                    continue

                dic['bbox'] = img_anns[p]['bbox']
                dic['objpos'] = person_center
                dic['keypoints'] = np.zeros((17, 3)).tolist()
                dic['scale'] = scale
                for part in range(17):
                    dic['keypoints'][part][0] = kpt[part * 3]
                    dic['keypoints'][part][1] = kpt[part * 3 + 1]
                    # visiable is 1, unvisiable is 0 and not labeled is 2
                    if kpt[part * 3 + 2] == 2:
                        dic['keypoints'][part][2] = 1
                    elif kpt[part * 3 + 2] == 1:
                        dic['keypoints'][part][2] = 0
                    else:
                        dic['keypoints'][part][2] = 2

                persons.append(dic)
                person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3])))

            if len(persons) > 0:
                persons = self.__coco_to_ours(persons)
                json_dict['persons'] = persons
                fw = open(os.path.join(self.json_dir, '{}.json'.format(filename.split('.')[0])), 'w')
                fw.write(json.dumps(json_dict))
                fw.close()

                mask_all = np.zeros((height, width), dtype=np.uint8)
                mask_miss = np.zeros((height, width), dtype=np.uint8)
                flag = 0
                for p in range(num_persons):
                    if img_anns[p]['iscrowd'] == 1:
                        mask_crowd = self.coco.annToMask(img_anns[p])
                        temp = np.bitwise_and(mask_all, mask_crowd)
                        mask_crowd = mask_crowd - temp
                        flag += 1
                        continue
                    else:
                        mask = self.coco.annToMask(img_anns[p])

                    mask_all = np.bitwise_or(mask, mask_all)

                    if p in mask_list:
                        mask_miss = np.bitwise_or(mask, mask_miss)

                if flag < 1:
                    mask_miss = np.logical_not(mask_miss)
                elif flag == 1:
                    mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd))
                else:
                    raise Exception('crowd segments > 1')

                mask_miss_vis = np.zeros((height, width), dtype=np.uint8)
                mask_miss_vis[:, :] = mask_miss * 255
                mask_image = Image.fromarray(mask_miss, mode='P')
                mask_image.save(os.path.join(self.mask_dir, '{}.png'.format(filename.split('.')[0])))
                mask_image_vis = Image.fromarray(mask_miss_vis, mode='P')
                mask_image_vis.save(os.path.join(self.mask_dir, '{}_vis.png'.format(filename.split('.')[0])))
                shutil.copyfile(os.path.join(self.args.img_dir, filename),
                                os.path.join(self.image_dir, filename))

            if i % 1000 == 0:
                print("Processed {} of {}".format(i, len(self.img_ids)))

    def __coco_to_ours(self, persons):
        our_persons = list()
        for person in persons:
            dic = dict()
            dic['bbox'] = person['bbox']
            dic['pos_center'] = person['objpos']
            dic['keypoints'] = np.zeros((18,3)).tolist()
            dic['scale'] = person['scale']
            for i in range(17):
                dic['keypoints'][COCO_TO_OURS[i]][0] = person['keypoints'][i][0]
                dic['keypoints'][COCO_TO_OURS[i]][1] = person['keypoints'][i][1]
                dic['keypoints'][COCO_TO_OURS[i]][2] = person['keypoints'][i][2]
            dic['keypoints'][1][0] = (person['keypoints'][5][0] + person['keypoints'][6][0]) * 0.5
            dic['keypoints'][1][1] = (person['keypoints'][5][1] + person['keypoints'][6][1]) * 0.5
            if person['keypoints'][5][2] == person['keypoints'][6][2]:
                dic['keypoints'][1][2] = person['keypoints'][5][2]
            elif person['keypoints'][5][2] == 2 or person['keypoints'][6][2] == 2:
                dic['keypoints'][1][2] = 2
            else:
                dic['keypoints'][1][2] = 0

            our_persons.append(dic)

        return our_persons