Example #1
0
class cocoReward(Reward):
    def __init__(self, ann_path):
        self.ann_path = ann_path
        self.coco = COCO(ann_path)
        self.cur_ins_index = -1
        self.cur_score = 0

    def next_episode(self, *args):
        img_id = args[0]
        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
        tmp = self.coco.loadImgs(img_id)[0]
        self.shape = (tmp['height'], tmp['width'])
        self.anns = self.coco.loadAnns(ann_ids)
        print('%d instance in mask gt of %d' % (len(self.anns), img_id))
        self.rles = [self.coco.annToRLE(ann) for ann in self.anns]
        # self.rle_states = np.zeros(len(self.anns))
        # denote current detected mask
        self.cur_ins_index = -1
        self.cur_score = 0

    def get_reward(self, *args):
        '''

        :param args: args[0] is current mask
        :return: score increment and possibility that whether the
        '''
        # just return the iou change
        mask = args[0]
        mask = resize(mask, self.shape).astype(np.uint8)
        rle = encode(np.asfortranarray(mask))
        if self.cur_ins_index == -1:
            # choose the instance with maximum iou
            scores = iou(self.rles, rle, np.zeros(len(self.rles)))
            self.cur_ins_index = np.argmax(scores)
            self.cur_score = scores[self.cur_ins_index][0]
            return self.cur_score, False
        scores = iou([self.rles[self.cur_ins_index]], rle,
                     np.zeros(len(self.rles)))
        incre = scores[0][0] - self.cur_score
        self.cur_score = scores[0][0]
        print('score %f' % self.cur_score)

        instance_stop = self.cur_score >= 0.8  # sufficiently good
        if instance_stop:
            # do not detect this again
            del self.rles[self.cur_ins_index]
            self.cur_ins_index = -1

        return incre, self.cur_score >= 0.8

    def get_reward2(self, *args):
        mask = args[0]
def _cacheMaskImgs(dataDir, 
                   classGrps, 
                   training, 
                   cocoObj = None):
    """ Caches mask images for each class group given in ``classGrps``.
        Generally speeds up CocoDataGen.sample().
        Mask images are stored in folders created in dataDir.

        # Params
            dataDir: Root directory of coco dataset.
            classGrps: List of lists of strings specifying object classes.
                Each list of classes will be grouped together into one mask image.
            training: If ``True`` then uses training set, otherwise uses validation set.
            cocoObj: Provides option to pass a pre-existing coco dataset object.
    """
    if cocoObj:
        coco = cocoObj
    else:
        annPath = '{}/annotations/instances_{}.json'.format(dataDir, 'train2017' if training else 'val2017')
        coco = COCO(annPath)
    # Get image ids by class group
    catIdsByGrp = [coco.getCatIds(catNms=catNms) for catNms in classGrps]
    imgIdsByGrp = []
    for grpIdx, catIds in enumerate(catIdsByGrp):
        imgIds = []
        for catId in catIds:
            imgIds.extend(coco.getImgIds(catIds=[catId]))
        imgIdsByGrp.append(list(set(imgIds)))
    for grp_idx in range(len(classGrps)):
        folder_name = '_'.join(classGrps[grp_idx]) + ('_train' if training else '_val')
        cache_path = os.path.join(dataDir, folder_name)
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)
        for img_id in imgIdsByGrp[grp_idx]:
            img_info = coco.loadImgs([img_id])[0]
            fname_mask = str(img_id).zfill(12)+'.jpg'
            mask_path = os.path.join(cache_path, fname_mask)
            if not os.path.exists(mask_path): 
                ann_ids = coco.getAnnIds(imgIds=img_id, catIds=catIdsByGrp[grp_idx], iscrowd=False) # Not sure about the iscrowd param TODO
                anns = coco.loadAnns(ann_ids)
                # create an empty mask image
                mask_composite = np.zeros(shape=(img_info['height'], img_info['width']), dtype=np.uint8)
                for ann in anns: # render mask of each ann and add it to the composite image
                    ann_mask = maskUtils.decode(coco.annToRLE(ann))  # get the contours and mask of this instance.
                    mask_composite = np.maximum(mask_composite, ann_mask)
                mask_composite *= 255
                cv2.imwrite(mask_path, mask_composite) # save the composite image
Example #3
0
class COCOLoader(Loader):
    def __init__(self, is_train=True, shuffle=True):
        super(COCOLoader, self).__init__()
        coco_dir = join(dirname(__file__), '..', '..', 'COCO')
        if is_train:
            self.image_dir = join(coco_dir, 'images', 'train2014')
            self.coco = COCO(
                join(coco_dir, 'annotations', 'instances_train2014.json'))
        else:
            self.image_dir = join(coco_dir, 'images', 'val2014')
            self.coco = COCO(
                join(coco_dir, 'annotations', 'instances_val2014.json'))
        self.imgIds = self.coco.getImgIds()
        self.catIds = self.coco.getCatIds()
        self.catId2label = dict(
            zip(self.catIds, range(1,
                                   len(self.catIds) + 1)))
        self.shuffleIds = [i for i in range(len(self.imgIds))]
        if shuffle:
            random.shuffle(self.shuffleIds)

    def __len__(self):
        return len(self.imgIds)

    def load_img(self, image_index):
        imgInd = self.imgIds[self.shuffleIds[image_index]]
        img = self.coco.imgs[imgInd]
        height, width = img['height'], img['width']
        image_path = join(self.image_dir, img['file_name'])
        return image_path, height, width

    def load_ann(self, image_index):
        imgInd = self.imgIds[self.shuffleIds[image_index]]
        annIds = self.coco.getAnnIds(imgIds=imgInd)
        anns = self.coco.loadAnns(annIds)
        anns = [ann for ann in anns if not ann['iscrowd']]
        for i, ann in enumerate(anns):
            ann['gt_id'] = self.catId2label[ann['category_id']]
            if not cfg.rpn_only:
                rle = self.coco.annToRLE(ann)
                anns[i]['mask'] = np.array(maskUtils.decode(rle))
        return anns
Example #4
0
class DidiDataset(Dataset):
    idx_in_coco_str = [
        'left_eye', 'right_eye', 'nose', 'neck', 'left_chest', 'right_chest',
        'left_shoulder', 'left_upperarm', 'left_elbow', 'left_forearm',
        'left_wrist', 'left_hand', 'right_shoulder', 'right_upperarm',
        'right_elbow', 'right_forearm', 'right_wrist', 'right_hand'
    ]

    num_joints = len(idx_in_coco_str)  # 18

    num_joints_and_bkg = num_joints + 1  # 19

    idx_in_coco = list(range(num_joints))  # [0:17]

    joint_pairs = [[3, 2], [2, 0], [2, 1], [3, 4], [3, 5], [3, 6], [6, 7],
                   [7, 8], [8, 9], [9, 10], [10, 11], [3, 12], [12, 13],
                   [13, 14], [14, 15], [15, 16], [16, 17]]

    num_connections = len(joint_pairs)  # 17

    def __init__(self, img_dir, anno_path, target_size=(368, 368), stride=8):
        self.coco_anno = COCO(anno_path)
        self.img_dir = img_dir
        self.ids = list(self.coco_anno.imgs.keys())

        for i, idx in enumerate(self.ids):
            img_meta = self.coco_anno.imgs[idx]

            # load annotations

            id = img_meta['id']
            img_file = img_meta['file_name']
            h, w = img_meta['height'], img_meta['width']
            img_path = os.path.join(self.img_dir, img_file)
            ann_ids = self.coco_anno.getAnnIds(imgIds=id)
            anns = self.coco_anno.loadAnns(ann_ids)

            total_keypoints = sum(
                [ann.get('num_keypoints', 0) for ann in anns])
            if total_keypoints == 0:
                continue

            persons = []
            prev_center = []
            masks = []
            keypoints = []

            # sort from the biggest person to the smallest one
            persons_ids = np.argsort([-a['area'] for a in anns],
                                     kind='mergesort')

            for id in list(persons_ids):
                person_meta = anns[id]

                if person_meta["iscrowd"]:
                    masks.append(self.coco_anno.annToRLE(person_meta))
                    continue

                # skip this person if parts number is too low or if segmentation area is too small
                if person_meta["num_keypoints"] < 5 or person_meta[
                        "area"] < 32 * 32:
                    masks.append(self.coco_anno.annToRLE(person_meta))
                    continue

                # skip this person if the distance to existing person is too small
                person_center = [
                    person_meta["bbox"][0] + person_meta["bbox"][2] / 2,
                    person_meta["bbox"][1] + person_meta["bbox"][3] / 2
                ]
                too_close = False
                for pc in prev_center:
                    a = np.expand_dims(pc[:2], axis=0)
                    b = np.expand_dims(person_center, axis=0)
                    dist = cdist(a, b)[0]
                    if dist < pc[2] * 0.3:
                        too_close = True
                        break
                if too_close:
                    # add mask of this person. we don't want to show the network unlabeled people
                    masks.append(self.coco_anno.annToRLE(person_meta))
                    continue

                keypoints.append(person_meta["keypoints"])
                pers = PersonMeta(img_path=img_path,
                                  height=h,
                                  width=w,
                                  center=np.expand_dims(person_center, axis=0),
                                  bbox=person_meta["bbox"],
                                  area=person_meta["area"],
                                  scale=person_meta["bbox"][3] /
                                  target_size[0],
                                  num_keypoints=person_meta["num_keypoints"])
                persons.append(pers)
                prev_center.append(
                    np.append(
                        person_center,
                        max(person_meta["bbox"][2], person_meta["bbox"][3])))

            if len(persons) > 0:
                main_person = persons[0]
                main_person.masks_segments = masks
                main_person.all_joints = DidiDataset.from_coco_keypoints(
                    keypoints, w, h)
                self.all_meta.append(main_person)

            if i % 1000 == 0:
                print("Loading image annot {}/{}".format(i, len(ids)))

    def get_ground_truth(self):
        # create heatmap
        heatmap = DidiDataset.create_heatmap()
        paf = DidiDataset.create_paf()

    @staticmethod
    def from_coco_keypoints(all_keypoints, w, h):
        """
        Creates list of joints based on the list of coco keypoints vectors.
        :param all_keypoints: list of coco keypoints vector [[x1,y1,v1,x2,y2,v2,....], []]
        :param w: image width
        :param h: image height
        :return: list of joints [[(x1,y1), (x1,y1), ...], [], []]
        """
        all_joints = []
        for keypoints in all_keypoints:
            kp = np.array(keypoints)
            xs = kp[0::3]
            ys = kp[1::3]
            vs = kp[2::3]

            # filter and loads keypoints to the list
            keypoints_list = []
            for idx, (x, y, v) in enumerate(zip(xs, ys, vs)):
                # only visible and occluded keypoints are used
                if v >= 1 and x >= 0 and y >= 0 and x < w and y < h:
                    keypoints_list.append((x, y))
                else:
                    keypoints_list.append(None)

            # build the list of joints. It contains the same coordinates
            # of body parts like in the orginal coco keypoints plus
            # additional body parts interpolated from coco
            # keypoints (ex. a neck)
            joints = []
            for part_idx in range(len(DidiDataset.idx_in_coco)):
                coco_kp_idx = DidiDataset.idx_in_coco[part_idx]
                if callable(coco_kp_idx):
                    p = coco_kp_idx(keypoints_list)
                else:
                    p = keypoints_list[coco_kp_idx]
                joints.append(p)
            all_joints.append(joints)
        return all_joints

    @staticmethod
    def create_heatmap(num_maps, height, width, all_joints, sigma, stride):
        def _put_heatmap_on_plane(heatmap, plane_idx, joint, sigma, height,
                                  width, stride):
            start = stride / 2.0 - 0.5
            center_x, center_y = joint
            for g_y in range(height):
                for g_x in range(width):
                    x = start + g_x * stride
                    y = start + g_y * stride
                    d2 = (x - center_x) * (x - center_x) + (y - center_y) * (
                        y - center_y)
                    exponent = d2 / 2.0 / sigma / sigma
                    if exponent > 4.6052:
                        continue
                    heatmap[g_y, g_x, plane_idx] += math.exp(-exponent)
                    if heatmap[g_y, g_x, plane_idx] > 1.0:
                        heatmap[g_y, g_x, plane_idx] = 1.0

        heatmap = np.zeros((height, width, num_maps), dtype=np.float64)
        for joints in all_joints:
            for plane_idx, joint in enumerate(joints):
                if joint:
                    _put_heatmap_on_plane(heatmap, plane_idx, joint, sigma,
                                          height, width, stride)
        # background
        heatmap[:, :, -1] = np.clip(1.0 - np.amax(heatmap, axis=2), 0.0, 1.0)
        return heatmap

    @staticmethod
    def create_paf(num_maps, height, width, all_joints, threshold, stride):
        def _put_paf_on_plane(vectormap, countmap, plane_idx, x1, y1, x2, y2,
                              threshold, height, width):
            min_x = max(0, int(round(min(x1, x2) - threshold)))
            max_x = min(width, int(round(max(x1, x2) + threshold)))
            min_y = max(0, int(round(min(y1, y2) - threshold)))
            max_y = min(height, int(round(max(y1, y2) + threshold)))

            vec_x = x2 - x1
            vec_y = y2 - y1
            norm = math.sqrt(vec_x**2 + vec_y**2)
            if norm < 1e-8:
                return

            vec_x /= norm
            vec_y /= norm

            for y in range(min_y, max_y):
                for x in range(min_x, max_x):
                    bec_x = x - x1
                    bec_y = y - y1
                    dist = abs(bec_x * vec_y - bec_y * vec_x)
                    if dist > threshold:
                        continue
                    cnt = countmap[y][x][plane_idx]
                    if cnt == 0:
                        vectormap[y][x][plane_idx * 2 + 0] = vec_x
                        vectormap[y][x][plane_idx * 2 + 1] = vec_y
                    else:
                        vectormap[y][x][
                            plane_idx * 2 +
                            0] = (vectormap[y][x][plane_idx * 2 + 0] * cnt +
                                  vec_x) / (cnt + 1)
                        vectormap[y][x][
                            plane_idx * 2 +
                            1] = (vectormap[y][x][plane_idx * 2 + 1] * cnt +
                                  vec_y) / (cnt + 1)
                    countmap[y][x][plane_idx] += 1

        paf = np.zeros((height, width, num_maps * 2), dtype=np.float64)
        countmap = np.zeros((height, width, num_maps), dtype=np.uint8)
        for joints in all_joints:
            for plane_idx, (j_idx1,
                            j_idx2) in enumerate(DidiDataset.joint_pairs):
                center_from = joints[j_idx1]
                center_to = joints[j_idx2]
                # skip if no valid pair of keypoints
                if center_from is None or center_to is None:
                    continue
                x1, y1 = (center_from[0] / stride, center_from[1] / stride)
                x2, y2 = (center_to[0] / stride, center_to[1] / stride)
                _put_paf_on_plane(paf, countmap, plane_idx, x1, y1, x2, y2,
                                  threshold, height, width)
        return paf
Example #5
0
class Test(object):
    def __init__(self, opt=None):
        assert opt is not None
        self.opt = opt
        self.device = torch.device(cfg.device)

        self.val_dataset = NewDataset(train_set=False)
        self.val_dataloader = DataLoader(
            self.val_dataset,
            batch_size=1,
            shuffle=True,
            num_workers=cfg.num_worker,
            collate_fn=self.val_dataset.collate_fn)

        self.len_train_dataset = len(self.val_dataset)

        # self.model = yolov3().to(self.device)
        self.model = build_model(opt.model)
        weights_path = self.opt.weights_path
        checkpoint = torch.load(weights_path)
        self.model.load_state_dict(checkpoint)

        self.cocoGt = COCO(cfg.test_json)

    def plot_one_box(
            self,
            x,
            img,
            color=None,
            label=None,
            line_thickness=None):  # Plots one bounding box on image img
        tl = line_thickness or round(
            0.001 * max(img.shape[0:2])) + 1  # line thickness
        color = color or [random.randint(0, 255) for _ in range(3)]

        cv2.line(img, (int(x[0]), int(x[1])), (int(x[2]), int(x[3])), color,
                 tl)
        cv2.line(img, (int(x[2]), int(x[3])), (int(x[4]), int(x[5])), color,
                 tl)
        cv2.line(img, (int(x[4]), int(x[5])), (int(x[6]), int(x[7])), color,
                 tl)
        cv2.line(img, (int(x[6]), int(x[7])), (int(x[0]), int(x[1])), color,
                 tl)
        cv2.putText(img, label, (int(x[0]), int(x[1])), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 255), 1)

    def drow_box(self, anns):
        image_id = [i['image_id'] for i in anns]
        assert all(x == image_id[0] for x in image_id)
        img_ann = self.cocoGt.loadImgs(ids=image_id[0])[0]
        img_name = img_ann['file_name']
        print('images:{}'.format(img_name))
        img_path = os.path.join(opt.image_folder, img_name)
        txt_path = os.path.join(opt.output_folder,
                                img_name.replace('png', 'txt'))
        img = cv2.imread(img_path)
        for ann in anns:
            cat = self.cocoGt.loadCats(ids=ann['category_id'])[0]
            score = ann['score']
            label = '%s %.2f' % (cat['name'], score)
            color = (0, 0, 255)
            coord = ann['segmentation'][0]
            with open(txt_path, 'a') as f:
                f.write('%s %.2f %g %g %g %g %g %g %g %g  \n' %
                        (cat['name'], score, coord[0], coord[1], coord[2],
                         coord[3], coord[4], coord[5], coord[6], coord[7]))
            self.plot_one_box(coord, img, color, label)
        cv2.imwrite(os.path.join(opt.output_folder, img_name), img)

    @torch.no_grad()
    def eval(self):
        n_threads = torch.get_num_threads()
        # FIXME remove this and make paste_masks_in_image run on the GPU
        torch.set_num_threads(n_threads)
        cpu_device = torch.device("cpu")
        self.model.eval()

        for ann_idx in self.cocoGt.anns:
            ann = self.cocoGt.anns[ann_idx]
            ann['area'] = maskUtils.area(self.cocoGt.annToRLE(ann))

        iou_types = 'segm'
        anns = []
        mAP_list = []

        for val_data in self.val_dataloader:
            image, target, logit = val_data

            image = image.to(self.device)
            image_size = image.shape[3]  # image.shape[2]==image.shape[3]
            # resize之后图像的大小

            _, pred = self.model(image)
            # TODO:当前只支持batch_size=1
            pred = pred.unsqueeze(0)
            pred = pred[pred[:, :, 8] > cfg.conf_thresh]
            detections = non_max_suppression(pred.unsqueeze(0),
                                             cls_thres=cfg.cls_thresh,
                                             nms_thres=cfg.conf_thresh)

            new_ann = reorginalize_target(detections, logit, image_size,
                                          self.cocoGt)
            self.drow_box(new_ann)
            anns.extend(new_ann)

        for ann in anns:
            ann['segmentation'] = self.cocoGt.annToRLE(
                ann)  # 将polygon形式的segmentation转换RLE形式

        cocoDt = self.cocoGt.loadRes(anns)

        cocoEval = COCOeval(self.cocoGt, cocoDt, iou_types)
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        ap_per_category(self.cocoGt, cocoEval, cfg.max_epoch)
        draw_pr(self.cocoGt, cocoEval)
        print_txt = cocoEval.stats
        coco_mAP = print_txt[0]
        voc_mAP = print_txt[1]
        if isinstance(mAP_list, list):
            mAP_list.append(voc_mAP)
Example #6
0
class CocoDataFlow(RNGDataFlow):
    """
    Tensorpack dataflow serving coco data points.
    """
    def __init__(self, target_size, annot_path, img_dir, select_ids=None):
        """
        Initializes dataflow.

        :param target_size:
        :param annot_path: path to the coco annotation file
        :param img_dir: directory containing images
        :param select_ids: (optional) identifiers of images to serve (for debugging)
        """
        self.img_dir = img_dir
        self.coco = COCO(annot_path)
        self.all_meta = []
        self.select_ids = select_ids
        self.target_size = target_size

    def prepare(self):
        """
        Loads coco metadata. Partially populates meta objects (image path,
        scale of main person, bounding box, area, joints) Remaining fields
        are populated in next steps - MapData tensorpack tranformer.
        """
        if self.select_ids:
            ids = self.select_ids
        else:
            ids = list(self.coco.imgs.keys())

        for i, img_id in enumerate(ids):
            img_meta = self.coco.imgs[img_id]

            # load annotations

            img_id = img_meta['id']
            img_file = img_meta['file_name']
            h, w = img_meta['height'], img_meta['width']
            img_path = os.path.join(self.img_dir, img_file)
            ann_ids = self.coco.getAnnIds(imgIds=img_id)
            anns = self.coco.loadAnns(ann_ids)

            total_keypoints = sum(
                [ann.get('num_keypoints', 0) for ann in anns])
            if total_keypoints == 0:
                continue

            persons = []
            prev_center = []
            masks = []
            keypoints = []

            # sort from the biggest person to the smallest one

            persons_ids = np.argsort([-a['area'] for a in anns],
                                     kind='mergesort')

            for id in list(persons_ids):
                person_meta = anns[id]

                if person_meta["iscrowd"]:
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

                # skip this person if parts number is too low or if
                # segmentation area is too small

                if person_meta["num_keypoints"] < 5 or person_meta[
                        "area"] < 32 * 32:
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

                person_center = [
                    person_meta["bbox"][0] + person_meta["bbox"][2] / 2,
                    person_meta["bbox"][1] + person_meta["bbox"][3] / 2
                ]

                # skip this person if the distance to existing person is too small

                too_close = False
                for pc in prev_center:
                    a = np.expand_dims(pc[:2], axis=0)
                    b = np.expand_dims(person_center, axis=0)
                    dist = cdist(a, b)[0]
                    if dist < pc[2] * 0.3:
                        too_close = True
                        break

                if too_close:
                    # add mask of this person. we don't want to show the network
                    # unlabeled people
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

                pers = Meta(img_path=img_path,
                            height=h,
                            width=w,
                            center=np.expand_dims(person_center, axis=0),
                            bbox=person_meta["bbox"],
                            area=person_meta["area"],
                            scale=person_meta["bbox"][3] / self.target_size[0],
                            num_keypoints=person_meta["num_keypoints"])

                keypoints.append(person_meta["keypoints"])
                persons.append(pers)
                prev_center.append(
                    np.append(
                        person_center,
                        max(person_meta["bbox"][2], person_meta["bbox"][3])))

            for person in persons:
                person.masks_segments = masks
                person.all_joints = JointsLoader.from_coco_keypoints(
                    keypoints, w, h)
                self.all_meta.append(person)

            if i % 1000 == 0:
                print("Loading image annot {}/{}".format(i, len(ids)))

    def save(self, path):
        raise NotImplemented

    def load(self, path):
        raise NotImplemented

    def size(self):
        """
        :return: number of items
        """
        return len(self.all_meta)

    def get_data(self):
        """
        Generator of data points

        :return: instance of Meta
        """
        idxs = np.arange(self.size())
        self.rng.shuffle(idxs)
        for idx in idxs:
            yield [self.all_meta[idx]]
Example #7
0
class MSCOCO(PoseDataset):
    def load_dataset(self):
        dataset  = self.cfg.dataset
        dataset_phase = self.cfg.dataset_phase
        dataset_ann = self.cfg.dataset_ann

        # initialize COCO api
        annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase)
        self.coco = COCO(annFile)

        imgIds = self.coco.getImgIds()

        data = []

        # loop through each image
        for imgId in imgIds:
            item = DataItem()

            img = self.coco.loadImgs(imgId)[0]
            item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"])
            item.im_size = [3, img["height"], img["width"]]
            item.coco_id = imgId
            annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False)
            anns = self.coco.loadAnns(annIds)

            all_person_keypoints = []
            masked_persons_RLE = []
            visible_persons_RLE = []
            all_visibilities = []

            # Consider only images with people
            has_people = len(anns) > 0
            if not has_people and self.cfg.coco_only_images_with_people:
                continue

            for ann in anns: # loop through each person
                person_keypoints = []
                visibilities = []
                if ann["num_keypoints"] != 0:
                    for i in range(self.cfg.num_joints):
                        x_coord = ann["keypoints"][3 * i]
                        y_coord = ann["keypoints"][3 * i + 1]
                        visibility = ann["keypoints"][3 * i + 2]
                        visibilities.append(visibility)
                        if visibility != 0: # i.e. if labeled
                            person_keypoints.append([i, x_coord, y_coord])
                    all_person_keypoints.append(np.array(person_keypoints))
                    visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann)))
                    all_visibilities.append(visibilities)
                if ann["num_keypoints"] == 0:
                    masked_persons_RLE.append(self.coco.annToRLE(ann))

            item.joints = all_person_keypoints
            item.im_neg_mask = maskUtils.merge(masked_persons_RLE)
            if self.cfg.use_gt_segm:
                item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1)
                item.visibilities = all_visibilities
            data.append(item)

        self.has_gt = self.cfg.dataset is not "image_info"
        return data


    def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
        size = scmap_shape[0:2]
        scmask = np.ones(size)
        m = maskUtils.decode(data_item.im_neg_mask)
        if m.size:
            scmask = 1.0 - imresize(m, size)
        scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1)
        return scmask

    def get_pose_segments(self):
       return [[0, 1], [0, 2], [1, 3], [2, 4], [5, 7], [6, 8], [7, 9], [8, 10], [11, 13], [12, 14], [13, 15], [14, 16]]

    def visualize_coco(self, coco_img_results, visibilities):
        inFile = "tmp.json"
        with open(inFile, 'w') as outfile:
            json.dump(coco_img_results, outfile)
        get_gt_visibilities(inFile, visibilities)

        # initialize cocoPred api
        cocoPred = self.coco.loadRes(inFile)
        os.remove(inFile)

        imgIds = [coco_img_results[0]["image_id"]]

        for imgId in imgIds:
            img = cocoPred.loadImgs(imgId)[0]
            im_path = "%s/images/%s/%s" % (self.cfg.dataset, self.cfg.dataset_phase, img["file_name"])
            I = io.imread(im_path)

            fig = plt.figure()
            a = fig.add_subplot(2, 2, 1)
            plt.imshow(I)
            a.set_title('Initial Image')

            a = fig.add_subplot(2, 2, 2)
            plt.imshow(I)
            a.set_title('Predicted Keypoints')
            annIds = cocoPred.getAnnIds(imgIds=img['id'])
            anns = cocoPred.loadAnns(annIds)
            cocoPred.showAnns(anns)

            a = fig.add_subplot(2, 2, 3)
            plt.imshow(I)
            a.set_title('GT Keypoints')
            annIds = self.coco.getAnnIds(imgIds=img['id'])
            anns = self.coco.loadAnns(annIds)
            self.coco.showAnns(anns)

            plt.show()
Example #8
0
class MSCOCO(PoseDataset):
    def __init__(self, cfg):
        cfg.all_joints = [[0], [2, 1], [4, 3], [6, 5], [8, 7],[10, 9], [12, 11], [14, 13], [16, 15]]
        cfg.all_joints_names = ["nose", 'eye', 'ear', 'shoulder', 'elbow', 'hand', 'hip', 'knee', 'foot']
        cfg.num_joints = 17
        super().__init__(cfg)

    def load_dataset(self):
        dataset  = self.cfg.dataset
        dataset_phase = self.cfg.dataset_phase
        dataset_ann = self.cfg.dataset_ann

        # initialize COCO api
        annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase)
        self.coco = COCO(annFile)

        imgIds = self.coco.getImgIds()

        data = []

        # loop through each image
        for imgId in imgIds:
            item = DataItem()

            img = self.coco.loadImgs(imgId)[0]
            item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"])
            item.im_size = [3, img["height"], img["width"]]
            item.coco_id = imgId
            annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False)
            anns = self.coco.loadAnns(annIds)

            all_person_keypoints = []
            masked_persons_RLE = []
            visible_persons_RLE = []
            all_visibilities = []

            # Consider only images with people
            has_people = len(anns) > 0
            if not has_people and self.cfg.coco_only_images_with_people:
                continue

            for ann in anns: # loop through each person
                person_keypoints = []
                visibilities = []
                if ann["num_keypoints"] != 0:
                    for i in range(self.cfg.num_joints):
                        x_coord = ann["keypoints"][3 * i]
                        y_coord = ann["keypoints"][3 * i + 1]
                        visibility = ann["keypoints"][3 * i + 2]
                        visibilities.append(visibility)
                        if visibility != 0: # i.e. if labeled
                            person_keypoints.append([i, x_coord, y_coord])
                    all_person_keypoints.append(np.array(person_keypoints))
                    visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann)))
                    all_visibilities.append(visibilities)
                if ann["num_keypoints"] == 0:
                    masked_persons_RLE.append(self.coco.annToRLE(ann))

            item.joints = all_person_keypoints
            item.im_neg_mask = maskUtils.merge(masked_persons_RLE)
            if self.cfg.use_gt_segm:
                item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1)
                item.visibilities = all_visibilities
            data.append(item)

        self.has_gt = self.cfg.dataset is not "image_info"
        return data


    def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
        size = scmap_shape[0:2]
        scmask = np.ones(size)
        m = maskUtils.decode(data_item.im_neg_mask)
        if m.size:
            scmask = 1.0 - imresize(m, size)
        scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1)
        return scmask

    def get_pose_segments(self):
       return [[0, 1], [0, 2], [1, 3], [2, 4], [5, 7], [6, 8], [7, 9], [8, 10], [11, 13], [12, 14], [13, 15], [14, 16]]

    def visualize_coco(self, coco_img_results, visibilities):
        inFile = "tmp.json"
        with open(inFile, 'w') as outfile:
            json.dump(coco_img_results, outfile)
        get_gt_visibilities(inFile, visibilities)

        # initialize cocoPred api
        cocoPred = self.coco.loadRes(inFile)
        os.remove(inFile)

        imgIds = [coco_img_results[0]["image_id"]]

        for imgId in imgIds:
            img = cocoPred.loadImgs(imgId)[0]
            im_path = "%s/images/%s/%s" % (self.cfg.dataset, self.cfg.dataset_phase, img["file_name"])
            I = io.imread(im_path)

            fig = plt.figure()
            a = fig.add_subplot(2, 2, 1)
            plt.imshow(I)
            a.set_title('Initial Image')

            a = fig.add_subplot(2, 2, 2)
            plt.imshow(I)
            a.set_title('Predicted Keypoints')
            annIds = cocoPred.getAnnIds(imgIds=img['id'])
            anns = cocoPred.loadAnns(annIds)
            cocoPred.showAnns(anns)

            a = fig.add_subplot(2, 2, 3)
            plt.imshow(I)
            a.set_title('GT Keypoints')
            annIds = self.coco.getAnnIds(imgIds=img['id'])
            anns = self.coco.loadAnns(annIds)
            self.coco.showAnns(anns)

            plt.show()
Example #9
0
for i in imgIds[:]:
    Id = str(i)
    l = len(Id)
    name = tname[:-l] + Id
    file = imgDir + name + '.jpg'
    # im = tf.gfile.FastGFile(file, 'rb').read()
    img = cv2.imread(file)

    h, w = img.shape[:2]

    AnnIds = cocoGt.getAnnIds([i], )
    Anns = cocoGt.loadAnns(AnnIds)
    bboxes = np.zeros((0, 6))
    Counts = []
    for Ann in Anns:
        tt = cocoGt.annToRLE(Ann)

        hh, ww = tt['size']
        counts = tt['counts']
        if hh != h or ww != w:
            continue
        bbox = Ann['bbox']
        bbox = np.array(bbox)
        x1, y1 = bbox[:2]
        x2, y2 = bbox[:2] + bbox[2:]
        catId = Ann['category_id']
        cls = catId2cls[catId]
        iscrowd = Ann['iscrowd']
        if iscrowd==1:
            continue
        t = np.array([[y1, x1, y2, x2, cls, iscrowd]])
Example #10
0
class _Trainer(object):
    def __init__(self):
        self.device = torch.device(cfg.device)
        self.max_epoch = cfg.max_epoch

        self.train_dataset = NewDataset(train_set=True)
        self.train_dataloader = DataLoader(
            self.train_dataset,
            batch_size=cfg.batch_size,
            shuffle=True,
            num_workers=cfg.num_worker,
            collate_fn=self.train_dataset.collate_fn)

        self.val_dataset = NewDataset(train_set=False)
        self.val_dataloader = DataLoader(
            self.val_dataset,
            batch_size=1,
            shuffle=True,
            num_workers=cfg.num_worker,
            collate_fn=self.val_dataset.collate_fn)

        self.len_train_dataset = len(self.train_dataset)

        self.model = build_model(cfg.model)

        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=cfg.lr_start,
                                         momentum=cfg.momentum,
                                         weight_decay=cfg.weight_decay)

        if cfg.linear_lr:
            lf = lambda x: (1 - x /
                            (cfg.max_epoch - 1)) * (1.0 - 0.2) + 0.2  # linear
        else:  # hyp['lrf']==0.2
            lf = one_cycle(1, 0.2, cfg.max_epoch)  # cosine 1->hyp['lrf']
        self.scheduler = LambdaLR(self.optimizer, lr_lambda=lf)
        # self.scheduler = adjust_lr_by_wave(self.optimizer, self.max_epoch * self.len_train_dataset, cfg.lr_start,
        #                                    cfg.lr_end, cfg.warmup)
        # self.scheduler = adjust_lr_by_loss(self.optimizer,cfg.lr_start,cfg.warmup,self.train_dataloader.num_batches)
        self.writer = SummaryWriter(cfg.tensorboard_path)
        self.iter = 0
        self.cocoGt = COCO(cfg.test_json)

    def put_log(self, epoch_index, mean_loss, time_per_iter):
        print(
            "[epoch:{}|{}] [iter:{}|{}] time:{}s loss:{} giou_loss:{} conf_loss:{} cls_loss:{} lr:{}"
            .format(epoch_index + 1, self.max_epoch, self.iter + 1,
                    math.ceil(self.len_train_dataset / cfg.batch_size),
                    round(time_per_iter, 2), round(mean_loss[0], 4),
                    round(mean_loss[1], 4), round(mean_loss[2], 4),
                    round(mean_loss[3], 4),
                    self.optimizer.param_groups[0]['lr']))

        step = epoch_index * math.ceil(
            self.len_train_dataset / cfg.batch_size) + self.iter
        self.writer.add_scalar("loss", mean_loss[0], global_step=step)
        self.writer.add_scalar("giou loss", mean_loss[1], global_step=step)
        self.writer.add_scalar("conf loss", mean_loss[2], global_step=step)
        self.writer.add_scalar("cls loss", mean_loss[3], global_step=step)
        self.writer.add_scalar("learning rate",
                               self.optimizer.param_groups[0]['lr'],
                               global_step=step)

    def train_one_epoch(self, epoch_index, train_loss=None, train_lr=None):
        mean_loss = [0, 0, 0, 0]
        self.model.train()
        for self.iter, train_data in enumerate(self.train_dataloader):
            start_time = time.time()
            # self.scheduler.step(epoch_index,
            #                     self.len_train_dataset * epoch_index + self.iter / cfg.batch_size)  # 调整学习率
            # self.scheduler.step(self.len_train_dataset * epoch_index + self.iter + 1,mean_loss[0])
            image, target, _ = train_data
            image = image.to(self.device)

            output, pred = self.model(image)

            # 计算loss
            loss, loss_giou, loss_conf, loss_cls = build_loss(output, target)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()

            end_time = time.time()
            time_per_iter = end_time - start_time  # 每次迭代所花时间

            loss_items = [
                loss.item(),
                loss_giou.item(),
                loss_conf.item(),
                loss_cls.item()
            ]
            mean_loss = [
                (mean_loss[i] * self.iter + loss_items[i]) / (self.iter + 1)
                for i in range(4)
            ]
            self.put_log(epoch_index, mean_loss, time_per_iter)

            # 记录训练损失
            loss_value = round(mean_loss[0], 4)
            if isinstance(train_loss, list):
                train_loss.append(loss_value)

            now_lr = self.optimizer.param_groups[0]["lr"]
            if isinstance(train_lr, list):
                train_lr.append(now_lr)

        if (epoch_index + 1) % cfg.save_step == 0:
            checkpoint = {
                'epoch': epoch_index,
                'model': self.model.state_dict(),
                'optimizer': self.optimizer.state_dict()
            }
            torch.save(
                self.model.state_dict(), cfg.checkpoint_save_path + cfg.model +
                '_' + str(epoch_index + 1) + '.pth')

    @torch.no_grad()
    def eval(self, epoch_index, mAP_list=None):
        n_threads = torch.get_num_threads()
        # FIXME remove this and make paste_masks_in_image run on the GPU
        torch.set_num_threads(n_threads)
        cpu_device = torch.device("cpu")
        self.model.eval()

        for ann_idx in self.cocoGt.anns:
            ann = self.cocoGt.anns[ann_idx]
            ann['area'] = maskUtils.area(self.cocoGt.annToRLE(ann))
        iou_types = 'segm'
        anns = []

        for val_data in self.val_dataloader:
            image, target, logit = val_data
            image = image.to(self.device)
            image_size = image.shape[3]  # image.shape[2]==image.shape[3]
            # resize之后图像的大小

            _, pred = self.model(image)
            # TODO:当前只支持batch_size=1
            pred = pred.unsqueeze(0)
            pred = pred[pred[:, :, 8] > cfg.conf_thresh]
            if pred.shape[0] == 0:
                pass
            else:
                detections = non_max_suppression(pred.unsqueeze(0),
                                                 cls_thres=cfg.cls_thresh,
                                                 nms_thres=cfg.conf_thresh)
                anns.extend(
                    reorginalize_target(detections, logit, image_size,
                                        self.cocoGt))

        for ann in anns:
            ann['segmentation'] = self.cocoGt.annToRLE(
                ann)  # 将polygon形式的segmentation转换RLE形式

        cocoDt = self.cocoGt.loadRes(anns)

        cocoEval = COCOeval(self.cocoGt, cocoDt, iou_types)
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        ap_per_category(self.cocoGt, cocoEval, epoch_index)
        print_txt = cocoEval.stats
        coco_mAP = print_txt[0]
        voc_mAP = print_txt[1]
        if isinstance(mAP_list, list):
            mAP_list.append(voc_mAP)
Example #11
0
def main():
    # Use first line of file docstring as description if it exists.
    parser = argparse.ArgumentParser(
        description=__doc__.split('\n')[0] if __doc__ else '',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--lvis', type=Path, required=True)
    parser.add_argument('--coco', type=Path, required=True)
    parser.add_argument('--mapping',
                        type=Path,
                        default=ROOT / 'data/lvis_coco_to_synset.json')
    parser.add_argument('--output-json',
                        type=Path,
                        required=True)
    parser.add_argument(
        '--iou-thresh',
        default=0.7,
        type=float,
        help=('If a COCO annotation overlaps with an LVIS annotations with '
              'IoU over this threshold, we use only the LVIS annotation.'))

    args = parser.parse_args()
    args.output_json.parent.mkdir(exist_ok=True, parents=True)
    common_setup(args.output_json.name + '.log', args.output_json.parent, args)

    coco = COCO(args.coco)
    lvis = COCO(args.lvis)

    synset_to_lvis_id = {x['synset']: x['id'] for x in lvis.cats.values()}
    coco_to_lvis_category = {}
    with open(args.mapping, 'r') as f:
        name_mapping = json.load(f)
    for category in coco.cats.values():
        mapped = name_mapping[category['name']]
        assert mapped['coco_cat_id'] == category['id']
        synset = mapped['synset']
        if synset not in synset_to_lvis_id:
            logging.debug(
                f'Found no LVIS category for "{category["name"]}" from COCO')
            continue
        coco_to_lvis_category[category['id']] = synset_to_lvis_id[synset]

    for image_id, image in coco.imgs.items():
        if image_id in lvis.imgs:
            coco_name = coco.imgs[image_id]['file_name']
            lvis_name = lvis.imgs[image_id]['file_name']
            assert coco_name in lvis_name
        else:
            logging.info(
                f'Image {image_id} in COCO, but not annotated in LVIS')

    lvis_highest_id = max(x['id'] for x in lvis.anns.values())
    ann_id_generator = itertools.count(lvis_highest_id + 1)
    new_annotations = []
    for image_id, lvis_anns in tqdm(lvis.imgToAnns.items()):
        if image_id not in coco.imgToAnns:
            logging.info(
                f'Image {image_id} in LVIS, but not annotated in COCO')
            continue

        coco_anns = coco.imgToAnns[image_id]
        # Compute IoU between coco_anns and lvis_anns
        # Shape (num_coco_anns, num_lvis_anns)
        mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns],
                                 [lvis.annToRLE(x) for x in lvis_anns],
                                 pyiscrowd=np.zeros(len(lvis_anns)))
        does_overlap = mask_iou.max(axis=1) > args.iou_thresh
        to_add = []
        for i, ann in enumerate(coco_anns):
            if does_overlap[i]:
                continue
            if ann['category_id'] not in coco_to_lvis_category:
                continue
            ann['category_id'] = coco_to_lvis_category[ann['category_id']]
            ann['id'] = next(ann_id_generator)
            to_add.append(ann)
        new_annotations.extend(to_add)

    with open(args.lvis, 'r') as f:
        merged = json.load(f)
    merged['annotations'].extend(new_annotations)
    with open(args.output_json, 'w') as f:
        json.dump(merged, f)
Example #12
0
def get_mask_form_anns(anns: List[Any], coco: COCO) -> Image.Image:
    mask = np.array(decode(coco.annToRLE(anns[0])))
    for ann in anns[1:]:
        mask += np.array(decode(coco.annToRLE(ann)))
    mask = (mask > 0).astype(np.uint8)
    return Image.fromarray(mask, mode='L')
Example #13
0
class PoseInfo:
    metas = []
    metas_test = []

    def __init__(self, data_dir, data_type, anno_path):
        self.data_dir = data_dir
        self.data_type = data_type
        self.image_base_dir = '{}/images/{}2014/'.format(data_dir, data_type)
        self.anno_path = '{}/annotations/person_keypoints_{}2014.json'.format(
            data_dir, data_type)
        self.coco = COCO(self.anno_path)
        self.get_image_annos()
        self.image_list = os.listdir(self.image_base_dir)

    @staticmethod
    def get_keypoints(annos_info):
        annolist = []
        for anno in annos_info:
            adjust_anno = {
                'keypoints': anno['keypoints'],
                'num_keypoints': anno['num_keypoints']
            }
            annolist.append(adjust_anno)
        return annolist

    def get_image_annos(self):

        images_ids = self.coco.getImgIds()

        for idx in range(len(images_ids)):

            images_info = self.coco.loadImgs(images_ids[idx])
            image_path = self.image_base_dir + images_info[0]['file_name']
            # filter that some images might not in the list
            if not os.path.exists(image_path):
                continue
            id = images_info[0]['id']

            annos_ids = self.coco.getAnnIds(imgIds=images_ids[idx])
            annos_info = self.coco.loadAnns(annos_ids)
            keypoints = self.get_keypoints(annos_info)

            #############################################################################
            anns = annos_info
            prev_center = []
            masks = []

            # sort from the biggest person to the smallest one
            persons_ids = np.argsort([-a['area'] for a in anns],
                                     kind='mergesort')

            for id in list(persons_ids):
                person_meta = anns[id]

                if person_meta["iscrowd"]:
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

                # skip this person if parts number is too low or if
                # segmentation area is too small

                if person_meta["num_keypoints"] < 5 or person_meta[
                        "area"] < 32 * 32:
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

                person_center = [
                    person_meta["bbox"][0] + person_meta["bbox"][2] / 2,
                    person_meta["bbox"][1] + person_meta["bbox"][3] / 2
                ]

                # skip this person if the distance to existing person is too small

                too_close = False
                for pc in prev_center:
                    a = np.expand_dims(pc[:2], axis=0)
                    b = np.expand_dims(person_center, axis=0)
                    dist = cdist(a, b)[0]
                    if dist < pc[2] * 0.3:
                        too_close = True
                        break

                if too_close:
                    # add mask of this person. we don't want to show the network
                    # unlabeled people
                    masks.append(self.coco.annToRLE(person_meta))
                    continue

            ############################################################################
            total_keypoints = sum(
                [ann.get('num_keypoints', 0) for ann in annos_info])
            if total_keypoints > 0:
                meta = CocoMeta(images_ids[idx], image_path, images_info[0],
                                keypoints, masks)
                self.metas.append(meta)

        print("Overall get {}".format(len(self.metas)))

    def load_images(self):
        pass

    def get_image_list(self):
        list = []
        for meta in self.metas:
            list.append(meta.img_url)
        return list

    def get_joint_list(self):
        list = []
        for meta in self.metas:
            list.append(meta.joint_list)
        return list

    def get_mask(self):
        list = []
        for meta in self.metas:
            list.append(meta.masks)
        return list
Example #14
0
class PoseInfo:
    """ Use COCO for pose estimation, returns images with people only. """
    def __init__(self, image_base_dir, anno_path, with_mask):
        self.metas = []
        # self.data_dir = data_dir
        # self.data_type = data_type
        self.image_base_dir = image_base_dir
        self.anno_path = anno_path
        self.with_mask = with_mask
        self.coco = COCO(self.anno_path)
        self.get_image_annos()
        self.image_list = os.listdir(self.image_base_dir)

    @staticmethod
    def get_keypoints(annos_info):
        annolist = []
        for anno in annos_info:
            adjust_anno = {
                'keypoints': anno['keypoints'],
                'num_keypoints': anno['num_keypoints']
            }
            annolist.append(adjust_anno)
        return annolist

    def get_image_annos(self):
        """Read JSON file, and get and check the image list.
        Skip missing images.
        """
        images_ids = self.coco.getImgIds()
        len_imgs = len(images_ids)
        for idx in range(len_imgs):

            images_info = self.coco.loadImgs(images_ids[idx])
            image_path = os.path.join(self.image_base_dir,
                                      images_info[0]['file_name'])
            # filter that some images might not in the list
            if not os.path.exists(image_path):
                print(
                    "[skip] json annotation found, but cannot found image: {}".
                    format(image_path))
                continue

            annos_ids = self.coco.getAnnIds(imgIds=images_ids[idx])
            annos_info = self.coco.loadAnns(annos_ids)
            keypoints = self.get_keypoints(annos_info)

            #############################################################################
            anns = annos_info
            prev_center = []
            masks = []

            # sort from the biggest person to the smallest one
            if self.with_mask:
                persons_ids = np.argsort([-a['area'] for a in anns],
                                         kind='mergesort')

                for p_id in list(persons_ids):
                    person_meta = anns[p_id]

                    if person_meta["iscrowd"]:
                        masks.append(self.coco.annToRLE(person_meta))
                        continue

                    # skip this person if parts number is too low or if
                    # segmentation area is too small
                    if person_meta["num_keypoints"] < 5 or person_meta[
                            "area"] < 32 * 32:
                        masks.append(self.coco.annToRLE(person_meta))
                        continue

                    person_center = [
                        person_meta["bbox"][0] + person_meta["bbox"][2] / 2,
                        person_meta["bbox"][1] + person_meta["bbox"][3] / 2
                    ]

                    # skip this person if the distance to existing person is too small
                    too_close = False
                    for pc in prev_center:
                        a = np.expand_dims(pc[:2], axis=0)
                        b = np.expand_dims(person_center, axis=0)
                        dist = cdist(a, b)[0]
                        if dist < pc[2] * 0.3:
                            too_close = True
                            break

                    if too_close:
                        # add mask of this person. we don't want to show the network
                        # unlabeled people
                        masks.append(self.coco.annToRLE(person_meta))
                        continue

            ############################################################################
            total_keypoints = sum(
                [ann.get('num_keypoints', 0) for ann in annos_info])
            if total_keypoints > 0:
                meta = CocoMeta(images_ids[idx], image_path, images_info[0],
                                keypoints, masks)
                self.metas.append(meta)

        print("Overall get {} valid pose images from {} and {}".format(
            len(self.metas), self.image_base_dir, self.anno_path))

    def load_images(self):
        pass

    def get_image_list(self):
        img_list = []
        for meta in self.metas:
            img_list.append(meta.img_url)
        return img_list

    def get_joint_list(self):
        joint_list = []
        for meta in self.metas:
            joint_list.append(meta.joint_list)
        return joint_list

    def get_mask(self):
        mask_list = []
        for meta in self.metas:
            mask_list.append(meta.masks)
        return mask_list
Example #15
0
class TrafficDataset(Dataset):
    def __init__(self, ann_file, root, to_contiguous_class_mapping,
                 to_json_class_mapping, transforms):
        self.coco = COCO(ann_file)
        self.class_mapping = to_contiguous_class_mapping
        MASK = Image.open(os.path.join(root, 'mask.png'))

        # Mask used to keep only visible roadbed
        tmp = np.array(MASK)
        data_points = np.argwhere(tmp)
        self.min_y, self.min_x = data_points.min(axis=0)
        self.max_y, self.max_x = data_points.max(axis=0) + 1
        tmp = tmp[self.min_y:self.max_y, self.min_x:self.max_x]
        assert tmp.shape == (769, 1920), tmp.shape
        MASK = Image.fromarray(tmp, MASK.mode)

        # filter images without detection annotations
        self.ids, self.images = [], []
        for img_id in sorted(self.coco.imgs.keys()):
            ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
            anno = self.coco.loadAnns(ann_ids)
            if has_valid_annotation(anno):
                path = os.path.join(root, self.coco.imgs[img_id]['file_name'])

                try:
                    # open and crop
                    image = Image.open(path)
                    image = Image.fromarray(
                        np.array(image)[self.min_y:self.max_y,
                                        self.min_x:self.max_x], image.mode)

                    zeros = Image.fromarray(np.zeros_like(image), image.mode)
                    image = Image.composite(image, zeros, mask=MASK)
                except Exception as e:
                    print("Failed to load image ", path, e)
                    continue

                self.images.append(image)
                self.ids.append(img_id)

        self.id_to_img_map = {k: v
                              for k, v in enumerate(self.ids)
                              }  # inner id to json id

        self.contiguous_category_id_to_json_id = copy.deepcopy(
            to_json_class_mapping)  # inner class -> json class name
        for class_id, class_name in self.contiguous_category_id_to_json_id.items(
        ):
            self.contiguous_category_id_to_json_id[class_id] = \
                [x for x in self.coco.cats.values() if x['name'] == class_name][0]['id'] # inner contiguous class to json class

        self.transforms = transforms

        # Update bboxes to avoid coco-annotator bugs with conversion bb coordinates
        from tqdm import tqdm
        for k, v in tqdm(list(self.coco.anns.items())):
            rle = self.coco.annToRLE(v)
            before = v['bbox']
            v['bbox'] = maskUtils.toBbox(rle)
            if sum(abs(before - v['bbox'])) > 1:
                print(f"Changed {before}->{v['bbox']}")

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img = self.images[idx]

        coco_idx = self.ids[idx]
        anno = self.coco.loadAnns(self.coco.getAnnIds([coco_idx]))

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [
            self.class_mapping[self.coco.cats[c]['name']] for c in classes
        ]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        target = target.crop([self.min_x, self.min_y, self.max_x,
                              self.max_y]).clip_to_image(remove_empty=True)

        if self.transforms:
            img, target = self.transforms(img, target)

        return img, target, idx

    def get_img_info(self, index):
        img_id = self.ids[index]
        img_data = self.coco.imgs[img_id]
        img_data["crop_x"], img_data["crop_y"] = self.min_x, self.min_y
        img_data["crop_w"], img_data[
            "crop_h"] = self.max_x - self.min_x, self.max_y - self.min_y
        return img_data
class COCOSemantic(SegmentationDataset):
    """COCO Semantic Segmentation Dataset for the Panoptic Segmentation task.

    Parameters
    ----------
    root : string
        Path to COCO dataset folder. Default is './mscoco'
    split: string
        'train', 'val' or 'test'
    transform : callable, optional
        A function that transforms the image

    Examples
    --------
    >>> from mxnet.gluon.data.vision import transforms
    >>> # Transforms for Normalization
    >>> input_transform = transforms.Compose([
    >>>     transforms.ToTensor(),
    >>>     transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    >>> ])
    >>> # Create Dataset
    >>> trainset = gluoncv.data.COCOSegmentation(split='train', transform=input_transform)
    >>> # Create Training Loader
    >>> train_data = gluon.data.DataLoader(
    >>>     trainset, 4, shuffle=True, last_batch='rollover',
    >>>     num_workers=4)
    """
    #CAT_LIST = [92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148,
    #            149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 184, 185, 186,
    #            187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200]

    #NUM_CLASS = 53


    #CAT_LIST = [92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148,
    #            149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 183, 184, 185, 186,
    #            187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200]

    #NUM_CLASS = 54


    CAT_LIST = [0, 92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148,
                149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 184, 185, 186,
                187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200]

    NUM_CLASS = 54




    def __init__(self, root=os.path.expanduser('~/.mxnet/datasets/coco'),
                 split='train', mode=None, transform=None, **kwargs):
        super(COCOSemantic, self).__init__(root, split, mode, transform, **kwargs)
        # lazy import pycocotools
        from pycocotools.coco import COCO
        from pycocotools import mask
        if split == 'train':
            print('train set')
            ann_file = os.path.join(root, 'annotations/stuff_train2017.json')
            ids_file = os.path.join(root, 'annotations/sem_train_ids_54_0.mx')
            self.root = os.path.join(root, 'train2017')
        else:
            print('val set')
            ann_file = os.path.join(root, 'annotations/stuff_val2017.json')
            ids_file = os.path.join(root, 'annotations/sem_val_ids_54_0.mx')
            self.root = os.path.join(root, 'val2017')
        self.coco = COCO(ann_file)
        self.coco_mask = mask
        if os.path.exists(ids_file):
            with open(ids_file, 'rb') as f:
                self.ids = pickle.load(f)
        else:
            ids = list(self.coco.imgs.keys())
            self.ids = self._preprocess(ids, ids_file)
        self.transform = transform

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        img_metadata = coco.loadImgs(img_id)[0]
        path = img_metadata['file_name']
        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
        mask = Image.fromarray(self._gen_seg_mask(
            cocotarget, img_metadata['height'], img_metadata['width']))
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            img, mask = self._img_transform(img), self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        
        """
        # only used for generating mask ground truth
        outdir = 'gt_outdir'
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        outname = 'gt_mask_' + str(index) + '_' + str(img_id) + '.png'
        #print(mask)
        mask_out = get_color_pallete(mask.asnumpy(), 'coco')
        mask_out.save(os.path.join(outdir, outname))
        print(str(img_id) + ' is saved.')
        """

        return img, mask

    def __len__(self):
        return len(self.ids)

    def _gen_seg_mask(self, target, h, w):
        mask = np.zeros((h, w), dtype=np.uint8)
        coco_mask = self.coco_mask
        for instance in target:
            #print(instance)
            #print(instance['segmentation'][0])
            #--------------------------------------------------------------------
            # right one for this version (1atest version)
            rle = self.coco.annToRLE(instance)
            m = coco_mask.decode(rle)
            #--------------------------------------------------------------------
            # For original github version.
            #m = coco_mask.decode(instance['segmentation'])
            #--------------------------------------------------------------------
            #print('decode is successful')
            # Here is anoter recommendation from gluoncv/data/mscoco/segmentation.py
            #rle = coco_mask.frPyObjects(instance['segmentation'], h, w)
            #m = coco_mask.decode(rle)
            #--------------------------------------------------------------------

            cat = instance['category_id']
            if cat in self.CAT_LIST:
                c = self.CAT_LIST.index(cat)
            else:
                continue
            if len(m.shape) < 3:
                mask[:, :] += (mask == 0) * (m * c)
            else:
                mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
        return mask

    def _preprocess(self, ids, ids_file):
        print("Preprocessing mask, this will take a while." + \
              "But don't worry, it only run once for each split.")
        tbar = trange(len(ids))
        new_ids = []
        for i in tbar:
            img_id = ids[i]
            cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
            #print(len(cocotarget))
            img_metadata = self.coco.loadImgs(img_id)[0]
            #print(len(img_metadata))
            #print(img_metadata['height'])
            #print(img_metadata['width'])
            mask = self._gen_seg_mask(cocotarget, img_metadata['height'],
                                      img_metadata['width'])
            # more than 1k pixels
            if (mask > 0).sum() > 1000:
                new_ids.append(img_id)
            tbar.set_description('Doing: {}/{}, got {} qualified images'. \
                                 format(i, len(ids), len(new_ids)))
        print('Found number of qualified images: ', len(new_ids))
        with open(ids_file, 'wb') as f:
            pickle.dump(new_ids, f)
        return new_ids

    @property
    def classes(self):
        """Category names."""
        #return ('banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood',
        #        'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
        #        'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs',
        #        'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other',
        #        'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged',
        #        'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
        #        'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged',
        #        'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged')


        #return ('banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood',
        #        'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
        #        'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs',
        #        'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other',
        #        'window-blind', 'window-other', 'other', 'tree-merged', 'fence-merged', 'ceiling-merged',
        #        'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
        #        'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged',
        #        'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged')


        return ('thing', 'banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood',
                'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
                'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs',
                'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other',
                'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged',
                'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
                'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged',
                'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged')
Example #17
0
# print('COCO categories: \n{}\n'.format(' '.join(nms)))
# nms = set([cat['supercategory'] for cat in cats])
# print('COCO supercategories: \n{}'.format(' '.join(nms)))

# get all images containing given categories, select one at random
# catIds = coco.getCatIds(catNms=['person','dog','skateboard']);
catIds = coco.getCatIds(catNms=['person']);
imgIds = coco.getImgIds(catIds=catIds );
# imgIds = coco.getImgIds(imgIds = [324158])
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]

#load and display image
imgpath = '%s/images/%s/%s'%(dataDir,dataType,img['file_name'])
labelspath = imgpath.replace('.jpg', '.txt')
print(os.path.abspath(imgpath))
I = io.imread(imgpath)

h,w,x = I.shape
print(I.shape)
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
for ann in anns:
    rle = coco.annToRLE(ann)
    bboxes = maskUtils.toBbox(rle)
    print(bboxes)

## load and display instance annotations
# plt.imshow(I); plt.axis('off')
# coco.showAnns(anns)
# plt.show()
Example #18
0
def main():
    # Use first line of file docstring as description if it exists.
    parser = argparse.ArgumentParser(
        description=__doc__.split('\n')[0] if __doc__ else '',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--lvis', type=Path, required=True, help='lvis json path')
    parser.add_argument(
        '--coco', type=Path, required=True, help='coco json path')
    parser.add_argument(
        '--mapping',
        type=Path,
        required=True,
        help='synset mapping from coco to lvis')
    parser.add_argument('--output', type=Path, required=True)
    parser.add_argument(
        '--iou-thresh',
        default=0.7,
        type=float,
        help=('If a COCO annotation overlaps with an LVIS annotations with '
              'IoU over this threshold, we use only the LVIS annotation.'))

    args = parser.parse_args()
    args.output.parent.mkdir(exist_ok=True, parents=True)

    coco = COCO(args.coco)
    lvis = COCO(args.lvis)

    # transfer COCO category name LVIS according to synset
    # synset format
    # "bench": {
    # "coco_cat_id": 15,
    # "meaning": "a long seat for more than one person",
    # "synset": "bench.n.01"}
    synset2lvis = {cat['syn set']: cat['id'] for cat in lvis.cats.values()}
    coco2lvis = {}
    with open(args.mapping, 'r') as f:
        mapping = json.load(f)
    for cat in coco.cats.values():
        mapped = mapping[cat['name']]
        assert mapped['coco_cat_id'] == cat['id']
        synset = mapped['synset']
        if synset not in synset2lvis:
            print(f'Found no LVIS category for "{cat["name"]}" from COCO')
            continue
        coco2lvis[cat['id']] = synset2lvis[synset]

    for img_id, _ in coco.imgs.items():
        if img_id in lvis.imgs:
            coco_name = coco.imgs[img_id]['file_name']
            lvis_name = lvis.imgs[img_id]['file_name']
            assert coco_name in lvis_name
        else:
            print(f'Image {img_id} in COCO, but not annotated in LVIS')

    # add coco annotations at the end of lvis's
    lvis_highest_id = max(x['id'] for x in lvis.anns.values())
    ann_id_generator = itertools.count(lvis_highest_id + 1)
    new_annotations = []
    for img_id, lvis_anns in tqdm(lvis.imgToAnns.items()):
        if img_id not in coco.imgToAnns:
            print(f'Image {img_id} in LVIS, but not annotated in COCO')
            continue

        coco_anns = coco.imgToAnns[img_id]
        # Compute IoU between coco_anns and lvis_anns
        # Shape (num_coco_anns, num_lvis_anns)
        mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns],
                                 [lvis.annToRLE(x) for x in lvis_anns],
                                 pyiscrowd=np.zeros(len(lvis_anns)))
        does_overlap = mask_iou.max(axis=1) > args.iou_thresh
        to_add = []
        for i, ann in enumerate(coco_anns):
            if does_overlap[i]:
                continue
            if ann['category_id'] not in coco2lvis:
                continue
            ann['category_id'] = coco2lvis[ann['category_id']]
            ann['id'] = next(ann_id_generator)
            to_add.append(ann)
        new_annotations.extend(to_add)

    with open(args.lvis, 'r') as f:
        merged = json.load(f)
    merged['annotations'].extend(new_annotations)
    with open(args.output, 'w') as f:
        json.dump(merged, f)
Example #19
0
class Cowbird_Dataset(torch.utils.data.Dataset):
    """
    Dataset class for instance level task, including detection, instance segmentation, 
    and single view reconstruction. Since data are in COCO format, this class utilize
    COCO API to do most of the dataloading.
    """
    def __init__(self, root, annfile, scale_factor=0.25, output_size=256, transform=None):
        self.root = root
        self.coco = COCO(annfile)
        self.imgIds = self.coco.getImgIds(catIds=1)
        self.imgIds.sort()
        
        self.scale_factor = scale_factor
        self.output_size = output_size
        self.transform = transform
        self.data = self.get_data()
        
    def __getitem__(self, index):
        data = self.data[index]
        x, y, w, h = data['bbox']

        # input image
        img = cv2.imread(data['imgpath'])
        img = img[y:y+h, x:x+w]
        img = cv2.resize(img, (self.output_size, self.output_size))
        
        if self.transform is not None:
            img = self.transform(img)
        else:
            img = torch.tensor(img).permute(2,0,1).float()/255

        # keypoints
        kpts = data['keypoints'].clone()
        valid = kpts[:,-1] > 0
        kpts[valid,:2] -= torch.tensor([x, y])
        kpts[valid,:2] *= self.output_size / w.float()
        
        # mask
        mask = decode(data['rle'])
        mask = mask[y:y+h, x:x+w]
        mask = cv2.resize(mask, (self.output_size, self.output_size))
        mask = torch.tensor(mask).long()
        
        # meta
        size = data['size'] * self.output_size / w.float()
        meta = {
            'imgpath': data['imgpath'],
            'size': size
            }
                
        return img, kpts, mask, meta
    
    def __len__(self):
        return len(self.data)
    
    def get_data(self):
        data = []
        for imgId in self.imgIds:
            data.extend(self.load_data(imgId))
        return data
    
    def load_data(self, imgId):
        img_dict = self.coco.loadImgs(imgId)[0]
        width = img_dict['width']
        height = img_dict['height']
        
        annIds = self.coco.getAnnIds(imgIds=imgId)
        anns = self.coco.loadAnns(annIds)
        data = []
        for ann in anns:
            path = self.path_from_Id(imgId)
            kpts = torch.tensor(ann['keypoints']).float().reshape(-1, 3)
            bbox = dialate_boxes([ann['bbox']], s=self.scale_factor)[0]
            rle  = self.coco.annToRLE(ann)
            size = max(ann['bbox'][2:])
            
            data.append({
                'imgpath': path,
                'bbox': bbox,
                'keypoints': kpts,
                'rle': rle,  # to save memory, we store rle and convert to mask on the fly 
                'size': size
            })
            
        return data
            
    def path_from_Id(self, imgId):
        img_dict = self.coco.loadImgs(imgId)[0]
        filename = img_dict['file_name']
        path = os.path.join(self.root, filename)
        return path