def __getitem__(self, index):
    img_id = self.images[index]
    img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    annotations = self.coco.loadAnns(ids=ann_ids)

    labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations])
    bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32)

    if len(bboxes) == 0:
      bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
      labels = np.array([[0]])

    bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

    # print("===============", img_path)
    img = cv2.imread(img_path)
    height, width = img.shape[0], img.shape[1]
    # 获取中心坐标p
    center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
    scale = max(height, width) * 1.0 # 仿射变换

    flipped = False
    if self.split == 'train':
      scale = scale * np.random.choice(self.rand_scales)
      w_border = get_border(128, width)
      h_border = get_border(128, height)
      center[0] = np.random.randint(low=w_border, high=width - w_border)
      center[1] = np.random.randint(low=h_border, high=height - h_border)

      if np.random.random() < 0.5:
        flipped = True
        img = img[:, ::-1, :]
        center[0] = width - center[0] - 1

    # 实行仿射变换
    trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
    img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

    img = (img.astype(np.float32) / 255.)
    if self.split == 'train':
      color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

    img -= self.mean
    img /= self.std
    img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

    trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

    # 3个最重要的变量
    hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
    w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height
    regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression

    inds = np.zeros((self.max_objs,), dtype=np.int64)
    ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

    for k, (bbox, label) in enumerate(zip(bboxes, labels)):
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_fmap)
      bbox[2:] = affine_transform(bbox[2:], trans_fmap)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

      if h > 0 and w > 0:
        obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        obj_c_int = obj_c.astype(np.int32)
        # 椭圆形状
        radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
        # 得到高斯分布
        draw_umich_gaussian(hmap[label], obj_c_int, radius)

        w_h_[k] = 1. * w, 1. * h
        # 记录偏移量
        regs[k] = obj_c - obj_c_int  # discretization error
        # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数
        inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
        # 进行mask标记?
        ind_masks[k] = 1

    return {'image': img,
            'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
            'c': center, 's': scale, 'img_id': img_id}
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        a_bboxes = []
        shapes = []
        a_shapes = []

        for anno in annotations:
            if anno['category_id'] not in KINS_IDS:
                continue  # excludes 3: person-sitting class for evaluation

            a_polygons = anno['segmentation'][
                0]  # only one mask for each instance
            polygons = anno['i_segm'][0]

            # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox']  # this is used to clip resampled polygons
            a_contour = np.array(a_polygons).reshape((-1, 2))
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(
                    np.int32)) < 5:  # remove tiny objects
                continue
            fixed_contour = uniformsample(a_contour, self.n_vertices)
            i_contour = uniformsample(contour, self.n_vertices)

            # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
            #     continue

            shapes.append(np.ndarray.flatten(i_contour).tolist())
            a_shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])
            a_bboxes.append(anno['a_bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        a_bboxes = np.array(a_bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)
        a_shapes = np.array(a_shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
            a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)

        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy
        a_bboxes[:, 2:] += a_bboxes[:, :2]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(360, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()

        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h']))

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap of centers
        occ_map = np.zeros(
            (1, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # grayscale map for occlusion levels
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of inmodal bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt amodal mass centers to inmodal bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt amodal coefficients
        regs = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # regression for quantization error
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        votes_ = np.zeros((self.max_objs, self.vote_length),
                          dtype=np.float32)  # voting for heatmaps

        for k, (bbox, a_bbox, label, shape, a_shape) in enumerate(
                zip(bboxes, a_bboxes, labels, shapes, a_shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    a_shape[2 * m] = width - a_shape[2 * m] - 1
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[
                0]  # This box is the inmodal boxes

            a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap)
            a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap)
            a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0,
                                     self.fmap_size['w'] - 1)
            a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0,
                                     self.fmap_size['h'] - 1)

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                a_shape[2 * m:2 * m + 2] = affine_transform(
                    a_shape[2 * m:2 * m + 2], trans_fmap)
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(a_shape, (self.n_vertices, 2))
            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            i_shape_clipped = np.reshape(shape, (self.n_vertices, 2))
            i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0,
                                            self.fmap_size['w'] - 1)
            i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0,
                                            self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            centered_shape = indexed_shape - mass_center  # these are amodal mask shapes

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = centered_shape.reshape((1, -1))

                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)

                a_shifted_poly = indexed_shape - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop amodal shapes to the amodal bboxes
                amodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(a_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                i_shifted_poly = i_shape_clipped - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop inmodal shapes to the same amodal bboxes
                inmodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(i_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                obj_mask = (
                    amodal_obj_mask + inmodal_obj_mask
                ) * 255. / 2  # convert to float type in image scale
                obj_mask = cv2.resize(
                    obj_mask.astype(np.uint8),
                    dsize=(self.vote_vec_dim, self.vote_vec_dim),
                    interpolation=cv2.INTER_LINEAR) * 1.
                votes_[k] = obj_mask.reshape((1, -1)) / 255.

                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # occlusion level map gt
                occ_map[0] += self.polys_to_mask(
                    [np.ndarray.flatten(indexed_shape).tolist()],
                    self.fmap_size['h'], self.fmap_size['w']) * 1.

        occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ

        # -----------------------------------debug---------------------------------
        # for bbox, label, shape in zip(bboxes, labels, shapes_):
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
        #     cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        #     # print(shape, shape.shape)
        #     cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255),
        #                   thickness=1)
        # # cv2.imshow('img', image_show)
        # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255)
        # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]),
        #                      code=cv2.COLOR_GRAY2BGR)
        # cat_img = np.concatenate([m_img, image_show], axis=0)
        # cv2.imshow('segm', cat_img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'occ_map': occ_map,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'votes': votes_,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Exemple #3
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            polygons = get_connected_polygon_using_mask(
                anno['segmentation'], (h_img, w_img),
                n_vertices=self.n_vertices,
                closing_max_kernel=50)

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > self.n_vertices:
                fixed_contour = resample(contour, num=self.n_vertices)
            else:
                fixed_contour = turning_angle_resample(contour,
                                                       self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [
                np.min(fixed_contour[:, 0]),
                np.min(fixed_contour[:, 1]),
                np.max(fixed_contour[:, 0]),
                np.max(fixed_contour[:, 1])
            ]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(160, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        contour_std_ = np.zeros(
            (self.max_objs, 1),
            dtype=np.float32)  # keep track of codes that is activated
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            contour_std = np.std(indexed_shape, axis=0) + 1e-4
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            # centered_shape = indexed_shape - mass_center
            norm_shape = (indexed_shape - mass_center) / np.sqrt(
                np.sum(contour_std**2))

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                # obj_c = mass_center
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)
                contour_std_[k] = np.sqrt(np.sum(contour_std**2))

                w_h_[k] = 1. * w, 1. * h
                # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \
                #           mass_center[0] - bbox[0], bbox[2] - mass_center[0]  # [top, bottom, left, right] distance
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'std': contour_std_,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), cfg.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        if self.split == 'train':
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1]
        else:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)

        flipped = False
        if self.split == 'train':
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = get_border(128, img.shape[1])
            h_border = get_border(128, img.shape[0])

            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_matrix, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = inp.astype(np.float32) / 255.

        # TODO:inp appears numbers below 0 after color_aug (myself)
        if self.split == 'train':
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - cfg.mean) / cfg.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // cfg.down_ratio
        output_w = input_w // cfg.down_ratio
        trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        reg = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        ind = np.zeros(cfg.max_objs, dtype=np.int64)
        reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8)

        gt_box = []
        for i in range(num_objs):
            ann = anns[i]
            bbox = coco2x1y1x2y2(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_matrix)
            bbox[2:] = affine_transform(bbox[2:], trans_matrix)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if h > 0 and w > 0:
                # get an object size-adapative radius
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                draw_umich_gaussian(hm[cls_id], ct_int, radius)

                wh[i] = 1. * w, 1. * h
                ind[i] = ct_int[1] * output_w + ct_int[0]
                reg[i] = ct - ct_int
                reg_mask[i] = 1

                gt_box.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg': reg,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        if self.opt.debug > 0 or not self.split == 'train':
            gt_box = np.array(
                gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros(
                    (1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id}
            ret['meta'] = meta
        return ret
    def __getitem__(self, index):
        img_id = self.ids[index]
        img_path = self.data_dir + "/images/" + img_id + ".jpeg"
        annot_path = self.data_dir + "/annotations/" + img_id + ".xml"

        tree = elemTree.parse(annot_path)
        annotations = [
            [
                float(obj.find('robndbox').find('cx').text),  #ctrX
                float(obj.find('robndbox').find('cy').text),  #ctrY
                float(obj.find('robndbox').find('w').text),  #W
                float(obj.find('robndbox').find('h').text),  #H
                float(obj.find('robndbox').find('angle').text)
            ]  #angle
            for obj in tree.findall('./object')
        ]

        labels = np.array([1. for anno in annotations])
        bboxes = np.array([anno for anno in annotations], dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(self.img_size['w'], width)
            h_border = get_border(self.img_size['h'], height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

        img = img.astype(np.float32) / 255.

        #if self.split == 'train':
        #color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        #img -= self.mean
        #img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        thetas = np.zeros((self.max_objs, 1), dtype=np.float32)
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        objCnt = np.zeros((self.max_objs, 2), dtype=np.float32)

        # detections = []
        for k, (rbox, label) in enumerate(zip(bboxes, labels)):
            w, h, angle = rbox[2], rbox[3], rbox[-1]
            if h > 0 and w > 0:
                obj_c = np.array([rbox[0], rbox[1]], dtype=np.float32) / float(
                    self.down_ratio)
                objCnt[k] = obj_c
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[int(label) - 1], obj_c_int, radius)
                w_h_[k] = w / self.img_size['w'], h / self.img_size['h']
                thetas[k] = angle
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id,
            'theta': thetas,
            'center': objCnt
        }
Exemple #6
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations],
                          dtype=np.float32)
        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        for k, (bbox, label) in enumerate(zip(bboxes, labels)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        # img = self.coco.loadImgs(ids=[img_id])[0]
        # w_img = int(img['width'])
        # h_img = int(img['height'])
        # if w_img < 2 or h_img < 2:
        #     continue

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1 or type(anno['segmentation']) != list:  # Excludes crowd objects
                continue

            if len(anno['segmentation']) > 1:
                obj_contours = [np.array(s).reshape((-1, 2)).astype(np.int32) for s in anno['segmentation']]
                obj_contours = sorted(obj_contours, key=cv2.contourArea)
                polygons = obj_contours[-1]
            else:
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            if gt_w < 5 or gt_h < 5:
                continue
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(np.int32)) < 35:
                continue

            fixed_contour = uniformsample(contour, self.n_vertices)

            # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]),
                            np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(150, width)
            h_border = get_border(150, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
        votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32)  # votes for hmap and code
        w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs,), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            # contour_std = np.std(indexed_shape, axis=0) + 1e-4
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            # centered_shape = indexed_shape - mass_center
            norm_shape = (indexed_shape - mass_center) / np.array([w / 2., h / 2.])

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary,
                                         lmbda=self.sparse_alpha, max_iter=80)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # getting the gt votes
                shifted_poly = indexed_shape - np.array([bbox[0], bbox[1]]) + 1  # crop to the bbox, add padding 1
                # obj_mask = polys_to_mask([np.ndarray.flatten(shifted_poly, order='C').tolist()], h + 2, w + 2) * 255
                obj_mask = np.zeros((int(h) + 3, int(w) + 3), dtype=np.uint8)
                cv2.drawContours(obj_mask, shifted_poly[None, :, :].astype(np.int32), color=255, contourIdx=-1,
                                 thickness=-1)

                # instance = obj_mask.copy()
                # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                #                       interpolation=cv2.INTER_LINEAR) * 1.
                # votes_[k] = obj_mask.reshape((1, -1)) / 255.
                # votes_[k] = (obj_mask.reshape((1, -1)) > 255 * 0.4) * 1.0

                # show debug masks
                obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                                      interpolation=cv2.INTER_LINEAR)  # INTER_AREA
                # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                #                       interpolation=cv2.INTER_AREA)
                votes_[k] = (obj_mask.reshape((1, -1)) > 0.2 * 255) * 1.0
                # cv2.imshow('obj_mask', instance.astype(np.uint8))
                # cv2.waitKey()
                # cv2.imshow('votes', obj_mask.astype(np.uint8))
                # cv2.waitKey()

        return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'votes': votes_,
                'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
                'c': center, 's': scale, 'img_id': img_id}
Exemple #8
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            # polygons = anno['segmentation'][0]
            polygons = anno['segmentation']
            if len(polygons) > 1:
                bg = np.zeros((h_img, w_img, 1), dtype=np.uint8)
                for poly in polygons:
                    len_poly = len(poly)
                    vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32)
                    for i in range(len_poly // 2):
                        vertices[0, i, 0] = int(poly[2 * i])
                        vertices[0, i, 1] = int(poly[2 * i + 1])
                    # cv2.fillPoly(bg, vertices, color=(255))
                    cv2.drawContours(bg,
                                     vertices,
                                     color=(255),
                                     contourIdx=-1,
                                     thickness=-1)

                pads = 5
                while True:
                    kernel = np.ones((pads, pads), np.uint8)
                    bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel)
                    obj_contours, _ = cv2.findContours(bg_closed,
                                                       cv2.RETR_TREE,
                                                       cv2.CHAIN_APPROX_SIMPLE)
                    if len(obj_contours) > 1:
                        pads += 5
                    else:
                        polygons = obj_contours[0]
                        break
            else:
                # continue
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            fixed_contour = resample(contour, num=self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)
            # contour_mean = np.mean(fixed_contour, axis=0)
            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        # if img_id in self.all_annotations.keys():
        #     annotations = self.all_annotations[img_id]
        #     shape_annots = self.all_shapes[img_id]
        #     labels = annotations['cat_id']
        #     bboxes = annotations['bbox']  # xyxy format
        #     shapes = shape_annots['shape']  # polygonal vertices format xyxyxyxyxy...
        #     codes = annotations['codes']
        #     labels = np.array(labels)
        #     bboxes = np.array(bboxes, dtype=np.float32)
        #     codes = np.array(codes, dtype=np.float32)
        #     shapes = np.array(shapes, dtype=np.float32)
        # else:
        #     bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
        #     labels = np.array([[0]])
        #     codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32)
        #     shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     if flipped:
        #         bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #         # Flip the contour
        #         for m in range(self.n_vertices):
        #             shape[2 * m] = width - shape[2 * m] - 1
        #     bbox[:2] = affine_transform(bbox[:2], trans_img)
        #     bbox[2:] = affine_transform(bbox[2:], trans_img)
        #     bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #     bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #
        #     # generate gt shape mean and std from contours
        #     for m in range(self.n_vertices):  # apply scale and crop transform to shapes
        #         shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img)
        #
        #     contour = np.reshape(shape, (self.n_vertices, 2))
        #     # Indexing from the left-most vertex, argmin x-axis
        #     idx = np.argmin(contour[:, 0])
        #     indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0)
        #
        #     clockwise_flag = check_clockwise_polygon(indexed_shape)
        #     if not clockwise_flag:
        #         fixed_contour = np.flip(indexed_shape, axis=0)
        #     else:
        #         fixed_contour = indexed_shape
        #
        #     contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1)
        #     contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1)
        #
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        #     cv2.drawContours(image_show, [contour.astype(np.int32)],
        #                      color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
        #                      contourIdx=-1, thickness=-1)
        #
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of the shape
        w_h_std = np.zeros((self.max_objs, 2),
                           dtype=np.float32)  # width and height of the shape
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt coefficients/codes for shapes
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            contour = np.reshape(shape, (self.n_vertices, 2))
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(contour[:, 0])
            indexed_shape = np.concatenate(
                (contour[idx:, :], contour[:idx, :]), axis=0)

            clockwise_flag = check_clockwise_polygon(indexed_shape)
            if not clockwise_flag:
                fixed_contour = np.flip(indexed_shape, axis=0)
            else:
                fixed_contour = indexed_shape.copy()

            contour[:, 0] = np.clip(fixed_contour[:, 0], 0,
                                    self.fmap_size['w'] - 1)
            contour[:, 1] = np.clip(fixed_contour[:, 1], 0,
                                    self.fmap_size['h'] - 1)

            contour_mean = np.mean(contour, axis=0)
            contour_std = np.std(contour, axis=0)
            if np.sqrt(np.sum(contour_std**2)) <= 1e-6:
                continue
            else:
                norm_shape = (contour - contour_mean) / np.sqrt(
                    np.sum(contour_std**2))

            if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6:
                obj_c = contour_mean
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_std[k] = contour_std
                temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)),
                                          self.dictionary,
                                          lmbda=self.sparse_alpha,
                                          max_iter=80)
                codes_[k] = np.exp(temp_codes)
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        # -----------------------------------debug---------------------------------
        # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32)
        # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3))
        # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3))
        # print(w_h_[0], regs[0])
        # cv2.imshow('hmap', canvas)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'codes': codes_,
            'hmap': hmap,
            'w_h_std': w_h_std,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }