Example #1
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
        num_valid_objs = len(valid_objs)

        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            gt_classes[ix] = cls

        for cls in gt_classes:
            entry['gt_classes'][0, cls] = 1
Example #2
0
    def _add_gt_boxes(self, entry):
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        valid_objs = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # Convert from (x1, y1, w, h) to (x_center/img_w, y_center/img_h, w/img_w, h/img_h)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            x_yolo, y_yolo, w_yolo, h_yolo = box_utils.xyxy_to_xywh_yolo(
                [x1, y1, x2, y2], height, width)

            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x_yolo, y_yolo, w_yolo, h_yolo]
                valid_objs.append(obj)
        num_valid_objs = len(valid_objs)
        gt_boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            gt_boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
        entry['boxes'] = np.append(entry['boxes'], gt_boxes, axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
Example #3
0
    def _add_gt_annotations_wad(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        label_image_name = entry['entry_id'][:-4] + '_instanceIds.png'
        # color_img = Image.open(os.path.join(self.WAD_CVPR2018.train_image_dir, entry['entry_id']))
        label_image = os.path.join(self.WAD_CVPR2018.train_label_dir, label_image_name)
        assert os.path.exists(label_image), 'Label \'{}\' not found'.format(label_image)
        l_img = Image.open(label_image)
        l_img = np.asarray(l_img)

        entry['height'] = self.WAD_CVPR2018.image_shape[0]
        entry['width'] = self.WAD_CVPR2018.image_shape[1]
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []

        for label in np.unique(l_img):
            class_id = label // 1000
            if class_id in self.WAD_CVPR2018.eval_class:
                area = np.sum(l_img == label)
                if area < cfg.TRAIN.GT_MIN_AREA:
                    continue
                # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
                mask = l_img == label
                mask_f = np.array(mask, order='F', dtype=np.uint8)
                rle = COCOmask.encode(mask_f)
                valid_segms.append(rle)

                xd, yd = np.where(mask)
                x1, y1, x2, y2 = yd.min(), xd.min(), yd.max(), xd.max()
                x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(x1, y1, x2, y2, entry['height'], entry['width'])
                # Require non-zero seg area and more than 1x1 box size\
                obj = {'area': area, 'clean_bbox': [x1, y1, x2, y2], 'category_id': class_id}
                valid_objs.append(obj)

        num_valid_objs = len(valid_objs)
        boxes = np.zeros((num_valid_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_valid_objs), dtype=np.int32)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=np.float32)
        seg_areas = np.zeros((num_valid_objs), dtype=np.float32)
        is_crowd = np.zeros((num_valid_objs), dtype=np.bool)
        box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=np.int32)

        for ix, obj in enumerate(valid_objs):
            cls = self.WAD_CVPR2018.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = False   #TODO: What's this flag for?
            box_to_gt_ind_map[ix] = ix
            gt_overlaps[ix, cls] = 1.0

        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map)
Example #4
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        objs = self.scenes[entry['id']]['objects']
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        width = entry['width']
        height = entry['height']
        for obj in objs.values():
            x, y, w, h = obj['x'], obj['y'], obj['w'], obj['h']
            x1, y1, x2, y2 = x, y, x + w, y + h
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )

            if x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)

        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )

        for ix, obj in enumerate(valid_objs):
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = self.category_to_id_map[obj['name']]
            # is_crowd[ix] = obj['iscrowd']
            is_crowd[ix] = 0
            box_to_gt_ind_map[ix] = ix
            if is_crowd[ix]:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, gt_classes[ix]] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
Example #5
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                # valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
            
        entry['dataset_name'] = self.name

        # add relationship annotations
        im_rels = self.rel_anns[entry['file_name']]
        sbj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['sbj_gt_boxes'].dtype)
        obj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['obj_gt_boxes'].dtype)
        sbj_gt_classes = np.zeros(len(im_rels), dtype=entry['sbj_gt_classes'].dtype)
        obj_gt_classes = np.zeros(len(im_rels), dtype=entry['obj_gt_classes'].dtype)
        prd_gt_classes = np.zeros(len(im_rels), dtype=entry['prd_gt_classes'].dtype)
        for ix, rel in enumerate(im_rels):
            # sbj
            sbj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['subject']['bbox'])
            sbj_gt_boxes[ix] = sbj_gt_box
            sbj_gt_classes[ix] = rel['subject']['category']  # excludes background
            # obj
            obj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['object']['bbox'])
            obj_gt_boxes[ix] = obj_gt_box
            obj_gt_classes[ix] = rel['object']['category']  # excludes background
            # prd
            prd_gt_classes[ix] = rel['predicate']  # exclude background
        entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'], sbj_gt_boxes, axis=0)
        entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'], obj_gt_boxes, axis=0)
        entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'], sbj_gt_classes)
        entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'], obj_gt_classes)
        entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'], prd_gt_classes)
        # misc
        sbj_gt_overlaps = np.zeros(
            (len(im_rels), self.num_obj_classes), dtype=entry['sbj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            sbj_cls = sbj_gt_classes[ix]
            sbj_gt_overlaps[ix, sbj_cls] = 1.0
        entry['sbj_gt_overlaps'] = np.append(
            entry['sbj_gt_overlaps'].toarray(), sbj_gt_overlaps, axis=0)
        entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['sbj_gt_overlaps'])

        obj_gt_overlaps = np.zeros(
            (len(im_rels), self.num_obj_classes), dtype=entry['obj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            obj_cls = obj_gt_classes[ix]
            obj_gt_overlaps[ix, obj_cls] = 1.0
        entry['obj_gt_overlaps'] = np.append(
            entry['obj_gt_overlaps'].toarray(), obj_gt_overlaps, axis=0)
        entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['obj_gt_overlaps'])

        prd_gt_overlaps = np.zeros(
            (len(im_rels), self.num_prd_classes), dtype=entry['prd_gt_overlaps'].dtype)
        pair_to_gt_ind_map = np.zeros(
            (len(im_rels)), dtype=entry['pair_to_gt_ind_map'].dtype)
        for ix in range(len(im_rels)):
            prd_cls = prd_gt_classes[ix]
            prd_gt_overlaps[ix, prd_cls] = 1.0
            pair_to_gt_ind_map[ix] = ix
        entry['prd_gt_overlaps'] = np.append(
            entry['prd_gt_overlaps'].toarray(), prd_gt_overlaps, axis=0)
        entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(entry['prd_gt_overlaps'])
        entry['pair_to_gt_ind_map'] = np.append(
            entry['pair_to_gt_ind_map'], pair_to_gt_ind_map)
        
        for k in ['file_name']:
            if k in entry:
                del entry[k]
Example #6
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry.
    """
        seq_idx = entry['seq_idx']
        idx = entry['idx']
        self.set_to_sequence(seq_idx)
        #get gt image.
        gt = self.get_gt(idx)
        vals = np.unique(gt)

        objs = []
        for val in vals:
            #it is background when val==0
            if val != 0:
                obj = {}
                mask = np.array(gt == val, dtype=np.uint8)
                #make sure gt==val is converted to value in 0 and 1.
                assert (len(set(mask.reshape(-1)) - {0, 1}) == 0)
                x, y, w, h = cv2.boundingRect(mask)
                x, y, w, h = self._expand_box(x, y, w, h, rate=0.05)
                #obj['segmentation'] = binary_mask_to_rle(mask)
                obj['segmentation'] = mask_util.encode(
                    np.array(mask, order='F', dtype=np.uint8))
                obj['area'] = np.sum(mask)
                obj['iscrowd'] = 0
                obj['bbox'] = x, y, w, h
                if self.cls_mapper is not None:
                    #set category id by cls_mapper.
                    obj['category_id'] = self.cls_mapper[val]
                else:
                    if not self.use_local_id:
                        obj['category_id'] = self.global_instance_id_start_of_seq[
                            seq_idx] + val - 1
                    else:
                        obj['category_id'] = val
                obj['instance_id'] = val
                assert (self.global_instance_id_start_of_seq[seq_idx] != 0)
                # val-1 to remove background.
                obj['global_instance_id'] = self.global_instance_id_start_of_seq[
                    seq_idx] + val - 1
                objs.append(obj)

        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            assert (isinstance(obj['segmentation'], dict))
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]

            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        instance_id = np.zeros((num_valid_objs),
                               dtype=entry['instance_id'].dtype)
        global_instance_id = np.zeros((num_valid_objs),
                                      dtype=entry['global_instance_id'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        gt_overlaps_id = np.zeros(
            (num_valid_objs, self.number_of_instance_ids),
            dtype=entry['gt_overlaps_id'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            if obj['category_id'] is not None:
                #cls = self.json_category_id_to_contiguous_id[obj['category_id']]
                cls = obj['category_id']
            else:
                #if no category_id specified, use background instead. index is 'self.num_classes-1'
                cls = self.num_classes - 1
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            instance_id[ix] = obj['instance_id']
            global_instance_id[ix] = obj['global_instance_id']
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
                gt_overlaps_id[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
                gt_overlaps_id[ix, global_instance_id[ix]] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['instance_id'] = np.append(entry['instance_id'], instance_id)
        entry['global_instance_id'] = np.append(entry['global_instance_id'],
                                                global_instance_id)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])

        entry['gt_overlaps_id'] = np.append(entry['gt_overlaps_id'].toarray(),
                                            gt_overlaps_id,
                                            axis=0)
        entry['gt_overlaps_id'] = scipy.sparse.csr_matrix(
            entry['gt_overlaps_id'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        assert (
            entry['gt_overlaps_id'].shape[0] == entry['gt_overlaps'].shape[0])
    def _add_gt_annotations_Car3d(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        entry_id = entry['entry_id']
        # Make file_name an abs path
        car_pose_file = os.path.join(self.Car3D.data_dir, 'car_poses',
                                     entry_id + '.json')
        assert os.path.exists(car_pose_file), 'Label \'{}\' not found'.format(
            car_pose_file)
        with open(car_pose_file) as f:
            car_poses = json.load(f)
        entry['height'] = self.Car3D.image_shape[0]
        entry['width'] = self.Car3D.image_shape[1]

        intrinsic_mat = self.Car3D.get_intrinsic_mat()
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        for i, car_pose in enumerate(car_poses):
            car_name = self.Car3D.car_id2name[car_pose['car_id']].name
            car = self.car_models[car_name]
            pose = np.array(car_pose['pose'])

            # project 3D points to 2d image plane
            rot_mat = euler_angles_to_rotation_matrix(pose[:3])
            rvect, _ = cv2.Rodrigues(rot_mat)
            imgpts, jac = cv2.projectPoints(np.float32(car['vertices']),
                                            rvect,
                                            pose[3:],
                                            intrinsic_mat,
                                            distCoeffs=None)

            imgpts = np.int32(imgpts).reshape(-1, 2)

            x1, y1, x2, y2 = imgpts[:, 0].min(), imgpts[:, 1].min(
            ), imgpts[:, 0].max(), imgpts[:, 1].max()
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, entry['height'], entry['width'])
            # Require non-zero seg area and more than 1x1 box size\
            obj = {
                'area': car_pose['area'],
                'clean_bbox': [x1, y1, x2, y2],
                'category_id': 33,
                'car_id': car_pose['car_id'],
                'visible_rate': car_pose['visible_rate'],
                'pose': car_pose['pose']
            }

            valid_objs.append(obj)

        num_valid_objs = len(valid_objs)
        boxes = np.zeros((num_valid_objs, 4), dtype=np.float32)
        # this is a legacy network from WAD Mask-RCNN
        car_class = 4
        gt_overlaps = np.zeros((num_valid_objs, 8), dtype=np.float32)
        seg_areas = np.zeros((num_valid_objs), dtype=np.float32)
        is_crowd = np.zeros((num_valid_objs), dtype=np.bool)
        box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=np.int32)

        # newly added for 3d car
        visible_rate = np.zeros((num_valid_objs), dtype=np.float32)
        poses = np.zeros((num_valid_objs, 6), dtype=np.float32)
        quaternions = np.zeros((num_valid_objs, 4), dtype=np.float32)

        car_cat_classes = np.zeros((num_valid_objs), dtype=np.int32)

        for ix, obj in enumerate(valid_objs):
            cls = np.where(self.Car3D.unique_car_models == obj['car_id'])[0][0]
            boxes[ix, :] = obj['clean_bbox']
            car_cat_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = False  # TODO: What's this flag for?
            box_to_gt_ind_map[ix] = ix
            gt_overlaps[ix, car_class] = 1.0
            visible_rate[ix] = obj['visible_rate']
            poses[ix] = obj['pose']
            quaternions[ix] = euler_angles_to_quaternions(
                np.array([obj['pose'][:3]]))
            # ensure the quaternion is upper hemispher
            quaternions[ix] = quaternion_upper_hemispher(quaternions[ix])

        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        # newly added for 3d car
        entry['visible_rate'] = np.append(entry['visible_rate'], visible_rate)
        entry['poses'] = np.append(entry['poses'], poses, axis=0)
        entry['car_cat_classes'] = np.append(entry['car_cat_classes'],
                                             car_cat_classes)
        entry['quaternions'] = np.append(entry['quaternions'],
                                         quaternions,
                                         axis=0)
Example #8
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        objs = self.scenes[entry['id']]['objects']

        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            rle = self._preprocess_rle(obj['mask'])
            mask = mask_utils.decode(rle)
            
            bbox, _ = segm_utils.rle_masks_to_boxes([rle])
            x1, y1, x2, y2 = bbox[0][0], bbox[0][1], bbox[0][2], bbox[0][3]
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )

            obj['area'] = mask.sum()
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(rle)

        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )

        for ix, obj in enumerate(valid_objs):
            if cfg.CLEVR.COMP_CAT:
                cls = self.category_to_id_map[' '.join([obj['color'], obj['material'], obj['shape']])]
            else:
                cls = self.category_to_id_map[obj['shape']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            # is_crowd[ix] = obj['iscrowd']
            is_crowd[ix] = 0
            box_to_gt_ind_map[ix] = ix
            if is_crowd[ix]:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
Example #9
0
    def _add_gt_annotations(self, entry, entry_id, annots):
        if len(annots) > 0:
            objs = self.convert_raw_predictions_to_objs(
                annots[entry_id], entry['id'])
        else:
            ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
            objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form x1, y1, w, h to x1, y1, x2, y2
            x1 = obj['bbox'][0]
            y1 = obj['bbox'][1]
            x2 = x1 + np.maximum(0., obj['bbox'][2] - 1.)
            y2 = y1 + np.maximum(0., obj['bbox'][3] - 1.)
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        tracks = -np.ones((num_valid_objs, 1), dtype=entry['tracks'].dtype)
        head_boxes = -np.ones((num_valid_objs, 4),
                              dtype=entry['head_boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            if 'track_id' in obj:
                tracks[ix, 0] = obj['track_id']
            if 'head_box' in obj:
                # NOTE: This box has NOT BEEN CLEANED, and NOT BEEN converted
                # to (xmin, ymin, xmax, ymax). This is only here to be used
                # in MPII evaluations
                head_boxes[ix, :] = obj['head_box']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['tracks'] = np.append(entry['tracks'], tracks, axis=0)
        entry['head_boxes'] = np.append(entry['head_boxes'], head_boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
    def _load_annotation_to_roidb(self, anno_file):
        gt_roidb = []
        entries = json.load(open(anno_file))
        # for imageId, entry in entries.items():  # for each sample in all the samples
        # if dir_name == '_part':
        #     pdb.set_trace()
        small_cnt = 0
        for entry in entries:  # for each sample in all the samples
            if entry['fold'] == '309':
                continue
            syms = entry['syms']
            polygons = entry['polygon']
            if 'rows' in entry:
                rows = entry['rows']
                cols = entry['cols']
            elif 'bottom_row' in entry:
                rows = entry['bottom_row']
                cols = entry['right_col']
            file_name = entry['filename'] + '.jpg'
            evaId = entry['evaId']
            image_dir = self.image_directory
            docName = entry['docName']
            fold = entry['fold']
            if 'offset_x' in entry and 'offset_y' in entry:
                offset_x, offset_y = entry['offset_x'], entry['offset_y']
            else:
                offset_x, offset_y = 0, 0

            npoly = len(polygons)
            is_valid_polygon = [True for _ in range(npoly)]

            # count number of valid syms
            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):
                if not self._valid_polygon(polygon):  # exclude non-valid polygons
                    is_valid_polygon[idx] = False
                    continue

            cls_list, gt_boxes = [], []
            filtered_polygons = []

            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):  # for each sym or region
                if not is_valid_polygon[idx]:
                    continue
                for s in sym:  # for each sub-sym
                    if s == '膈面膨隆' or s == '膈面抬高':  # awkward ...
                        s = '膈面异常'
                    if s == '盘状肺不张':
                        s = '纤维化表现'
                    if s in self._class_to_ind:  # if in the given classes
                        polygon = [tuple(point) for point in polygon]
                        polygon_np = np.array(polygon)
                        x1, y1, x2, y2 = polygon_np[:, 0].min(), polygon_np[:, 1].min(), \
                                         polygon_np[:, 0].max(), polygon_np[:, 1].max(),
                        x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                            x1, y1, x2, y2, rows, cols
                        )

                        # if nodule is too large, then assign it to diffusive nodules
                        if s == '结节' and (x2 - x1 > 300 or y2 - y1 > 300):
                            s = '弥漫性结节'

                        cls = self._class_to_ind[s]
                        cls_list.append(cls)

                        # expand too-small boxes
                        if x2 - x1 < 20:
                            cx = (x1 + x2) * 0.5
                            x1 = cx - 10
                            x2 = cx + 10
                            small_cnt += 1
                        if y2 - y1 < 20:
                            cy = (y1 + y2) * 0.5
                            y1 = cy - 10
                            y2 = cy + 10
                            small_cnt += 1

                        gt_boxes.append([x1, y1, x2, y2])
                        tmp = [list(chain.from_iterable(polygon))]  # [[x1, y1], [x2, y2]] -> [[x1, y1, x2, y2]]
                        filtered_polygons.append(tmp)
                    else:
                        # print(s)
                        pass

            assert len(cls_list) == len(gt_boxes) == len(filtered_polygons)
            new_entry = {'file_name': file_name, 'cls_list': cls_list, 'height': rows, 'width': cols,
                         'polygons': filtered_polygons, 'gt_boxes': gt_boxes, 'eva_id': evaId, 'doc_name': docName,
                         'image_dir': image_dir, 'offset_x': offset_x, 'offset_y': offset_y, 'fold': fold}
            gt_roidb.append(new_entry)

        print('small boxes: ', small_cnt)
        return gt_roidb
Example #11
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'])
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            im_path = self.image_path_from_index(obj['image_id'])
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                # Compress the rle
                if 'counts' in obj['segmentation'] and type(obj['segmentation']['counts']) == list:
                    # Magic RLE format handling painfully discovered by looking at the
                    # COCO API showAnns function.
                    rle = mask_util.frPyObjects(obj['segmentation'], height, width)
                    valid_segms.append(rle)
                else:
                    binary_mask = segm_utils.polys_to_mask(obj['segmentation'], height, width)
                    if len(np.unique(binary_mask))==1:
                        continue
                    valid_segms.append(obj['segmentation'])
                valid_objs.append(obj)
                query = {
                        'boxes': obj['clean_bbox'],
                        'segms': valid_segms,
                        'image_path': im_path,
                        'area': obj['area'],
                        }

                self.cat_data[obj['category_id']].append(query)

        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_cats = np.zeros((num_valid_objs), dtype=entry['gt_cats'].dtype)
        #gt_overlaps = np.zeros(
        #    (num_valid_objs, self.num_classes),
        #    dtype=entry['gt_overlaps'].dtype
        #)
        gt_overlaps = np.zeros(
            (num_valid_objs, cfg.MODEL.NUM_CLASSES),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = obj['category_id']
            boxes[ix, :] = obj['clean_bbox']
            #gt_classes[ix] = cls
            gt_classes[ix] = 1
            gt_cats[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            #gt_overlaps[ix, cls] = 1.0
            gt_overlaps[ix, 1] = 1.0
        box_utils.validate_boxes(boxes, width=width, height=height)
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['gt_cats'] = np.append(entry['gt_cats'], gt_cats)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
    def _load_annotation_to_roidb(self, anno_file):
        gt_roidb = []
        entries = json.load(open(anno_file))
        # for imageId, entry in entries.items():  # for each sample in all the samples
        small_cnt = 0
        for entry in entries:  # for each sample in all the samples
            # pdb.set_trace()
            syms = entry['syms']
            polygons = entry['polygon']

            if 'boxes' in entry:
                boxes = entry['boxes']
            else:
                boxes = []

            if 'rows' in entry:
                rows = entry['rows']
                cols = entry['cols']
            elif 'bottom_row' in entry:
                rows = entry['bottom_row'] - entry['top_row']
                cols = entry['right_col'] - entry['left_col']

            # 新版json 关键字变化 file_name eva_id doc_name
            file_name = entry['file_name']
            # if file_name in ['58695.png', '57569.png', '45795.png', '59788.png', '60191.png',
            #         '60795.png', '69838.png', '70454.png', '69845.png']:
            #     continue

            image_dir = self.image_directory
            evaId = entry['eva_id']
            docName = entry['doc_name']
            fold = entry['fold']

            if 'offset_x' in entry and 'offset_y' in entry:
                offset_x, offset_y = entry['offset_x'], entry['offset_y']
            else:
                offset_x, offset_y = 0, 0

            has_mask = False
            if len(polygons) > 0 and len(boxes) == 0:
                has_mask = True
            elif len(boxes) > 0 and len(polygons) == 0:
                has_mask = False
                for box in boxes:
                    x1, y1, x2, y2 = box
                    polygon = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
                    polygons.append(polygon)

            npoly = len(polygons)
            is_valid_polygon = [True for _ in range(npoly)]

            # count number of valid syms
            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):
                if not self._valid_polygon(
                        polygon):  # exclude non-valid polygons
                    is_valid_polygon[idx] = False
                    continue

            cls_list, gt_boxes = [], []
            filtered_polygons = []

            remove_flag = False
            for idx, (sym, polygon) in enumerate(zip(
                    syms, polygons)):  # for each sym or region
                if not is_valid_polygon[idx]:
                    continue
                if '膈面异常' in sym and entry['doc_name'] == 'fj6311':
                    continue
                # if '胸腔积液' in sym and entry['file_name'] == '39420.png':
                #     continue

                if '主动脉异常' in sym and '钙化' in sym:
                    sym = ['主动脉钙化', '主动脉异常']
                if '结节' in sym and '乳头影' in sym:  # 费主任标了好多这种,结节和乳头影都在,我们认为是乳头影
                    sym = ['乳头影']

                if '结节' in sym and '弥漫性结节' in sym:
                    sym.remove('结节')
                if '结节' in sym and '多发结节' in sym:
                    sym.remove('结节')
                if '结核结节' in sym and '弥漫性结节' in sym:
                    sym.remove('结核结节')
                if '结核结节' in sym and '多发结节' in sym:
                    sym.remove('结核结节')
                if '结核球' in sym and '弥漫性结节' in sym:
                    sym.remove('结核球')
                if '结核球' in sym and '多发结节' in sym:
                    sym.remove('结核球')

                for s in sym:  # for each sub-sym
                    if s == '膈面膨隆' or s == '膈面抬高':  # awkward ...
                        s = '膈面异常'
                    # if s == '盘状肺不张':
                    #     s = '纤维化表现'
                    # if s == '肺结核':  # ignore 肺结核
                    #     s = '肺实变'
                    if s == '肺门影浓' or s == '肺门影大':
                        s = '肺门增浓'
                    if s == '主动脉异常':
                        s = '主动脉结增宽'

                    # 以下是肺结核的征象
                    if s == '三均匀粟粒样结节' or s == '非三均匀粟粒样结节':
                        s = '弥漫性结节'
                    if s == '结核球' or s == '结核结节':
                        s = '结节'
                    if s == '索条影':
                        s = '纤维化表现'

                    # cvpr code
                    if s == '骨折' or s == '肋骨缺失':
                        s = '骨折'
                    if s == '弥漫性结节' or s == '多发结节':
                        s = '弥漫性结节'

                    if s in self._class_to_ind:  # if in the given classes
                        polygon = [tuple(point) for point in polygon]
                        polygon_np = np.array(polygon)
                        x1, y1, x2, y2 = polygon_np[:, 0].min(), polygon_np[:, 1].min(), \
                                         polygon_np[:, 0].max(), polygon_np[:, 1].max(),
                        x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                            x1, y1, x2, y2, rows, cols)

                        # # if nodule is too large, then assign it to diffusive nodules
                        if s == '结节' and (x2 - x1 > 300 or y2 - y1 > 300):
                            s = '弥漫性结节'
                            # remove_flag = True
                            # break

                        cls = self._class_to_ind[s]
                        cls_list.append(cls)

                        # expand too-small boxes (width or height < 20)
                        if x2 - x1 < 20:
                            cx = (x1 + x2) * 0.5
                            x1 = cx - 10
                            x2 = cx + 10
                            small_cnt += 1
                        if y2 - y1 < 20:
                            cy = (y1 + y2) * 0.5
                            y1 = cy - 10
                            y2 = cy + 10
                            small_cnt += 1

                        gt_boxes.append([x1, y1, x2, y2])
                        tmp = [list(chain.from_iterable(polygon))
                               ]  # [[x1, y1], [x2, y2]] -> [[x1, y1, x2, y2]]
                        filtered_polygons.append(tmp)
                    else:
                        # print(s)
                        pass

            if not remove_flag:
                assert len(cls_list) == len(gt_boxes) == len(filtered_polygons)
                # LJ add parts info
                parts_list = []
                parts = entry['parts']
                part_boxes = entry['part_boxes']
                for part_name in parts:
                    parts_list.append(self._class_to_parts[part_name])
                part_boxes = np.array(part_boxes, dtype=np.float32)
                parts_list = np.array(parts_list, dtype=np.int32)

                new_entry = {
                    'file_name': file_name,
                    'cls_list': cls_list,
                    'height': rows,
                    'width': cols,
                    'polygons': filtered_polygons,
                    'gt_boxes': gt_boxes,
                    'eva_id': evaId,
                    'doc_name': docName,
                    'image_dir': image_dir,
                    'offset_x': offset_x,
                    'offset_y': offset_y,
                    'fold': fold,
                    'has_mask': has_mask,
                    'parts_list': parts_list,
                    'part_boxes': part_boxes
                }
                gt_roidb.append(new_entry)

        return gt_roidb
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)

        if len(objs) > 0:
            has_scores = ('score' in objs[0].keys())
        else:
            has_scores = False
        if len(objs) > 0:
            has_source = ('source' in objs[0].keys())
        else:
            has_source = False

        # EDIT: detection "soft"-scores
        # if has_scores:
        if not 'gt_scores' in entry.keys():
            entry['gt_scores'] = np.empty((0), dtype=np.float32)
        # EDIT: dataset name
        if len(objs) > 0:
            has_dataset = ('dataset' in objs[0].keys())
        else:
            has_dataset = True  # TODO: Check. This assumes that the "dataset" field is given
        # if has_dataset:
        if not 'dataset_id' in entry.keys():
            entry['dataset_id'] = np.empty((0), dtype=np.int32)
        if not 'gt_source' in entry.keys():
            entry['gt_source'] = np.empty((0), dtype=np.int32)

        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        """input('HEREEEE1111  '+str(has_dataset)+': '+str(entry['dataset'].name)+' __ '+str(entry))
        if not has_dataset:
            print(entry['dataset'].name)
            input(str(entry))
        """
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        #print('>>>',num_valid_objs)
        #input('checkpoint here')

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        # if has_scores:
        gt_scores = np.zeros((num_valid_objs),
                             dtype=entry['gt_scores'].dtype)  # EDIT: scores
        gt_source = np.zeros((num_valid_objs), dtype=entry['gt_source'].dtype)
        # if has_dataset:
        dataset_annot = np.zeros(
            (num_valid_objs), dtype=entry['dataset_id'].dtype)  # EDIT: dataset

        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            if has_scores:
                gt_scores[ix] = obj['score']  # EDIT: scores
            if has_source:
                gt_source[ix] = obj[
                    'source']  # EDIT: annot source (det or track)
            if has_dataset:
                dataset_annot[ix] = cfg.TRAIN.DATASETS.index(
                    obj['dataset'])  # EDIT: dataset id
            #else:
            #    input('>>>'+str(obj['dataset']))
            #    input(':::'+str(has_dataset)+'__'+str(dataset_annot))
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)

        if has_scores:
            entry['gt_scores'] = np.append(entry['gt_scores'],
                                           gt_scores)  # EDIT: scores
        if has_source:
            entry['gt_source'] = np.append(entry['gt_source'], gt_source)
        if has_dataset:
            entry['dataset_id'] = np.append(entry['dataset_id'],
                                            dataset_annot)  # EDIT: dataset

        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
Example #14
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width
            )
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros(
            (num_valid_objs, self.num_classes),
            dtype=entry['gt_overlaps'].dtype
        )
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros(
            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
        )
        if self.keypoints is not None:
            gt_keypoints = np.zeros(
                (num_valid_objs, 3, self.num_keypoints),
                dtype=entry['gt_keypoints'].dtype
            )

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'], box_to_gt_ind_map
        )
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(
                entry['gt_keypoints'], gt_keypoints, axis=0
            )
            entry['has_visible_keypoints'] = im_has_visible_keypoints
Example #15
0
    def _load_annotation_to_roidb(self, anno_file):
        gt_roidb = []
        entries = json.load(open(anno_file))
        # for imageId, entry in entries.items():  # for each sample in all the samples
        # if dir_name == '_part':
        #     pdb.set_trace()
        small_cnt = 0
        for entry in entries:  # for each sample in all the samples
            # if entry['fold'] == '309':
            #     continue
            syms = entry['syms']
            polygons = entry['polygon']
            if 'boxes' in entry:
                boxes = entry['boxes']
            else:
                boxes = []
            if 'rows' in entry:
                rows = entry['rows']
                cols = entry['cols']
            elif 'bottom_row' in entry:
                rows = entry['bottom_row'] - entry['top_row']
                cols = entry['right_col'] - entry['left_col']

            # if 'filename' not in entry.keys():
            #     pdb.set_trace()

            if entry['filename'].endswith('.jpg'):
                file_name = entry['filename']
            else:
                file_name = entry['filename'] + '.jpg'
            evaId = entry['evaId']
            image_dir = self.image_directory
            docName = entry['docName']
            fold = entry['fold']

            manufacturer = None if 'manufacturer' not in entry or entry[
                'manufacturer'] is None else entry['manufacturer'].strip('\"')
            manufacturer_model = None if 'manufacturer_model' not in entry or entry[
                'manufacturer_model'] is None else entry[
                    'manufacturer_model'].strip('\"')

            # manufacturer = entry['manufacturer'].strip('\"') if 'manufacturer' in entry else None
            # manufacturer_model = entry['manufacturer_model'].strip('\"') if 'manufacturer_model' in entry else None

            if 'offset_x' in entry and 'offset_y' in entry:
                offset_x, offset_y = entry['offset_x'], entry['offset_y']
            else:
                offset_x, offset_y = 0, 0

            has_mask = False
            if len(polygons) > 0 and len(boxes) == 0:
                has_mask = True
            elif len(boxes) > 0 and len(polygons) == 0:
                has_mask = False
                for box in boxes:
                    x1, y1, x2, y2 = box
                    polygon = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
                    polygons.append(polygon)

            npoly = len(polygons)
            is_valid_polygon = [True for _ in range(npoly)]

            # count number of valid syms
            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):
                if not self._valid_polygon(
                        polygon):  # exclude non-valid polygons
                    is_valid_polygon[idx] = False
                    continue

            cls_list, gt_boxes = [], []
            filtered_polygons = []

            # pdb.set_trace()

            for idx, (sym, polygon) in enumerate(zip(
                    syms, polygons)):  # for each sym or region
                if not is_valid_polygon[idx]:
                    continue
                if '膈面异常' in sym and entry['docName'] == 'fj6311':
                    continue

                if '主动脉异常' in sym and '钙化' in sym:
                    sym = ['主动脉钙化', '主动脉异常']
                if '结节' in sym and '乳头影' in sym:  # 费主任标了好多这种,结节和乳头影都在,我们认为是乳头影
                    sym = ['乳头影']

                if '结节' in sym and '弥漫性结节' in sym:
                    sym.remove('结节')
                if '结节' in sym and '多发结节' in sym:
                    sym.remove('结节')
                if '结核结节' in sym and '弥漫性结节' in sym:
                    sym.remove('结核结节')
                if '结核结节' in sym and '多发结节' in sym:
                    sym.remove('结核结节')
                if '结核球' in sym and '弥漫性结节' in sym:
                    sym.remove('结核球')
                if '结核球' in sym and '多发结节' in sym:
                    sym.remove('结核球')

                for s in sym:  # for each sub-sym
                    if s == '膈面膨隆' or s == '膈面抬高':  # awkward ...
                        s = '膈面异常'
                    # if s == '盘状肺不张':
                    #     s = '纤维化表现'
                    # if s == '肺结核':  # ignore 肺结核
                    #     s = '肺实变'
                    if s == '肺门影浓' or s == '肺门影大':
                        s = '肺门增浓'
                    if s == '主动脉异常':
                        s = '主动脉结增宽'

                    # 以下是肺结核的征象
                    if s == '三均匀粟粒样结节' or s == '非三均匀粟粒样结节':
                        s = '弥漫性结节'
                    if s == '结核球' or s == '结核结节':
                        s = '结节'
                    if s == '索条影':
                        s = '纤维化表现'

                    if s in self._class_to_ind:  # if in the given classes
                        polygon = [tuple(point) for point in polygon]
                        polygon_np = np.array(polygon)
                        x1, y1, x2, y2 = polygon_np[:, 0].min(), polygon_np[:, 1].min(), \
                                         polygon_np[:, 0].max(), polygon_np[:, 1].max(),
                        x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                            x1, y1, x2, y2, rows, cols)

                        # if nodule is too large, then assign it to diffusive nodules
                        if s == '结节' and (x2 - x1 > 300 or y2 - y1 > 300):
                            # if fold == '2019.06.14':
                            #     pdb.set_trace()
                            s = '多发结节'
                            print('fold ', fold, 'evaId ', evaId, idx + 1,
                                  '多发结节')

                        cls = self._class_to_ind[s]
                        cls_list.append(cls)

                        # expand too-small boxes (width or height < 20)
                        if x2 - x1 < 20:
                            cx = (x1 + x2) * 0.5
                            x1 = cx - 10
                            x2 = cx + 10
                            small_cnt += 1
                        if y2 - y1 < 20:
                            cy = (y1 + y2) * 0.5
                            y1 = cy - 10
                            y2 = cy + 10
                            small_cnt += 1

                        gt_boxes.append([x1, y1, x2, y2])
                        tmp = [list(chain.from_iterable(polygon))
                               ]  # [[x1, y1], [x2, y2]] -> [[x1, y1, x2, y2]]
                        filtered_polygons.append(tmp)
                    else:
                        # print(s)
                        pass

            assert len(cls_list) == len(gt_boxes) == len(filtered_polygons)
            new_entry = {
                'file_name': file_name,
                'cls_list': cls_list,
                'height': rows,
                'width': cols,
                'polygons': filtered_polygons,
                'gt_boxes': gt_boxes,
                'eva_id': evaId,
                'doc_name': docName,
                'image_dir': image_dir,
                'offset_x': offset_x,
                'offset_y': offset_y,
                'fold': fold,
                'manufacturer': manufacturer,
                'manufacturer_model': manufacturer_model,
                'has_mask': has_mask
            }
            gt_roidb.append(new_entry)

        print('small boxes: ', small_cnt)
        # pdb.set_trace()
        return gt_roidb
Example #16
0
    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints
Example #17
0
    def _load_annotation_to_roidb(self, anno_file):
        gt_roidb = []
        entries = json.load(open(anno_file))
        small_cnt = 0
        for entry in entries:  # for each sample in all the samples
            syms = entry['syms']
            polygons = entry['polygon']
            boxes = entry['boxes']
            if 'rows' in entry:
                rows = entry['rows']
                cols = entry['cols']
            elif 'bottom_row' in entry:
                rows = entry['bottom_row'] - entry['top_row']
                cols = entry['right_col'] - entry['left_col']
            file_name = entry['filename'] + '.jpg'
            evaId = entry['evaId']
            image_dir = self.image_directory
            docName = entry['docName']
            fold = entry['fold']

            manufacturer = None if 'manufacturer' not in entry or entry['manufacturer'] is None \
                                else entry['manufacturer'].strip('\"')
            manufacturer_model = None if 'manufacturer_model' not in entry or entry['manufacturer_model'] is None \
                                      else entry['manufacturer_model'].strip('\"')

            if 'offset_x' in entry and 'offset_y' in entry:
                offset_x, offset_y = entry['offset_x'], entry['offset_y']
            else:
                offset_x, offset_y = 0, 0

            has_mask = False
            if len(polygons) > 0 and len(boxes) == 0:
                has_mask = True
            elif len(boxes) > 0 and len(polygons) == 0:
                has_mask = False
                for box in boxes:
                    x1, y1, x2, y2 = box
                    polygon = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
                    polygons.append(polygon)

            npoly = len(polygons)
            is_valid_polygon = [True for _ in range(npoly)]

            # count number of valid syms
            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):
                if not self._valid_polygon(polygon):  # exclude non-valid polygons
                    is_valid_polygon[idx] = False
                    continue

            cls_list, gt_boxes = [], []
            filtered_polygons = []

            for idx, (sym, polygon) in enumerate(zip(syms, polygons)):  # for each sym or region
                if not is_valid_polygon[idx]:
                    continue

                for s in sym:  # for each sub-sym
                    if s in self._class_to_ind:  # if in the given classes
                        polygon = [tuple(point) for point in polygon]
                        polygon_np = np.array(polygon)
                        x1, y1, x2, y2 = polygon_np[:, 0].min(), polygon_np[:, 1].min(), \
                                         polygon_np[:, 0].max(), polygon_np[:, 1].max(),
                        x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                            x1, y1, x2, y2, rows, cols
                        )

                        cls = self._class_to_ind[s]
                        cls_list.append(cls)

                        # expand too-small boxes (width or height < 20)
                        if x2 - x1 < 20:
                            cx = (x1 + x2) * 0.5
                            x1 = cx - 10
                            x2 = cx + 10
                            small_cnt += 1
                        if y2 - y1 < 20:
                            cy = (y1 + y2) * 0.5
                            y1 = cy - 10
                            y2 = cy + 10
                            small_cnt += 1

                        gt_boxes.append([x1, y1, x2, y2])
                        tmp = [list(chain.from_iterable(polygon))]  # [[x1, y1], [x2, y2]] -> [[x1, y1, x2, y2]]
                        filtered_polygons.append(tmp)
                    else:
                        # print(s)
                        pass

            assert len(cls_list) == len(gt_boxes) == len(filtered_polygons)
            new_entry = {'file_name': file_name, 'cls_list': cls_list, 'height': rows, 'width': cols,
                         'polygons': filtered_polygons, 'gt_boxes': gt_boxes, 'eva_id': evaId, 'doc_name': docName,
                         'image_dir': image_dir, 'offset_x': offset_x, 'offset_y': offset_y, 'fold': fold,
                         'manufacturer': manufacturer, 'manufacturer_model': manufacturer_model,
                         'has_mask': has_mask}
            gt_roidb.append(new_entry)

        print('small boxes: ', small_cnt)
        return gt_roidb