Esempio n. 1
0
    def __getitem__(self, index) -> dict:
        res = super().__getitem__(index)
        # 图像Resize至网络输入大小
        na_img = res[HK.NATIVE_IMAGE]

        img_resize, np_kps_resize, np_boxes_resize = self.rkr.resize(
            na_img, res[HK.KEYPOINTS], res[HK.BOXES])

        res[HK.RE_IMAGE] = img_resize
        res[HK.RE_KEYPOINTS] = np_kps_resize
        res[HK.RE_BOXES] = np_boxes_resize

        if 'visual_debug' in self.kwargs and self.kwargs.get('visual_debug'):
            img_draw = KeypointsOnImage.from_xy_array(res[HK.KEYPOINTS].reshape(-1, 2), shape=na_img.shape) \
                .draw_on_image(na_img, size=5)
            img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.BOXES].reshape(-1, 4), shape=na_img.shape) \
                .draw_on_image(img_draw, size=2)
            res[HK.DEBUG_NATIVE_IMAGE] = img_draw

            img_draw = KeypointsOnImage.from_xy_array(res[HK.RE_KEYPOINTS].reshape(-1, 2), shape=img_resize.shape) \
                .draw_on_image(img_resize, size=5)
            img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.RE_BOXES].reshape(-1, 4), shape=img_resize.shape) \
                .draw_on_image(img_draw, size=2)
            res[HK.DEBUG_RE_IMAGE] = img_draw

        return res
Esempio n. 2
0
    def aug_image(self, train_instance, jitter):
        image_name = train_instance['filename']
        if self._config['IMAGE_C'] == 1:
            image = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
        elif self._config['IMAGE_C'] == 3:
            image = cv2.imread(image_name)
        else:
            raise ValueError("Invalid number of image channels.")

        if image is None:
            print('Cannot find ', image_name)
        if self._callback is not None:
            image, train_instance = self._callback(image, train_instance)

        h = image.shape[0]
        w = image.shape[1]
        all_objs = copy.deepcopy(train_instance['object'])

        if jitter:
            bbs = []
            for i, obj in enumerate(all_objs):
                xmin = obj['xmin']
                ymin = obj['ymin']
                xmax = obj['xmax']
                ymax = obj['ymax']
                # use label field to later match it with final boxes
                bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax, label=i))
            bbs = BoundingBoxesOnImage(bbs, shape=image.shape)
            image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs)
            bbs = bbs.remove_out_of_image().clip_out_of_image()

            if len(bbs) < len(all_objs):
                print("Some boxes were removed during augmentations.")

            filtered_objs = []
            for bb in bbs.bounding_boxes:
                obj = all_objs[bb.label]
                obj['xmin'] = bb.x1
                obj['xmax'] = bb.x2
                obj['ymin'] = bb.y1
                obj['ymax'] = bb.y2
                filtered_objs.append(obj)
            all_objs = filtered_objs

        # resize the image to standard size
        image = cv2.resize(image, (self._config['IMAGE_W'], self._config['IMAGE_H']))
        if self._config['IMAGE_C'] == 1:
            image = image[..., np.newaxis]
        image = image[..., ::-1]  # make it RGB (it is important for normalization of some backends)

        # fix object's position and size
        for obj in all_objs:
            for attr in ['xmin', 'xmax']:
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w)
                obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0)

            for attr in ['ymin', 'ymax']:
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h)
                obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0)
        return image, all_objs
    def aug_image(self, train_instance, jitter):
        image_name = train_instance['filename']
        if self._config['IMAGE_C'] == 1:
            image = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
        elif self._config['IMAGE_C'] == 3:
            image = cv2.imread(image_name)
        else:
            raise ValueError("Invalid number of image channels.")

        if image is None:
            print('Cannot find ', image_name)
        if self._callback is not None:
            image, train_instance = self._callback(image, train_instance)

        h = image.shape[0]
        w = image.shape[1]
        all_objs = copy.deepcopy(train_instance['object'])

        if jitter:
            bbs = []
            for obj in all_objs:
                xmin = obj['xmin']
                ymin = obj['ymin']
                xmax = obj['xmax']
                ymax = obj['ymax']
                bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax))
            bbs = BoundingBoxesOnImage(bbs, shape=image.shape)
            image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs)
            bbs = bbs.remove_out_of_image().clip_out_of_image()

            if len(all_objs) != 0:
                for i in range(len(bbs.bounding_boxes)):
                    all_objs[i]['xmin'] = bbs.bounding_boxes[i].x1
                    all_objs[i]['xmax'] = bbs.bounding_boxes[i].x2
                    all_objs[i]['ymin'] = bbs.bounding_boxes[i].y1
                    all_objs[i]['ymax'] = bbs.bounding_boxes[i].y2

        # resize the image to standard size
        image = cv2.resize(image,
                           (self._config['IMAGE_W'], self._config['IMAGE_H']))
        if self._config['IMAGE_C'] == 1:
            image = image[..., np.newaxis]
        image = image[..., ::-1]

        # fix object's position and size
        for obj in all_objs:
            for attr in ['xmin', 'xmax']:
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w)
                obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0)

            for attr in ['ymin', 'ymax']:
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h)
                obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0)
        return image, all_objs
Esempio n. 4
0
    def aug(self, img, pts: np.ndarray, boxes: np.ndarray):
        pts_shape = pts.shape
        pts = pts.reshape((-1, 2))
        boxes_shape = boxes.shape
        boxes = boxes.reshape((-1, 4))

        kps_on_image = KeypointsOnImage.from_xy_array(pts, shape=img.shape)
        boxes_on_img = BoundingBoxesOnImage.from_xyxy_array(boxes,
                                                            shape=img.shape)

        seq = iaa.Sequential([
            iaa.Multiply((0.8, 1.2)),  # change brightness
            iaa.Affine(
                rotate=(-5, 5),
                scale=(0.9, 1.05),
                translate_percent={
                    "x": (-0.1, 0.1),
                    "y": (-0.1, 0.1)
                },
            ),
            iaa.GaussianBlur(sigma=(0, 0.7)),
            iaa.Sometimes(0.3, iaa.MotionBlur(k=(3, 7)))
        ])
        det = seq.to_deterministic()
        img_aug = det.augment_image(img)
        kps_aug = det.augment_keypoints(kps_on_image)
        boxes_aug = det.augment_bounding_boxes(boxes_on_img)

        np_kps_aug = kps_aug.to_xy_array()
        np_kps_aug = np_kps_aug.reshape(pts_shape)
        np_boxes_aug = boxes_aug.to_xy_array()
        np_boxes_aug = np_boxes_aug.reshape(boxes_shape)

        return img_aug, np_kps_aug, np_boxes_aug
Esempio n. 5
0
    def aug_image(self, image, objs, jitter=True):
        h = image.shape[0]
        w = image.shape[1]
        all_objs = copy.deepcopy(objs)
        
        if jitter:
            bbs = []
            for obj in all_objs:
                # see parse image for the correspondence between numeric index and values
                xmin = obj[0]
                xmax = obj[1] 
                ymin = obj[2]
                ymax = obj[3]            
                bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax))           
            
            bbs = BoundingBoxesOnImage(bbs, shape=image.shape)
            image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs)
            bbs = bbs.remove_out_of_image().clip_out_of_image()

            if len(all_objs) != 0:
                for i in range(len(bbs.bounding_boxes)):
                    all_objs[i][0] = bbs.bounding_boxes[i].x1 # xmin
                    all_objs[i][1] = bbs.bounding_boxes[i].x2 # xmax
                    all_objs[i][2] = bbs.bounding_boxes[i].y1 # ymin
                    all_objs[i][3] = bbs.bounding_boxes[i].y2 # ymax
    
        # resize the image to standard size
        image = cv2.resize(image, (self._config['IMAGE_W'], self._config['IMAGE_H'])) 
        '''
        if self._config['IMAGE_C'] == 1: #self._config['IMAGE_C']
            image = image[..., np.newaxis]
        '''    
        #image = image[..., ::-1]  
        
        # fix object's position and size
        for obj in all_objs:
            for attr in [0, 1]: #xmin, xmax
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w) 
                obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0)
    
            for attr in [2, 3]: #ymin, ymax
                obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h) 
                obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0)
        return image, all_objs
Esempio n. 6
0
    def to_imgaug_format(self, image, label, face):
        image = np.array(image)
        bbox = [int(x) for x in face.split(' ')]
        segmap = (np.array(label) / 255).astype(bool)

        segmaps = SegmentationMapsOnImage(segmap, image.shape)
        bboxes = BoundingBoxesOnImage([
            BoundingBox(x1=bbox[3], y1=bbox[0], x2=bbox[1], y2=bbox[2]),
        ],
                                      shape=image.shape)
        return image, segmaps, bboxes
Esempio n. 7
0
    def resize(self, img, pts: np.ndarray, boxes: np.ndarray):
        pts_shape = pts.shape
        pts = pts.reshape((-1, 2))
        boxes_shape = boxes.shape
        boxes = boxes.reshape((-1, 4))

        tw, th = self.target_size
        ih, iw, ic = img.shape
        kps_on_image = KeypointsOnImage.from_xy_array(pts, shape=img.shape)
        boxes_on_img = BoundingBoxesOnImage.from_xyxy_array(boxes,
                                                            shape=img.shape)

        seq = self.__aug_sequence((iw, ih), (tw, th))
        det = seq.to_deterministic()
        img_aug = det.augment_image(img)
        kps_aug = det.augment_keypoints(kps_on_image)
        boxes_aug = det.augment_bounding_boxes(boxes_on_img)

        np_kps_aug = kps_aug.to_xy_array()
        np_kps_aug = np_kps_aug.reshape(pts_shape)
        np_boxes_aug = boxes_aug.to_xy_array()
        np_boxes_aug = np_boxes_aug.reshape(boxes_shape)
        return img_aug, np_kps_aug, np_boxes_aug
Esempio n. 8
0
    def __getitem__(self, index) -> dict:
        res = super().__getitem__(index)
        # 图像Resize至网络输入大小
        img = res[HK.RE_IMAGE]
        if self.is_train:
            img_aug, np_kps_aug, np_boxes_aug = self.aug.aug(
                img, res[HK.RE_KEYPOINTS], res[HK.RE_BOXES])
        else:
            img_aug, np_kps_aug, np_boxes_aug = img, res[HK.RE_KEYPOINTS], res[
                HK.RE_BOXES]

        res[HK.AUG_IMAGE] = img_aug
        res[HK.AUG_KEYPOINTS] = np_kps_aug
        res[HK.AUG_BOXES] = np_boxes_aug

        if 'visual_debug' in self.kwargs and self.kwargs.get('visual_debug'):

            img_draw = KeypointsOnImage.from_xy_array(res[HK.AUG_KEYPOINTS].reshape(-1, 2), shape=img_aug.shape)\
                .draw_on_image(img_aug, size=5)
            img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.AUG_BOXES].reshape(-1, 4), shape=img_aug.shape)\
                .draw_on_image(img_draw, size=2)
            res[HK.DEBUG_AUG_IMAGE] = img_draw

        return res
Esempio n. 9
0
    def __get_default_coco(self, img, anns, num_objs):
        boxes = []
        if self.num_keypoints > 0:
            kpts = []

        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            boxes.append(BoundingBox(*bbox))

            if self.num_keypoints > 0:
                if 'keypoints' not in ann:
                    ann['keypoints'] = np.zeros((3 * self.num_keypoints, ))

                kpt = [
                    Keypoint(*x)
                    for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
                ]
                kpts.extend(kpt)

        bbs = BoundingBoxesOnImage(boxes, shape=img.shape)

        if self.num_keypoints > 0:
            kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            if self.num_keypoints > 0:
                img_aug, bbs_aug, kpts_aug = self.augmentation(
                    image=img, bounding_boxes=bbs, keypoints=kpts)
            else:
                img_aug, bbs_aug = self.augmentation(image=img,
                                                     bounding_boxes=bbs)
        else:
            if self.num_keypoints > 0:
                kpts_aug = kpts.copy()

            img_aug, bbs_aug = np.copy(img), bbs.copy()

        if self.num_keypoints > 0:
            img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug,
                                                     bounding_boxes=bbs_aug,
                                                     keypoints=kpts_aug)
        else:
            img_aug, bbs_aug = self.resize(image=img_aug,
                                           bounding_boxes=bbs_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 2), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros((self.max_detections, 6), dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        if self.num_keypoints > 0:
            kp = np.zeros((self.max_detections, self.num_keypoints * 2),
                          dtype=np.float32)
            gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2),
                             dtype=np.float32)
            kp_reg_mask = np.zeros(
                (self.max_detections, self.num_keypoints * 2), dtype=np.uint8)

            bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug,
                                                keypoints=kpts_aug)
        else:
            bbs_aug = self.resize_out(bounding_boxes=bbs_aug)

        for k in range(num_objs):
            ann = anns[k]
            bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h))
            bbox = np.array(
                [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2])

            cls_id = int(self.cat_mapping[ann['category_id']])

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

                if self.num_keypoints > 0:
                    valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1]
                    for i, p in enumerate(
                            kpts_aug[k * self.num_keypoints:k *
                                     self.num_keypoints + self.num_keypoints]):
                        kp[k][i * 2] = p.x - ct_int[0]
                        kp[k][i * 2 + 1] = p.y - ct_int[1]

                        is_valid = valid[i] == 2 and not p.is_out_of_image(
                            (output_w, output_w))
                        kp_reg_mask[k, i * 2] = int(is_valid)
                        kp_reg_mask[k, i * 2 + 1] = int(is_valid)
                        gt_kp[k][i] = p.x, p.y

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del bbs
        del bbs_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 6), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        if self.num_keypoints > 0:
            ret['kps'] = kp
            ret['gt_kps'] = gt_kp
            ret['kp_reg_mask'] = kp_reg_mask
            del kpts_aug

        return ret
Esempio n. 10
0
    def __get_default_coco(self, img, anns, num_objs):
        boxes = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            boxes.append(BoundingBox(*bbox))

        bbs = BoundingBoxesOnImage(boxes, shape=img.shape)

        if self.augmentation is not None:
            img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs)
        else:
            img_aug, bbs_aug = np.copy(img), bbs.copy()

        img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros((self.max_detections, num_classes), dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        bbs_aug = self.resize_out(bounding_boxes=bbs_aug)

        for k in range(num_objs):
            ann = anns[k]
            bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h))
            bbox = np.array(
                [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2])

            cls_id = int(self.cat_mapping[ann['category_id']])

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del bbs
        del bbs_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 6), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        return ret