def __getitem__(self, index) -> dict: res = super().__getitem__(index) # 图像Resize至网络输入大小 na_img = res[HK.NATIVE_IMAGE] img_resize, np_kps_resize, np_boxes_resize = self.rkr.resize( na_img, res[HK.KEYPOINTS], res[HK.BOXES]) res[HK.RE_IMAGE] = img_resize res[HK.RE_KEYPOINTS] = np_kps_resize res[HK.RE_BOXES] = np_boxes_resize if 'visual_debug' in self.kwargs and self.kwargs.get('visual_debug'): img_draw = KeypointsOnImage.from_xy_array(res[HK.KEYPOINTS].reshape(-1, 2), shape=na_img.shape) \ .draw_on_image(na_img, size=5) img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.BOXES].reshape(-1, 4), shape=na_img.shape) \ .draw_on_image(img_draw, size=2) res[HK.DEBUG_NATIVE_IMAGE] = img_draw img_draw = KeypointsOnImage.from_xy_array(res[HK.RE_KEYPOINTS].reshape(-1, 2), shape=img_resize.shape) \ .draw_on_image(img_resize, size=5) img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.RE_BOXES].reshape(-1, 4), shape=img_resize.shape) \ .draw_on_image(img_draw, size=2) res[HK.DEBUG_RE_IMAGE] = img_draw return res
def aug_image(self, train_instance, jitter): image_name = train_instance['filename'] if self._config['IMAGE_C'] == 1: image = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE) elif self._config['IMAGE_C'] == 3: image = cv2.imread(image_name) else: raise ValueError("Invalid number of image channels.") if image is None: print('Cannot find ', image_name) if self._callback is not None: image, train_instance = self._callback(image, train_instance) h = image.shape[0] w = image.shape[1] all_objs = copy.deepcopy(train_instance['object']) if jitter: bbs = [] for i, obj in enumerate(all_objs): xmin = obj['xmin'] ymin = obj['ymin'] xmax = obj['xmax'] ymax = obj['ymax'] # use label field to later match it with final boxes bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax, label=i)) bbs = BoundingBoxesOnImage(bbs, shape=image.shape) image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs) bbs = bbs.remove_out_of_image().clip_out_of_image() if len(bbs) < len(all_objs): print("Some boxes were removed during augmentations.") filtered_objs = [] for bb in bbs.bounding_boxes: obj = all_objs[bb.label] obj['xmin'] = bb.x1 obj['xmax'] = bb.x2 obj['ymin'] = bb.y1 obj['ymax'] = bb.y2 filtered_objs.append(obj) all_objs = filtered_objs # resize the image to standard size image = cv2.resize(image, (self._config['IMAGE_W'], self._config['IMAGE_H'])) if self._config['IMAGE_C'] == 1: image = image[..., np.newaxis] image = image[..., ::-1] # make it RGB (it is important for normalization of some backends) # fix object's position and size for obj in all_objs: for attr in ['xmin', 'xmax']: obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w) obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0) for attr in ['ymin', 'ymax']: obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h) obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0) return image, all_objs
def aug_image(self, train_instance, jitter): image_name = train_instance['filename'] if self._config['IMAGE_C'] == 1: image = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE) elif self._config['IMAGE_C'] == 3: image = cv2.imread(image_name) else: raise ValueError("Invalid number of image channels.") if image is None: print('Cannot find ', image_name) if self._callback is not None: image, train_instance = self._callback(image, train_instance) h = image.shape[0] w = image.shape[1] all_objs = copy.deepcopy(train_instance['object']) if jitter: bbs = [] for obj in all_objs: xmin = obj['xmin'] ymin = obj['ymin'] xmax = obj['xmax'] ymax = obj['ymax'] bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax)) bbs = BoundingBoxesOnImage(bbs, shape=image.shape) image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs) bbs = bbs.remove_out_of_image().clip_out_of_image() if len(all_objs) != 0: for i in range(len(bbs.bounding_boxes)): all_objs[i]['xmin'] = bbs.bounding_boxes[i].x1 all_objs[i]['xmax'] = bbs.bounding_boxes[i].x2 all_objs[i]['ymin'] = bbs.bounding_boxes[i].y1 all_objs[i]['ymax'] = bbs.bounding_boxes[i].y2 # resize the image to standard size image = cv2.resize(image, (self._config['IMAGE_W'], self._config['IMAGE_H'])) if self._config['IMAGE_C'] == 1: image = image[..., np.newaxis] image = image[..., ::-1] # fix object's position and size for obj in all_objs: for attr in ['xmin', 'xmax']: obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w) obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0) for attr in ['ymin', 'ymax']: obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h) obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0) return image, all_objs
def aug(self, img, pts: np.ndarray, boxes: np.ndarray): pts_shape = pts.shape pts = pts.reshape((-1, 2)) boxes_shape = boxes.shape boxes = boxes.reshape((-1, 4)) kps_on_image = KeypointsOnImage.from_xy_array(pts, shape=img.shape) boxes_on_img = BoundingBoxesOnImage.from_xyxy_array(boxes, shape=img.shape) seq = iaa.Sequential([ iaa.Multiply((0.8, 1.2)), # change brightness iaa.Affine( rotate=(-5, 5), scale=(0.9, 1.05), translate_percent={ "x": (-0.1, 0.1), "y": (-0.1, 0.1) }, ), iaa.GaussianBlur(sigma=(0, 0.7)), iaa.Sometimes(0.3, iaa.MotionBlur(k=(3, 7))) ]) det = seq.to_deterministic() img_aug = det.augment_image(img) kps_aug = det.augment_keypoints(kps_on_image) boxes_aug = det.augment_bounding_boxes(boxes_on_img) np_kps_aug = kps_aug.to_xy_array() np_kps_aug = np_kps_aug.reshape(pts_shape) np_boxes_aug = boxes_aug.to_xy_array() np_boxes_aug = np_boxes_aug.reshape(boxes_shape) return img_aug, np_kps_aug, np_boxes_aug
def aug_image(self, image, objs, jitter=True): h = image.shape[0] w = image.shape[1] all_objs = copy.deepcopy(objs) if jitter: bbs = [] for obj in all_objs: # see parse image for the correspondence between numeric index and values xmin = obj[0] xmax = obj[1] ymin = obj[2] ymax = obj[3] bbs.append(BoundingBox(x1=xmin, x2=xmax, y1=ymin, y2=ymax)) bbs = BoundingBoxesOnImage(bbs, shape=image.shape) image, bbs = self._aug_pipe(image=image, bounding_boxes=bbs) bbs = bbs.remove_out_of_image().clip_out_of_image() if len(all_objs) != 0: for i in range(len(bbs.bounding_boxes)): all_objs[i][0] = bbs.bounding_boxes[i].x1 # xmin all_objs[i][1] = bbs.bounding_boxes[i].x2 # xmax all_objs[i][2] = bbs.bounding_boxes[i].y1 # ymin all_objs[i][3] = bbs.bounding_boxes[i].y2 # ymax # resize the image to standard size image = cv2.resize(image, (self._config['IMAGE_W'], self._config['IMAGE_H'])) ''' if self._config['IMAGE_C'] == 1: #self._config['IMAGE_C'] image = image[..., np.newaxis] ''' #image = image[..., ::-1] # fix object's position and size for obj in all_objs: for attr in [0, 1]: #xmin, xmax obj[attr] = int(obj[attr] * float(self._config['IMAGE_W']) / w) obj[attr] = max(min(obj[attr], self._config['IMAGE_W']), 0) for attr in [2, 3]: #ymin, ymax obj[attr] = int(obj[attr] * float(self._config['IMAGE_H']) / h) obj[attr] = max(min(obj[attr], self._config['IMAGE_H']), 0) return image, all_objs
def to_imgaug_format(self, image, label, face): image = np.array(image) bbox = [int(x) for x in face.split(' ')] segmap = (np.array(label) / 255).astype(bool) segmaps = SegmentationMapsOnImage(segmap, image.shape) bboxes = BoundingBoxesOnImage([ BoundingBox(x1=bbox[3], y1=bbox[0], x2=bbox[1], y2=bbox[2]), ], shape=image.shape) return image, segmaps, bboxes
def resize(self, img, pts: np.ndarray, boxes: np.ndarray): pts_shape = pts.shape pts = pts.reshape((-1, 2)) boxes_shape = boxes.shape boxes = boxes.reshape((-1, 4)) tw, th = self.target_size ih, iw, ic = img.shape kps_on_image = KeypointsOnImage.from_xy_array(pts, shape=img.shape) boxes_on_img = BoundingBoxesOnImage.from_xyxy_array(boxes, shape=img.shape) seq = self.__aug_sequence((iw, ih), (tw, th)) det = seq.to_deterministic() img_aug = det.augment_image(img) kps_aug = det.augment_keypoints(kps_on_image) boxes_aug = det.augment_bounding_boxes(boxes_on_img) np_kps_aug = kps_aug.to_xy_array() np_kps_aug = np_kps_aug.reshape(pts_shape) np_boxes_aug = boxes_aug.to_xy_array() np_boxes_aug = np_boxes_aug.reshape(boxes_shape) return img_aug, np_kps_aug, np_boxes_aug
def __getitem__(self, index) -> dict: res = super().__getitem__(index) # 图像Resize至网络输入大小 img = res[HK.RE_IMAGE] if self.is_train: img_aug, np_kps_aug, np_boxes_aug = self.aug.aug( img, res[HK.RE_KEYPOINTS], res[HK.RE_BOXES]) else: img_aug, np_kps_aug, np_boxes_aug = img, res[HK.RE_KEYPOINTS], res[ HK.RE_BOXES] res[HK.AUG_IMAGE] = img_aug res[HK.AUG_KEYPOINTS] = np_kps_aug res[HK.AUG_BOXES] = np_boxes_aug if 'visual_debug' in self.kwargs and self.kwargs.get('visual_debug'): img_draw = KeypointsOnImage.from_xy_array(res[HK.AUG_KEYPOINTS].reshape(-1, 2), shape=img_aug.shape)\ .draw_on_image(img_aug, size=5) img_draw = BoundingBoxesOnImage.from_xyxy_array(res[HK.AUG_BOXES].reshape(-1, 4), shape=img_aug.shape)\ .draw_on_image(img_draw, size=2) res[HK.DEBUG_AUG_IMAGE] = img_draw return res
def __get_default_coco(self, img, anns, num_objs): boxes = [] if self.num_keypoints > 0: kpts = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) boxes.append(BoundingBox(*bbox)) if self.num_keypoints > 0: if 'keypoints' not in ann: ann['keypoints'] = np.zeros((3 * self.num_keypoints, )) kpt = [ Keypoint(*x) for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2] ] kpts.extend(kpt) bbs = BoundingBoxesOnImage(boxes, shape=img.shape) if self.num_keypoints > 0: kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.augmentation( image=img, bounding_boxes=bbs, keypoints=kpts) else: img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs) else: if self.num_keypoints > 0: kpts_aug = kpts.copy() img_aug, bbs_aug = np.copy(img), bbs.copy() if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug, keypoints=kpts_aug) else: img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 2), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros((self.max_detections, 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) if self.num_keypoints > 0: kp = np.zeros((self.max_detections, self.num_keypoints * 2), dtype=np.float32) gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2), dtype=np.float32) kp_reg_mask = np.zeros( (self.max_detections, self.num_keypoints * 2), dtype=np.uint8) bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug, keypoints=kpts_aug) else: bbs_aug = self.resize_out(bounding_boxes=bbs_aug) for k in range(num_objs): ann = anns[k] bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h)) bbox = np.array( [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]) cls_id = int(self.cat_mapping[ann['category_id']]) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.num_keypoints > 0: valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1] for i, p in enumerate( kpts_aug[k * self.num_keypoints:k * self.num_keypoints + self.num_keypoints]): kp[k][i * 2] = p.x - ct_int[0] kp[k][i * 2 + 1] = p.y - ct_int[1] is_valid = valid[i] == 2 and not p.is_out_of_image( (output_w, output_w)) kp_reg_mask[k, i * 2] = int(is_valid) kp_reg_mask[k, i * 2 + 1] = int(is_valid) gt_kp[k][i] = p.x, p.y if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del bbs del bbs_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 6), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } if self.num_keypoints > 0: ret['kps'] = kp ret['gt_kps'] = gt_kp ret['kp_reg_mask'] = kp_reg_mask del kpts_aug return ret
def __get_default_coco(self, img, anns, num_objs): boxes = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) boxes.append(BoundingBox(*bbox)) bbs = BoundingBoxesOnImage(boxes, shape=img.shape) if self.augmentation is not None: img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs) else: img_aug, bbs_aug = np.copy(img), bbs.copy() img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros((self.max_detections, num_classes), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) bbs_aug = self.resize_out(bounding_boxes=bbs_aug) for k in range(num_objs): ann = anns[k] bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h)) bbox = np.array( [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]) cls_id = int(self.cat_mapping[ann['category_id']]) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del bbs del bbs_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 6), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } return ret