def __getitem__(self, index): im_path = self.image_list[index] im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) roidb = self._load_annotation(self.image_list[index]) gt_inds = np.where(roidb['gt_classes'] != 0)[0] bboxes = roidb['boxes'][gt_inds, :] classes = roidb['gt_classes'][gt_inds] gt_boxes = np.zeros((len(gt_inds), 6), dtype=np.float32) if self.augment: transform = Augment( [ HorizontalFlip(0.5), VerticalFlip(0.5), Affine(degree=0, translate=0.1, scale=0., p=0.5), ], box_mode='xyxyxyxy', ) im, bboxes = transform(im, bboxes) mask = mask_valid_boxes(quad_2_rbox(bboxes, 'xywha'), return_mask=True) bboxes = bboxes[mask] gt_boxes = gt_boxes[mask] classes = classes[mask] for i, bbox in enumerate(bboxes): gt_boxes[i, :5] = quad_2_rbox(np.array(bbox), mode='xyxya') # 四点转xyxya(a为角度制) gt_boxes[i, 5] = classes[i] ## test augmentation # print(im.shape) # plot_gt(im, gt_boxes[:,:5], im_path, mode = 'xyxya') return {'image': im, 'boxes': gt_boxes, 'path': im_path}
def _load_annotation(self, index): root_dir, img_name = os.path.split(index) filename = os.path.join(root_dir, img_name[:-5] + '.xml') boxes, gt_classes = [], [] with open(filename, 'r', encoding='utf-8-sig') as f: content = f.read() assert '<objects>' in content, 'Background picture occurred in %s' % filename objects = content.split('<object>') info = objects.pop(0) for obj in objects: assert len(obj) != 0, 'No onject found in %s' % filename points = obj[obj.find('<points>') + 8:obj.find('</points>')].split('<point>')[1:] coors = [x.split('<')[0].split(',') for x in points] (x1, y1), (x2, y2), (x3, y3), (x4, y4), *_ = coors x1, y1, x2, y2, x3, y3, x4, y4 = [ x for x in map(eval, [x1, y1, x2, y2, x3, y3, x4, y4]) ] quad_box = np.array([(x1, y1), (x2, y2), (x3, y3), (x4, y4)]) box = quad_2_rbox(quad_box) boxes.append(box) label = self.class_mapping(cls_id, self.level) gt_classes.append(label) return {'boxes': np.array(boxes), 'gt_classes': np.array(gt_classes)}
def __getitem__(self, index): # index是索引号,每次进8线程同时进行处理(num_worker),self.image_list[index]从trainval.txt的所有img中提取出遍历当前的图片名 im_path = self.image_list[index] # 获取当前张单张图像的绝对路径 im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) # 由当前的文件提取出xml的信息,dict格式内含两对kv:'boxes'-shape(nt,8)和'gt_classes'-shape(1,nt) roidb = self._load_annotation(self.image_list[index]) gt_inds = np.where(roidb['gt_classes'] != 0)[0] nt = len(roidb['boxes']) # gt_boxes的格式为:xyxyac gt_boxes = np.zeros((len(gt_inds), 6), dtype=np.float32) if nt: bboxes = roidb['boxes'][gt_inds, :] classes = roidb['gt_classes'][gt_inds] if self.augment: transform = Augment( [ HorizontalFlip(0.5), VerticalFlip(0.5), Affine(degree=0, translate=0.1, scale=0.1, p=0.5), # Grayscale(0.3,p=0.5), # # Contrast(0.15, p=0.3), # Sharpen(0.15, p=0.2), Noise(0.1, p=0.5), # Gamma(0.2, p=0.4), # Blur(1.3, p=0.5), ], box_mode='xywha', ) im, bboxes = transform(im, bboxes) gt_boxes[:, :-1] = bboxes mask = mask_valid_boxes(quad_2_rbox(bboxes, 'xywha'), return_mask=True) bboxes = bboxes[mask] gt_boxes = gt_boxes[mask] classes = classes[mask] for i, bbox in enumerate(bboxes): gt_boxes[i, 5] = classes[i] gt_boxes = constraint_theta(gt_boxes) cx, cy, w, h = [gt_boxes[:, x] for x in range(4)] x1 = cx - 0.5 * w x2 = cx + 0.5 * w y1 = cy - 0.5 * h y2 = cy + 0.5 * h gt_boxes[:, 0] = x1 gt_boxes[:, 1] = y1 gt_boxes[:, 2] = x2 gt_boxes[:, 3] = y2 ## test augmentation print(im.shape) plot_gt(im, gt_boxes[:, :5], im_path, mode='xyxya') return {'image': im, 'boxes': gt_boxes, 'path': im_path}
def __call__(self, img, labels, mode=None): if random.random() < self.p: if mode == 'xywha': labels = rbox_2_quad(labels, mode = 'xywha') img, labels = random_affine(img, labels, degree=self.degree,translate=self.translate, scale=self.scale,shear=self.shear ) labels = quad_2_rbox(labels, mode = 'xywha') else: img, labels = random_affine(img, labels, degree=self.degree,translate=self.translate, scale=self.scale,shear=self.shear ) return img, labels
def __getitem__(self, index): im_path = self._image_path_from_index(self.image_list[index]) im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) roidb = self._load_pascal_annotation(self.image_list[index]) gt_inds = np.where(roidb['gt_classes'] != 0)[0] bboxes = roidb['boxes'][gt_inds, :] classes = roidb['gt_classes'][gt_inds] if self.random_flip and np.random.rand() >= 0.5: im = cv2.flip(im, 1, None) oldxs = bboxes[:, 0::2].copy() bboxes[:, 0::2] = im.shape[1] - oldxs - 1 gt_boxes = np.empty((len(gt_inds), 6), dtype=np.float32) for i, bbox in enumerate(bboxes): gt_boxes[i, :5] = quad_2_rbox(np.array(bbox)) gt_boxes[:, 5] = classes[i] return {'image': im, 'boxes': gt_boxes}
def __getitem__(self, index): im_path = self._image_path_from_index(self.image_list[index]) im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) if im is not None: h, w = im.shape[:2] gt_path = self._annotation_path_from_index(self.annotation_list[index]) if self.dformat == 'txt': roidb = self._load_annotation_txt(gt_path, h, w) elif self.dformat == 'json': roidb = self._load_annotation_json(gt_path) else: raise Exception('Data format not supported!') gt_inds = np.where(roidb['gt_classes'] != 0)[0] bboxes = roidb['boxes'][gt_inds, :] classes = roidb['gt_classes'][gt_inds] gt_boxes = np.empty((len(gt_inds), 6), dtype=np.float32) for i, bbox in enumerate(bboxes): gt_boxes[i, :5] = quad_2_rbox(np.array(bbox)) gt_boxes[:, 5] = classes[i] return {'image': im, 'boxes': gt_boxes}
def random_affine(img, targets=(), degree=10, translate=.1, scale=.1, shear=10): # torchvision.transforms.RandomAffine(degree=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 if targets is None: targets = [] border = 0 # width of added border (optional) height = img.shape[0] + border * 2 width = img.shape[1] + border * 2 # Rotation and Scale R = np.eye(3) a = random.uniform(-degree, degree) # # # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) # Translation T = np.eye(3) T[0, 2] = random.uniform(-translate, translate ) * img.shape[0] + border # x translation (pixels) T[1, 2] = random.uniform(-translate, translate ) * img.shape[1] + border # y translation (pixels) M = T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! imw = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_AREA, borderValue=(128, 128, 128)) # BGR order borderValue # Return warped points also targets[:, [0, 2, 4, 6]] = targets[:, [0, 2, 4, 6]] * M[ 0, 0] + targets[:, [1, 3, 5, 7]] * M[0, 1] + M[0, 2] targets[:, [1, 3, 5, 7]] = targets[:, [0, 2, 4, 6]] * M[ 1, 0] + targets[:, [1, 3, 5, 7]] * M[1, 1] + M[1, 2] for x in range(0, 8, 2): targets[:, x] = targets[:, x].clip(0, width) for y in range(1, 8, 2): targets[:, y] = targets[:, y].clip(0, height) rboxes = quad_2_rbox(targets) w = rboxes[:, 2] h = rboxes[:, 3] ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) mask = (w > 4) & \ (h > 4) & \ (ar < 15) & \ np.array([(x>0).all() for x in targets]) & \ np.array([(x[0::2]<width).all() for x in targets]) & \ np.array([(x[1::2]<height).all() for x in targets]) targets = targets[mask] return imw, targets, mask