def fliter_small_box(boxes, min_size): boxes_xywh = xyxy2xywh(boxes) boxes_ws = boxes_xywh[:, 2] boxes_hs = boxes_xywh[:, 3] boxes_areas = boxes_ws * boxes_hs keep_index = np.where(boxes_areas > min_size)[0] return keep_index
def test(epochs_tested): is_train=False transforms = transform.build_transforms(is_train=is_train) coco_dataset = dataset.COCODataset(is_train=is_train, transforms=transforms) dataloader = build_dataloader(coco_dataset, sampler=None, is_train=is_train) assert isinstance(epochs_tested, (list, set)), "during test, archive_name must be a list or set!" model = FCOS(is_train=is_train) for epoch in epochs_tested: utils.load_model(model, epoch) model.cuda() model.eval() final_results = [] with torch.no_grad(): for data in tqdm(dataloader): img = data["images"] ori_img_shape = data["ori_img_shape"] fin_img_shape = data["fin_img_shape"] index = data["indexs"] img = img.cuda() ori_img_shape = ori_img_shape.cuda() fin_img_shape = fin_img_shape.cuda() cls_pred, reg_pred, label_pred = model([img, ori_img_shape, fin_img_shape]) cls_pred = cls_pred[0].cpu() reg_pred = reg_pred[0].cpu() label_pred = label_pred[0].cpu() index = index[0] img_info = dataloader.dataset.img_infos[index] imgid = img_info["id"] reg_pred = utils.xyxy2xywh(reg_pred) label_pred = label_pred.tolist() cls_pred = cls_pred.tolist() final_results.extend( [ { "image_id": imgid, "category_id": dataloader.dataset.label2catid[label_pred[k]], "bbox": reg_pred[k].tolist(), "score": cls_pred[k], } for k in range(len(reg_pred)) ] ) output_path = os.path.join(cfg.output_path, "fcos_"+str(epoch)+".json") utils.evaluate_coco(dataloader.dataset.coco, final_results, output_path, "bbox")
def convert_labelme_to_coco(input_dir, output_path): json_file_paths = get_files(input_dir, ('.json')) # json_file_names = [basename(f) for f in json_file_paths] print('Load {} files from {}'.format(len(json_file_paths), input_dir)) json_dict = { 'images': [], 'annotations': [], 'categories': [{ 'supercategory': 'face', 'id': 1, 'name': 'face', }], } image_id = 1 bbox_id = 1 ignore = 0 category_id = 1 image = {} for json_file_path in json_file_paths: with open(json_file_path, 'r') as fp: anno = json.load(fp) img_height = anno['imageHeight'] img_width = anno['imageWidth'] file_name = anno['imagePath'] image_info = { 'file_name': file_name, 'height': img_height, 'width': img_width, 'id': image_id, } json_dict['images'].append(image_info) for shape in anno['shapes']: p0, p1 = shape['points'] bbox = xyxy2xywh([p0[0], p0[1], p1[0], p1[1]]) _, _, w, h = bbox annotation = { 'area': w * h, 'iscrowd': ignore, 'image_id': image_id, 'bbox': bbox, 'category_id': category_id, 'id': bbox_id, 'ignore': ignore, 'segmentation': [], } json_dict['annotations'].append(annotation) bbox_id += 1 image_id += 1 with open(output_path, 'w') as json_fp: json_str = json.dumps(json_dict) json_fp.write(json_str) print('Done, save to {}'.format(output_path))
def make_target(self, gt_boxes, gt_labels, img_size): """ :param gt_boxes: [M, 4] / [xmin, ymin, xmax, ymax] / ndarray :param gt_labels: [M,] / ndarray :param img_size: 416 :return: [[x, y, w, h, label], ...] / shape: [13, 13, 5, 25] x, y, w, h的scale均为416 """ assert len(gt_boxes) == len(gt_labels) assert isinstance(gt_boxes, np.ndarray) assert isinstance(gt_labels, np.ndarray) grid_size = img_size / self.opt.S target = np.zeros((self.opt.S, self.opt.S, self.opt.B, 4 + 1 + self.opt.voc_class_num), dtype=np.float32) # [xmin, ymin, xmax, ymax] -> [center_x, center_y, w, h] gt_boxes_xywh = xyxy2xywh(gt_boxes) # [M, 2] box_coors = np.floor(gt_boxes_xywh[:, :2] / grid_size).astype(np.int32) keep_index = self._fliter_duplicate(box_coors) keep_box_coors = box_coors[keep_index] keep_labels = gt_labels[keep_index] keep_boxes = gt_boxes_xywh[keep_index] # 存在目标的cell的5个预测框中,只有gt_box与5个anchor的iou值最大的那个预测框的ground_truth才有值 gt_anchor_ious = self._iou(keep_boxes, self.anchor_base) best_match = np.argmax(gt_anchor_ious, axis=-1) max_iou = np.max(gt_anchor_ious, axis=-1) for grid, k, iou, xywh, label in zip(keep_box_coors, best_match, max_iou, keep_boxes, keep_labels): target[grid[1], grid[0], k, :2] = xywh[:2] # x,y target[grid[1], grid[0], k, 2:4] = xywh[2:] # w,h target[grid[1], grid[0], k, 4] = 1. # confidence # target[grid[1], grid[0], k, 4] = iou # confidence target[grid[1], grid[0], k, 5 + label] = 1. # label return target
def __call__(self, **kwargs): reg_targets_xyxy = kwargs["reg_targets"] image_shapes = [np.array(image.shape[-2::-1]) for image in kwargs["images"]] # calculate the size of each cell in the grid cell_sizes = [shape / self.grid_size for shape in image_shapes] # convert the regression targets to xywh format reg_targets_xywh = [xyxy2xywh(xyxy) for xyxy in reg_targets_xyxy] # calculate regression targets' centers reg_targets_centers = [np.array(xywh[:, :2]) for xywh in reg_targets_xywh] # normalize centers' coordinates centers_norm = [center / cell_size for center, cell_size in zip(reg_targets_centers, cell_sizes)] # calculate centers indices in the grid centers_indices = [np.floor(center).astype(int) for center in centers_norm] # calculate normalized centers and sizes bboxes_centers_norm = [norm - idx - 0.5 for norm, idx in zip(centers_norm, centers_indices)] bboxes_sizes_norm = [xywh[:, 2:] / cell_size for xywh, cell_size in zip(reg_targets_xywh, cell_sizes)] # concatenate normalized centers and sizes bboxes = [np.concatenate((center, size), axis=1) for center, size in zip(bboxes_centers_norm, bboxes_sizes_norm)] kwargs["reg_targets"] = bboxes kwargs["obj_indices"] = centers_indices return kwargs
def __getitem__(self, index): img_path = self.img_files[index % len(self.img_files)].rstrip() label_path = self.label_files[index % len(self.img_files)].rstrip() # Getting image img = Image.open(img_path).convert('RGB') width, height = img.size if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # RESIZING if width > height: ratio = height / width t_width = self.img_size t_height = int(ratio * self.img_size) else: ratio = width / height t_width = int(ratio * self.img_size) t_height = self.img_size img = transforms.functional.resize(img, (t_height, t_width)) # IF TRAIN APPLY BRIGHTNESS CONTRAST HUE SATURTATION if self.train: brightness_rnd = random.uniform(1 - self.brightness_range, 1 + self.brightness_range) contrast_rnd = random.uniform(1 - self.contrast_range, 1 + self.contrast_range) hue_rnd = random.uniform(-self.hue_range, self.hue_range) saturation_rnd = random.uniform(1 - self.saturation_range, 1 + self.saturation_range) img = transforms.functional.adjust_brightness(img, brightness_rnd) img = transforms.functional.adjust_contrast(img, contrast_rnd) img = transforms.functional.adjust_hue(img, hue_rnd) img = transforms.functional.adjust_saturation(img, saturation_rnd) # CONVERTING TO TENSOR tensor_img = transforms.functional.to_tensor(img) # Handle grayscaled images if len(tensor_img.shape) != 3: tensor_img = tensor_img.unsqueeze(0) tensor_img = tensor_img.expand((3, img.shape[1:])) # !!!WARNING IN PIL IT'S WIDTH HEIGHT, WHEN IN PYTORCH IT IS HEIGHT WIDTH # Apply augmentations for train it would be mosaic if self.train: mossaic_img = torch.zeros(3, self.img_size, self.img_size) # FINDING CROSS POINT cross_x = int( random.uniform(self.img_size * self.cross_offset, self.img_size * (1 - self.cross_offset))) cross_y = int( random.uniform(self.img_size * self.cross_offset, self.img_size * (1 - self.cross_offset))) fragment_img, fragment_bbox = self.get_mosaic( 0, cross_x, cross_y, tensor_img, boxes) mossaic_img[:, 0:cross_y, 0:cross_x] = fragment_img boxes = fragment_bbox for n in range(1, 4): raw_fragment_img, raw_fragment_bbox = self.get_img_for_mosaic( brightness_rnd, contrast_rnd, hue_rnd, saturation_rnd) fragment_img, fragment_bbox = self.get_mosaic( n, cross_x, cross_y, raw_fragment_img, raw_fragment_bbox) boxes = torch.cat([boxes, fragment_bbox]) if n == 1: mossaic_img[:, 0:cross_y, cross_x:self.img_size] = fragment_img elif n == 2: mossaic_img[:, cross_y:self.img_size, 0:cross_x] = fragment_img elif n == 3: mossaic_img[:, cross_y:self.img_size, cross_x:self.img_size] = fragment_img #Set mossaic to return tensor tensor_img = mossaic_img # For validation it would be letterbox else: xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:]) #IMG padding = abs((t_width - t_height)) // 2 padded_img = torch.zeros(3, self.img_size, self.img_size) if t_width > t_height: padded_img[:, padding:padding + t_height] = tensor_img else: padded_img[:, :, padding:padding + t_width] = tensor_img tensor_img = padded_img relative_padding = padding / self.img_size #BOXES if t_width > t_height: #Change y's relative position xyxy_bboxes[:, 1] *= ratio xyxy_bboxes[:, 3] *= ratio xyxy_bboxes[:, 1] += relative_padding xyxy_bboxes[:, 3] += relative_padding else: #x's xyxy_bboxes[:, 0] *= ratio xyxy_bboxes[:, 2] *= ratio xyxy_bboxes[:, 0] += relative_padding xyxy_bboxes[:, 2] += relative_padding boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes) targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes return img_path, tensor_img, targets
def get_mosaic(self, n, cross_x, cross_y, tensor_img, boxes): t_height = tensor_img.shape[1] t_width = tensor_img.shape[2] xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:]) relative_cross_x = cross_x / self.img_size relative_cross_y = cross_y / self.img_size #CALCULATING TARGET WIDTH AND HEIGHT OF PICTURE if n == 0: width_of_nth_pic = cross_x height_of_nth_pic = cross_y elif n == 1: width_of_nth_pic = self.img_size - cross_x height_of_nth_pic = cross_y elif n == 2: width_of_nth_pic = cross_x height_of_nth_pic = self.img_size - cross_y elif n == 3: width_of_nth_pic = self.img_size - cross_x height_of_nth_pic = self.img_size - cross_y # self.img_size - width_of_1st_pic # selg.img_size - height_of_1st_pic # CHOOSING TOP LEFT CORNER (doing offset to have more than fex pixels in bbox :-) ) cut_x1 = random.randint(0, int(t_width * 0.33)) cut_y1 = random.randint(0, int(t_height * 0.33)) # Now we should find which axis should we randomly enlarge (this we do by finding out which ratio is bigger); cross x is basically width of the top left picture if (t_width - cut_x1) / width_of_nth_pic < ( t_height - cut_y1) / height_of_nth_pic: cut_x2 = random.randint(cut_x1 + int(t_width * 0.67), t_width) cut_y2 = int(cut_y1 + (cut_x2 - cut_x1) / width_of_nth_pic * height_of_nth_pic) else: cut_y2 = random.randint(cut_y1 + int(t_height * 0.67), t_height) cut_x2 = int(cut_x1 + (cut_y2 - cut_y1) / height_of_nth_pic * width_of_nth_pic) # RESIZING AND INSERTING (TO DO 2D interpolation wants 4 dimensions, so I add and remove one by using None and squeeze) tensor_img = F.interpolate( tensor_img[:, cut_y1:cut_y2, cut_x1:cut_x2][None], (height_of_nth_pic, width_of_nth_pic)).squeeze() # BBOX relative_cut_x1 = cut_x1 / t_width relative_cut_y1 = cut_y1 / t_height relative_cropped_width = (cut_x2 - cut_x1) / t_width relative_cropped_height = (cut_y2 - cut_y1) / t_height # SHIFTING TO CUTTED IMG SO X1 Y1 WILL 0 xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] - relative_cut_x1 xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] - relative_cut_y1 xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] - relative_cut_x1 xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] - relative_cut_y1 # RESIZING TO CUTTED IMG SO X2 WILL BE 1 xyxy_bboxes[:, 0] /= relative_cropped_width xyxy_bboxes[:, 1] /= relative_cropped_height xyxy_bboxes[:, 2] /= relative_cropped_width xyxy_bboxes[:, 3] /= relative_cropped_height # CLAMPING BOUNDING BOXES, SO THEY DO NOT OVERCOME OUTSIDE THE IMAGE xyxy_bboxes[:, 0].clamp_(0, 1) xyxy_bboxes[:, 1].clamp_(0, 1) xyxy_bboxes[:, 2].clamp_(0, 1) xyxy_bboxes[:, 3].clamp_(0, 1) # FILTER TO THROUGH OUT ALL SMALL BBOXES filter_minbbox = ( xyxy_bboxes[:, 2] - xyxy_bboxes[:, 0] > self.bbox_minsize) & ( xyxy_bboxes[:, 3] - xyxy_bboxes[:, 1] > self.bbox_minsize) # RESIZING TO MOSAIC if n == 0: xyxy_bboxes[:, 0] *= relative_cross_x # xyxy_bboxes[:, 1] *= relative_cross_y #(1 - relative_cross_y) xyxy_bboxes[:, 2] *= relative_cross_x # xyxy_bboxes[:, 3] *= relative_cross_y #(1 - relative_cross_y) elif n == 1: xyxy_bboxes[:, 0] *= (1 - relative_cross_x) xyxy_bboxes[:, 1] *= relative_cross_y xyxy_bboxes[:, 2] *= (1 - relative_cross_x) xyxy_bboxes[:, 3] *= relative_cross_y elif n == 2: xyxy_bboxes[:, 0] *= relative_cross_x xyxy_bboxes[:, 1] *= (1 - relative_cross_y) xyxy_bboxes[:, 2] *= relative_cross_x xyxy_bboxes[:, 3] *= (1 - relative_cross_y) elif n == 3: xyxy_bboxes[:, 0] *= (1 - relative_cross_x) xyxy_bboxes[:, 1] *= (1 - relative_cross_y) xyxy_bboxes[:, 2] *= (1 - relative_cross_x) xyxy_bboxes[:, 3] *= (1 - relative_cross_y) # RESIZING TO MOSAIC if n == 0: xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] # + relative_cross_x xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] # + relative_cross_y xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] # + relative_cross_x xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] # + relative_cross_y elif n == 1: xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] elif n == 2: xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y elif n == 3: xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y boxes = boxes[filter_minbbox] boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes)[filter_minbbox] return tensor_img, boxes
torch.FloatTensor(0, 6).to(device))) out, loss = net(inputs, labels) # if _trainCount%5==0: print(_trainCount, loss.item()) loss.backward() optimizer.step() _trainCount += 1 temp += 1 if _trainCount % 10 == 0 and _trainCount > 100: torch.save(net.state_dict(), "yoloParam%d.dict" % _trainCount) print("temp", temp) elif MODE is "predict": fileName = './data/images/BloodImage_00000.jpg' net.eval() img = Variable(trainDataSet.imgRead(fileName).unsqueeze(0).to(device)) with torch.no_grad(): out, _ = net(img) pred = torch.cat(out, dim=1).cpu() print(pred.shape) detections = utils.non_max_suppression(pred, 0.4, 0.2)[0] if detections is None: print("can not find the red cell") exit() a, label = torch.split(detections, [6, 1], dim=1) label = torch.cat([torch.zeros(label.shape[0], 1), label, a], dim=1) label[:, 2:6] = utils.xyxy2xywh(label[:, 2:6]) / options.imgSquareSize imgUtils.showImgNLab(img[0], label)
def __getitem__(self, index): if self.image_weights: index = self.indices[index] hyp = self.hyp if self.mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def __getitem__(self, index): info = self.img_files[index].rstrip().split(' ') img_path = info[0] img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, None, 255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, None, 255, out=V) img_hsv[:, :, 1] = S # .astype(np.uint8) img_hsv[:, :, 2] = V # .astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = letterbox(img, height=self.img_size) # Load labels labels = [] labels0 = np.array(info[1:]).astype(np.float32).reshape(-1, 5) if len(labels0 > 0): # labels = labels0.copy() labels[:, 1] = ratio * labels[:, 1] + padw labels[:, 2] = ratio * labels[:, 2] + padh labels[:, 3] = ratio * labels[:, 3] + padw labels[:, 4] = ratio * labels[:, 4] + padh # labels[:, 5] = labels[:, 5] else: labels = np.array([]) # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None # MixUp https://arxiv.org/pdf/1710.09412.pdf if random.random() < hyp['mixup']: img2, labels2 = load_mosaic(self, random.randint(0, self.n - 1)) r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 img = (img * r + img2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not mosaic: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 if self.augment: # flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def __getitem__(self, index): img_path = self.img_files[index] #image path label_path = self.label_files[index] #label path hyp = self.hyp # Load image img = self.imgs[index] if img is None: img = cv2.imread( img_path, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_UNCHANGED) #reading image with OpenCV #stacking channels img = np.stack((img, ) * 3, axis=2) # BGR assert img is not None, 'File Not Found ' + img_path #augmentation r = self.img_size / max(img.shape) if self.augment and r < 1: # if training (NOT testing), downsize to inference shape h, w, _ = img.shape img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) if self.n < 3000: # cache into memory if image count < 3000 self.imgs[index] = img # Letterbox h, w, _ = img.shape #image shape shape = self.img_size img, ratiow, ratioh, padw, padh = letterbox( img, new_shape=shape, mode='square' ) #image and its padding after applying letterbox function # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) self.labels[index] = x # save for next time if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
outputs = detr((images, masks), post_process=True) for img_id, scores, labels, boxes in zip(img_ids, outputs['scores'], outputs['labels'], outputs['boxes']): img_id = img_id.numpy() img_info = coco_data.coco.loadImgs([img_id])[0] img_height = img_info['height'] img_width = img_info['width'] for score, label, box in zip(scores, labels, boxes): score = score.numpy() label = label.numpy() box = absolute2relative(box, (img_width, img_height)) box = xyxy2xywh(box).numpy() results.append({ "image_id": int(img_id), "category_id": int(label), "bbox": box.tolist(), "score": float(score) }) pbar.update(int(len(images))) json_object = json.dumps(results, indent=2) with open(args.results_file, 'w') as f: f.write(json_object) evaluate(args.results_file)
def __getitem__(self, index): index = self.indices[index] hyp = self.hyp mosaic = self.mosaic if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None # MixUp https://arxiv.org/pdf/1710.09412.pdf if random.random() < hyp['mixup']: img2, labels2 = load_mosaic(self, random.randint(0, self.num_img - 1)) mixup_ratio = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 img = (img * mixup_ratio + img2 * (1 - mixup_ratio)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) else: # Load image img, (orig_h, orig_w), (height, width) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size img, ratio, pad = _letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (orig_h, orig_w), ((height / orig_h, width / orig_w), pad) # for COCO mAP rescaling # Load labels labels = [] label = self.labels[index] if label.size > 0: # Normalized xywh to pixel xyxy format labels = label.copy() labels[:, 1] = ratio[0] * width * (label[:, 1] - label[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * height * (label[:, 2] - label[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * width * (label[:, 1] + label[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * height * (label[:, 2] + label[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not mosaic: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) num_labels = len(labels) if num_labels: labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 if self.augment: # flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if num_labels: labels[:, 2] = 1 - labels[:, 2] # flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if num_labels: labels[:, 1] = 1 - labels[:, 1] labels_out = torch.zeros((num_labels, 6)) if num_labels: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes