def __getitem__(self, index): if self.image_weights: index = self.indices[index] parameters = self.hyper_parameters if self.mosaic: # Load mosaic images, labels = load_mosaic(self, index) shapes = None else: # Load image images, (raw_height, raw_width), (height, width) = load_image(self, index) # Letterbox # final letterboxed shape shape = self.batch_shapes[ self.batch[index]] if self.rect else self.image_size images, ratio, pad = letterbox(images, shape, auto=False, scaleup=self.augment) # for COCO mAP rescaling shapes = (raw_height, raw_width), ((height / raw_height, width / raw_width), pad) # Load labels labels = [] x = self.labels[index] if x is not None and x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * width * (x[:, 1] - x[:, 3] / 2) + pad[ 0] # pad width labels[:, 2] = ratio[1] * height * (x[:, 2] - x[:, 4] / 2) + pad[ 1] # pad height labels[:, 3] = ratio[0] * width * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * height * (x[:, 2] + x[:, 4] / 2) + pad[1] labels_num = len(labels) # number of labels if labels_num: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= images.shape[0] # height labels[:, [1, 3]] /= images.shape[1] # width labels_out = torch.zeros((labels_num, 6)) if labels_num: labels_out[:, 1:] = torch.from_numpy(labels) # Convert images = images[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 images = np.ascontiguousarray(images) return torch.from_numpy( images), labels_out, self.image_files[index], shapes
def evaluate(cfg, data, weights=None, batch_size=16, workers=4, image_size=416, confidence_threshold=0.001, iou_threshold=0.6, # for nms save_json=True, single_cls=False, augment=False, model=None, dataloader=None): # Initialize/load model and set device if model is None: device = select_device(args.device, batch_size=batch_size) verbose = args.task == "eval" # Initialize model model = Darknet(cfg, image_size).to(device) # Load weightss if weights.endswith(".pth"): model.load_state_dict(torch.load(weights, map_location=device)["state_dict"]) else: load_darknet_weights(model, weights) if device.type != "cpu" and torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device # get model device verbose = False # Configure run data = parse_data_config(data) classes_num = 1 if single_cls else int(data["classes"]) path = data["valid"] # path to valid images names = load_classes(data["names"]) # class names iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 iouv = iouv[0].view(1) # comment for [email protected]:0.95 niou = iouv.numel() # Dataloader if dataloader is None: dataset = LoadImagesAndLabels(path, image_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=workers, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() s = ("%20s" + "%10s" * 6) % ("Class", "Images", "Targets", "P", "R", "[email protected]", "F1") p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, ap, ap_class = [], [], [], [] for batch_i, (images, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): images = images.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) batch_size, _, height, width = images.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Test the effect of image enhancement if augment: fs_image = scale_image(images.flip(3), 0.9) # flip-lr and scale s_image = scale_image(images, 0.7) # scale images = torch.cat((images, fs_image, s_image), 0) # Run model start_time = time_synchronized() inference_outputs, training_outputs = model(images) t0 += time_synchronized() - start_time if augment: x = torch.split(inference_outputs, batch_size, dim=0) x[1][..., :4] /= 0.9 # scale x[1][..., 0] = width - x[1][..., 0] # flip lr x[2][..., :4] /= 0.7 # scale inference_outputs = torch.cat(x, 1) # Compute loss if hasattr(model, "hyp"): # if model has loss hyperparameters # GIoU, obj, cls loss += compute_loss(training_outputs, targets, model)[1][:3].cpu() # Run NMS start_time = time_synchronized() output = non_max_suppression(inference_outputs, confidence_threshold=confidence_threshold, iou_threshold=iou_threshold) t1 += time_synchronized() - start_time # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] label_num = len(labels) target_class = labels[:, 0].tolist() if label_num else [] seen += 1 if pred is None: if label_num: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), target_class)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split("_")[-1]) box = pred[:, :4].clone() # xyxy # to original shape scale_coords(images[si].shape[1:], box, shapes[si][0], shapes[si][1]) box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): json_dict.append({"image_id": image_id, "category_id": coco91class[int(p[5])], "bbox": [round(x, 3) for x in b], "score": round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(len(pred), niou, dtype=torch.bool, device=device) if label_num: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes target_boxes = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious # best ious, indices ious, i = box_iou(pred[pi, :4], target_boxes[ti]).max(1) # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn # all targets already located in image if len(detected) == label_num: break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() # number of targets per class nt = np.bincount(stats[3].astype(np.int64), minlength=classes_num) else: nt = torch.zeros(1) # Print results context = "%20s" + "%10.3g" * 6 # print format print(context % ("all", seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and classes_num > 1 and len(stats): for i, c in enumerate(ap_class): print(context % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: # tuple memory = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 start_time = tuple(ms / seen * 1E3 for ms in (t0, t1, t0 + t1)) start_time += (image_size, image_size, batch_size) print(f"Inference menory: {memory:.1f} GB.") print(f"Speed:\n" f"Image size: ({image_size}x{image_size}) at batch_size: {batch_size}\n" f"\t- Inference {t0 / seen * 1E3:.1f}ms.\n" f"\t- NMS {t1 / seen * 1E3:.1f}ms.\n" f"\t- Total {(t0 + t1) / seen * 1E3:.1f}ms.\n") # Save JSON if save_json and map and len(json_dict): print("\nCOCO mAP with pycocotools...") imgIds = [int(Path(x).stem.split("_")[-1]) for x in dataloader.dataset.image_files] with open("results.json", "w") as file: json.dump(json_dict, file) # initialize COCO ground truth api cocoGt = COCO(glob.glob("data/coco2014/annotations/instances_val*.json")[0]) cocoDt = cocoGt.loadRes("results.json") # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, "bbox") cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mf1, map = cocoEval.stats[:2] # update to pycocotools results ([email protected]:0.95, [email protected]) # Return results maps = np.zeros(classes_num) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
def __getitem__(self, index): if self.image_weights: index = self.indices[index] hyp = self.hyp if self.mosaic: # Load mosaic images, labels = load_mosaic(self, index) shapes = None else: # Load image images, (raw_height, raw_width), (height, width) = load_image(self, index) # Letterbox # final letterboxed shape shape = self.batch_shapes[ self.batch[index]] if self.rect else self.image_size images, ratio, pad = letterbox(images, shape, auto=False, scaleup=self.augment) # for COCO mAP rescaling shapes = (raw_height, raw_width), ((height / raw_height, width / raw_width), pad) # Load labels labels = [] x = self.labels[index] if x is not None and x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * width * (x[:, 1] - x[:, 3] / 2) + pad[ 0] # pad width labels[:, 2] = ratio[1] * height * (x[:, 2] - x[:, 4] / 2) + pad[ 1] # pad height labels[:, 3] = ratio[0] * width * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * height * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: images, labels = random_affine(images, labels, degrees=hyp["degrees"], translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"]) # Augment colorspace augment_hsv(images, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) labels_num = len(labels) # number of labels if labels_num: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= images.shape[0] # height labels[:, [1, 3]] /= images.shape[1] # width if self.augment: # random left-right flip fliplr = True if fliplr and random.random() < 0.5: images = np.fliplr(images) if labels_num: labels[:, 1] = 1 - labels[:, 1] # random up-down flip flipud = False if flipud and random.random() < 0.5: images = np.flipud(images) if labels_num: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((labels_num, 6)) if labels_num: labels_out[:, 1:] = torch.from_numpy(labels) # Convert images = images[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 images = np.ascontiguousarray(images) return torch.from_numpy( images), labels_out, self.image_files[index], shapes