예제 #1
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        parameters = self.hyper_parameters
        if self.mosaic:
            # Load mosaic
            images, labels = load_mosaic(self, index)
            shapes = None

        else:
            # Load image
            images, (raw_height, raw_width), (height,
                                              width) = load_image(self, index)

            # Letterbox
            # final letterboxed shape
            shape = self.batch_shapes[
                self.batch[index]] if self.rect else self.image_size
            images, ratio, pad = letterbox(images,
                                           shape,
                                           auto=False,
                                           scaleup=self.augment)
            # for COCO mAP rescaling
            shapes = (raw_height, raw_width), ((height / raw_height,
                                                width / raw_width), pad)

            # Load labels
            labels = []
            x = self.labels[index]
            if x is not None and x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * width * (x[:, 1] - x[:, 3] / 2) + pad[
                           0]  # pad width
                labels[:,
                       2] = ratio[1] * height * (x[:, 2] - x[:, 4] / 2) + pad[
                           1]  # pad height
                labels[:,
                       3] = ratio[0] * width * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * height * (x[:, 2] +
                                                    x[:, 4] / 2) + pad[1]

        labels_num = len(labels)  # number of labels
        if labels_num:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= images.shape[0]  # height
            labels[:, [1, 3]] /= images.shape[1]  # width

        labels_out = torch.zeros((labels_num, 6))
        if labels_num:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        images = images[:, :, ::-1].transpose(2, 0,
                                              1)  # BGR to RGB, to 3x416x416
        images = np.ascontiguousarray(images)

        return torch.from_numpy(
            images), labels_out, self.image_files[index], shapes
예제 #2
0
def evaluate(cfg,
             data,
             weights=None,
             batch_size=16,
             workers=4,
             image_size=416,
             confidence_threshold=0.001,
             iou_threshold=0.6,  # for nms
             save_json=True,
             single_cls=False,
             augment=False,
             model=None,
             dataloader=None):
    # Initialize/load model and set device
    if model is None:
        device = select_device(args.device, batch_size=batch_size)
        verbose = args.task == "eval"

        # Initialize model
        model = Darknet(cfg, image_size).to(device)

        # Load weightss
        if weights.endswith(".pth"):
            model.load_state_dict(torch.load(weights, map_location=device)["state_dict"])
        else:
            load_darknet_weights(model, weights)

        if device.type != "cpu" and torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_config(data)
    classes_num = 1 if single_cls else int(data["classes"])
    path = data["valid"]  # path to valid images
    names = load_classes(data["names"])  # class names
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95
    iouv = iouv[0].view(1)  # comment for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:
        dataset = LoadImagesAndLabels(path, image_size, batch_size, rect=True)
        batch_size = min(batch_size, len(dataset))
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=workers,
                                pin_memory=True,
                                collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    s = ("%20s" + "%10s" * 6) % ("Class", "Images", "Targets", "P", "R", "[email protected]", "F1")
    p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, ap, ap_class = [], [], [], []
    for batch_i, (images, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        images = images.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        batch_size, _, height, width = images.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Test the effect of image enhancement
            if augment:
                fs_image = scale_image(images.flip(3), 0.9)  # flip-lr and scale
                s_image = scale_image(images, 0.7)  # scale
                images = torch.cat((images, fs_image, s_image), 0)

            # Run model
            start_time = time_synchronized()
            inference_outputs, training_outputs = model(images)
            t0 += time_synchronized() - start_time

            if augment:
                x = torch.split(inference_outputs, batch_size, dim=0)
                x[1][..., :4] /= 0.9  # scale
                x[1][..., 0] = width - x[1][..., 0]  # flip lr
                x[2][..., :4] /= 0.7  # scale
                inference_outputs = torch.cat(x, 1)

            # Compute loss
            if hasattr(model, "hyp"):  # if model has loss hyperparameters
                # GIoU, obj, cls
                loss += compute_loss(training_outputs, targets, model)[1][:3].cpu()

            # Run NMS
            start_time = time_synchronized()
            output = non_max_suppression(inference_outputs,
                                         confidence_threshold=confidence_threshold,
                                         iou_threshold=iou_threshold)
            t1 += time_synchronized() - start_time

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            label_num = len(labels)
            target_class = labels[:, 0].tolist() if label_num else []
            seen += 1

            if pred is None:
                if label_num:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(),
                                  torch.Tensor(),
                                  target_class))
                continue

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split("_")[-1])
                box = pred[:, :4].clone()  # xyxy
                # to original shape
                scale_coords(images[si].shape[1:], box, shapes[si][0], shapes[si][1])
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    json_dict.append({"image_id": image_id,
                                      "category_id": coco91class[int(p[5])],
                                      "bbox": [round(x, 3) for x in b],
                                      "score": round(p[4], 5)})

            # Assign all predictions as incorrect
            correct = torch.zeros(len(pred), niou, dtype=torch.bool, device=device)
            if label_num:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                target_boxes = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        # best ious, indices
                        ious, i = box_iou(pred[pi, :4], target_boxes[ti]).max(1)

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                # all targets already located in image
                                if len(detected) == label_num:
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        if niou > 1:
            p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        # number of targets per class
        nt = np.bincount(stats[3].astype(np.int64), minlength=classes_num)
    else:
        nt = torch.zeros(1)

    # Print results
    context = "%20s" + "%10.3g" * 6  # print format
    print(context % ("all", seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and classes_num > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(context % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Print speeds
    if verbose:
        # tuple
        memory = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0
        start_time = tuple(ms / seen * 1E3 for ms in (t0, t1, t0 + t1))
        start_time += (image_size, image_size, batch_size)
        print(f"Inference menory: {memory:.1f} GB.")
        print(f"Speed:\n"
              f"Image size: ({image_size}x{image_size}) at batch_size: {batch_size}\n"
              f"\t- Inference {t0 / seen * 1E3:.1f}ms.\n"
              f"\t- NMS       {t1 / seen * 1E3:.1f}ms.\n"
              f"\t- Total     {(t0 + t1) / seen * 1E3:.1f}ms.\n")

    # Save JSON
    if save_json and map and len(json_dict):
        print("\nCOCO mAP with pycocotools...")
        imgIds = [int(Path(x).stem.split("_")[-1]) for x in dataloader.dataset.image_files]
        with open("results.json", "w") as file:
            json.dump(json_dict, file)

        # initialize COCO ground truth api
        cocoGt = COCO(glob.glob("data/coco2014/annotations/instances_val*.json")[0])
        cocoDt = cocoGt.loadRes("results.json")  # initialize COCO pred api

        cocoEval = COCOeval(cocoGt, cocoDt, "bbox")
        cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        mf1, map = cocoEval.stats[:2]  # update to pycocotools results ([email protected]:0.95, [email protected])

    # Return results
    maps = np.zeros(classes_num) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
예제 #3
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            images, labels = load_mosaic(self, index)
            shapes = None

        else:
            # Load image
            images, (raw_height, raw_width), (height,
                                              width) = load_image(self, index)

            # Letterbox
            # final letterboxed shape
            shape = self.batch_shapes[
                self.batch[index]] if self.rect else self.image_size
            images, ratio, pad = letterbox(images,
                                           shape,
                                           auto=False,
                                           scaleup=self.augment)
            # for COCO mAP rescaling
            shapes = (raw_height, raw_width), ((height / raw_height,
                                                width / raw_width), pad)

            # Load labels
            labels = []
            x = self.labels[index]
            if x is not None and x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * width * (x[:, 1] - x[:, 3] / 2) + pad[
                           0]  # pad width
                labels[:,
                       2] = ratio[1] * height * (x[:, 2] - x[:, 4] / 2) + pad[
                           1]  # pad height
                labels[:,
                       3] = ratio[0] * width * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * height * (x[:, 2] +
                                                    x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                images, labels = random_affine(images,
                                               labels,
                                               degrees=hyp["degrees"],
                                               translate=hyp["translate"],
                                               scale=hyp["scale"],
                                               shear=hyp["shear"])

            # Augment colorspace
            augment_hsv(images,
                        hgain=hyp["hsv_h"],
                        sgain=hyp["hsv_s"],
                        vgain=hyp["hsv_v"])

        labels_num = len(labels)  # number of labels
        if labels_num:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= images.shape[0]  # height
            labels[:, [1, 3]] /= images.shape[1]  # width

        if self.augment:
            # random left-right flip
            fliplr = True
            if fliplr and random.random() < 0.5:
                images = np.fliplr(images)
                if labels_num:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            flipud = False
            if flipud and random.random() < 0.5:
                images = np.flipud(images)
                if labels_num:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((labels_num, 6))
        if labels_num:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        images = images[:, :, ::-1].transpose(2, 0,
                                              1)  # BGR to RGB, to 3x416x416
        images = np.ascontiguousarray(images)

        return torch.from_numpy(
            images), labels_out, self.image_files[index], shapes