Python load_darknet_weights Exemples, easydet.utils.load_darknet_weights Python Exemples

Exemple #1

0

Afficher le fichier

def evaluate(cfg,
             data,
             weights=None,
             batch_size=16,
             workers=4,
             image_size=416,
             confidence_threshold=0.001,
             iou_threshold=0.6,  # for nms
             save_json=True,
             single_cls=False,
             augment=False,
             model=None,
             dataloader=None):
    # Initialize/load model and set device
    if model is None:
        device = select_device(args.device, batch_size=batch_size)
        verbose = args.task == "eval"

        # Initialize model
        model = Darknet(cfg, image_size).to(device)

        # Load weightss
        if weights.endswith(".pth"):
            model.load_state_dict(torch.load(weights, map_location=device)["state_dict"])
        else:
            load_darknet_weights(model, weights)

        if device.type != "cpu" and torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_config(data)
    classes_num = 1 if single_cls else int(data["classes"])
    path = data["valid"]  # path to valid images
    names = load_classes(data["names"])  # class names
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95
    iouv = iouv[0].view(1)  # comment for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:
        dataset = LoadImagesAndLabels(path, image_size, batch_size, rect=True)
        batch_size = min(batch_size, len(dataset))
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=workers,
                                pin_memory=True,
                                collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    s = ("%20s" + "%10s" * 6) % ("Class", "Images", "Targets", "P", "R", "[email protected]", "F1")
    p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, ap, ap_class = [], [], [], []
    for batch_i, (images, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        images = images.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        batch_size, _, height, width = images.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Test the effect of image enhancement
            if augment:
                fs_image = scale_image(images.flip(3), 0.9)  # flip-lr and scale
                s_image = scale_image(images, 0.7)  # scale
                images = torch.cat((images, fs_image, s_image), 0)

            # Run model
            start_time = time_synchronized()
            inference_outputs, training_outputs = model(images)
            t0 += time_synchronized() - start_time

            if augment:
                x = torch.split(inference_outputs, batch_size, dim=0)
                x[1][..., :4] /= 0.9  # scale
                x[1][..., 0] = width - x[1][..., 0]  # flip lr
                x[2][..., :4] /= 0.7  # scale
                inference_outputs = torch.cat(x, 1)

            # Compute loss
            if hasattr(model, "hyp"):  # if model has loss hyperparameters
                # GIoU, obj, cls
                loss += compute_loss(training_outputs, targets, model)[1][:3].cpu()

            # Run NMS
            start_time = time_synchronized()
            output = non_max_suppression(inference_outputs,
                                         confidence_threshold=confidence_threshold,
                                         iou_threshold=iou_threshold)
            t1 += time_synchronized() - start_time

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            label_num = len(labels)
            target_class = labels[:, 0].tolist() if label_num else []
            seen += 1

            if pred is None:
                if label_num:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(),
                                  torch.Tensor(),
                                  target_class))
                continue

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split("_")[-1])
                box = pred[:, :4].clone()  # xyxy
                # to original shape
                scale_coords(images[si].shape[1:], box, shapes[si][0], shapes[si][1])
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    json_dict.append({"image_id": image_id,
                                      "category_id": coco91class[int(p[5])],
                                      "bbox": [round(x, 3) for x in b],
                                      "score": round(p[4], 5)})

            # Assign all predictions as incorrect
            correct = torch.zeros(len(pred), niou, dtype=torch.bool, device=device)
            if label_num:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                target_boxes = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        # best ious, indices
                        ious, i = box_iou(pred[pi, :4], target_boxes[ti]).max(1)

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                # all targets already located in image
                                if len(detected) == label_num:
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        if niou > 1:
            p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        # number of targets per class
        nt = np.bincount(stats[3].astype(np.int64), minlength=classes_num)
    else:
        nt = torch.zeros(1)

    # Print results
    context = "%20s" + "%10.3g" * 6  # print format
    print(context % ("all", seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and classes_num > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(context % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Print speeds
    if verbose:
        # tuple
        memory = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0
        start_time = tuple(ms / seen * 1E3 for ms in (t0, t1, t0 + t1))
        start_time += (image_size, image_size, batch_size)
        print(f"Inference menory: {memory:.1f} GB.")
        print(f"Speed:\n"
              f"Image size: ({image_size}x{image_size}) at batch_size: {batch_size}\n"
              f"\t- Inference {t0 / seen * 1E3:.1f}ms.\n"
              f"\t- NMS       {t1 / seen * 1E3:.1f}ms.\n"
              f"\t- Total     {(t0 + t1) / seen * 1E3:.1f}ms.\n")

    # Save JSON
    if save_json and map and len(json_dict):
        print("\nCOCO mAP with pycocotools...")
        imgIds = [int(Path(x).stem.split("_")[-1]) for x in dataloader.dataset.image_files]
        with open("results.json", "w") as file:
            json.dump(json_dict, file)

        # initialize COCO ground truth api
        cocoGt = COCO(glob.glob("data/coco2014/annotations/instances_val*.json")[0])
        cocoDt = cocoGt.loadRes("results.json")  # initialize COCO pred api

        cocoEval = COCOeval(cocoGt, cocoDt, "bbox")
        cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        mf1, map = cocoEval.stats[:2]  # update to pycocotools results ([email protected]:0.95, [email protected])

    # Return results
    maps = np.zeros(classes_num) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps

Exemple #2

0

Afficher le fichier

Fichier : train.py Projet : fireae/YOLOv3-PyTorch

def train():
    cfg = args.cfg
    data = args.data
    if len(args.image_size) == 2:
        image_size, image_size_val = args.image_size[0], args.image_size[1]
    else:
        image_size, image_size_val = args.image_size[0], args.image_size[0]

    epochs = args.epochs
    batch_size = args.batch_size
    accumulate = args.accumulate
    weights = args.weights

    # Initialize
    gs = 32  # (pixels) grid size
    assert math.fmod(image_size,
                     gs) == 0, f"--image-size must be a {gs}-multiple"

    init_seeds()
    image_size_min = 6.6  # 320 / 32 / 1.5
    image_size_max = 28.5  # 320 / 32 / 28.5
    if args.multi_scale:
        image_size_min = round(image_size / gs / 1.5) + 1
        image_size_max = round(image_size / gs * 1.5)
        image_size = image_size_max * gs  # initiate with maximum multi_scale size
        print(f"Using multi-scale {image_size_min * gs} - {image_size}")

    # Configure run
    dataset_dict = parse_data_config(data)
    train_path = dataset_dict["train"]
    valid_path = dataset_dict["valid"]
    num_classes = 1 if args.single_cls else int(dataset_dict["classes"])

    # Remove previous results
    for files in glob.glob("results.txt"):
        os.remove(files)

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for model_key, model_value in dict(model.named_parameters()).items():
        if ".bias" in model_key:
            pg2 += [model_value]  # biases
        elif "Conv2d.weight" in model_key:
            pg1 += [model_value]  # apply weight_decay
        else:
            pg0 += [model_value]  # all else

    optimizer = torch.optim.SGD(pg0,
                                lr=parameters["lr0"],
                                momentum=parameters["momentum"],
                                nesterov=True)
    optimizer.add_param_group({
        "params": pg1,
        # add pg1 with weight_decay
        "weight_decay": parameters["weight_decay"]
    })
    optimizer.add_param_group({"params": pg2})  # add pg2 with biases
    del pg0, pg1, pg2

    epoch = 0
    start_epoch = 0
    best_fitness = 0.0
    context = None
    if weights.endswith(".pth"):
        state = torch.load(weights, map_location=device)
        # load model
        try:
            state["state_dict"] = {
                k: v
                for k, v in state["state_dict"].items()
                if model.state_dict()[k].numel() == v.numel()
            }
            model.load_state_dict(state["state_dict"], strict=False)
        except KeyError as e:
            error_msg = f"{args.weights} is not compatible with {args.cfg}. "
            error_msg += f"Specify --weights `` or specify a --cfg "
            error_msg += f"compatible with {args.weights}. "
            raise KeyError(error_msg) from e

        # load optimizer
        if state["optimizer"] is not None:
            optimizer.load_state_dict(state["optimizer"])
            best_fitness = state["best_fitness"]

        # load results
        if state.get("training_results") is not None:
            with open("results.txt", "w") as file:
                file.write(state["training_results"])  # write results.txt

        start_epoch = state["epoch"] + 1
        del state

    elif len(weights) > 0:
        # possible weights are "*.weights", "yolov3-tiny.conv.15",  "darknet53.conv.74" etc.
        load_darknet_weights(model, weights)
    else:
        print("Pre training model weight not loaded.")

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        # skip print amp info
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
    # source https://arxiv.org/pdf/1812.01187.pdf
    lr_lambda = lambda x: ((
        (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.95 + 0.05
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=lr_lambda,
                                                  last_epoch=start_epoch - 1)

    # Initialize distributed training
    if device.type != "cpu" and torch.cuda.device_count(
    ) > 1 and torch.distributed.is_available():
        dist.init_process_group(
            backend="nccl",  # "distributed backend"
            # distributed training init method
            init_method="tcp://127.0.0.1:8888",
            # number of nodes for distributed training
            world_size=1,
            # distributed training node rank
            rank=0)
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.yolo_layers = model.module.yolo_layers

    # Dataset
    # Apply augmentation hyperparameters (option: rectangular training)
    train_dataset = LoadImagesAndLabels(train_path,
                                        image_size,
                                        batch_size,
                                        augment=True,
                                        hyp=parameters,
                                        rect=args.rect,
                                        cache_images=args.cache_images,
                                        single_cls=args.single_cls)
    # No apply augmentation hyperparameters and rectangular inference
    valid_dataset = LoadImagesAndLabels(valid_path,
                                        image_size_val,
                                        batch_size,
                                        augment=False,
                                        hyp=parameters,
                                        rect=True,
                                        cache_images=args.cache_images,
                                        single_cls=args.single_cls)
    collate_fn = train_dataset.collate_fn
    # Dataloader
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=args.workers,
                                                   shuffle=not args.rect,
                                                   pin_memory=True,
                                                   collate_fn=collate_fn)
    valid_dataloader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=args.workers,
                                                   shuffle=False,
                                                   pin_memory=True,
                                                   collate_fn=collate_fn)

    # Model parameters
    model.nc = num_classes  # attach number of classes to model
    model.hyp = parameters  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    # attach class weights
    model.class_weights = labels_to_class_weights(train_dataset.labels,
                                                  num_classes).to(device)

    # Model EMA
    ema = ModelEMA(model, decay=0.9998)

    # Start training
    batches_num = len(train_dataloader)  # number of batches
    burns = max(3 * batches_num,
                500)  # burn-in iterations, max(3 epochs, 500 iterations)
    maps = np.zeros(num_classes)  # mAP per class
    # "P", "R", "mAP", "F1", "val GIoU", "val Objectness", "val Classification"
    results = (0, 0, 0, 0, 0, 0, 0)
    print(f"Using {args.workers} dataloader workers.")
    print(f"Starting training for {args.epochs} epochs...")

    start_time = time.time()
    for epoch in range(start_epoch, args.epochs):
        model.train()

        # Update image weights (optional)
        if train_dataset.image_weights:
            # class weights
            class_weights = model.class_weights.cpu().numpy() * (1 - maps)**2
            image_weights = labels_to_image_weights(
                train_dataset.labels,
                num_classes=num_classes,
                class_weights=class_weights)
            # rand weighted index
            train_dataset.indices = random.choices(
                range(train_dataset.image_files_num),
                weights=image_weights,
                k=train_dataset.image_files_num)

        mean_losses = torch.zeros(4).to(device)
        print("\n")
        print(("%10s" * 8) % ("Epoch", "memory", "GIoU", "obj", "cls", "total",
                              "targets", " image_size"))
        progress_bar = tqdm(enumerate(train_dataloader), total=batches_num)
        for index, (images, targets, paths, _) in progress_bar:
            # number integrated batches (since train start)
            ni = index + batches_num * epoch
            # uint8 to float32, 0 - 255 to 0.0 - 1.0
            images = images.to(device).float() / 255.0
            targets = targets.to(device)

            # Hyperparameter Burn-in
            if ni <= burns * 2:
                # giou loss ratio (obj_loss = 1.0 or giou)
                model.gr = np.interp(ni, [0, burns * 2], [0.0, 1.0])

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x["lr"] = np.interp(ni, [0, burns], [
                        0.1 if j == 2 else 0.0,
                        x["initial_lr"] * lr_lambda(epoch)
                    ])
                    if "momentum" in x:
                        x["momentum"] = np.interp(
                            ni, [0, burns], [0.9, parameters["momentum"]])

            # Multi-Scale training
            if args.multi_scale:
                #  adjust img_size (67% - 150%) every 1 batch
                if ni / accumulate % 1 == 0:
                    image_size = random.randrange(image_size_min,
                                                  image_size_max + 1) * gs
                scale_ratio = image_size / max(images.shape[2:])
                if scale_ratio != 1:
                    # new shape (stretched to 32-multiple)
                    new_size = [
                        math.ceil(size * scale_ratio / gs) * gs
                        for size in images.shape[2:]
                    ]
                    images = F.interpolate(images,
                                           size=new_size,
                                           mode="bilinear",
                                           align_corners=False)

            # Run model
            output = model(images)

            # Compute loss
            loss, loss_items = compute_loss(output, targets, model)
            if not torch.isfinite(loss):
                warnings.warn(
                    f"WARNING: Non-finite loss, ending training {loss_items}")
                return results

            # Scale loss by nominal batch_size of (16 * 4 = 64)
            loss *= batch_size / (batch_size * accumulate)

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize accumulated gradient
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
                ema.update(model)

            # Print batch results
            # update mean losses
            mean_losses = (mean_losses * index + loss_items) / (index + 1)
            memory = f"{torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0:.2f}G"
            context = ("%10s" * 2 + "%10.3g" * 6) % ("%g/%g" %
                                                     (epoch, args.epochs - 1),
                                                     memory, *mean_losses,
                                                     len(targets), image_size)
            progress_bar.set_description(context)

        # Update scheduler
        scheduler.step()

        # Process epoch results
        ema.update_attr(model)
        final_epoch = epoch + 1 == epochs
        if not args.notest or final_epoch:  # Calculate mAP
            coco = any([
                coco_name in data for coco_name in
                ["coco.data", "coco2014.data", "coco2017.data"]
            ]) and model.nc == 80
            results, maps = evaluate(cfg,
                                     data,
                                     batch_size=batch_size,
                                     image_size=image_size_val,
                                     model=ema.ema,
                                     save_json=final_epoch and coco,
                                     single_cls=args.single_cls,
                                     dataloader=valid_dataloader)

        # Write epoch results
        with open("results.txt", "a") as f:
            # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
            f.write(context + "%10.3g" * 7 % results)
            f.write("\n")

        # Write Tensorboard results
        if tb_writer:
            tags = [
                "train/giou_loss", "train/obj_loss", "train/cls_loss",
                "metrics/precision", "metrics/recall", "metrics/mAP_0.5",
                "metrics/F1", "val/giou_loss", "val/obj_loss", "val/cls_loss"
            ]
            for x, tag in zip(list(mean_losses[:-1]) + list(results), tags):
                tb_writer.add_scalar(tag, x, epoch)

        # Update best mAP
        # fitness_i = weighted combination of [P, R, mAP, F1]
        fitness_i = fitness(np.array(results).reshape(1, -1))
        if fitness_i > best_fitness:
            best_fitness = fitness_i

        # Save training results
        save = (not args.nosave) or (final_epoch and not args.evolve)
        if save:
            with open("results.txt", "r") as f:
                # Create checkpoint
                state = {
                    "epoch":
                    epoch,
                    "best_fitness":
                    best_fitness,
                    "training_results":
                    f.read(),
                    "state_dict":
                    ema.ema.module.state_dict()
                    if hasattr(model, "module") else ema.ema.state_dict(),
                    "optimizer":
                    None if final_epoch else optimizer.state_dict()
                }

        # Save last checkpoint
        torch.save(state, "weights/checkpoint.pth")

        # Save best checkpoint
        if (best_fitness == fitness_i) and not final_epoch:
            state = {
                "epoch": -1,
                "best_fitness": None,
                "training_results": None,
                "state_dict": model.state_dict(),
                "optimizer": None
            }
            torch.save(state, "weights/model_best.pth")

        # Delete checkpoint
        del state

    if not args.evolve:
        plot_results()  # save as results.png
    print(f"{epoch - start_epoch} epochs completed "
          f"in "
          f"{(time.time() - start_time) / 3600:.3f} hours.\n")
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()

    return results

Exemple #3

0

Afficher le fichier

Fichier : detect.py Projet : fireae/YOLOv3-PyTorch

def detect(save_image=False):
    # (320, 192) or (416, 256) or (608, 352) for (height, width)
    image_size = (608, 352) if ONNX_EXPORT else args.image_size
    output = args.output
    source = args.source
    weights = args.weights
    view_image = args.view_image
    save_txt = args.save_txt

    camera = False
    if source == "0" or source.startswith("http") or source.endswith(".txt"):
        camera = True

    # Initialize
    device = select_device(device="cpu" if ONNX_EXPORT else args.device)
    if os.path.exists(output):
        shutil.rmtree(output)  # delete output folder
    os.makedirs(output)  # make new output folder

    # Initialize model
    model = Darknet(args.cfg, image_size)

    # Load weight
    if weights.endswith(".pth"):
        model.load_state_dict(
            torch.load(weights, map_location=device)["model"])
    else:
        load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        # init model
        model_classifier = load_classifier(name="resnet101", classes=2)
        # load model
        model_classifier.load_state_dict(
            torch.load("weights/resnet101.pth", map_location=device)["model"])
        model_classifier.to(device)
        model_classifier.eval()
    else:
        model_classifier = None

    # Migrate the model to the specified device
    model.to(device)
    # set eval model mode
    model.eval()

    # Export mode
    if ONNX_EXPORT:
        model.fuse()
        image = torch.zeros((1, 3) + image_size)  # (1, 3, 608, 352)
        # *.onnx filename
        filename = args.weights.replace(args.weights.split(".")[-1], "onnx")
        torch.onnx.export(model,
                          tuple(image),
                          filename,
                          verbose=False,
                          opset_version=11)

        # Validate exported model
        import onnx
        model = onnx.load(filename)  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(model.graph))
        return

    # Set Dataloader
    video_path, video_writer = None, None
    if camera:
        view_image = True
        cudnn.benchmark = True
        dataset = LoadStreams(source, image_size=image_size)
    else:
        save_image = True
        dataset = LoadImages(source, image_size=image_size)

    # Get names and colors
    names = load_classes(args.names)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    start_time = time.time()
    # run once
    _ = model(torch.zeros(
        (1, 3, img_size,
         img_size), device=device)) if device.type != "cpu" else None
    for image_path, image, im0s, video_capture in dataset:
        image = torch.from_numpy(image).to(device)
        image = image.float()  # uint8 to fp16/32
        image /= 255.0  # 0 - 255 to 0.0 - 1.0
        if image.ndimension() == 3:
            image = image.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        predict = model(image, augment=args.augment)[0]
        t2 = time_synchronized()

        # Apply NMS
        predict = non_max_suppression(predict,
                                      args.confidence_threshold,
                                      args.iou_threshold,
                                      multi_label=False,
                                      classes=args.classes,
                                      agnostic=args.agnostic_nms)

        # Apply Classifier
        if classify:
            predict = apply_classifier(predict, model_classifier, image, im0s)

        # Process detections
        for i, detect in enumerate(predict):  # detections per image
            if camera:  # batch_size >= 1
                p, context, im0 = image_path[i], f"{i:g}: ", im0s[i]
            else:
                p, context, im0 = image_path, "", im0s

            save_path = str(Path(output) / Path(p).name)
            context += f"{image.shape[2]}*{image.shape[3]} "  # get image size
            if detect is not None and len(detect):
                # Rescale boxes from img_size to im0 size
                detect[:, :4] = scale_coords(image.shape[2:], detect[:, :4],
                                             im0.shape).round()

                # Print results
                for classes in detect[:, -1].unique():
                    # detections per class
                    number = (detect[:, -1] == classes).sum()
                    context += f"{number} {names[int(classes)]}s, "

                # Write results
                for *xyxy, confidence, classes in detect:
                    if save_txt:  # Write to file
                        with open(save_path + ".txt", "a") as files:
                            files.write(("%e " * 6 + "\n") %
                                        (*xyxy, classes, confidence))

                    if save_image or view_image:  # Add bbox to image
                        label = f"{names[int(classes)]} {confidence * 100:.2f}%"
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(classes)])

            # Stream results
            if view_image:
                cv2.imshow("camera", im0)
                if cv2.waitKey(1) == ord("q"):  # q to quit
                    raise StopIteration

            # Print time (inference + NMS)
            print(f"{context}Done. {t2 - t1:.3f}s")

            # Save results (image with detections)
            if save_image:
                if dataset.mode == "images":
                    cv2.imwrite(save_path, im0)
                else:
                    if video_path != save_path:  # new video
                        video_path = save_path
                        if isinstance(video_writer, cv2.VideoWriter):
                            video_writer.release(
                            )  # release previous video writer

                        fps = video_capture.get(cv2.CAP_PROP_FPS)
                        w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        video_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*args.fourcc),
                            fps, (w, h))
                    video_writer.write(im0)

    print(f"Done. ({time.time() - start_time:.3f}s)")