def get_model(config_path, img_size, weights_path, device):
    model = Darknet(config_path, img_size=img_size)
    model.load_darknet_weights(weights_path)

    model = model.to(device)
    model.eval()  # Set in evaluation mode
    return model
Beispiel #2
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=416,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=False,
         hyp=None,
         model=None):
    # Initialize/load model and set device
    if model is None:
        device = torch_utils.select_device(opt.device)
        verbose = True

        # Initialize model
        model = Darknet(cfg, hyp).to(device)

        # Load weights
        attempt_download(weights)
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            _ = load_darknet_weights(model, weights)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    test_path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    # Dataloader
    dataset = LoadImagesAndLabels(test_path,
                                  img_size,
                                  batch_size,
                                  augment=False,
                                  hyp=hyp)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=min([os.cpu_count(), batch_size, 16]),
                            pin_memory=True,
                            collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP',
                                 'F1')
    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        targets = targets.to(device)  # [img_id, cls_id, x, y, w, h, a]
        imgs = imgs.to(device)
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # # Compute loss
        # if hasattr(model, 'hyp'):  # if model has loss hyperparameters
        #     loss += compute_loss(train_out, targets, model,hyp)[1][:3].cpu()  # GIoU, obj, cls

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si,
                             1:]  # 当前图像的gt  [cls_id, x, y, w, h, a]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[si].shape[1:], box,
                             shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for di, d in enumerate(pred):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': coco91class[int(d[6])],
                        'bbox': [floatn(x, 3) for x in box[di]],
                        'score': floatn(d[4], 5)
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = labels[:, 1:6]
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions遍历每个检测出的box
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = skew_bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct, pred[:, 5].cpu(), pred[:, 7].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Save JSON
    if save_json and map and len(jdict):
        try:
            imgIds = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(jdict, file)

            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocoGt = COCO('../coco/annotations/instances_val2014.json'
                          )  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map = cocoEval.stats[1]  # update mAP to pycocotools mAP
        except:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
Beispiel #3
0
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('clw: config file:', cfg)
    print('clw: pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)
    #model.apply(weights_init_normal)  # clw note: without this can also get high mAP;   TODO

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
                                                                #          会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.Size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
                                                               #           TODO:map_location=device ?
        chkpt = torch.load(weights, map_location=device)
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
            # model.load_state_dict(chkpt['model'])
        except KeyError as e:
            s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
            raise KeyError(s) from e
Beispiel #4
0
def train():
    cfg = opt.cfg
    data = opt.data
    img_size = opt.img_size
    epochs = 1 if opt.prebias else int(
        hyp['epochs'])  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = int(hyp['batch_size'])
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights  # initial training weights

    if 'pw' not in opt.arc:  # remove BCELoss positive weights
        hyp['cls_pw'] = 1.
        hyp['obj_pw'] = 1.

    # Initialize
    init_seeds()
    if opt.multi_scale:
        img_sz_min = round(img_size / 32 / 1.5) + 1
        img_sz_max = round(img_size / 32 * 1.3) - 1
        img_size = img_sz_max * 32  # initiate with maximum multi_scale size
        print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))

    # Configure run
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']
    nc = int(data_dict['classes'])  # number of classes

    # Remove previous results
    for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg, hyp, arc=opt.arc).to(device)

    # Optimizer
    pg0, pg1 = [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if 'Conv2d.weight' in k:
            pg1 += [v]  # parameter group 1 (apply weight_decay)
        else:
            pg0 += [v]  # parameter group 0

    if opt.adam:
        optimizer = optim.Adam(pg0, lr=hyp['lr0'])
        # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp['lr0'],
                              momentum=hyp['momentum'],
                              nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    del pg0, pg1

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_fitness = float('inf')

    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
        chkpt = torch.load(weights, map_location=device)

        # load model
        # if opt.transfer:
        chkpt['model'] = {
            k: v
            for k, v in chkpt['model'].items()
            if model.state_dict()[k].numel() == v.numel()
        }
        model.load_state_dict(chkpt['model'], strict=False)
        # else:
        #    model.load_state_dict(chkpt['model'])

        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        if opt.resume:
            start_epoch = chkpt['epoch'] + 1

        del chkpt

    # elif len(weights) > 0:  # darknet format
    #     # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
    #     cutoff = load_darknet_weights(model, weights)
    if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
        nf = [
            int(model.module_defs[x - 1]['filters']) for x in model.yolo_layers
        ]  # yolo layer size (i.e. 255)

        if opt.prebias:
            for p in optimizer.param_groups:
                # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
                p['lr'] = 0.1  # learning rate
                if p.get('momentum') is not None:  # for SGD but not Adam
                    p['momentum'] = 0.9

        for p in model.parameters():
            if opt.prebias and p.numel() == nf:  # train (yolo biases)
                p.requires_grad = True
            elif opt.transfer and p.shape[
                    0] == nf:  # train (yolo biases+weights)
                p.requires_grad = True
            else:  # freeze layer
                p.requires_grad = False

    # Scheduler https://github.com/ultralytics/yolov3/issues/238
    # lf = lambda x: 1 - x / epochs  # linear ramp to zero
    # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
    # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs))  # inverse exp ramp
    # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=range(59, 70, 1), gamma=0.8)  # gradual fall to 0.1*lr0
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[round(epochs * x) for x in [0.8, 0.9]],
        gamma=0.1)
    # 带重启的余弦退火
    # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = 0.1*epochs, eta_min=0, last_epoch=-1)
    # 余弦退火
    # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs)
    # warmup加载器,支持各种scheduler
    scheduler = GradualWarmupScheduler(optimizer,
                                       multiplier=hyp['multiplier'],
                                       total_epoch=hyp['warm_epoch'],
                                       after_scheduler=scheduler)
    scheduler.last_epoch = start_epoch - 1

    # # # Plot lr schedule(注意别一直开着!否则lr调整失效)
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, label='LR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    if mixed_precision:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O1',
                                          verbosity=0)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(
            backend='nccl',  # 'distributed backend'
            init_method=
            'tcp://127.0.0.1:9999',  # distributed training init method
            world_size=1,  # number of nodes for distributed training
            rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=False,
        #   augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=opt.rect,  # rectangular training
        image_weights=opt.img_weights,
        cache_labels=epochs > 10,
        cache_images=opt.cache_images and not opt.prebias,
    )

    # Dataloader
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=not opt.
        rect,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)
    # Test Dataloader
    if not opt.prebias:
        testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(
            test_path,
            opt.img_size,
            batch_size * 2,
            hyp=hyp,
            rect=opt.rect,
            cache_labels=True,
            cache_images=opt.cache_images),
                                                 batch_size=batch_size * 2,
                                                 num_workers=nw,
                                                 pin_memory=True,
                                                 collate_fn=dataset.collate_fn)

    # Start training
    model.nc = nc  # attach number of classes to model
    model.arc = opt.arc  # attach yolo architecture
    model.hyp = hyp  # attach hyperparameters to model
    # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
    model_info(model, report='summary')  # 'full' or 'summary'
    nb = len(dataloader)
    maps = np.zeros(nc)  # mAP per class
    results = (
        0, 0, 0, 0, 0, 0, 0
    )  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification', 'val Regression'
    t0 = time.time()
    print('Using %g dataloader workers' % nw)
    print('Starting %s for %g epochs...' %
          ('prebias' if opt.prebias else 'training', epochs))

    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()
        model.epoch = epoch
        # print(('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'reg', 'total', 'targets', 'img_size'))
        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'obj', 'cls', 'reg',
                                     'total', 'targets', 'img_size'))

        # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
        freeze_backbone = False
        if freeze_backbone and epoch < 2:
            for name, p in model.named_parameters():
                if int(name.split('.')[1]) < cutoff:  # if layer < 75
                    p.requires_grad = False if epoch == 0 else True

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 -
                                                     maps)**2  # class weights
            image_weights = labels_to_image_weights(dataset.labels,
                                                    nc=nc,
                                                    class_weights=w)
            dataset.indices = random.choices(range(dataset.n),
                                             weights=image_weights,
                                             k=dataset.n)  # rand weighted idx

        mloss = torch.zeros(4).to(device)  # mean losses
        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        # 着重注意这个targets,已经经过resize到416,augment等变化了,不能直接映射到原图
        for i, (
                imgs, targets, paths, _
        ) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device)  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            # Multi-Scale training
            if opt.multi_scale:
                if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
                    img_size = random.randrange(img_sz_min,
                                                img_sz_max + 1) * 32
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [
                        math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]
                    ]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode='bilinear',
                                         align_corners=False)

            # Plot images with bounding boxes
            if ni == 0:
                fname = 'train_batch%g.jpg' % i
                plot_images(imgs=imgs,
                            targets=targets,
                            paths=paths,
                            fname=fname)
                if tb_writer:
                    tb_writer.add_image(fname,
                                        cv2.imread(fname)[:, :, ::-1],
                                        dataformats='HWC')

            # Hyperparameter burn-in
            # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
            # if ni <= n_burn:
            #     for m in model.named_modules():
            #         if m[0].endswith('BatchNorm2d'):
            #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
            #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
            #     for x in optimizer.param_groups:
            #         x['lr'] = hyp['lr0'] * g
            #         x['weight_decay'] = hyp['weight_decay'] * g

            # Run model
            pred = model(imgs)

            # Compute loss
            loss, loss_items = compute_loss(pred, targets, model, hyp)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Scale loss by nominal batch_size of 64
            # loss *= batch_size / 64

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Accumulate gradient for x batches before optimizing
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0  # (GB)
            # s = ('%10s' * 2 + '%10.3g' * 7) % (
            #     '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss, len(targets), img_size)
            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1),
                                               '%.3gG' % mem, *mloss,
                                               len(targets), img_size)
            pbar.set_description(s)

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()

        # Process epoch results
        final_epoch = epoch + 1 == epochs
        if opt.prebias:
            print_model_biases(model)
        else:
            # Calculate mAP (always test final epoch, skip first 10 if opt.nosave)
            if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch:
                if not epoch < hyp['test_from']:  # 前部分epoch proposal太多,不计算
                    if epoch % hyp['test_interval'] == 0 and epoch != 0:
                        results, maps = test.test(
                            cfg,
                            data,
                            batch_size=1,
                            img_size=opt.img_size,
                            model=model,
                            hyp=hyp,
                            conf_thres=0.001
                            if final_epoch else 0.1,  # 0.1 for speed
                            save_json=final_epoch and epoch > 0
                            and 'coco.data' in data,
                            dataloader=testloader)
        # Write epoch results
        with open(results_file, 'a') as f:
            f.write(s + '%10.3g' * 7 % results +
                    '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

        # Write Tensorboard results
        if tb_writer:
            x = list(mloss) + list(results)
            titles = [
                'GIoU', 'Objectness', 'Classification', 'Train loss',
                'Precision', 'Recall', 'mAP', 'F1', 'val GIoU',
                'val Objectness', 'val Classification'
            ]
            for xi, title in zip(x, titles):
                tb_writer.add_scalar(title, xi, epoch)

        # Update best mAP
        fitness = sum(results[4:])  # total loss
        if fitness < best_fitness:
            best_fitness = fitness

        # Save training results
        save = (not opt.nosave) or (final_epoch
                                    and not opt.evolve) or opt.prebias
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {
                    'epoch':
                    epoch,
                    'best_fitness':
                    best_fitness,
                    'training_results':
                    f.read(),
                    'model':
                    model.module.state_dict()
                    if type(model) is nn.parallel.DistributedDataParallel else
                    model.state_dict(),
                    'optimizer':
                    None if final_epoch else optimizer.state_dict()
                }

            # Save last checkpoint
            torch.save(chkpt, last)

            # Save best checkpoint
            if best_fitness == fitness:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % hyp['save_interval'] == 0:
                torch.save(chkpt, wdir + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt

        # end epoch ----------------------------------------------------------------------------------------------------

    # end training
    if len(opt.name):
        os.rename('results.txt', 'results_%s.txt' % opt.name)
    plot_results()  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1,
                                                    (time.time() - t0) / 3600))
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()

    return results
Beispiel #5
0
def detect():
    # 0、初始化一些参数
    cfg = opt.cfg
    weights = opt.weights
    src_txt_path = opt.src_txt_path
    img_size = opt.img_size
    batch_size = opt.batch_size
    dst_path = opt.dst_path
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    device = select_device(opt.device)
    classes = load_classes(parse_data_cfg(opt.data)['names'])

    # 1、加载网络
    model = Darknet(cfg)
    if weights.endswith('.pt'):  # TODO: .weights权重格式
        model.load_state_dict(
            torch.load(weights)['model'])  # TODO:map_location=device ?
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path, img_size, with_label=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn)  # TODO

    # 3、预测,前向传播
    start = time.time()
    pbar = tqdm(dataloader)
    for i, (img_tensor, img0, img_name) in enumerate(pbar):
        pbar.set_description("Already Processed %d image: " % (i + 1))
        # print('clw: Already Processed %d image' % (i+1))
        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        output = model(img_tensor)[
            0]  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

        # NMS
        nms_output = non_max_suppression(output, opt.conf_thres, opt.nms_thres)

        # 可视化
        for batch_idx, det in enumerate(nms_output):  # detections per image
            if det is not None:  # and len(det):  # clw note: important !
                #or box in det:
                for *box, conf, _, cls in det:  # det: tensor.Size (bs, 7)    box: list
                    orig_h, orig_w = img0[batch_idx].shape[:2]  # 坐标变换
                    new_h = new_w = img_tensor.size()[
                        2]  # 绘图,resize后的图的框 -> 原图的框,new -> orig
                    ratio_h = orig_h / new_h
                    ratio_w = orig_w / new_w
                    x1 = int(ratio_w * box[0])
                    y1 = int(ratio_h * box[1])
                    x2 = int(ratio_w * (box[2]))
                    y2 = int(ratio_h * (box[3]))
                    label = '%s %.2f' % (classes[int(cls)], conf)

                    # 预测结果可视化
                    plot_one_box([x1, y1, x2, y2],
                                 img0[batch_idx],
                                 label=label,
                                 color=(255, 0, 0))
                    #cv2.rectangle(img0[batch_idx], (x1, y1), (x2, y2), (0, 0, 255), 1)  # 如果报错 TypeError: an integer is required (got type tuple),检查是不是传入了img_tensor

            if SAVE:
                # 保存结果
                cv2.imwrite(os.path.join(dst_path, img_name[batch_idx]),
                            img0[batch_idx])
            if SHOW:
                cv2.imshow('aaa', img0[batch_idx])
                cv2.waitKey(0)

    print('time use: %.3fs' % (time.time() - start))
Beispiel #6
0
def multi_detect(save_txt=True, save_img=True, hyp=None, multi_scale = False):
    img_size =  opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
    
    # Setting txt result folder
    save_type = 'ICDAR'
    if save_txt and save_type == 'ICDAR':
        out_txt = 'icdar_result'
        if os.path.exists(out_txt):
            shutil.rmtree(out_txt)  # delete output folder
        os.makedirs(out_txt)  # make new output folder

    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Get classes and colors
    classes = load_classes(parse_data_cfg(opt.data)['names'])                           # .data文件解析成dict并索引类别名的name文件地址
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]  # 配置颜色

    #  build model
    model = Darknet(opt.cfg, hyp)  # 搭建模型(不连接计算图),只调用构造函数
        ## Load weights
    attempt_download(weights)  
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
        ## Eval mode
    model.to(device).eval()
    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()    # pytorch原生支持fp16训练

    # multi-scale
    if multi_scale:
        size_num = 2
        img_sz_min = round(img_size / 32 / 1.5) + 1
        img_sz_max = round(img_size / 32 * 1.3) - 1
        img_sizes = np.random.choice(range(img_sz_min * 32, (img_sz_max+1) * 32, 32), size_num, False)
        img_sizes = [int(x) for x in img_sizes]
        # img_sizes = [608]
        print('use scale: {}'.format(img_sizes))
    else:
        img_sizes = [img_size]



    # load images
    t0 = time.time()
    img_paths = sorted(glob.glob(os.path.join(source, '*.jpg')))
    for id, img_path in enumerate(img_paths):
        img0 = cv2.imread(img_path)
        p, s,  = img_path, ''
        save_path = str(Path(out) / Path(p).name)
        assert img0 is not None, 'Image Not Found ' + path
        print('image %g/%g %s: ' % (id, len(img_paths), img_path), end='')
        # multi-scale inference
        all_pre = []
        t = time.time()
        for img_size in img_sizes:
            img, *_ = letterbox(img0, new_shape=img_size)
            img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
            img = np.ascontiguousarray(img, dtype=np.float16 if opt.half else np.float32)  # uint8 to fp16/fp32
            img /= 255.0  
            
            #  Get detections
            img = torch.from_numpy(img).to(device)
            if img.ndimension() == 3:   # 查看数据维度是否为三维,等价于len(img.shape)
                img = img.unsqueeze(0)  # 加个第0维bs,但是detect实际没用
            pred, _ = model(img)        # forward 
            # nms
            det = non_max_suppression(pred, opt.conf_thres, 0.95)[0]   # bs只允许1,不支持视频
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size  将预测的bbox坐标(前四维)从缩放图放大回原图尺度
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
            all_pre.append(det)
        all_pre = list(filter(is_None,all_pre))
        if all_pre is not None and len(all_pre):
            merged_dets = torch.cat(all_pre, 0)
            final_dets = non_max_suppression(merged_dets.unsqueeze(0), opt.conf_thres, opt.nms_thres)[0]
            # Print results: 统计各类物体出现的次数
            if final_dets is not None and len(final_dets):
                for c in final_dets[:, -1].unique():       # 取出最后一维类别并去重排序
                    n = (final_dets[:, -1] == c).sum()     # detections per class
                    s += '%g %ss, ' % (n, classes[int(c)])  # s添加检测物体统计

                # Write results
                for *box, conf, _, cls in final_dets:
                    if save_txt and save_type == 'ICDAR':  # Write to file
                        save_icdar_path = str(Path(out_txt)/ ('res_'+os.path.splitext(Path(p).name)[0]+'.txt'))
                        with open(save_icdar_path , 'a') as file:
                            file.write(xywha2icdar(box))
                    elif save_txt:
                        with open(save_path + '.txt', 'a') as file:
                            file.write(('%g ' * 7 + '\n') % (*box, cls, conf))

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (classes[int(cls)], conf)
                        plot_one_box(box, img0, label=label, color=colors[int(cls)])
        print('%sDone. (%.3fs)' % (s, time.time() - t))

        # Stream results
        if view_img:
            cv2.imshow(p, img0)

        # Save results (image with detections)
        if save_img:
            cv2.imwrite(save_path, img0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

    zip_dir(out_txt, 'icdar_result.zip')
    shutil.rmtree(out_txt)
Beispiel #7
0
def detect(save_txt = True, save_img = False, hyp = None):
    img_size =  opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
    
    # Setting txt result folder
    save_type = 'ICDAR'
    if save_txt and save_type == 'ICDAR':
        out_txt = 'detections'
        if os.path.exists(out_txt):
            shutil.rmtree(out_txt)  # delete output folder
        os.makedirs(out_txt)  # make new output folder

    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, hyp )  # 搭建模型(不连接计算图),只调用构造函数

    # Load weights
    attempt_download(weights)  
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()


    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()    # pytorch原生支持fp16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)  # source是测试的文件夹路径,返回的dataset是一个迭代器

    
    # Get classes and colors
    classes = load_classes(parse_data_cfg(opt.data)['names'])                           # .data文件解析成dict并索引类别名的name文件地址
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]  # 配置颜色

    # Run inference
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:    # im0s为原图(hwc),im0s为缩放+padding之后的图(chw)
        t = time.time()
        # Get detections
        img = torch.from_numpy(img).to(device)
        if img.ndimension() == 3:   # 查看数据维度是否为三维,等价于len(img.shape)
            img = img.unsqueeze(0)  # 加个第0维bs,但是detect实际没用

        # 只用到io的结果,不用p;io有三个维度:bs(1),num_proposal(每个yolo层预测其特征图的w*h*3个proposal),num_params(5+classes)
        pred, _ = model(img)        # forward 
        # NMS后返回的张量维度:[(num_detections,7),...] (7=(x1, y1, x2, y2, object_conf, class_conf, class))  (len=bs)
        # 遍历时det是每张图片的bbox属性: (num_detections,7)
        # 实际上在图像中遍历只会执行一次,这里的i=0就跳出了
        for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i]
            else:
                p, s, im0 = path, '', im0s
            save_path = str(Path(out) / Path(p).name)
            # s 是最后检测打印的字符串,会通过字符串拼接逐渐添加项
            s += '%gx%g ' % img.shape[2:]  # s添加缩放后的图像尺度,如  320x416 
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size  将预测的bbox坐标(前四维)从缩放图放大回原图尺度
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results: 统计各类物体出现的次数
                for c in det[:, -1].unique():       # 取出最后一维类别并去重排序
                    n = (det[:, -1] == c).sum()     # detections per class
                    s += '%g %ss, ' % (n, classes[int(c)])  # s添加检测物体统计

                # Write results
                for *box, conf, _, cls in det:
                    if save_txt and save_type == 'ICDAR':  # Write to file
                        save_icdar_path = str(Path(out_txt)/ ('res_'+os.path.splitext(Path(p).name)[0]+'.txt'))
                        with open(save_icdar_path , 'a') as file:
                            file.write(xywha2icdar(box))
                    elif save_txt:
                        with open(save_path + '.txt', 'a') as file:
                            file.write(('%g ' * 7 + '\n') % (*box, cls, conf))

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (classes[int(cls)], conf)
                        plot_one_box(box, im0, label=label, color=colors[int(cls)])
                        # plot_one_box(box, im0, label=label, color=[0,0,255], line_thickness=1)
                        

            print('%sDone. (%.3fs)' % (s, time.time() - t))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    zip_dir(out_txt, 'icdar_result.zip')
    shutil.rmtree(out_txt)
Beispiel #8
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path,
         weights,
         log_file_path=None,
         model=None):

    # 0、初始化一些参数
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device('0')
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights, map_location=device)['model']
            )  # 20200704_50epoch_modify_noobj   # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    image_nums = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')
    #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1')

    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []

    pbar = tqdm(dataloader)
    for i, (img_tensor, target_tensor, _, _) in enumerate(pbar):

        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        target_tensor = target_tensor.to(device)
        height, width = img_tensor.shape[2:]

        start = time.time()
        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(
                img_tensor
            )  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres, nms_thres)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(nms_output):  # pred: (bs, 7)
            labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:]
            nl = len(labels)  # len of label
            tcls = labels[:, 0].tolist() if nl else []  # target class
            image_nums += 1

            # 考虑一个预测 box 都没有的情况,比如 conf 太高
            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds   TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低
            clip_coords(pred, (height, width))  #  mAP is the same

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2]  # w
                tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1]  # h

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
            '''
                        pred flag                (  [1,       0,       1,       0,       0,       1,       0,       0,       1], 
                        pred conf            tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), 
                        pred cls             tensor([2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.]), 
                        lb_cls                 [2.0,     2.0,  2.0, 2.0, 2.0])
            stats is a []
            '''
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(),
                 tcls))  # Append statistics (correct, conf, pcls, tcls)

    # after get stats for all images , ...
    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    # time.sleep(0.01)  # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了
    #pf = '%20s' + '%10.3g' * 6  # print format
    pf = '%20s' + '%10s' + '%10.3g' * 5
    pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1)
    print(pf_value)
    if __name__ != '__main__':
        write_to_file(s, log_file_path)
        write_to_file(pf_value, log_file_path)

    results = []
    results.append({"all": (mp, mr, map, mf1)})

    # Print results per class
    #if verbose and nc > 1 and len(stats):
    if nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
            print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]))
            if __name__ != '__main__':
                write_to_file(
                    pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]),
                    log_file_path)
            results.append({names[c]: (p[i], r[i], ap[i], f1[i])})

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1), maps
Beispiel #9
0
    device = select_device(opt.device)

    # 打印配置信息,写log等
    print(opt)
    print('config file:', cfg)
    print('pretrained weights:', weights)
    print('initial lr:', lr0)

    os.makedirs(log_folder, exist_ok=True)
    write_to_file(repr(opt), log_file_path)
    write_to_file('config file:' + cfg, log_file_path)
    write_to_file('pretrained weights:' + repr(weights), log_file_path)
    write_to_file('initial lr:' + repr(lr0), log_file_path)

    # 1、加载模型
    model = Darknet(cfg).to(device)
    model.apply(weights_init_normal
                )  # clw note: without this can also get high mAP;   TODO

    print('anchors:\n' +
          repr(model.module_defs[model.yolo_layers[0]]['anchors']))
    write_to_file(
        'anchors:\n' +
        repr(model.module_defs[model.yolo_layers[0]]['anchors']),
        log_file_path)

    if weights.endswith('.pt'):
        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
        # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
        chkpt = torch.load(weights)
        try:
Beispiel #10
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path='./valid.txt',
         dst_path='./output',
         weights=None,
         model=None,
         log_file_path='log.txt'):

    # 0、初始化一些参数
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    class_names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device(opt.device)
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights)['model'])  # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')

    pbar = tqdm(dataloader)
    for i, (img_tensor, _, img_path, shapes) in enumerate(pbar):
        start = time.time()
        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)

        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(img_tensor)  # [0]
            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres,
                                             nms_thres)  # list (64,)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(
                nms_output
        ):  # pred: (bs, 7) -> xyxy, obj_conf*class_conf, class_conf, cls_idx
            ################################################
            if pred is None:
                continue
            bboxes_prd = torch.cat((pred[:, 0:5], pred[:, 6].unsqueeze(1)),
                                   dim=1).cpu().numpy()

            ###### clw note: coord transform to origin size(because of resize and so on....) is really important !!!
            scale_coords(img_tensor[batch_idx].shape[1:], bboxes_prd,
                         shapes[batch_idx][0],
                         shapes[batch_idx][1])  # to original shape
            ######

            for bbox in bboxes_prd:
                coor = np.array(bbox[:4], dtype=np.int32)
                score = bbox[4]
                class_ind = int(bbox[5])

                class_name = class_names[class_ind]
                classes_pred.add(class_name)
                score = '%.4f' % score
                xmin, ymin, xmax, ymax = map(str, coor)
                s = ' '.join([
                    str(img_path[batch_idx]),
                    str(score), xmin, ymin, xmax, ymax
                ]) + '\n'

                with open(
                        os.path.join(result_path,
                                     'comp4_det_test_' + class_name + '.txt'),
                        'a') as f:
                    f.write(s)
            ################################################
    return calc_APs()
Beispiel #11
0
def train():

    # 0、Initialize parameters( set random seed, get cfg info, )
    cfg = opt.cfg
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('config file:', cfg)
    print('pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
                                                                #          会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
                                                               #           TODO:map_location=device ?
        chkpt = torch.load(weights, map_location=device)
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
            # model.load_state_dict(chkpt['model'])
        except KeyError as e:
            s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
            raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif weights.endswith('.pth'):    # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
        model_state_dict = model.state_dict()
        chkpt = torch.load(weights, map_location=device)
        #try:
        state_dict = {}
        block_cnt = 0
        fc_item_num = 2
        chkpt_keys = list(chkpt.keys())
        model_keys = list(model.state_dict().keys())
        model_values = list(model.state_dict().values())
        for i in range(len(chkpt_keys) - fc_item_num):  # 102 - 2
            if i % 5 == 0:
                state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 1 or i % 5 == 2:
                state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 3 or i % 5 == 4:
                state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]]
                if i % 5 == 4:
                    block_cnt += 1
                    state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt]


        #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(state_dict, strict=False)

        # model.load_state_dict(chkpt['model'])

        # except KeyError as e:
        #     s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
        #     raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)
    # else:
    #     raise Exception("pretrained model's path can't be NULL!")

    # 2、设置优化器 和 学习率
    start_epoch = 0
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True)  # TODO:nesterov ?  weight_decay=0.0005 ?

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # parameter group 0

    optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True)

    optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2


    ###### apex need ######
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)  # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level


    ######
    model.nc = nc

    #### 阶梯学习率
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1)
    ### 余弦学习率
    #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2
    #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    # 3、加载数据集
    train_dataset = VocDataset(train_txt_path, img_size, with_label=True)
    dataloader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,  # TODO: True
                            num_workers=8, # TODO
                            collate_fn=train_dataset.train_collate_fn,
                            pin_memory=True)


    # 4、训练
    print('')   # 换行
    print('Starting training for %g epochs...' % total_epochs)
    nb = len(dataloader)

    mloss = torch.zeros(4).to(device)  # mean losses
    writer = SummaryWriter()    # tensorboard --logdir=runs, view at http://localhost:6006/

    prebias = start_epoch == 0

    for epoch in range(start_epoch, total_epochs):  # epoch ------------------------------
        model.train()  # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval()

        # # Prebias
        # if prebias:
        #     if epoch < 3:  # prebias
        #         ps = 0.1, 0.9  # prebias settings (lr=0.1, momentum=0.9)
        #     else:  # normal training
        #         ps = lr0, momentum  # normal training settings
        #         print_model_biases(model)
        #         prebias = False
        #
        #     # Bias optimizer settings
        #     optimizer.param_groups[2]['lr'] = ps[0]
        #     if optimizer.param_groups[2].get('momentum') is not None:  # for SGD but not Adam
        #         optimizer.param_groups[2]['momentum'] = ps[1]

        start = time.time()
        title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use')
        print(title)
        #pbar = tqdm(dataloader, ncols=20)  # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。
        #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar):

        # # Freeze darknet53.conv.74 for first epoch
        # freeze_backbone = False
        # if freeze_backbone and (epoch < 3):
        #     for i, (name, p) in enumerate(model.named_parameters()):
        #         if int(name.split('.')[2]) < 75:  # if layer < 75  # 多卡是[2],单卡[1]
        #             p.requires_grad = False if (epoch < 3) else True

        for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader):

            # # SGD burn-in
            # ni = epoch * nb + i
            # if ni <= 1000:  # n_burnin = 1000
            #     lr = lr0 * (ni / 1000) ** 2
            #     for g in optimizer.param_groups:
            #         g['lr'] = lr

            batch_start = time.time()
            #print(img_path)
            img_tensor = img_tensor.to(device)
            target_tensor = target_tensor.to(device)
            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            pred = model(img_tensor)

            # (2) 计算损失
            loss, loss_items = compute_loss(pred, target_tensor, model)
            if not torch.isfinite(loss):
               raise Exception('WARNING: non-finite loss, ending training ', loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            # ni = i + nb * epoch  # number integrated batches (since train start)
            # if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
            optimizer.step()
            optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
            #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)
            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start)
            s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            if i % 10 == 0:
                print(s)
                
            # Plot
            if epoch == start_epoch  and i == 0:
                fname = 'train_batch.jpg' # filename
                cur_path = os.getcwd()
                res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname))
                writer.add_image(fname, res, dataformats='HWC', global_step=epoch)
                # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        print('time use per epoch: %.3fs' % (time.time() - start))

        write_to_file(title, log_file_path)
        write_to_file(s, log_file_path)

        # Update scheduler
        scheduler.step()

        # compute mAP
        results, maps = test.test(cfg,
                                  'cfg/voc.data',
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  conf_thres=0.05,
                                  iou_thres=0.5,
                                  nms_thres=0.5,
                                  src_txt_path=valid_txt_path,
                                  dst_path='./output',
                                  weights=None,
                                  model=model,
                                  log_file_path = log_file_path)

        # Tensorboard
        tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1']
        for x, tag in zip(list(mloss[:-1]) + list(results), tags):
            writer.add_scalar(tag, x, epoch)

        # save model 保存模型
        chkpt = {'epoch': epoch,
                 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(),  # clw note: 多卡
                 'optimizer': optimizer.state_dict()}

        torch.save(chkpt, last_model_path)

    print('end')