Ejemplo n.º 1
0
    def __init__(self,ops,device):
        self.ops = ops
        self.img_size = ops.img_size
        self.classes = load_classes(parse_data_cfg(ops.data_cfg)['names'])
        self.num_classes = len(self.classes)

        if "tiny" in ops.detect_network:
            a_scalse = 416./ops.img_size
            anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
            anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
            model = Yolov3Tiny(self.num_classes,anchors = anchors_new)
            weights = ops.detect_model
            print('network : yolov3 - tiny')
        else:
            a_scalse = 416./ops.img_size
            anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
            anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
            model = Yolov3(self.num_classes,anchors = anchors_new)
            weights = ops.detect_model
            print('network : yolov3')

        self.model = model
        yolo_model_param(self.model)# 显示模型参数

        self.device = device
        self.use_cuda = torch.cuda.is_available()
        # Load weights
        if os.access(weights,os.F_OK):# 判断模型文件是否存在
            self.model.load_state_dict(torch.load(weights, map_location=self.device)['model'])
        else:
            print('------- >>> error model not exists')
            return False
        self.model.to(self.device).eval()#模型设置为 eval
Ejemplo n.º 2
0
def main():
    args = create_prune_argparser()
    config = create_config(args)

    # Initialize
    init_seeds(seed=0)

    model = Darknet(cfg=config['cfg'], arc=config['arc'])
    mask = create_mask(model)
    bckp = create_backup(model)
    device = select_device(config['device'])

    model = model.to(device)
    # print('Making forwards by 100 iterations')
    # mask = mask.to(device)
    # x = torch.Tensor(10, 3, 416, 416).to(device)
    # for i in range(100):
    #     out = model(x)
    # exit()

    data_dict = parse_data_cfg(config['data'])
    train_path = data_dict['train']

    dataset = LoadImagesAndLabels(
        path=train_path,
        img_size=config['img_size'][0],
        batch_size=config['batch_size'],
        augment=True,
        hyp=config['hyp'],
        cache_images=config['cache_images'],
    )

    # Dataloader
    nw = min([os.cpu_count(), 18 if 18 > 1 else 0, 8])  # number of workers
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=18,
                                             num_workers=nw,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # torch.cuda.empty_cache()

    imgs, _, _, _ = next(iter(dataloader))
    imgs = imgs.float() / 255.0
    imgs = imgs.to(device)

    start = datetime.datetime.now()
    print(f'Starting to compute the time at {start}')
    for i in range(10):
        prune_on_cpu(model, mask, bckp, imgs, config, device)
    end = datetime.datetime.now()
    print(f'Ending at {end}')
    result = end - start
    print(f'Time of {result}')
Ejemplo n.º 3
0
def run_compare(cfg, data, prune_cfg, batch_size, origin_weights):
    device = select_device('', apex=None, batch_size=batch_size)

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        dist.init_process_group(
            backend='nccl',  # 'distributed backend'
            init_method=
            'tcp://127.0.0.1:9999',  # distributed training init method
            world_size=1,  # number of nodes for distributed training
            rank=0)  # distributed training node rank

    init_seeds()

    data_dict = parse_data_cfg(data)
    train_path = data_dict['valid']

    dataset = LoadImagesAndLabels(
        train_path,
        416,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=False,  # rectangular training
        cache_labels=True,
        cache_images=False)
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=True,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    origin_model = Darknet(cfg).to(device)
    chkpt = torch.load(origin_weights, map_location=device)
    origin_model.load_state_dict(chkpt['model'], strict=True)
    aux_util = AuxNetUtils(origin_model, hyp)
    del chkpt

    for layer in aux_util.pruning_layer[1:]:
        # greedy_channel_select(origin_model, prune_cfg, origin_weights, layer, device, aux_util, train_loader, 0.75)
        random_greedy_channel_select(origin_model, prune_cfg, origin_weights,
                                     layer, device, aux_util, train_loader,
                                     0.75)
Ejemplo n.º 4
0
def run_single_detect(model, images, img_size, conf_thres=0.3, nms_thres=0.45):
    device = torch_utils.select_device()
    dataloader = LoadImages(images, img_size=img_size)
    classes = load_classes(parse_data_cfg('cfg/coco.data')['names'])
    for i, (path, img, im0) in enumerate(dataloader):
        img = torch.from_numpy(img).unsqueeze(0).to(device)
        pred = model(img)
        pred = pred[pred[:, :, 4] > conf_thres]  # remove boxes < threshold

        if len(pred) > 0:
            # Run NMS on predictions
            detections = non_max_suppression(pred.unsqueeze(0), conf_thres,
                                             nms_thres)[0]
            # Print results to screen
            unique_classes = detections[:, -1].cpu().unique()
            for c in unique_classes:
                n = (detections[:, -1].cpu() == c).sum()
                print('%g %ss' % (n, classes[int(c)]), end=', ')
Ejemplo n.º 5
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path,
         weights,
         log_file_path=None,
         model=None):

    # 0、初始化一些参数
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device('0')
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights, map_location=device)['model']
            )  # 20200704_50epoch_modify_noobj   # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    image_nums = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')
    #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1')

    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []

    pbar = tqdm(dataloader)
    for i, (img_tensor, target_tensor, _, _) in enumerate(pbar):

        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        target_tensor = target_tensor.to(device)
        height, width = img_tensor.shape[2:]

        start = time.time()
        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(
                img_tensor
            )  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres, nms_thres)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(nms_output):  # pred: (bs, 7)
            labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:]
            nl = len(labels)  # len of label
            tcls = labels[:, 0].tolist() if nl else []  # target class
            image_nums += 1

            # 考虑一个预测 box 都没有的情况,比如 conf 太高
            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds   TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低
            clip_coords(pred, (height, width))  #  mAP is the same

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2]  # w
                tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1]  # h

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
            '''
                        pred flag                (  [1,       0,       1,       0,       0,       1,       0,       0,       1], 
                        pred conf            tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), 
                        pred cls             tensor([2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.]), 
                        lb_cls                 [2.0,     2.0,  2.0, 2.0, 2.0])
            stats is a []
            '''
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(),
                 tcls))  # Append statistics (correct, conf, pcls, tcls)

    # after get stats for all images , ...
    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    # time.sleep(0.01)  # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了
    #pf = '%20s' + '%10.3g' * 6  # print format
    pf = '%20s' + '%10s' + '%10.3g' * 5
    pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1)
    print(pf_value)
    if __name__ != '__main__':
        write_to_file(s, log_file_path)
        write_to_file(pf_value, log_file_path)

    results = []
    results.append({"all": (mp, mr, map, mf1)})

    # Print results per class
    #if verbose and nc > 1 and len(stats):
    if nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
            print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]))
            if __name__ != '__main__':
                write_to_file(
                    pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]),
                    log_file_path)
            results.append({names[c]: (p[i], r[i], ap[i], f1[i])})

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1), maps
Ejemplo n.º 6
0
Archivo: train.py Proyecto: axiat/IRCRA
def train():
    cfg = opt.cfg
    data = opt.data
    epochs = opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = opt.batch_size
    accumulate = max(round(64 / batch_size),
                     1)  # accumulate n times before optimizer update (bs 64)
    weights = opt.weights  # initial training weights
    imgsz_min, imgsz_max, imgsz_test = opt.img_size  # img sizes (min, max, test)

    # Image Sizes
    gs = 64  # (pixels) grid size
    assert math.fmod(
        imgsz_min,
        gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)
    opt.multi_scale |= imgsz_min != imgsz_max  # multi if different (min, max)
    if opt.multi_scale:
        if imgsz_min == imgsz_max:
            imgsz_min //= 1.5
            imgsz_max //= 0.667
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = grid_min * gs, grid_max * gs
    img_size = imgsz_max  # initialize with max size

    # Configure run
    init_seeds()
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']
    nc = 1 if opt.single_cls else int(
        data_dict['classes'])  # number of classes
    hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset

    # Remove previous results
    for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # all else

    if opt.adam:
        # hyp['lr0'] *= 0.1  # reduce lr (i.e. SGD=5E-3, Adam=5E-4)
        optimizer = optim.Adam(pg0, lr=hyp['lr0'])
        # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp['lr0'],
                              momentum=hyp['momentum'],
                              nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2

    start_epoch = 0
    best_fitness = 0.0
    # attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        chkpt = torch.load(weights, map_location=device)

        # load model
        try:
            chkpt['model'] = {
                k: v
                for k, v in chkpt['model'].items()
                if model.state_dict()[k].numel() == v.numel()
            }
            model.load_state_dict(chkpt['model'], strict=False)
        except KeyError as e:
            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
            raise KeyError(s) from e

        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        start_epoch = chkpt['epoch'] + 1
        del chkpt

    elif len(weights) > 0:  # darknet format
        load_darknet_weights(model, weights)

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O1',
                                          verbosity=0)

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((
        (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.95 + 0.05  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    scheduler.last_epoch = start_epoch - 1  # see link below
    # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822

    # Plot lr schedule
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, '.-', label='LambdaLR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    # Initialize distributed training
    if device.type != 'cpu' and torch.cuda.device_count(
    ) > 1 and torch.distributed.is_available():
        dist.init_process_group(
            backend='nccl',  # 'distributed backend'
            init_method=
            'tcp://127.0.0.1:9999',  # distributed training init method
            world_size=1,  # number of nodes for distributed training
            rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(
            model, find_unused_parameters=True)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=opt.rect,  # rectangular training
        cache_images=opt.cache_images,
        single_cls=opt.single_cls)

    # Dataloader
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=not opt.
        rect,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    # Testloader
    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(
        test_path,
        imgsz_test,
        batch_size,
        hyp=hyp,
        rect=True,
        cache_images=opt.cache_images,
        single_cls=opt.single_cls),
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Model parameters
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(
        device)  # attach class weights

    # Model EMA
    ema = torch_utils.ModelEMA(model)

    # Start training
    nb = len(dataloader)  # number of batches
    n_burn = max(3 * nb,
                 500)  # burn-in iterations, max(3 epochs, 500 iterations)
    maps = np.zeros(nc)  # mAP per class
    # torch.autograd.set_detect_anomaly(True)
    results = (
        0, 0, 0, 0, 0, 0, 0
    )  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    t0 = time.time()
    print('Image sizes %g - %g train, %g test' %
          (imgsz_min, imgsz_max, imgsz_test))
    print('Using %g dataloader workers' % nw)
    print('Starting training for %g epochs...' % epochs)
    print("------------------------------------------------", start_epoch,
          start_epoch + epochs)
    for epoch in range(
            start_epoch, start_epoch + epochs
    ):  # epoch ------------------------------------------------------------------

        model.train()

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 -
                                                     maps)**2  # class weights
            image_weights = labels_to_image_weights(dataset.labels,
                                                    nc=nc,
                                                    class_weights=w)
            dataset.indices = random.choices(range(dataset.n),
                                             weights=image_weights,
                                             k=dataset.n)  # rand weighted idx

        mloss = torch.zeros(4).to(device)  # mean losses
        # print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
        # pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        for i, (imgs, targets, paths, _) in enumerate(
                dataloader
        ):  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            # Burn-in
            if ni <= n_burn * 2:
                model.gr = np.interp(
                    ni, [0, n_burn * 2],
                    [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
                if ni == n_burn:  # burnin complete
                    print_model_biases(model)

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(
                        ni, [0, n_burn],
                        [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, [0, n_burn],
                                                  [0.9, hyp['momentum']])

            # Multi-Scale
            if opt.multi_scale:
                if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch
                    img_size = random.randrange(grid_min, grid_max + 1) * gs
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                          ]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode='bilinear',
                                         align_corners=False)

            # Forward
            pred = model(imgs)

            # Loss
            loss, loss_items = compute_loss(pred, targets, model)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Backward
            loss *= batch_size / 64  # scale loss
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
                ema.update(model)

            # Print
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = '%.3gG' % (torch.cuda.memory_cached() /
                             1E9 if torch.cuda.is_available() else 0)  # (GB)
            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' %
                                               (epoch, epochs - 1), mem,
                                               *mloss, len(targets), img_size)
            # pbar.set_description(s)

            # Plot
            # if ni < 1:
            #     f = 'train_batch%g.jpg' % i  # filename
            #     # plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)
            #     if tb_writer:
            #         tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC')
            # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()

        # Process epoch results
        ema.update_attr(model)
        final_epoch = epoch + 1 == epochs
        if not opt.notest or final_epoch:  # Calculate mAP
            is_coco = any([
                x in data
                for x in ['coco.data', 'coco2014.data', 'coco2017.data']
            ]) and model.nc == 80
            results, maps = test.test(cfg,
                                      data,
                                      batch_size=batch_size,
                                      img_size=imgsz_test,
                                      model=ema.ema,
                                      save_json=final_epoch and is_coco,
                                      single_cls=opt.single_cls,
                                      dataloader=testloader)

        # Write
        with open(results_file, 'a') as f:
            f.write(s + '%10.3g' * 7 % results +
                    '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
        if len(opt.name) and opt.bucket:
            os.system('gsutil cp results.txt gs://%s/results/results%s.txt' %
                      (opt.bucket, opt.name))

        # Tensorboard
        if tb_writer:
            tags = [
                'train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5',
                'metrics/F1', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss'
            ]
            for x, tag in zip(list(mloss[:-1]) + list(results), tags):
                tb_writer.add_scalar(tag, x, epoch)

        # Update best mAP
        fi = fitness(np.array(results).reshape(
            1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
        if fi > best_fitness:
            best_fitness = fi

        # Save model
        save = (not opt.nosave) or (final_epoch and not opt.evolve)
        if save:
            with open(results_file, 'r') as f:  # create checkpoint
                chkpt = {
                    'epoch':
                    epoch,
                    'best_fitness':
                    best_fitness,
                    'training_results':
                    f.read(),
                    'model':
                    ema.ema.module.state_dict()
                    if hasattr(model, 'module') else ema.ema.state_dict(),
                    'optimizer':
                    None if final_epoch else optimizer.state_dict()
                }

            # Save last, best and delete
            torch.save(chkpt, last)
            if (best_fitness == fi) and not final_epoch:
                torch.save(chkpt, best)
            del chkpt

        # end epoch ----------------------------------------------------------------------------------------------------
    # end training

    n = opt.name
    if len(n):
        n = '_' + n if not n.isnumeric() else n
        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'],
                          [flast, fbest, fresults]):
            if os.path.exists(f1):
                os.rename(f1, f2)  # rename
                ispt = f2.endswith('.pt')  # is *.pt
                strip_optimizer(f2) if ispt else None  # strip optimizer
                os.system('gsutil cp %s gs://%s/weights' % (
                    f2, opt.bucket)) if opt.bucket and ispt else None  # upload

    if not opt.evolve:
        plot_results()  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1,
                                                    (time.time() - t0) / 3600))
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()
    return results
Ejemplo n.º 7
0
def test(cfg,
         data_cfg,
         batch_size=16,
         img_size=416,
         iou_thres=0.5,
         conf_thres=0.3,
         nms_thres=0.5,
         model=None):
    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
    nc = int(data_cfg['classes'])  # number of classes
    test_path = data_cfg['valid']  # path to test images
    names = load_classes(data_cfg['names'])  # class names

    if model is None:
        device = select_device()
        num_classes = nc
        # Initialize model
        if "-tiny" in cfg:
            model = Yolov3Tiny(num_classes).to(device)
            # weights = 'weights-yolov3-tiny/best.pt'
            weights = "./yolov3-tiny_coco.pt"
        else:
            model = Yolov3(num_classes).to(device)
            # weights = 'weights-yolov3/best.pt'
            weights = "./finetune-weight/yolov3_coco.pt"

        # Load weights
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
    print("using device: {}".format(device))
    # Dataloader
    dataset = LoadImagesAndLabels(test_path,
                                  batch_size,
                                  img_size=img_size,
                                  augment=False)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=0,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    print(('%20s' + '%10s' * 6) %
          ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
    loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')):
        targets = targets.to(device)
        nt = len(targets)
        if nt == 0:  # if no targets continue
            continue
        imgs = imgs.to(device)
        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Build targets
        target_list = build_targets(model, targets)

        # Compute loss
        loss_i, _ = compute_loss(train_out, target_list)
        loss += loss_i.item()

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            correct, detected = [], []
            tcls = torch.Tensor()
            seen += 1

            if pred is None:
                if len(labels):
                    tcls = labels[:, 0].cpu()  # target classes
                    stats.append(
                        (correct, torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to pycocotools JSON dictionary

            if len(labels):
                # Extract target boxes as (x1, y1, x2, y2)
                tbox = xywh2xyxy(labels[:, 1:5]) * img_size  # target boxes
                tcls = labels[:, 0]  # target classes

                for *pbox, pconf, pcls_conf, pcls in pred:
                    if pcls not in tcls:
                        correct.append(0)
                        continue

                    # Best iou, index between pred and targets
                    iou, bi = bbox_iou(pbox, tbox).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and bi not in detected:
                        correct.append(1)
                        detected.append(bi)
                    else:
                        correct.append(0)
            else:
                # If no labels add number of detections as incorrect
                correct.extend([0] * len(pred))

            # Append Statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls.cpu()))

    # Compute statistics
    stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
    nt = np.bincount(stats_np[3].astype(np.int64),
                     minlength=nc)  # number of targets per class
    if len(stats_np):
        p, r, ap, f1, ap_class = ap_per_class(*stats_np)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1), end='\n\n')

    # Print results per class
    if nc > 1 and len(stats_np):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Return results
    return mp, mr, map, mf1, loss
Ejemplo n.º 8
0
    #parser.add_argument('--batch-size', type=int, default=16)
    opt = parser.parse_args()
    print(opt)

    device = select_device(opt.device)
    if device == 'cpu':
        mixed_precision = False

    # 0、Initialize parameters( set random seed, get cfg info, )
    cfg = opt.cfg
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('clw: config file:', cfg)
    print('clw: pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)
    #model.apply(weights_init_normal)  # clw note: without this can also get high mAP;   TODO

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
Ejemplo n.º 9
0
def get_thin_model(cfg,
                   backbone,
                   neck,
                   data,
                   origin_weights,
                   img_size,
                   batch_size,
                   prune_rate,
                   aux_epochs=50,
                   ft_epochs=15,
                   resume=False,
                   cache_images=False,
                   start_layer='75'):
    init_seeds()

    # -----------------dataset-----------------
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']

    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=False,  # rectangular training
        cache_labels=True,
        cache_images=cache_images)

    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=True,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    test_loader = torch.utils.data.DataLoader(LoadImagesAndLabels(
        test_path,
        img_size,
        batch_size * 2,
        hyp=hyp,
        rect=True,
        cache_labels=True,
        cache_images=cache_images),
                                              batch_size=batch_size * 2,
                                              num_workers=nw,
                                              pin_memory=True,
                                              collate_fn=dataset.collate_fn)
    # -----------------dataset-----------------

    # -----------get trained aux net-----------
    if aux_trained:
        aux_chkpt = torch.load(aux_weight)
        if aux_chkpt["epoch"] + 1 != aux_epochs:
            del aux_chkpt
            train_aux_for_DCP(cfg,
                              backbone,
                              neck,
                              train_loader,
                              origin_weights,
                              aux_weight,
                              hyp,
                              device,
                              resume=True,
                              epochs=aux_epochs)
        else:
            del aux_chkpt
    else:
        train_aux_for_DCP(cfg,
                          backbone,
                          neck,
                          train_loader,
                          origin_weights,
                          aux_weight,
                          hyp,
                          device,
                          resume=False,
                          epochs=aux_epochs)
    # -----------get trained aux net-----------

    # ----------init model and aux util----------
    origin_model = Darknet(cfg).to(device)
    chkpt = torch.load(origin_weights, map_location=device)
    origin_model.load_state_dict(chkpt['model'], strict=True)
    aux_util = AuxNetUtils(origin_model, hyp, backbone, neck, strategy="DCP")
    del chkpt
    # ----------init model and aux net----------

    mask_cfg, init_state_dict = mask_cfg_and_converted(
        aux_util.mask_replace_layer, cfg, origin_weights, target=None)

    # ----------start from first layer----------
    if not resume:
        first_progress = {
            'current_layer': start_layer,
            'epoch': -1,
            'model': init_state_dict,
            'optimizer': None
        }
        aux_chkpt = torch.load(aux_weight)
        for k, v in aux_chkpt.items():
            if 'aux' in k:
                first_progress[k] = v
        del aux_chkpt
        torch.save(first_progress, progress_chkpt)

        with open(progress_result, 'a') as f:
            t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            f.write('\n' + t + '\n')
        # ----------start from first layer----------

    layer = start_layer
    if start_layer == aux_util.pruning_layer[-1]:
        return mask_cfg, aux_util

    while int(layer) < int(aux_util.pruning_layer[-1]):
        layer = fine_tune(mask_cfg, data, aux_util, device, train_loader,
                          test_loader, ft_epochs)
        channels_select(mask_cfg, data, origin_model, aux_util, device,
                        train_loader, layer, prune_rate)

    return mask_cfg, aux_util
Ejemplo n.º 10
0
def train(
        cfg,
        data_cfg,
        resume=False,
        epochs=273,  # 500200 batches at bs 64, dataset length 117263
        batch_size=16,
        accumulate=1,
        weights_path='weights',
        init_weights='yolov3-player_stage2_start.81'):
    #init_seeds()
    weights = weights_path + os.sep
    latest = weights + 'latest.pt'
    best = weights + 'best.pt'
    device, n_gpu = torch_utils.select_device()

    #Image size
    cfg_model = parse_cfg(cfg)
    img_size = (int(cfg_model[0]['width']), int(cfg_model[0]['height']))

    # Configure run
    train_path = parse_data_cfg(data_cfg)['train_path']
    train_set = parse_data_cfg(data_cfg)['train_set']

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=hyp['lr0'],
                          momentum=hyp['momentum'],
                          weight_decay=hyp['weight_decay'])

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_loss = float('inf')

    if resume:  # Load previously saved model(resume from latest.pt)
        chkpt = torch.load(latest, map_location=device)  # load checkpoint
        model.load_state_dict(chkpt['model'])

        start_epoch = chkpt['epoch'] + 1
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_loss = chkpt['best_loss']
        del chkpt

    else:  # Initialize model with backbone (optional)
        model.load_weights(weights + init_weights)

    # Scheduler
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[100, 440, 1097],
                                               gamma=0.1,
                                               last_epoch=start_epoch - 1)

    # Dataset
    dataset = YoloDataSets(data_path=train_path,
                           input_size=img_size,
                           batch_size=batch_size,
                           image_set=train_set,
                           augment=True,
                           jitter_x=0.3,
                           jitter_y=0.3)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=opt.backend,
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.rank)
        model = torch.nn.parallel.DistributedDataParallel(model)
        # sampler = torch.utils.data.distributed.DistributedSampler(dataset)

    # Dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=opt.num_workers,
        shuffle=False,  # disable rectangular training if True
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    # Mixed precision training https://github.com/NVIDIA/apex
    # install help: https://github.com/NVIDIA/apex/issues/259
    mixed_precision = False
    if mixed_precision:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Start training
    t = time.time()
    model.hyp = hyp  # attach hyperparameters to model
    #model_info(model)

    nb = len(dataloader)
    results = (0, 0, 0, 0, 0)  # P, R, mAP, F1, test_loss

    n_burnin = int(cfg_model[0]["burn_in"])  # burn-in batches

    for epoch in range(start_epoch, epochs):
        model.train()
        print(
            ('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf',
                                          'cls', 'total', 'nTargets', 'time'))

        # Update scheduler
        scheduler.step(epoch)

        mloss = torch.zeros(5).to(device)  # mean losses
        for i, (imgs, targets) in enumerate(dataloader):
            imgs = imgs.to(device)
            targets = targets.to(device)
            nt = len(targets)
            #plot_images(imgs=imgs, targets=targets, fname='train_batch%d.jpg' % i)

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = hyp['lr0'] * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            if i == 0:
                print('learning rate: %g' % optimizer.param_groups[0]['lr'])
            # Run model
            pred, loss, loss_items = model(imgs, targets)
            loss = torch.mean(loss)
            n_ = int(loss_items.size()[0] / 5)
            loss_items = torch.mean(loss_items.view((n_, 5)), 0)

            if torch.isnan(loss):
                print('WARNING: nan loss detected, ending training')
                return results

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nb:
                optimizer.step()
                optimizer.zero_grad()

            # Update running mean of tracked metrics
            mloss = (mloss * i + loss_items) / (i + 1)
            # Print batch results
            s = ('%8s%12s' +
                 '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' %
                                  (i, nb - 1), *mloss, nt, time.time() - t)

            t = time.time()
            print(s)

        # Calculate mAP (always test final epoch, skip first 5 if opt.nosave)
        if not (opt.notest or
                (opt.nosave and epoch < 5)) or epoch == epochs - 1:
            with torch.no_grad():
                results, maps = test.test(cfg,
                                          data_cfg,
                                          batch_size=batch_size,
                                          img_size=img_size,
                                          model=model,
                                          conf_thres=0.1,
                                          iou_thres=0.4)

        # Write epoch results
        with open('results.txt', 'a') as file:
            file.write(s + '%11.3g' * 5 % results +
                       '\n')  # P, R, mAP, F1, test_loss

        # Update best loss
        test_loss = results[4]
        if test_loss < best_loss:
            best_loss = test_loss

        # Save training results
        save = True and not opt.nosave
        if save:
            # Create checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }

            # Save latest checkpoint
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == test_loss:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 10 == 0:
                torch.save(chkpt, weights + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt

    return results
Ejemplo n.º 11
0
def detect(
    model_path,
    classify_model_path,
    label_path,
    root_path,
    cfg,
    data_cfg,
    img_size=416,
    conf_thres=0.5,
    nms_thres=0.5,
):
    classes = load_classes(parse_data_cfg(data_cfg)['names'])
    num_classes = len(classes)
    # Initialize model
    if "-tiny" in cfg:
        model = Yolov3Tiny(num_classes)
        weights = model_path
    else:
        model = Yolov3(num_classes)
        weights = model_path

    show_model_param(model)  # 显示模型参数

    device = select_device(False)  # 运行硬件选择

    classify_model, labels_dogs_list = Create_Classify_Model(
        device, classify_model_path, label_path)

    # Load weights
    if os.access(weights, os.F_OK):  # 判断模型文件是否存在
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:
        print('error model not exists')
        return False
    model.to(device).eval()  # 设置 模型 eval

    colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32)
              for v in range(1, num_classes + 1)][::-1]
    use_cuda = torch.cuda.is_available()
    for img_name in os.listdir(root_path):
        img_path = root_path + img_name
        im0 = cv2.imread(img_path)
        im_c = cv2.imread(img_path)
        print("---------------------")

        t = time.time()
        img = process_data(im0, img_size)
        if use_cuda:
            torch.cuda.synchronize()
        t1 = time.time()
        print("process time:", t1 - t)
        img = torch.from_numpy(img).unsqueeze(0).to(device)

        pred, _ = model(img)
        if use_cuda:
            torch.cuda.synchronize()
        t2 = time.time()
        print("inference time:", t2 - t1)
        detections = non_max_suppression(pred, conf_thres, nms_thres)[0]
        if use_cuda:
            torch.cuda.synchronize()
        t3 = time.time()
        print("get res time:", t3 - t2)
        if detections is None or len(detections) == 0:
            continue
        # Rescale boxes from 416 to true image size
        detections[:, :4] = scale_coords(img_size, detections[:, :4],
                                         im0.shape).round()
        result = []
        for res in detections:
            result.append(
                (classes[int(res[-1])], float(res[4]),
                 [int(res[0]),
                  int(res[1]),
                  int(res[2]),
                  int(res[3])]))
        if use_cuda:
            torch.cuda.synchronize()
        s2 = time.time()
        print("detect time:", s2 - t)
        print(result)

        # Draw bounding boxes and labels of detections
        for *xyxy, conf, cls_conf, cls in detections:
            label = '%s %.2f' % (classes[int(cls)], conf)

            #-------------------------------------------------------------------
            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

            x_1 = int(xyxy[0])
            y_1 = int(xyxy[1])
            x_2 = int(xyxy[2])
            y_2 = int(xyxy[3])
            #--------------------
            img_crop_ = cv2.resize(im_c[y_1:y_2, x_1:x_2, :], (224, 224),
                                   interpolation=cv2.INTER_CUBIC)
            img_crop_ = img_crop_.astype(np.float32)
            img_crop_ = prewhiten(img_crop_)

            img_crop_ = torch.from_numpy(img_crop_)
            img_crop_ = img_crop_.unsqueeze_(0)
            img_crop_ = img_crop_.permute(0, 3, 1, 2)

            if use_cuda:  #
                img_crop_ = img_crop_.cuda()  # (bs, 3, h, w)

            outputs = F.softmax(classify_model(img_crop_.float()), dim=1)

            outputs = outputs[0]
            outputx = outputs.cpu().detach().numpy()
            # print('output: ',output)
            max_index = np.argmax(outputx)

            scorex_ = outputx[max_index]
            label_dog_ = labels_dogs_list[max_index]

            print('label_dog_ : ', label_dog_)

            plot_one_box((x_1, y_1 + 20, x_2, y_2),
                         im0,
                         label=label_dog_ + '_' + '%.2f' % (scorex_),
                         color=colors[int(cls)])
            #-----------------------
            cv2.namedWindow('crop', 0)
            cv2.imshow('crop', im_c[y_1:y_2, x_1:x_2, :])

        cv2.namedWindow('result', 0)
        cv2.imshow("result", im0)
        key = cv2.waitKey(0)
        if key == 27:
            break
Ejemplo n.º 12
0
def train(hyper):
    device = torch.device(opt.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    results_file = "results.txt"
    data = opt.data
    epochs = opt.epochs
    batch_size = opt.batch_size
    img_size_train = opt.img_size
    img_size_test = opt.img_size  # test image sizes
    multi_scale = opt.multi_scale

    # Image sizes
    # 图像要设置成32的倍数
    grid_size = 32  # (pixels) grid size
    assert math.fmod(img_size_test,
                     grid_size) == 0, "--img-size %g must be a %g-multiple" % (
                         img_size_test, grid_size)
    grid_min, grid_max = img_size_test // grid_size, img_size_test // grid_size
    if multi_scale:
        img_size_min = opt.img_size // 1.5
        img_size_max = opt.img_size // 0.667

        # 将给定的最大,最小输入尺寸向下调整到32的整数倍
        grid_min, grid_max = img_size_min // grid_size, img_size_max // grid_size
        img_size_min, img_size_max = int(grid_min * grid_size), int(grid_max *
                                                                    grid_size)
        img_size_train = img_size_max  # initialize with max size
        print("Using multi_scale training, image range[{}, {}]".format(
            img_size_min, img_size_max))

    # configure run
    # init_seeds()  # 初始化随机种子,保证结果可复现
    data_dict = parse_data_cfg(data)
    train_path = data_dict["train"]
    test_path = data_dict["valid"]
    num_cls = 1 if opt.single_cls else int(
        data_dict["classes"])  # number of classes
    hyper[
        "cls"] *= num_cls / 80  # update coco-tuned hyp['cls'] to current dataset
    hyper["obj"] *= img_size_test / 320

    # Remove previous results
    for file in glob.glob(results_file):
        os.remove(file)

    # Initialize model
    # model = YOLOV3_SPP(cfg).to(device)
    model = YOLO_SPP(num_cls).to(device)

    # 是否冻结权重,只训练predictor的权重
    if isinstance(model, YOLOV3_SPP):
        weights = './weights/yolov3-spp-ultralytics-512.pt'
    else:
        weights = './weights/yolov3spp.pt'
    if isinstance(model, YOLOV3_SPP) and False:
        if opt.freeze_layers:
            # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor
            output_layer_indices = [
                idx - 1 for idx, module in enumerate(model.module_list)
                if isinstance(module, YOLOBlk)
            ]
            # 冻结除predictor和YOLOLayer外的所有层
            freeze_layer_indices = [
                x for x in range(len(model.module_list))
                if (x not in output_layer_indices) and (
                    x - 1 not in output_layer_indices)
            ]
            # Freeze non-output layers
            # 总共训练3x2=6个parameters
            for idx in freeze_layer_indices:
                for parameter in model.module_list[idx].parameters():
                    parameter.requires_grad_(False)
        else:
            # 如果freeze_layer为False,默认仅训练除darknet53之后的部分
            # 若要训练全部权重,删除以下代码
            darknet_end_layer = 74  # only yolov3spp cfg
            # Freeze darknet53 layers
            # 总共训练21x3+3x2=69个parameters
            for idx in range(darknet_end_layer + 1):  # [0, 74]
                for parameter in model.module_list[idx].parameters():
                    parameter.requires_grad_(False)
    else:
        if opt.freeze_layers:
            model.freeze_layers(model.index_anchors)

    # optimizer
    params_grad = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(params_grad,
                          lr=hyper["lr0"],
                          momentum=hyper["momentum"],
                          weight_decay=hyper["weight_decay"],
                          nesterov=True)

    start_epoch = 0
    if weights.endswith(".pt") or weights.endswith(".pth"):
        epochs, start_epoch = loadCKPT(model, optimizer, epochs, weights,
                                       results_file, device, True)

    train_loader = None
    bool_trainer = True
    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
                       8])  # number of workers

    # dataset
    if bool_trainer:
        # 训练集的图像尺寸指定为 multi_scale_range 中最大的尺寸
        train_loader = dataLoader(train_path,
                                  img_size_train,
                                  batch_size,
                                  True,
                                  hyper,
                                  opt.rect,
                                  cache_images=opt.cache_images,
                                  single_cls=opt.single_cls,
                                  num_workers=num_workers,
                                  pin_memory=True)

    # 验证集的图像尺寸指定为 img_size(512)
    test_loader = dataLoader(test_path,
                             img_size_test,
                             1,
                             True,
                             hyper,
                             cache_images=opt.cache_images,
                             single_cls=opt.single_cls,
                             num_workers=num_workers,
                             pin_memory=True)

    # Model parameters
    loss_cfg = {
        'num_cls': num_cls,  # attach number of classes to model
        'hyp': hyper,  # attach hyper parameters to model
        'ratio': 1.0,  # giou loss ratio (obj_loss = 1.0 or giou)
        'anchors': model.anchor_vec,  # anchors
    }

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lr_lambda = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (
        1 - hyper["lrf"]) + hyper["lrf"]  # cosine
    multi_gpu = type(model) in (nn.parallel.DataParallel,
                                nn.parallel.DistributedDataParallel)
    trainer = Trainer(model,
                      optimizer,
                      loss=YoloLoss(multi_gpu=multi_gpu, cfg=loss_cfg),
                      lr_lambda=lr_lambda,
                      last_epoch=start_epoch)
    if bool_trainer:
        print("starting training for %g epochs..." % epochs)
        print('Using %g data loader workers' % num_workers)
        trainer.fit_generate(train_loader,
                             epochs=epochs,
                             test_loader=test_loader,
                             print_freq=50,
                             save_best=True,
                             multi_scale=multi_scale,
                             img_size=img_size_train,
                             grid_min=grid_min,
                             grid_max=grid_max,
                             grid_size=grid_size,
                             device=device,
                             warmup=True)
    else:
        trainer.evaluate(test_loader, device=device)
    pass
Ejemplo n.º 13
0
def train(data_cfg='cfg/face.data', accumulate=1):
    # Configure run
    get_data_cfg = parse_data_cfg(data_cfg)  # 返回训练配置参数,类型:字典

    gpus = get_data_cfg['gpus']
    num_workers = int(get_data_cfg['num_workers'])
    cfg_model = get_data_cfg['cfg_model']
    train_path = get_data_cfg['train']
    num_classes = int(get_data_cfg['classes'])
    finetune_model = get_data_cfg['finetune_model']
    batch_size = int(get_data_cfg['batch_size'])
    img_size = int(get_data_cfg['img_size'])
    multi_scale = get_data_cfg['multi_scale']
    epochs = int(get_data_cfg['epochs'])
    lr_step = str(get_data_cfg['lr_step'])
    lr0 = float(get_data_cfg['lr0'])

    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    device = select_device()

    if multi_scale == 'True':
        multi_scale = True
    else:
        multi_scale = False
    print('data_cfg            : ', data_cfg)
    print('voc.data config len : ', len(get_data_cfg))
    print('GPUs             : ', gpus)
    print('num_workers      : ', num_workers)
    print('model            : ', cfg_model)
    print('Finetune_model   : ', finetune_model)
    print('train_path       : ', train_path)
    print('num_classes      : ', num_classes)
    print('batch_size       : ', batch_size)
    print('img_size         : ', img_size)
    print('multi_scale      : ', multi_scale)
    print('lr_step          : ', lr_step)
    print('lr0              : ', lr0)
    a_scalse = 416. / img_size
    anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)]
    anchors_new = [(int(anchors[j][0] / a_scalse), int(anchors[j][1] / a_scalse)) for j in range(len(anchors))]
    model = Yolov3(num_classes, anchors=anchors_new)
    weights = './weights'
    # mkdir save model document
    if not os.path.exists(weights):
        os.mkdir(weights)

    model = model.to(device)
    latest = weights + 'latest_{}.pt'.format(img_size)
    best = weights + 'best_{}.pt'.format(img_size)
    # Optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)

    start_epoch = 0

    if os.access(finetune_model, os.F_OK):  # load retrain/finetune_model
        print('loading yolo-v3 finetune_model ~~~~~~', finetune_model)
        not_load_filters = 3 * (80 + 5)  # voc: 3*(20+5), coco: 3*(80+5)=255
        chkpt = torch.load(finetune_model, map_location=device)
        model.load_state_dict(
            {k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False)
        # model.load_state_dict(chkpt['model'])
        if 'coco' not in finetune_model:
            start_epoch = chkpt['epoch']
            if chkpt['optimizer'] is not None:
                optimizer.load_state_dict(chkpt['optimizer'])
                best_loss = chkpt['best_loss']

    milestones = [int(i) for i in lr_step.split(",")]
    print('milestones : ', milestones)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")],
                                                     gamma=0.1,
                                                     last_epoch=start_epoch - 1)

    # Dataset
    print('multi_scale : ', multi_scale)
    dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True,
                                  multi_scale=multi_scale)
    print('--------------->>> imge num ---------->>>: ', dataset.__len__())
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)

    t = time.time()
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches

    best_loss = float('inf')
    test_loss = float('inf')

    flag_start = False

    for epoch in range(0, epochs):
        model.train()
        if flag_start:
            scheduler.step()
        flag_start = True
        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
            multi_size = imgs.size()
            imgs = imgs.to(device)
            targets = targets.to(device)
            nt = len(targets)
            if nt == 0:  # if no targets continue
                continue
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin) ** 4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            print(
                'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, '
                'wh {:.3f}, '
                'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1,
                                                                             multi_size[2], multi_size[3]
                                                                             , nt, scheduler.get_lr()[0], mloss['xy'],
                                                                             mloss['wh'], mloss['conf'], mloss['cls'],
                                                                             mloss['total'], time.time() - t),
                end='\r')

            s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'],
                                              mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt,
                                              time.time() - t)
            t = time.time()
        print()
        # Create checkpoint
        chkpt = {'epoch': epoch,
                 'best_loss': best_loss,
                 'model': model.module.state_dict() if type(
                     model) is nn.parallel.DistributedDataParallel else model.state_dict(),
                 'optimizer': optimizer.state_dict()}

        # Save latest checkpoint
        torch.save(chkpt, latest)

        # Save best checkpoint
        if best_loss == test_loss and epoch % 5 == 0:
            torch.save(chkpt, best)

        # Save backup every 10 epochs (optional)
        if epoch > 0 and epoch % 5 == 0:
            torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size, epoch))

        # Delete checkpoint
        del chkpt
Ejemplo n.º 14
0
def detect(save_txt=False, save_img=False):
    img_size = (
        320, 192
    ) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()

    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model,
                          img,
                          'weights/export.onnx',
                          verbose=False,
                          opset_version=11)

        # Validate exported model
        import onnx
        model = onnx.load('weights/export.onnx')  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        print(onnx.helper.printable_graph(
            model.graph))  # Print a human readable representation of the graph
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get classes and colors
    classes = load_classes(parse_data_cfg(opt.data)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(classes))]

    # Run inference
    t0 = time.time()
    forward_time_total = 0

    for path, img, im0s, vid_cap in dataset:
        t = time.time()

        # Get detections
        img = torch.from_numpy(img).to(device)
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        forward_time = time.time()
        pred = model(img)[0]

        if opt.half:
            pred = pred.float()

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres)

        # Apply
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '{}: '.format(i), im0s[i]
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '{}x{} '.format(*img.shape[2:])  # print string
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '{} {}s, '.format(n, classes[int(c)])  # add to string

                # Write results
                for *xyxy, conf, _, cls in det:
                    if save_txt:  # Write to file
                        with open(save_path + '.txt', 'a') as file:
                            file.write(
                                ('{} ' * 6 + '\n').format(*xyxy, cls, conf))

                    if save_img or view_img:  # Add bbox to image
                        label = '{} {:.2f}'.format(classes[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

            end_time = time.time()

            forward_time_total += end_time - forward_time
            print('{}Done. (net: {:.3f}s, total: {:.3f}s)'.format(
                s,
                end_time - forward_time,
                end_time - t,
            ))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to {}'.format(os.getcwd()) + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (total: {:.3f}s, net avg: {:.3f}ms)'.format(
        time.time() - t0,
        forward_time_total / len(dataset) * 1000,
    ))
Ejemplo n.º 15
0
def detect(
        model_path,
        root_path,
        cfg,
        data_cfg,
        img_size=416,
        conf_thres=0.5,
        nms_thres=0.5,
):
    classes = load_classes(parse_data_cfg(data_cfg)['names'])
    num_classes = len(classes)
    # Initialize model
    if "-tiny" in cfg:
        model = Yolov3Tiny(num_classes)
        weights = model_path
    else:
        model = Yolov3(num_classes)
        weights = model_path

    show_model_param(model)# 显示模型参数

    device = select_device() # 运行硬件选择
    use_cuda = torch.cuda.is_available()
    # Load weights
    if os.access(weights,os.F_OK):# 判断模型文件是否存在
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
    else:
        print('error model not exists')
        return False
    model.to(device).eval()#模型设置为 eval

    colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1]

    for img_name in os.listdir(root_path):
        img_path  = root_path + img_name
        im0 = cv2.imread(img_path)
        print("---------------------")

        t = time.time()
        img = process_data(im0, img_size)
        if use_cuda:
            torch.cuda.synchronize()
        t1 = time.time()
        print("process time:", t1-t)
        img = torch.from_numpy(img).unsqueeze(0).to(device)

        pred, _ = model(img)#图片检测
        if use_cuda:
            torch.cuda.synchronize()
        t2 = time.time()
        print("inference time:", t2-t1)
        detections = non_max_suppression(pred, conf_thres, nms_thres)[0] # nms
        if use_cuda:
            torch.cuda.synchronize()
        t3 = time.time()
        print("get res time:", t3-t2)
        if detections is None or len(detections) == 0:
            continue
        # Rescale boxes from 416 to true image size
        detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round()
        result = []
        for res in detections:
            result.append((classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])]))
        if use_cuda:
            torch.cuda.synchronize()
        s2 = time.time()
        print("detect time:", s2 - t)
        print(result)

        # Draw bounding boxes and labels of detections
        for *xyxy, conf, cls_conf, cls in detections:
            label = '%s %.2f' % (classes[int(cls)], conf)
            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

        cv2.namedWindow('result',0)
        cv2.imshow("result", im0)
        key = cv2.waitKey(0)
        if key == 27:
            break
Ejemplo n.º 16
0
def detect(ModelPath,
           cfg,
           data_cfg,
           ImgSize=416,
           ConfThres=0.5,
           NMSThres=0.5,
           VideoPath=0):
    classes = load_classes(parse_data_cfg(data_cfg)['names'])
    num_classes = len(classes)
    # 初始化模型
    weights = ModelPath
    A_Scalse = 416. / ImgSize
    anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119),
               (116, 90), (156, 198), (373, 326)]
    anchors_new = [(int(anchors[j][0] / A_Scalse),
                    int(anchors[j][1] / A_Scalse))
                   for j in range(len(anchors))]
    model = Yolov3(num_classes, anchors=anchors_new)
    device = select_device()  # 运行硬件选择
    use_cuda = torch.cuda.is_available()
    # Load weights
    if os.access(weights, os.F_OK):  # 判断模型文件是否存在
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:
        print('error model not exists')
        return False
    model.to(device).eval()  # 模型模式设置为 eval
    colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32)
              for v in range(1, num_classes + 1)][::-1]
    video_capture = cv2.VideoCapture(VideoPath)
    video_writer = None
    loc_time = time.localtime()
    str_time = time.strftime("%Y-%m-%d_%H-%M-%S", loc_time)
    save_video_path = "./demo/demo_{}.mp4".format(str_time)
    # -------------------------------------------------
    while True:
        ret, im0 = video_capture.read()
        if ret:
            t = time.time()
            # im0 = cv2.imread("picture/1.png")
            img = process_data(im0, ImgSize)
            if use_cuda:
                torch.cuda.synchronize()
            t1 = time.time()
            # print("process time:", t1 - t)
            img = torch.from_numpy(img).unsqueeze(0).to(device)

            pred, _ = model(img)  # 图片检测
            if use_cuda:
                torch.cuda.synchronize()
            t2 = time.time()
            # print("inference time:", t2 - t1)
            detections = non_max_suppression(pred, ConfThres,
                                             NMSThres)[0]  # nms
            if use_cuda:
                torch.cuda.synchronize()
            t3 = time.time()
            # print("get res time:", t3 - t2)
            if detections is None or len(detections) == 0:
                cv2.namedWindow('image', 0)
                cv2.imshow("image", im0)
                key = cv2.waitKey(1)
                if key == 27:
                    break
                continue
            # Rescale boxes from 416 to true image size
            detections[:, :4] = scale_coords(ImgSize, detections[:, :4],
                                             im0.shape).round()
            result = []
            for res in detections:
                result.append(
                    (classes[int(res[-1])], float(res[4]),
                     [int(res[0]),
                      int(res[1]),
                      int(res[2]),
                      int(res[3])]))
            if use_cuda:
                torch.cuda.synchronize()
            for r in result:
                print(r)
            for *xyxy, conf, cls_conf, cls in detections:
                label = '%s %.2f' % (classes[int(cls)], conf)
                xyxy = int(xyxy[0]), int(xyxy[1]) + 6, int(xyxy[2]), int(
                    xyxy[3])
                if int(cls) == 0:
                    plot_one_box(xyxy,
                                 im0,
                                 label=label,
                                 color=(255, 255, 95),
                                 line_thickness=3)
                else:
                    plot_one_box(xyxy,
                                 im0,
                                 label=label,
                                 color=(15, 155, 255),
                                 line_thickness=3)
            s2 = time.time()
            # print("detect time: {} \n".format(s2 - t))
            str_fps = ("{:.2f} FPS".format(1. / (s2 - t + 0.00001)))
            cv2.putText(im0, str_fps, (5, im0.shape[0] - 3),
                        cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 255, 255), 4)
            cv2.putText(im0, str_fps, (5, im0.shape[0] - 3),
                        cv2.FONT_HERSHEY_DUPLEX, 0.9, (0, 0, 0), 1)
            cv2.namedWindow('image', 0)
            cv2.imshow("image", im0)
            key = cv2.waitKey(1)
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*"mp4v")
                video_writer = cv2.VideoWriter(save_video_path,
                                               fourcc,
                                               fps=25,
                                               frameSize=(im0.shape[1],
                                                          im0.shape[0]))
            video_writer.write(im0)
            if key == 27:
                break
        else:
            break
    cv2.destroyAllWindows()
    video_writer.release()
Ejemplo n.º 17
0
def detect():
    # 0、初始化一些参数
    cfg = opt.cfg
    weights = opt.weights
    src_txt_path = opt.src_txt_path
    img_size = opt.img_size
    batch_size = opt.batch_size
    dst_path = opt.dst_path
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    device = select_device(opt.device)
    classes = load_classes(parse_data_cfg(opt.data)['names'])

    # 1、加载网络
    model = Darknet(cfg)
    if weights.endswith('.pt'):  # TODO: .weights权重格式
        model.load_state_dict(
            torch.load(weights)['model'])  # TODO:map_location=device ?
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path, img_size, with_label=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn)  # TODO

    # 3、预测,前向传播
    start = time.time()
    pbar = tqdm(dataloader)
    for i, (img_tensor, img0, img_name) in enumerate(pbar):
        pbar.set_description("Already Processed %d image: " % (i + 1))
        # print('clw: Already Processed %d image' % (i+1))
        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        output = model(img_tensor)[
            0]  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

        # NMS
        nms_output = non_max_suppression(output, opt.conf_thres, opt.nms_thres)

        # 可视化
        for batch_idx, det in enumerate(nms_output):  # detections per image
            if det is not None:  # and len(det):  # clw note: important !
                #or box in det:
                for *box, conf, _, cls in det:  # det: tensor.Size (bs, 7)    box: list
                    orig_h, orig_w = img0[batch_idx].shape[:2]  # 坐标变换
                    new_h = new_w = img_tensor.size()[
                        2]  # 绘图,resize后的图的框 -> 原图的框,new -> orig
                    ratio_h = orig_h / new_h
                    ratio_w = orig_w / new_w
                    x1 = int(ratio_w * box[0])
                    y1 = int(ratio_h * box[1])
                    x2 = int(ratio_w * (box[2]))
                    y2 = int(ratio_h * (box[3]))
                    label = '%s %.2f' % (classes[int(cls)], conf)

                    # 预测结果可视化
                    plot_one_box([x1, y1, x2, y2],
                                 img0[batch_idx],
                                 label=label,
                                 color=(255, 0, 0))
                    #cv2.rectangle(img0[batch_idx], (x1, y1), (x2, y2), (0, 0, 255), 1)  # 如果报错 TypeError: an integer is required (got type tuple),检查是不是传入了img_tensor

            if SAVE:
                # 保存结果
                cv2.imwrite(os.path.join(dst_path, img_name[batch_idx]),
                            img0[batch_idx])
            if SHOW:
                cv2.imshow('aaa', img0[batch_idx])
                cv2.waitKey(0)

    print('time use: %.3fs' % (time.time() - start))
Ejemplo n.º 18
0
def test(cfg,
         data_cfg,
         weights=None,
         batch_size=16,
         img_size=416,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=False,
         model=None):
    if model is None:
        device = torch_utils.select_device()

        # Initialize model
        model = Darknet(cfg).to(device)

        # Load weights
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            _ = load_darknet_weights(model, weights)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
    n_gpu = torch.cuda.device_count()

    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
    nc = int(data_cfg['classes'])  # number of classes
    test_path = data_cfg['valid_path']  # path to test images
    test_set = data_cfg['valid_set']
    names = load_classes(data_cfg['names'])  # class names

    # Dataset
    dataset = YoloDataSets(data_path=test_path,
                           input_size=img_size,
                           batch_size=batch_size,
                           image_set=test_set,
                           augment=False)

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=4,
                            pin_memory=True,
                            collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    #coco91class = coco80_to_coco91_class()
    print(('%20s' + '%10s' * 6) %
          ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
    loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs,
                  targets) in enumerate(tqdm(dataloader,
                                             desc='Computing mAP')):
        imgs = imgs.to(device)
        targets = targets.to(device)
        _, _, height, width = imgs.shape

        # Run model
        inf_out, loss_i, loss_item = model(
            imgs, targets)  # inference and training outputs
        loss += torch.mean(loss_i)

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)

        # Statistics per image
        true_targets = targets[torch.sum(
            targets[:, 1:6],
            1) != 0]  # remove the targets that fills 0 for data distribution.
        for si, pred in enumerate(output):
            labels = true_targets[true_targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 4].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue
            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 4]

                # target boxes
                tbox = xywh2xyxy(labels[:, 0:4])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue
                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    nt = np.bincount(stats[3].astype(np.int64),
                     minlength=nc)  # number of targets per class
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1), end='\n\n')

    # Print results per class
    if nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Return results
    maps = np.zeros(nc)
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, loss / len(dataloader)), maps
Ejemplo n.º 19
0
def test(
    cfg,
    data,
    weights=None,
    batch_size=16,
    img_size=416,
    iou_thres=0.5,
    conf_thres=0.001,
    nms_thres=0.5,
    save_json=False,
    model=None,
):
    # Initialize/load model and set device
    if model is None:
        device = torch_utils.select_device(opt.device)
        verbose = True

        # Initialize model
        model = Darknet(cfg, img_size).to(device)

        # Load weights
        attempt_download(weights)
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            _ = load_darknet_weights(model, weights)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    test_path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=min([os.cpu_count(), batch_size, 16]),
        pin_memory=True,
        collate_fn=dataset.collate_fn,
    )

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', 'F1')
    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        targets = targets.to(device)
        imgs = imgs.to(device)
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Compute loss
        if hasattr(model, 'param'):  # if model has loss hyperparameters
            loss += compute_loss(train_out, targets,
                                 model)[1][:3].cpu()  # GIoU, obj, cls

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[si].shape[1:], box,
                             shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for di, d in enumerate(pred):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': coco91class[int(d[6])],
                        'bbox': [floatn(x, 3) for x in box[di]],
                        'score': floatn(d[4], 5)
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Save JSON
    if save_json and map and len(jdict):
        try:
            imgIds = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(jdict, file)

            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocoGt = COCO(
                './data-bin/coco2014/annotations/instances_val2014.json'
            )  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map = cocoEval.stats[1]  # update mAP to pycocotools mAP
        except:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
Ejemplo n.º 20
0
def train():
    cfg = opt.cfg
    data = opt.data
    img_size = opt.img_size
    epochs = 1 if opt.prebias else opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = opt.batch_size
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights  # initial training weights

    if 'pw' not in opt.arch:  # remove BCELoss positive weights
        param['cls_pw'] = 1.
        param['obj_pw'] = 1.

    # Initialize
    init_seeds()
    multi_scale = opt.multi_scale

    if multi_scale:
        img_sz_min = round(img_size / 32 / 1.5) + 1
        img_sz_max = round(img_size / 32 * 1.5) - 1
        img_size = img_sz_max * 32  # initiate with maximum multi_scale size
        print('Using multi-scale {} - {}'.format(img_sz_min * 32, img_size))

    # Configure run
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    nc = int(data_dict['classes'])  # number of classes

    # Remove previous results
    for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg, arch=opt.arch).to(device)

    # Optimizer
    pg0, pg1 = [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if 'Conv2d.weight' in k:
            pg1 += [v]  # parameter group 1 (apply weight_decay)
        else:
            pg0 += [v]  # parameter group 0

    if opt.adam:
        optimizer = optim.Adam(pg0, lr=param['lr0'])
        # optimizer = AdaBound(pg0, lr=param['lr0'], final_lr=0.1)
    else:
        optimizer = optim.SGD(pg0, lr=param['lr0'], momentum=param['momentum'], nesterov=True)
    optimizer.add_param_group({'params': pg1, 'weight_decay': param['weight_decay']})  # add pg1 with weight_decay
    del pg0, pg1

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_fitness = float('inf')
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
        chkpt = torch.load(weights, map_location=device)

        # load model
        # if opt.transfer:
        chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(chkpt['model'], strict=False)
        # else:
        #    model.load_state_dict(chkpt['model'])

        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        start_epoch = chkpt['epoch'] + 1
        del chkpt

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        cutoff = load_darknet_weights(model, weights)

    if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
        nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)

        if opt.prebias:
            for p in optimizer.param_groups:
                # lower param count allows more aggressive training
                # settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
                p['lr'] *= 100  # lr gain
                if p.get('momentum') is not None:  # for SGD but not Adam
                    p['momentum'] *= 0.9

        for p in model.parameters():
            if opt.prebias and p.numel() == nf:  # train (yolo biases)
                p.requires_grad = True
            elif opt.transfer and p.shape[0] == nf:  # train (yolo biases+weights)
                p.requires_grad = True
            else:  # freeze layer
                p.requires_grad = False

    # Scheduler https://github.com/ultralytics/yolov3/issues/238
    # lf = lambda x: 1 - x / epochs  # linear ramp to zero
    # lf = lambda x: 10 ** (param['lrf'] * x / epochs)  # exp ramp
    # lf = lambda x: 1 - 10 ** (param['lrf'] * (1 - x / epochs))  # inverse exp ramp
    # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # scheduler = lr_scheduler.MultiStepLR(
    #     optimizer, milestones=range(59, 70, 1), gamma=0.8,
    # )  # gradual fall to 0.1*lr0
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[round(opt.epochs * x) for x in [0.8, 0.9]],
        gamma=0.1,
    )
    scheduler.last_epoch = start_epoch - 1

    # # Plot lr schedule
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, label='LambdaLR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        param=param,  # augmentation hyperparameters
        rect=opt.rect,  # rectangular training
        image_weights=opt.img_weights,
        cache_labels=True if epochs > 10 else False,
        cache_images=False if opt.prebias else opt.cache_images,
    )

    # Dataloader
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=min([os.cpu_count(), batch_size, 16]),
        shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn,
    )

    # Start training
    model.nc = nc  # attach number of classes to model
    model.arch = opt.arch  # attach yolo architecture
    model.param = param  # attach hyperparameters to model
    # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
    torch_utils.model_info(model, report='summary')  # 'full' or 'summary'
    nb = len(dataloader)
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    t0 = time.time()
    print('Starting {} for {} epochs...'.format('prebias' if opt.prebias else 'training', epochs))
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()
        print(('{:>10s}' * 8).format(
            'Epoch', 'gpu_mem', 'GIoU', 'obj',
            'cls', 'total', 'targets', 'img_size',
        ))

        # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
        freeze_backbone = False
        if freeze_backbone and epoch < 2:
            for name, p in model.named_parameters():
                if int(name.split('.')[1]) < cutoff:  # if layer < 75
                    p.requires_grad = False if epoch == 0 else True

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
            image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
            dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx

        mloss = torch.zeros(4).to(device)  # mean losses
        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device)
            targets = targets.to(device)

            # Multi-Scale training
            if multi_scale:
                if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
                    img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Plot images with bounding boxes
            if ni == 0:
                fname = 'train_batch{}.jpg'.format(i)
                plot_images(imgs=imgs, targets=targets, paths=paths, fname=fname)
                if tb_writer:
                    tb_writer.add_image(fname, cv2.imread(fname)[:, :, ::-1], dataformats='HWC')

            # Hyperparameter burn-in
            # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
            # if ni <= n_burn:
            #     for m in model.named_modules():
            #         if m[0].endswith('BatchNorm2d'):
            #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
            #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
            #     for x in optimizer.param_groups:
            #         x['lr'] = param['lr0'] * g
            #         x['weight_decay'] = param['weight_decay'] * g

            # Run model
            preds = model(imgs)

            # Compute loss
            loss, loss_items = compute_loss(preds, targets, model)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Scale loss by nominal batch_size of 64
            loss *= batch_size / 64

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Accumulate gradient for x batches before optimizing
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
            s = ('{:>10s}' * 2 + '{:10.3g}' * 6).format(
                '{:g}/{:g}'.format(epoch, epochs - 1),
                '{:.3g}G'.format(mem), *mloss, len(targets), img_size)
            pbar.set_description(s)

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()

        # Process epoch results
        final_epoch = epoch + 1 == epochs
        if opt.prebias:
            print_model_biases(model)
        else:
            # Calculate mAP (always test final epoch, skip first 10 if opt.nosave)
            if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch:
                with torch.no_grad():
                    results, maps = test.test(
                        cfg,
                        data,
                        batch_size=batch_size,
                        img_size=opt.img_size,
                        model=model,
                        conf_thres=0.001 if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
                        save_json=final_epoch and epoch > 0 and 'coco.data' in data,
                    )

        # Write epoch results
        with open(results_file, 'a') as f:
            f.write(s + ('%10.3g' * 7).format(results) + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

        # Write Tensorboard results
        if tb_writer:
            x = list(mloss) + list(results)
            titles = [
                'GIoU', 'Objectness', 'Classification', 'Train loss',
                'Precision', 'Recall', 'mAP', 'F1', 'val GIoU',
                'val Objectness', 'val Classification',
            ]
            for xi, title in zip(x, titles):
                tb_writer.add_scalar(title, xi, epoch)

        # Update best mAP
        fitness = sum(results[4:])  # total loss
        if fitness < best_fitness:
            best_fitness = fitness

        # Save training results
        save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {'epoch': epoch,
                         'best_fitness': best_fitness,
                         'training_results': f.read(),
                         'model': model.module.state_dict() if type(
                             model) is nn.parallel.DistributedDataParallel else model.state_dict(),
                         'optimizer': None if final_epoch else optimizer.state_dict()}

            # Save last checkpoint
            torch.save(chkpt, last)

            # Save best checkpoint
            if best_fitness == fitness:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 10 == 0:
                torch.save(chkpt, wdir + 'backup{}.pt'.format(epoch))

            # Delete checkpoint
            del chkpt

        # end epoch ----------------------------------------------------------------------------------------------------

    # end training
    if len(opt.name) and not opt.prebias:
        fresults = 'results{}.txt'.format(opt.name)
        flast = 'last{}.pt'.format(opt.name)
        fbest = 'best{}.pt'.format(opt.name)
        os.rename('results.txt', fresults)
        os.rename(wdir + 'last.pt', wdir + flast) if os.path.exists(wdir + 'last.pt') else None
        os.rename(wdir + 'best.pt', wdir + fbest) if os.path.exists(wdir + 'best.pt') else None

        # save to cloud
        if opt.bucket:
            os.system('gsutil cp {} {} gs://{}'.format(fresults, wdir + flast, opt.bucket))

    plot_results()  # save as results.png
    print('{} epochs completed in {:.3f} hours.\n'.format(
        epoch - start_epoch + 1,
        (time.time() - t0) / 3600),
    )
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()

    return results
Ejemplo n.º 21
0
def train(data_cfg='cfg/voc_coco.data', accumulate=1):
    device = select_device()
    # Configure run
    get_data_cfg = parse_data_cfg(data_cfg)  #返回训练配置参数,类型:字典

    gpus = get_data_cfg['gpus']
    num_workers = int(get_data_cfg['num_workers'])
    cfg_model = get_data_cfg['cfg_model']
    train_path = get_data_cfg['train']
    valid_ptah = get_data_cfg['valid']
    num_classes = int(get_data_cfg['classes'])
    finetune_model = get_data_cfg['finetune_model']
    batch_size = int(get_data_cfg['batch_size'])
    img_size = int(get_data_cfg['img_size'])
    multi_scale = get_data_cfg['multi_scale']
    epochs = int(get_data_cfg['epochs'])
    lr_step = str(get_data_cfg['lr_step'])

    if multi_scale == 'True':
        multi_scale = True
    else:
        multi_scale = False

    print('data_cfg            : ', data_cfg)
    print('voc.data config len : ', len(get_data_cfg))
    print('gpus             : ', gpus)
    print('num_workers      : ', num_workers)
    print('model            : ', cfg_model)
    print('finetune_model   : ', finetune_model)
    print('train_path       : ', train_path)
    print('valid_ptah       : ', valid_ptah)
    print('num_classes      : ', num_classes)
    print('batch_size       : ', batch_size)
    print('img_size         : ', img_size)
    print('multi_scale      : ', multi_scale)
    print('lr_step          : ', lr_step)
    # load model
    if "-tiny" in cfg_model:
        model = Yolov3Tiny(num_classes)
        weights = './weights-yolov3-tiny/'
    else:
        model = Yolov3(num_classes)
        weights = './weights-yolov3/'
    # mkdir save model document
    if not os.path.exists(weights):
        os.mkdir(weights)

    model = model.to(device)
    latest = weights + 'latest.pt'
    best = weights + 'best.pt'
    # Optimizer
    lr0 = 0.001  # initial learning rate
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr0,
                                momentum=0.9,
                                weight_decay=0.0005)

    start_epoch = 0

    if os.access(finetune_model, os.F_OK):  # load retrain/finetune_model
        print('loading yolo-v3 finetune_model ~~~~~~', finetune_model)
        not_load_filters = 3 * (80 + 5)  # voc: 3*(20+5), coco: 3*(80+5)=255
        chkpt = torch.load(finetune_model, map_location=device)
        model.load_state_dict(
            {
                k: v
                for k, v in chkpt['model'].items()
                if v.numel() > 1 and v.shape[0] != not_load_filters
            },
            strict=False)
        # model.load_state_dict(chkpt['model'])
        start_epoch = chkpt['epoch']
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_loss = chkpt['best_loss']

    # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子
    milestones = [int(i) for i in lr_step.split(",")]
    print('milestones : ', milestones)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(i) for i in lr_step.split(",")],
        gamma=0.1,
        last_epoch=start_epoch - 1)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=opt.backend,
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.rank)
        model = torch.nn.parallel.DistributedDataParallel(model)

    # Dataset
    print('multi_scale : ', multi_scale)
    dataset = LoadImagesAndLabels(train_path,
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  augment=True,
                                  multi_scale=multi_scale)

    # Dataloader
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)

    # Start training
    t = time.time()
    model_info(model)
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches

    best_loss = float('inf')
    test_loss = float('inf')

    for epoch in range(start_epoch, epochs):
        print()
        model.train()
        # Update scheduler
        scheduler.step()

        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
            multi_size = imgs.size()
            imgs = imgs.to(device)
            targets = targets.to(device)

            nt = len(targets)
            if nt == 0:  # if no targets continue
                continue

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            print(
                'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.2f}, wh {:.2f}, '
                'conf {:.2f}, cls {:.2f}, total {:.2f}, time {:.3f}s'.format(
                    epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3],
                    nt,
                    scheduler.get_lr()[0], mloss['xy'], mloss['wh'],
                    mloss['conf'], mloss['cls'], mloss['total'],
                    time.time() - t))

            s = ('%8s%12s' + '%10.3g' * 7) % (
                '%g/%g' % (epoch, epochs - 1), '%g/%g' %
                (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'],
                mloss['cls'], mloss['total'], nt, time.time() - t)
            t = time.time()

        if epoch % 10 == 0:
            # Calculate mAP
            print('\n')
            with torch.no_grad():
                print("-------" * 5 + "testing" + "-------" * 5)
                results = test.test(cfg_model,
                                    data_cfg,
                                    batch_size=batch_size,
                                    img_size=img_size,
                                    model=model)
            # Update best loss
            test_loss = results[4]
            if test_loss < best_loss:
                best_loss = test_loss

        if True:
            # Create checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }

            # Save latest checkpoint
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == test_loss and epoch % 5 == 0:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 5 == 0:
                torch.save(chkpt, weights + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt
Ejemplo n.º 22
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path='./valid.txt',
         dst_path='./output',
         weights=None,
         model=None,
         log_file_path='log.txt'):

    # 0、初始化一些参数
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    class_names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device(opt.device)
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights)['model'])  # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')

    pbar = tqdm(dataloader)
    for i, (img_tensor, _, img_path, shapes) in enumerate(pbar):
        start = time.time()
        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)

        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(img_tensor)  # [0]
            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres,
                                             nms_thres)  # list (64,)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(
                nms_output
        ):  # pred: (bs, 7) -> xyxy, obj_conf*class_conf, class_conf, cls_idx
            ################################################
            if pred is None:
                continue
            bboxes_prd = torch.cat((pred[:, 0:5], pred[:, 6].unsqueeze(1)),
                                   dim=1).cpu().numpy()

            ###### clw note: coord transform to origin size(because of resize and so on....) is really important !!!
            scale_coords(img_tensor[batch_idx].shape[1:], bboxes_prd,
                         shapes[batch_idx][0],
                         shapes[batch_idx][1])  # to original shape
            ######

            for bbox in bboxes_prd:
                coor = np.array(bbox[:4], dtype=np.int32)
                score = bbox[4]
                class_ind = int(bbox[5])

                class_name = class_names[class_ind]
                classes_pred.add(class_name)
                score = '%.4f' % score
                xmin, ymin, xmax, ymax = map(str, coor)
                s = ' '.join([
                    str(img_path[batch_idx]),
                    str(score), xmin, ymin, xmax, ymax
                ]) + '\n'

                with open(
                        os.path.join(result_path,
                                     'comp4_det_test_' + class_name + '.txt'),
                        'a') as f:
                    f.write(s)
            ################################################
    return calc_APs()
Ejemplo n.º 23
0
def train(data_cfg='cfg/voc.data', accumulate=1):
    device = select_device()
    # Config
    get_data_cfg = parse_data_cfg(data_cfg)  #返回训练配置参数,类型:字典

    gpus = get_data_cfg['gpus']
    num_workers = int(get_data_cfg['num_workers'])
    cfg_model = get_data_cfg['cfg_model']
    train_path = get_data_cfg['train']
    valid_ptah = get_data_cfg['valid']
    num_classes = int(get_data_cfg['classes'])
    finetune_model = get_data_cfg['finetune_model']
    batch_size = int(get_data_cfg['batch_size'])
    img_size = int(get_data_cfg['img_size'])
    multi_scale = get_data_cfg['multi_scale']
    epochs = int(get_data_cfg['epochs'])
    lr_step = str(get_data_cfg['lr_step'])

    if multi_scale == 'True':
        multi_scale = True
    else:
        multi_scale = False

    print('data_cfg            : ', data_cfg)
    print('voc.data config len : ', len(get_data_cfg))
    print('gpus             : ', gpus)
    print('num_workers      : ', num_workers)
    print('model            : ', cfg_model)
    print('finetune_model   : ', finetune_model)
    print('train_path       : ', train_path)
    print('valid_ptah       : ', valid_ptah)
    print('num_classes      : ', num_classes)
    print('batch_size       : ', batch_size)
    print('img_size         : ', img_size)
    print('multi_scale      : ', multi_scale)
    print('lr_step          : ', lr_step)
    # load model
    if "tiny" in cfg_model:
        a_scalse = 416. / img_size
        anchors = [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169),
                   (344, 319)]
        anchors_new = [(int(anchors[j][0] / a_scalse),
                        int(anchors[j][1] / a_scalse))
                       for j in range(len(anchors))]
        print('old anchors : ', anchors)
        model = Yolov3Tiny(num_classes, anchors=anchors_new)
        weights = './weights-yolov3-tiny/'
    else:
        a_scalse = 416. / img_size
        anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119),
                   (116, 90), (156, 198), (373, 326)]
        anchors_new = [(int(anchors[j][0] / a_scalse),
                        int(anchors[j][1] / a_scalse))
                       for j in range(len(anchors))]
        model = Yolov3(num_classes, anchors=anchors_new)
        weights = './weights-yolov3/'
    # make dir save model document
    if not os.path.exists(weights):
        os.mkdir(weights)

    latest = weights + 'latest.pt'
    best = weights + 'best.pt'
    # Optimizer
    lr0 = 0.001  # initial learning rate
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr0,
                                momentum=0.9,
                                weight_decay=0.0005)

    start_epoch = 0
    model = model.to(device)

    print(finetune_model)

    if os.access(finetune_model, os.F_OK):
        print(
            '\n/************************** load_model *************************/'
        )
        print(finetune_model)
        load_model(model, torch.load(finetune_model))
    else:
        print('finetune_model not exist !')

    milestones = [int(i) for i in lr_step.split(",")]
    print('milestones : ', milestones)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(i) for i in lr_step.split(",")],
        gamma=0.1,
        last_epoch=start_epoch - 1)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=opt.backend,
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.rank)
        model = torch.nn.parallel.DistributedDataParallel(model)

    # Dataset
    print('multi_scale : ', multi_scale)
    dataset = LoadImagesAndLabels(train_path,
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  augment=True,
                                  multi_scale=multi_scale)
    print('--------------->>> imge num : ', dataset.__len__())
    # Dataloader
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)

    # Start training
    t = time.time()
    model_info(model)
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches

    best_loss = float('inf')
    test_loss = float('inf')

    for epoch in range(start_epoch, epochs):

        print('')
        model.train()

        scheduler.step()

        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
            multi_size = imgs.size()
            imgs = imgs.to(device)
            targets = targets.to(device)

            nt = len(targets)
            if nt == 0:  # if no targets continue
                continue

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            print(
                'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.2f}, wh {:.2f}, '
                'conf {:.2f}, cls {:.2f}, total {:.2f}, time {:.3f}s'.format(
                    epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3],
                    nt,
                    scheduler.get_lr()[0], mloss['xy'], mloss['wh'],
                    mloss['conf'], mloss['cls'], mloss['total'],
                    time.time() - t),
                end='\r')

            s = ('%8s%12s' + '%10.3g' * 7) % (
                '%g/%g' % (epoch, epochs - 1), '%g/%g' %
                (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'],
                mloss['cls'], mloss['total'], nt, time.time() - t)
            t = time.time()

        if epoch % 5 == 0 and epoch > 0:
            # Calculate mAP
            print('\n')
            with torch.no_grad():
                print("-------" * 5 + "testing" + "-------" * 5)
                results = test.test(cfg_model,
                                    data_cfg,
                                    batch_size=batch_size,
                                    img_size=img_size,
                                    model=model)
            # Update best loss
            test_loss = results[4]
            if test_loss < best_loss:
                best_loss = test_loss

        if True:
            # Create checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }

            # Save latest checkpoint
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == test_loss and epoch % 5 == 0:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 5 == 0:
                torch.save(chkpt, weights + 'Detect%g.pt' % epoch)

            # Delete checkpoint
            del chkpt
Ejemplo n.º 24
0
def train():

    # 0、Initialize parameters( set random seed, get cfg info, )
    cfg = opt.cfg
    weights = opt.weights
    img_size = opt.img_size
    batch_size = opt.batch_size
    total_epochs = opt.epochs
    init_seeds()
    data = parse_data_cfg(opt.data)
    train_txt_path = data['train']
    valid_txt_path = data['valid']
    nc = int(data['classes'])

    # 0、打印配置文件信息,写log等
    print('config file:', cfg)
    print('pretrained weights:', weights)

    # 1、加载模型
    model = Darknet(cfg).to(device)

    if weights.endswith('.pt'):

        ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
                                                                #          会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]).
                                                               #           TODO:map_location=device ?
        chkpt = torch.load(weights, map_location=device)
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
            # model.load_state_dict(chkpt['model'])
        except KeyError as e:
            s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
            raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif weights.endswith('.pth'):    # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
        model_state_dict = model.state_dict()
        chkpt = torch.load(weights, map_location=device)
        #try:
        state_dict = {}
        block_cnt = 0
        fc_item_num = 2
        chkpt_keys = list(chkpt.keys())
        model_keys = list(model.state_dict().keys())
        model_values = list(model.state_dict().values())
        for i in range(len(chkpt_keys) - fc_item_num):  # 102 - 2
            if i % 5 == 0:
                state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 1 or i % 5 == 2:
                state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]]
            elif i % 5 == 3 or i % 5 == 4:
                state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]]
                if i % 5 == 4:
                    block_cnt += 1
                    state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt]


        #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(state_dict, strict=False)

        # model.load_state_dict(chkpt['model'])

        # except KeyError as e:
        #     s = "%s is not compatible with %s" % (opt.weights, opt.cfg)
        #     raise KeyError(s) from e

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)

    elif len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

        write_to_file(repr(opt), log_file_path, mode='w')
        write_to_file('anchors:\n' +  repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path)
    # else:
    #     raise Exception("pretrained model's path can't be NULL!")

    # 2、设置优化器 和 学习率
    start_epoch = 0
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True)  # TODO:nesterov ?  weight_decay=0.0005 ?

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # parameter group 0

    optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True)

    optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2


    ###### apex need ######
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)  # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level


    ######
    model.nc = nc

    #### 阶梯学习率
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1)
    ### 余弦学习率
    #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2
    #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    # 3、加载数据集
    train_dataset = VocDataset(train_txt_path, img_size, with_label=True)
    dataloader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,  # TODO: True
                            num_workers=8, # TODO
                            collate_fn=train_dataset.train_collate_fn,
                            pin_memory=True)


    # 4、训练
    print('')   # 换行
    print('Starting training for %g epochs...' % total_epochs)
    nb = len(dataloader)

    mloss = torch.zeros(4).to(device)  # mean losses
    writer = SummaryWriter()    # tensorboard --logdir=runs, view at http://localhost:6006/

    prebias = start_epoch == 0

    for epoch in range(start_epoch, total_epochs):  # epoch ------------------------------
        model.train()  # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval()

        # # Prebias
        # if prebias:
        #     if epoch < 3:  # prebias
        #         ps = 0.1, 0.9  # prebias settings (lr=0.1, momentum=0.9)
        #     else:  # normal training
        #         ps = lr0, momentum  # normal training settings
        #         print_model_biases(model)
        #         prebias = False
        #
        #     # Bias optimizer settings
        #     optimizer.param_groups[2]['lr'] = ps[0]
        #     if optimizer.param_groups[2].get('momentum') is not None:  # for SGD but not Adam
        #         optimizer.param_groups[2]['momentum'] = ps[1]

        start = time.time()
        title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use')
        print(title)
        #pbar = tqdm(dataloader, ncols=20)  # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。
        #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar):

        # # Freeze darknet53.conv.74 for first epoch
        # freeze_backbone = False
        # if freeze_backbone and (epoch < 3):
        #     for i, (name, p) in enumerate(model.named_parameters()):
        #         if int(name.split('.')[2]) < 75:  # if layer < 75  # 多卡是[2],单卡[1]
        #             p.requires_grad = False if (epoch < 3) else True

        for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader):

            # # SGD burn-in
            # ni = epoch * nb + i
            # if ni <= 1000:  # n_burnin = 1000
            #     lr = lr0 * (ni / 1000) ** 2
            #     for g in optimizer.param_groups:
            #         g['lr'] = lr

            batch_start = time.time()
            #print(img_path)
            img_tensor = img_tensor.to(device)
            target_tensor = target_tensor.to(device)
            ### 训练过程主要包括以下几个步骤:
            # (1) 前传
            #print('img_tensor:', img_tensor[0][1][208][208])
            pred = model(img_tensor)

            # (2) 计算损失
            loss, loss_items = compute_loss(pred, target_tensor, model)
            if not torch.isfinite(loss):
               raise Exception('WARNING: non-finite loss, ending training ', loss_items)

            # (3) 损失:反向传播,求出梯度
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # (4) 优化器:更新参数、梯度清零
            # ni = i + nb * epoch  # number integrated batches (since train start)
            # if ni % accumulate == 0:  # Accumulate gradient for x batches before optimizing
            optimizer.step()
            optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
            #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)
            #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start)
            s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size,  scheduler.get_lr()[0], time.time()-batch_start)

            if i % 10 == 0:
                print(s)
                
            # Plot
            if epoch == start_epoch  and i == 0:
                fname = 'train_batch.jpg' # filename
                cur_path = os.getcwd()
                res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname))
                writer.add_image(fname, res, dataformats='HWC', global_step=epoch)
                # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        print('time use per epoch: %.3fs' % (time.time() - start))

        write_to_file(title, log_file_path)
        write_to_file(s, log_file_path)

        # Update scheduler
        scheduler.step()

        # compute mAP
        results, maps = test.test(cfg,
                                  'cfg/voc.data',
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  conf_thres=0.05,
                                  iou_thres=0.5,
                                  nms_thres=0.5,
                                  src_txt_path=valid_txt_path,
                                  dst_path='./output',
                                  weights=None,
                                  model=model,
                                  log_file_path = log_file_path)

        # Tensorboard
        tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1']
        for x, tag in zip(list(mloss[:-1]) + list(results), tags):
            writer.add_scalar(tag, x, epoch)

        # save model 保存模型
        chkpt = {'epoch': epoch,
                 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(),  # clw note: 多卡
                 'optimizer': optimizer.state_dict()}

        torch.save(chkpt, last_model_path)

    print('end')