Beispiel #1
0
def kmean_anchors(path='./data/coco64.txt',
                  n=9,
                  img_size=(640, 640),
                  thr=0.20,
                  gen=1000):
    # Creates kmeans anchors for use in *.cfg files: from utils.utils import *; _ = kmean_anchors()
    # n: number of anchors
    # img_size: (min, max) image size used for multi-scale training (can be same values)
    # thr: IoU threshold hyperparameter used for training (0.0 - 1.0)
    # gen: generations to evolve anchors using genetic algorithm
    from utils.datasets import LoadImagesAndLabels

    def print_results(k):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        iou = wh_iou(wh, torch.Tensor(k))
        max_iou = iou.max(1)[0]
        bpr, aat = (max_iou > thr).float().mean(), (
            iou > thr).float().mean() * n  # best possible recall, anch > thr
        print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' %
              (thr, bpr, aat))
        print(
            'n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: '
            % (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()),
            end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])),
                  end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k

    def fitness(k):  # mutation fitness
        iou = wh_iou(wh, torch.Tensor(k))  # iou
        max_iou = iou.max(1)[0]
        return (max_iou * (max_iou > thr).float()).mean()  # product

    # Get label wh
    wh = []
    dataset = LoadImagesAndLabels(path, augment=True, rect=True)
    nr = 1 if img_size[0] == img_size[
        1] else 10  # number augmentation repetitions
    for s, l in zip(dataset.shapes, dataset.labels):
        wh.append(l[:, 3:5] *
                  (s / s.max()))  # image normalized to letterbox normalized wh
    wh = np.concatenate(wh, 0).repeat(nr, axis=0)  # augment 10x
    wh *= np.random.uniform(img_size[0], img_size[1],
                            size=(wh.shape[0],
                                  1))  # normalized to pixels (multi-scale)
    wh = wh[(wh > 2.0).all(1)]  # remove below threshold boxes (< 2 pixels wh)

    # Kmeans calculation
    from scipy.cluster.vq import kmeans
    print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
    s = wh.std(0)  # sigmas for whitening
    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    k *= s
    wh = torch.Tensor(wh)
    k = print_results(k)

    # # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
    # fig.tight_layout()
    # fig.savefig('wh.png', dpi=200)

    # Evolve
    npr = np.random
    f, sh, mp, s = fitness(
        k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    for _ in tqdm(range(gen), desc='Evolving anchors'):
        v = np.ones(sh)
        while (v == 1
               ).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s +
                 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            print_results(k)
    k = print_results(k)

    return k
Beispiel #2
0
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
    """ Creates kmeans-evolved anchors from training dataset

        Arguments:
            dataset: path to data.yaml, or a loaded dataset
            n: number of anchors
            img_size: image size used for training
            thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
            gen: generations to evolve anchors using genetic algorithm
            verbose: print all results

        Return:
            k: kmeans evolved anchors

        Usage:
            from utils.autoanchor import *; _ = kmean_anchors()
    """
    from scipy.cluster.vq import kmeans

    npr = np.random
    thr = 1 / thr

    def metric(k, wh):  # compute metrics
        r = wh[:, None] / k[None]
        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
        # x = wh_iou(wh, torch.tensor(k))  # iou metric
        return x, x.max(1)[0]  # x, best_x

    def anchor_fitness(k):  # mutation fitness
        _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
        return (best * (best > thr).float()).mean()  # fitness

    def print_results(k, verbose=True):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        x, best = metric(k, wh0)
        bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
        s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
            f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
            f'past_thr={x[x > thr].mean():.3f}-mean: '
        for i, x in enumerate(k):
            s += '%i,%i, ' % (round(x[0]), round(x[1]))
        if verbose:
            LOGGER.info(s[:-2])
        return k

    if isinstance(dataset, str):  # *.yaml file
        with open(dataset, errors='ignore') as f:
            data_dict = yaml.safe_load(f)  # model dict
        from utils.datasets import LoadImagesAndLabels
        dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)

    # Get label wh
    shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
    wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh

    # Filter
    i = (wh0 < 3.0).any(1).sum()
    if i:
        LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
    wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
    # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1

    # Kmeans calculation
    LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
    s = wh.std(0)  # sigmas for whitening
    k = kmeans(wh / s, n, iter=30)[0] * s  # points
    if len(k) != n:  # kmeans may return fewer points than requested if wh is insufficient or too similar
        LOGGER.warning(f'{PREFIX}WARNING: scipy.cluster.vq.kmeans returned only {len(k)} of {n} requested points')
        k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
    wh = torch.tensor(wh, dtype=torch.float32)  # filtered
    wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
    k = print_results(k, verbose=False)

    # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
    # fig.savefig('wh.png', dpi=200)

    # Evolve
    f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    pbar = tqdm(range(gen), desc=f'{PREFIX}Evolving anchors with Genetic Algorithm:')  # progress bar
    for _ in pbar:
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = anchor_fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
            if verbose:
                print_results(k, verbose)

    return print_results(k)
Beispiel #3
0
def kmean_anchors(path='./data/coco128.yaml',
                  n=9,
                  img_size=640,
                  thr=4.0,
                  gen=1000,
                  verbose=True):
    """ Creates kmeans-evolved anchors from training dataset

        Arguments:
            path: path to dataset *.yaml, or a loaded dataset
            n: number of anchors
            img_size: image size used for training
            thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
            gen: generations to evolve anchors using genetic algorithm
            verbose: print all results

        Return:
            k: kmeans evolved anchors

        Usage:
            from utils.autoanchor import *; _ = kmean_anchors()
    """
    thr = 1. / thr

    def metric(k, wh):  # compute metrics
        r = wh[:, None] / k[None]
        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
        # x = wh_iou(wh, torch.tensor(k))  # iou metric
        return x, x.max(1)[0]  # x, best_x

    def anchor_fitness(k):  # mutation fitness
        _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
        return (best * (best > thr).float()).mean()  # fitness

    def print_results(k):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        x, best = metric(k, wh0)
        bpr, aat = (best > thr).float().mean(), (
            x > thr).float().mean() * n  # best possible recall, anch > thr
        print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' %
              (thr, bpr, aat))
        print(
            'n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: '
            % (n, img_size, x.mean(), best.mean(), x[x > thr].mean()),
            end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])),
                  end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k

    if isinstance(path, str):  # *.yaml file
        with open(path) as f:
            data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
        from utils.datasets import LoadImagesAndLabels
        dataset = LoadImagesAndLabels(data_dict['train'],
                                      augment=True,
                                      rect=True)
    else:
        dataset = path  # dataset

    # Get label wh
    shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
    wh0 = np.concatenate(
        [l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh

    # Filter
    i = (wh0 < 3.0).any(1).sum()
    if i:
        print('WARNING: Extremely small objects found. '
              '%g of %g labels are < 3 pixels in width or height.' %
              (i, len(wh0)))
    wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels

    # Kmeans calculation
    print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
    s = wh.std(0)  # sigmas for whitening
    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    k *= s
    wh = torch.tensor(wh, dtype=torch.float32)  # filtered
    wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
    k = print_results(k)

    # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
    # fig.savefig('wh.png', dpi=200)

    # Evolve
    npr = np.random
    f, sh, mp, s = anchor_fitness(
        k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    pbar = tqdm(range(gen),
                desc='Evolving anchors with Genetic Algorithm')  # progress bar
    for _ in pbar:
        v = np.ones(sh)
        while (v == 1
               ).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s +
                 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = anchor_fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
            if verbose:
                print_results(k)

    return print_results(k)
Beispiel #4
0
def train(data_cfg='cfg/voc.data', accumulate=1):
    device = select_device()
    # Config
    get_data_cfg = parse_data_cfg(data_cfg)  #返回训练配置参数,类型:字典

    num_workers = int(get_data_cfg['num_workers'])
    cfg_model = get_data_cfg['cfg_model']
    train_path = get_data_cfg['train']
    valid_ptah = get_data_cfg['valid']
    num_classes = int(get_data_cfg['classes'])
    finetune_model = get_data_cfg['finetune_model']
    batch_size = int(get_data_cfg['batch_size'])
    img_size = int(get_data_cfg['img_size'])
    multi_scale = get_data_cfg['multi_scale']
    epochs = int(get_data_cfg['epochs'])
    lr_step = str(get_data_cfg['lr_step'])

    if multi_scale == 'True':
        multi_scale = True
    else:
        multi_scale = False

    print('data_cfg            : ', data_cfg)
    print('voc.data config len : ', len(get_data_cfg))
    print('num_workers      : ', num_workers)
    print('model            : ', cfg_model)
    print('finetune_model   : ', finetune_model)
    print('train_path       : ', train_path)
    print('valid_ptah       : ', valid_ptah)
    print('num_classes      : ', num_classes)
    print('batch_size       : ', batch_size)
    print('img_size         : ', img_size)
    print('multi_scale      : ', multi_scale)
    print('lr_step          : ', lr_step)
    # load model
    if "tiny" in cfg_model:
        a_scalse = 416. / img_size
        anchors = [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169),
                   (344, 319)]
        anchors_new = [(int(anchors[j][0] / a_scalse),
                        int(anchors[j][1] / a_scalse))
                       for j in range(len(anchors))]
        print('old anchors : ', anchors)
        model = Yolov3Tiny(num_classes, anchors=anchors_new)
        weights = './weights-yolov3-tiny/'
    else:
        a_scalse = 416. / img_size
        anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119),
                   (116, 90), (156, 198), (373, 326)]
        anchors_new = [(int(anchors[j][0] / a_scalse),
                        int(anchors[j][1] / a_scalse))
                       for j in range(len(anchors))]
        model = Yolov3(num_classes, anchors=anchors_new)
        weights = './weights-yolov3/'
    # make dir save model document
    if not os.path.exists(weights):
        os.mkdir(weights)

    latest = weights + 'latest.pt'
    best = weights + 'best.pt'
    # Optimizer
    lr0 = 0.001  # initial learning rate
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr0,
                                momentum=0.9,
                                weight_decay=0.0005)

    start_epoch = 0
    model = model.to(device)

    print(finetune_model)

    if os.access(finetune_model, os.F_OK):
        print(
            '\n/************************** load_model *************************/'
        )
        print(finetune_model)
        load_model(model, torch.load(finetune_model))
    else:
        print('finetune_model not exist !')

    milestones = [int(i) for i in lr_step.split(",")]
    print('milestones : ', milestones)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(i) for i in lr_step.split(",")],
        gamma=0.1,
        last_epoch=start_epoch - 1)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=opt.backend,
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.rank)
        model = torch.nn.parallel.DistributedDataParallel(model)

    # Dataset
    print('multi_scale : ', multi_scale)
    dataset = LoadImagesAndLabels(train_path,
                                  batch_size=batch_size,
                                  img_size=img_size,
                                  augment=True,
                                  multi_scale=multi_scale)
    print('--------------->>> imge num : ', dataset.__len__())
    # Dataloader
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)

    # Start training
    t = time.time()
    model_info(model)
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches

    best_loss = float('inf')
    test_loss = float('inf')
    start_epoch = 0
    for epoch in range(start_epoch, epochs):

        print('')
        model.train()

        scheduler.step()

        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
            multi_size = imgs.size()
            imgs = imgs.to(device)
            targets = targets.to(device)

            nt = len(targets)
            if nt == 0:  # if no targets continue
                continue

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            print(
                'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.2f}, wh {:.2f}, '
                'conf {:.2f}, cls {:.2f}, total {:.2f}, time {:.3f}s'.format(
                    epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3],
                    nt,
                    scheduler.get_lr()[0], mloss['xy'], mloss['wh'],
                    mloss['conf'], mloss['cls'], mloss['total'],
                    time.time() - t),
                end='\r')

            s = ('%8s%12s' + '%10.3g' * 7) % (
                '%g/%g' % (epoch, epochs - 1), '%g/%g' %
                (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'],
                mloss['cls'], mloss['total'], nt, time.time() - t)
            t = time.time()
            if i % 300 == 0 and i > 0:
                # Create checkpoint
                chkpt = {
                    'epoch':
                    epoch,
                    'best_loss':
                    best_loss,
                    'model':
                    model.module.state_dict()
                    if type(model) is nn.parallel.DistributedDataParallel else
                    model.state_dict(),
                    'optimizer':
                    optimizer.state_dict()
                }

                # Save latest checkpoint
                torch.save(chkpt, latest)

        if epoch % 20 == 0 and epoch > 0:
            # Calculate mAP
            print('\n')
            with torch.no_grad():
                print("-------" * 5 + "testing" + "-------" * 5)
                results = test.test(cfg_model,
                                    data_cfg,
                                    batch_size=batch_size,
                                    img_size=img_size,
                                    model=model)
            # Update best loss
            test_loss = results[4]
            if test_loss < best_loss:
                best_loss = test_loss

        if True:
            # Create checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }

            # Save latest checkpoint
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == test_loss and epoch % 5 == 0:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 5 == 0:
                torch.save(chkpt, weights + 'Detect%g.pt' % epoch)

            # Delete checkpoint
            del chkpt
Beispiel #5
0
    def log_dataset_artifact(self,
                             data_file,
                             single_cls,
                             project,
                             overwrite_config=False):
        """
        Log the dataset as W&B artifact and return the new data file with W&B links

        arguments:
        data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
        single_class (boolean)  -- train multi-class data as single-class
        project (str) -- project name. Used to construct the artifact path
        overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
        file with _wandb postfix. Eg -> data_wandb.yaml

        returns:
        the new .yaml file with artifact links. it can be used to start training directly from artifacts
        """
        upload_dataset = self.wandb_run.config.upload_dataset
        log_val_only = isinstance(upload_dataset,
                                  str) and upload_dataset == 'val'
        self.data_dict = check_dataset(data_file)  # parse and check
        data = dict(self.data_dict)
        nc, names = (1, ['item']) if single_cls else (int(data['nc']),
                                                      data['names'])
        names = {k: v for k, v in enumerate(names)}  # to index dictionary

        # log train set
        if not log_val_only:
            self.train_artifact = self.create_dataset_table(
                LoadImagesAndLabels(data['train'], rect=True, batch_size=1),
                names,
                name='train') if data.get('train') else None
            if data.get('train'):
                data['train'] = WANDB_ARTIFACT_PREFIX + str(
                    Path(project) / 'train')

        self.val_artifact = self.create_dataset_table(
            LoadImagesAndLabels(data['val'], rect=True, batch_size=1),
            names,
            name='val') if data.get('val') else None
        if data.get('val'):
            data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')

        path = Path(data_file)
        # create a _wandb.yaml file with artifacts links if both train and test set are logged
        if not log_val_only:
            path = (path.stem if overwrite_config else path.stem +
                    '_wandb') + '.yaml'  # updated data.yaml path
            path = ROOT / 'data' / path
            data.pop('download', None)
            data.pop('path', None)
            with open(path, 'w') as f:
                yaml.safe_dump(data, f)
                LOGGER.info(f"Created dataset config file {path}")

        if self.job_type == 'Training':  # builds correct artifact pipeline graph
            if not log_val_only:
                self.wandb_run.log_artifact(
                    self.train_artifact
                )  # calling use_artifact downloads the dataset. NOT NEEDED!
            self.wandb_run.use_artifact(self.val_artifact)
            self.val_artifact.wait()
            self.val_table = self.val_artifact.get('val')
            self.map_val_table_path()
        else:
            self.wandb_run.log_artifact(self.train_artifact)
            self.wandb_run.log_artifact(self.val_artifact)
        return path
Beispiel #6
0
def train():
    cfg = opt.cfg
    data = opt.data
    img_size = opt.img_size
    epochs = 1 if opt.prebias else int(
        hyp['epochs'])  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = int(hyp['batch_size'])
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights  # initial training weights

    if 'pw' not in opt.arc:  # remove BCELoss positive weights
        hyp['cls_pw'] = 1.
        hyp['obj_pw'] = 1.

    # Initialize
    init_seeds()
    multi_scale = opt.multi_scale

    if multi_scale:
        img_sz_min = round(img_size / 32 / 1.5) + 1
        img_sz_max = round(img_size / 32 * 1.3) - 1
        img_size = img_sz_max * 32  # initiate with maximum multi_scale size
        print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))

    # Configure run
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    nc = int(data_dict['classes'])  # number of classes

    # Remove previous results
    for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg, hyp, arc=opt.arc).to(device)

    # Optimizer
    pg0, pg1 = [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if 'Conv2d.weight' in k:
            pg1 += [v]  # parameter group 1 (apply weight_decay)
        else:
            pg0 += [v]  # parameter group 0

    if opt.adam:
        optimizer = optim.Adam(pg0, lr=hyp['lr0'])
        # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp['lr0'],
                              momentum=hyp['momentum'],
                              nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    del pg0, pg1

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_fitness = 0.
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
        if opt.bucket:
            os.system('gsutil cp gs://%s/last.pt %s' %
                      (opt.bucket, last))  # download from bucket
        chkpt = torch.load(weights, map_location=device)

        # load model
        # if opt.transfer:
        chkpt['model'] = {
            k: v
            for k, v in chkpt['model'].items()
            if model.state_dict()[k].numel() == v.numel()
        }
        model.load_state_dict(chkpt['model'], strict=False)
        # else:
        #    model.load_state_dict(chkpt['model'])

        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        if opt.resume:
            start_epoch = chkpt['epoch'] + 1

        del chkpt

    # elif len(weights) > 0:  # darknet format
    #     # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
    #     cutoff = load_darknet_weights(model, weights)
    if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
        nf = int(model.module_defs[model.yolo_layers[0] -
                                   1]['filters'])  # yolo layer size (i.e. 255)

        for p in optimizer.param_groups:
            # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
            p['lr'] *= 100
            if p.get('momentum') is not None:  # for SGD but not Adam
                p['momentum'] *= 0.9

        for p in model.parameters():
            if opt.prebias and p.numel() == nf:  # train (yolo biases)
                p.requires_grad = True
            elif opt.transfer and p.shape[
                    0] == nf:  # train (yolo biases+weights)
                p.requires_grad = True
            else:  # freeze layer
                p.requires_grad = False

    # Scheduler https://github.com/ultralytics/yolov3/issues/238
    # lf = lambda x: 1 - x / epochs  # linear ramp to zero
    # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
    # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs))  # inverse exp ramp
    # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=range(59, 70, 1), gamma=0.8)  # gradual fall to 0.1*lr0
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[round(epochs * x) for x in [0.8, 0.9]],
        gamma=0.1)
    # 带重启的余弦退火
    # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = 0.1*epochs, eta_min=0, last_epoch=-1)
    # 余弦退火
    # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs)
    scheduler = GradualWarmupScheduler(optimizer,
                                       multiplier=hyp['multiplier'],
                                       total_epoch=hyp['warm_epoch'],
                                       after_scheduler=scheduler)
    scheduler.last_epoch = start_epoch - 1

    # # # Plot lr schedule(注意别一直开着!否则lr调整失效)
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, label='LR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    if mixed_precision:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O1',
                                          verbosity=0)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(
            backend='nccl',  # 'distributed backend'
            init_method=
            'tcp://127.0.0.1:9999',  # distributed training init method
            world_size=1,  # number of nodes for distributed training
            rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=opt.rect,  # rectangular training
        image_weights=opt.img_weights,
        cache_labels=True if epochs > 10 else False,
        cache_images=False if opt.prebias else opt.cache_images,
    )

    # Dataloader
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=min([os.cpu_count(), batch_size, 16]),
        shuffle=not opt.
        rect,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    # Start training
    model.nc = nc  # attach number of classes to model
    model.arc = opt.arc  # attach yolo architecture
    model.hyp = hyp  # attach hyperparameters to model
    # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
    model_info(model, report='summary')  # 'full' or 'summary'
    nb = len(dataloader)
    maps = np.zeros(nc)  # mAP per class
    # results = (0, 0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification', 'val Regression'
    results = (
        0, 0, 0, 0, 0, 0, 0
    )  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification', 'val Regression'
    t0 = time.time()
    print('Starting %s for %g epochs...' %
          ('prebias' if opt.prebias else 'training', epochs))
    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()
        model.epoch = epoch
        # print(('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'reg', 'total', 'targets', 'img_size'))
        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'obj', 'cls', 'reg',
                                     'total', 'targets', 'img_size'))

        # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
        freeze_backbone = False
        if freeze_backbone and epoch < 2:
            for name, p in model.named_parameters():
                if int(name.split('.')[1]) < cutoff:  # if layer < 75
                    p.requires_grad = False if epoch == 0 else True

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 -
                                                     maps)**2  # class weights
            image_weights = labels_to_image_weights(dataset.labels,
                                                    nc=nc,
                                                    class_weights=w)
            dataset.indices = random.choices(range(dataset.n),
                                             weights=image_weights,
                                             k=dataset.n)  # rand weighted idx

        # mloss = torch.zeros(5).to(device)  # mean losses
        mloss = torch.zeros(4).to(device)  # mean losses

        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        # 着重注意这个targets,已经经过resize到416,augment等变化了,不能直接映射到原图
        for i, (
                imgs, targets, paths, _
        ) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device)
            targets = targets.to(device)

            # Multi-Scale training
            if multi_scale:
                if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
                    img_size = random.randrange(img_sz_min,
                                                img_sz_max + 1) * 32
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [
                        math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]
                    ]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode='bilinear',
                                         align_corners=False)

            # Plot images with bounding boxes
            if ni == 0:
                fname = 'train_batch%g.jpg' % i
                plot_images(imgs=imgs,
                            targets=targets,
                            paths=paths,
                            fname=fname)
                if tb_writer:
                    tb_writer.add_image(fname,
                                        cv2.imread(fname)[:, :, ::-1],
                                        dataformats='HWC')

            # Hyperparameter burn-in
            # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
            # if ni <= n_burn:
            #     for m in model.named_modules():
            #         if m[0].endswith('BatchNorm2d'):
            #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
            #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
            #     for x in optimizer.param_groups:
            #         x['lr'] = hyp['lr0'] * g
            #         x['weight_decay'] = hyp['weight_decay'] * g

            # Run model
            pred = model(imgs)
            # Compute loss
            loss, loss_items = compute_loss(pred, targets, model, hyp)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Scale loss by nominal batch_size of 64
            # loss *= batch_size / 64
            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Accumulate gradient for x batches before optimizing
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0  # (GB)
            # s = ('%10s' * 2 + '%10.3g' * 7) % (
            #     '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss, len(targets), img_size)
            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1),
                                               '%.3gG' % mem, *mloss,
                                               len(targets), img_size)
            pbar.set_description(s)

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()

        # Process epoch results
        final_epoch = epoch + 1 == epochs
        if opt.prebias:
            print_model_biases(model)
        else:
            # Calculate mAP (always test final epoch, skip first 10 if opt.nosave)
            if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch:
                if not epoch < 10:  # 前部分epoch proposal太多,不计算
                    with torch.no_grad():
                        if epoch % hyp['test_interval'] == 0 and epoch != 0:
                            results, maps = test.test(
                                cfg,
                                data,
                                batch_size=1,
                                img_size=opt.img_size,
                                model=model,
                                hyp=hyp,
                                conf_thres=0.001 if final_epoch and epoch > 0
                                else 0.1,  # 0.1 for speed
                                save_json=final_epoch and epoch > 0
                                and 'coco.data' in data)
            # Write epoch results
        with open(results_file, 'a') as f:
            # f.write(s + '%10.3g' * 8 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
            f.write(s + '%10.3g' * 7 % results +
                    '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

        # Write Tensorboard results
        if tb_writer:
            x = list(mloss) + list(results)
            titles = [
                'GIoU', 'Objectness', 'Classification', 'Train loss',
                'Precision', 'Recall', 'mAP', 'F1', 'val GIoU',
                'val Objectness', 'val Classification'
            ]
            for xi, title in zip(x, titles):
                tb_writer.add_scalar(title, xi, epoch)

        # Update best mAP
        fitness = results[2]  # mAP
        if fitness > best_fitness:
            best_fitness = fitness

        # Save training results
        save = (not opt.nosave) or (final_epoch
                                    and not opt.evolve) or opt.prebias
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {
                    'epoch':
                    epoch,
                    'best_fitness':
                    best_fitness,
                    'training_results':
                    f.read(),
                    'model':
                    model.module.state_dict()
                    if type(model) is nn.parallel.DistributedDataParallel else
                    model.state_dict(),
                    'optimizer':
                    None if final_epoch else optimizer.state_dict()
                }

            # Save last checkpoint
            # torch.save(chkpt, last)
            # if opt.bucket and not opt.prebias:
            #     os.system('gsutil cp %s gs://%s' % (last, opt.bucket))  # upload to bucket

            # Save best checkpoint
            if best_fitness == fitness:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % hyp['save_interval'] == 0:
                torch.save(chkpt, wdir + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt

        # end epoch ----------------------------------------------------------------------------------------------------

    # end training
    if len(opt.name):
        os.rename('results.txt', 'results_%s.txt' % opt.name)
    plot_results()  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1,
                                                    (time.time() - t0) / 3600))
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()
    return results
Beispiel #7
0
def kmean_anchors(path='data/DsiacPlusF2.txt', n=9, img_size=(416, 416)):
    # from utils.utils import *; _ = kmean_anchors()
    # Produces a list of target kmeans suitable for use in *.cfg files
    from utils.datasets import LoadImagesAndLabels
    thr = 0.20  # IoU threshold

    def print_results(thr, wh, k):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        iou = wh_iou(torch.Tensor(wh), torch.Tensor(k))
        max_iou, min_iou = iou.max(1)[0], iou.min(1)[0]
        bpr, aat = (max_iou > thr).float().mean(), (
            iou > thr).float().mean() * n  # best possible recall, anch > thr
        print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' %
              (thr, bpr, aat))
        print(
            'kmeans anchors (n=%g, img_size=%s, IoU=%.3f/%.3f/%.3f-min/mean/best): '
            % (n, img_size, min_iou.mean(), iou.mean(), max_iou.mean()),
            end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])),
                  end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k

    def fitness(thr, wh, k):  # mutation fitness
        iou = wh_iou(wh, torch.Tensor(k)).max(1)[0]  # max iou
        bpr = (iou > thr).float().mean()  # best possible recall
        return iou.mean() * bpr  # product

    # Get label wh
    wh = []
    dataset = LoadImagesAndLabels(path,
                                  augment=True,
                                  rect=True,
                                  cache_labels=True)
    nr = 1 if img_size[0] == img_size[
        1] else 10  # number augmentation repetitions
    for s, l in zip(dataset.shapes, dataset.labels):
        wh.append(l[:, 3:5] *
                  (s / s.max()))  # image normalized to letterbox normalized wh
    wh = np.concatenate(wh, 0).repeat(nr, axis=0)  # augment 10x
    wh *= np.random.uniform(img_size[0], img_size[1],
                            size=(wh.shape[0],
                                  1))  # normalized to pixels (multi-scale)

    # Darknet yolov3.cfg anchors
    use_darknet = False
    if use_darknet:
        k = np.array([[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
                      [59, 119], [116, 90], [156, 198], [373, 326]])
    else:
        # Kmeans calculation
        from scipy.cluster.vq import kmeans
        print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
        s = wh.std(0)  # sigmas for whitening
        k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
        k *= s
    k = print_results(thr, wh, k)

    # # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')

    # Evolve
    npr = np.random
    wh = torch.Tensor(wh)
    f, sh, ng, mp, s = fitness(
        thr, wh, k
    ), k.shape, 1000, 0.9, 0.1  # fitness, generations, mutation probability, sigma
    for _ in tqdm(range(ng), desc='Evolving anchors'):
        v = np.ones(sh)
        while (v == 1
               ).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s +
                 1).clip(0.3, 3.0)  # 98.6, 61.6
        kg = (k.copy() * v).clip(min=2.0)
        fg = fitness(thr, wh, kg)
        if fg > f:
            f, k = fg, kg.copy()
            print_results(thr, wh, k)
    k = print_results(thr, wh, k)

    return k
Beispiel #8
0
        with open(json_file_path, 'w') as outfile:
            json.dump(json_file_data, outfile, sort_keys=True, indent=4)


def complement_bgr(color):
    B, G, R = color
    luminance = (0.299 * R + 0.587 * G + 0.114 * B) / 255
    return (255, 255, 255) if luminance <= 0.5 else (0, 0, 0)


# change to the directory of this script
os.chdir(os.path.dirname(os.path.abspath(__file__)))

if __name__ == '__main__':
    # load all images and videos (with multiple extensions) from a directory using OpenCV
    dataset = LoadImagesAndLabels(INPUT_DIR)

    IMAGE_PATH_LIST = dataset.IMAGE_PATH_LIST
    VIDEO_NAME_DICT = dataset.VIDEO_NAME_DICT

    assert len(IMAGE_PATH_LIST) > 0 or len(
        VIDEO_NAME_DICT) > 0, "no images found"
    last_img_index = len(IMAGE_PATH_LIST) - 1

    # create output directories
    if len(VIDEO_NAME_DICT) > 0:
        if not os.path.exists(TRACKER_DIR):
            os.makedirs(TRACKER_DIR)

    # create empty annotation files for each image, if it doesn't exist already
    for img_path in tqdm(IMAGE_PATH_LIST, desc='loading label file'):
Beispiel #9
0
tracker = Tracker(obj_detect, tracktor['tracker'])

transforms = T.Compose([T.ToTensor()])

time_total = 0
num_frames = 0
mot_accums = []

for seq_path in os.listdir(tracktor['dataset']):
    tracker.reset()

    start = time.time()

    print(f"Tracking: {seq_path}")
    sequence = LoadImagesAndLabels(root, osp.join(tracktor['dataset'], seq_path), img_size, augment=False, transforms=transforms)
    data_loader = DataLoader(sequence, batch_size=1, shuffle=False)
    seq = []
    for i, (frame, labels, imgs_path, _) in enumerate(tqdm(data_loader)):
        gt = {}
        for label in labels[0]:
            gt[label[1]] = label[2:6]
        seq.append({'gt':gt})
        blob = {'img':frame.cuda()}
        # blob = {'img':frame}
        with torch.no_grad():
            tracker.step(blob)
        num_frames += 1
    results = tracker.get_results()
    time_total += time.time() - start
Beispiel #10
0
def get_thin_model(cfg,
                   backbone,
                   neck,
                   data,
                   origin_weights,
                   img_size,
                   batch_size,
                   prune_rate,
                   aux_epochs=50,
                   ft_epochs=15,
                   resume=False,
                   cache_images=False,
                   start_layer='75'):
    init_seeds()

    # -----------------dataset-----------------
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']

    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=False,  # rectangular training
        cache_labels=True,
        cache_images=cache_images)

    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=True,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    test_loader = torch.utils.data.DataLoader(LoadImagesAndLabels(
        test_path,
        img_size,
        batch_size * 2,
        hyp=hyp,
        rect=True,
        cache_labels=True,
        cache_images=cache_images),
                                              batch_size=batch_size * 2,
                                              num_workers=nw,
                                              pin_memory=True,
                                              collate_fn=dataset.collate_fn)
    # -----------------dataset-----------------

    # -----------get trained aux net-----------
    if aux_trained:
        aux_chkpt = torch.load(aux_weight)
        if aux_chkpt["epoch"] + 1 != aux_epochs:
            del aux_chkpt
            train_aux_for_LCP(cfg,
                              backbone,
                              neck,
                              train_loader,
                              origin_weights,
                              aux_weight,
                              hyp,
                              device,
                              resume=True,
                              epochs=aux_epochs)
        else:
            del aux_chkpt
    else:
        train_aux_for_LCP(cfg,
                          backbone,
                          neck,
                          train_loader,
                          origin_weights,
                          aux_weight,
                          hyp,
                          device,
                          resume=False,
                          epochs=aux_epochs)
    # -----------get trained aux net-----------

    # ----------init model and aux util----------
    origin_model = Darknet(cfg).to(device)
    chkpt = torch.load(origin_weights, map_location=device)
    origin_model.load_state_dict(chkpt['model'], strict=True)
    aux_util = AuxNetUtils(origin_model, hyp, backbone, neck, strategy="LCP")
    del chkpt
    # ----------init model and aux net----------

    mask_cfg, init_state_dict = mask_cfg_and_converted(
        aux_util.mask_replace_layer, cfg, origin_weights, target=None)

    # ----------start from first layer----------
    if not resume:
        first_progress = {
            'current_layer': start_layer,
            'epoch': -1,
            'model': init_state_dict,
            'optimizer': None
        }
        aux_chkpt = torch.load(aux_weight)
        for k, v in aux_chkpt.items():
            if 'aux' in k:
                first_progress[k] = v
        del aux_chkpt
        torch.save(first_progress, progress_chkpt)

        with open(progress_result, 'a') as f:
            t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            f.write('\n' + t + '\n')
        # ----------start from first layer----------

    layer = start_layer
    if start_layer == aux_util.pruning_layer[-1]:
        return mask_cfg, aux_util

    while int(layer) < int(aux_util.pruning_layer[-1]):
        layer = fine_tune(mask_cfg, data, aux_util, device, train_loader,
                          test_loader, ft_epochs)
        channels_select(mask_cfg, data, origin_model, aux_util, device,
                        train_loader, layer, prune_rate)

    return mask_cfg, aux_util
def train(
        cfg,
        data_cfg,
        img_size=416,
        resume=False,
        epochs=270,
        batch_size=16,
        accumulate=1,
        multi_scale=False,
        freeze_backbone=False,
        num_workers=4,
        transfer=False,  # Transfer learning (train only YOLO layers)
        use_cpu=True,
        backend='nccl',
        world_size=1,
        rank=0,
        dist_url='tcp://127.0.0.1:9999'):
    weights = 'weights' + os.sep
    latest = weights + 'latest.pt'
    best = weights + 'best.pt'

    use_gpu = torch.cuda.is_available()
    if use_cpu:
        use_gpu = False

    device = torch.device('cuda' if use_gpu else 'cpu')

    if multi_scale:
        img_size = 608  # initiate with maximum multi_scale size
        num_workers = 0  # bug https://github.com/ultralytics/yolov3/issues/174
    else:
        torch.backends.cudnn.benchmark = True  # unsuitable for multiscale

    # Configure run
    train_path = parse_data_config(data_cfg)['train']

    # Initialize model
    model = Darknet(cfg, img_size, device).to(device)

    # Optimizer
    lr0 = 0.001  # initial learning rate
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr0,
                                momentum=0.9,
                                weight_decay=0.0005)

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
    best_loss = float('inf')
    yl = get_yolo_layers(model)  # yolo layers
    nf = int(model.module_defs[yl[0] -
                               1]['filters'])  # yolo layer size (i.e. 255)

    if resume:  # Load previously saved model
        if transfer:  # Transfer learning
            chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device)
            model.load_state_dict(
                {
                    k: v
                    for k, v in chkpt['model'].items()
                    if v.numel() > 1 and v.shape[0] != 255
                },
                strict=False)
            for p in model.parameters():
                p.requires_grad = True if p.shape[0] == nf else False

        else:  # resume from latest.pt
            chkpt = torch.load(latest, map_location=device)  # load checkpoint
            model.load_state_dict(chkpt['model'])

        start_epoch = chkpt['epoch'] + 1
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_loss = chkpt['best_loss']
        del chkpt

    else:  # Initialize model with backbone (optional)
        if '-tiny.cfg' in cfg:
            cutoff = model.load_darknet_weights(weights +
                                                'yolov3-tiny.conv.15')
        else:
            cutoff = model.load_darknet_weights(weights + 'darknet53.conv.74')

    # Set scheduler (reduce lr at epoch 250)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[250],
                                                     gamma=0.1,
                                                     last_epoch=start_epoch -
                                                     1)

    # Dataset
    dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)

    # Initialize distributed training
    if torch.cuda.device_count() > 1:
        dist.init_process_group(backend=backend,
                                init_method=dist_url,
                                world_size=world_size,
                                rank=rank)
        model = torch.nn.parallel.DistributedDataParallel(model)
        sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    else:
        sampler = None

    # Dataloader
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=False,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn,
                            sampler=sampler)

    # Start training
    t = time.time()
    model_info(model)
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches
    for epoch in range(start_epoch, epochs):
        model.train()
        print(
            ('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf',
                                          'cls', 'total', 'nTargets', 'time'))

        # Update scheduler
        scheduler.step()

        # Freeze backbone at epoch 0, unfreeze at epoch 1
        if freeze_backbone and epoch < 2:
            for name, p in model.named_parameters():
                if int(name.split('.')[1]) < cutoff:  # if layer < 75
                    p.requires_grad = False if epoch == 0 else True

        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, _, _) in enumerate(dataloader):
            imgs = imgs.to(device)
            targets = targets.to(device)

            nT = len(targets)
            if nT == 0:  # if no targets continue
                continue

            # Plot images with bounding boxes
            plot_images = False
            if plot_images:
                fig = plt.figure(figsize=(10, 10))
                for ip in range(len(imgs)):
                    boxes = xywh2xyxy(targets[targets[:, 0] == ip,
                                              2:6]).numpy().T * img_size
                    plt.subplot(4, 4,
                                ip + 1).imshow(imgs[ip].numpy().transpose(
                                    1, 2, 0))
                    plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]],
                             '.-')
                    plt.axis('off')
                fig.tight_layout()
                fig.savefig('batch_%g.jpg' % i, dpi=fig.dpi)

            # SGD burn-in
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin)**4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            s = ('%8s%12s' + '%10.3g' * 7) % (
                '%g/%g' % (epoch, epochs - 1), '%g/%g' %
                (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'],
                mloss['cls'], mloss['total'], nT, time.time() - t)
            t = time.time()
            print(s)

            # Multi-Scale training (320 - 608 pixels) every 10 batches
            if multi_scale and (i + 1) % 10 == 0:
                dataset.img_size = random.choice(range(10, 20)) * 32
                print('multi_scale img_size = %g' % dataset.img_size)

        # Update best loss
        if mloss['total'] < best_loss:
            best_loss = mloss['total']

        # Save training results
        save = True
        if save:
            # Save latest checkpoint
            chkpt = {
                'epoch':
                epoch,
                'best_loss':
                best_loss,
                'model':
                model.module.state_dict()
                if type(model) is nn.parallel.DistributedDataParallel else
                model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }
            torch.save(chkpt, latest)

            # Save best checkpoint
            if best_loss == mloss['total']:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            if epoch > 0 and epoch % 10 == 0:
                torch.save(chkpt, weights + 'backup%g.pt' % epoch)

            del chkpt

        # Calculate mAP
        with torch.no_grad():
            results = test.test(cfg,
                                data_cfg,
                                batch_size=batch_size,
                                img_size=img_size,
                                model=model)

        # Write epoch results
        with open('results.txt', 'a') as file:
            file.write(s + '%11.3g' * 3 % results + '\n')  # append P, R, mAP
Beispiel #12
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=416,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=False,
         hyp=None,
         model=None,
         dataloader=None):
    # Initialize/load model and set device
    if model is None:
        device = torch_utils.select_device(opt.device, batch_size=batch_size)
        verbose = opt.task == 'test'

        # Initialize model
        model = Darknet(cfg, hyp).to(device)

        # Load weights
        attempt_download(weights)
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            _ = load_darknet_weights(model, weights)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    test_path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95
    iouv = iouv[0].view(1)  # for [email protected]
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:
        dataset = LoadImagesAndLabels(test_path, img_size, batch_size,augment=False, hyp=hyp)
        batch_size = min(batch_size, len(dataset))
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]),
                                pin_memory=True,
                                collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1')
    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        targets = targets.to(device)    # [img_id, cls_id, x, y, w, h, a]
        imgs = imgs.to(device)
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')

        with torch.no_grad():
            # Run model
            inf_out, train_out = model(imgs)  # inference and training outputs

            # Compute loss
            # if hasattr(model, 'hyp'):  # if model has loss hyperparameters
            #     loss += compute_loss(train_out, targets, model,hyp)[1][:3].cpu()  # GIoU, obj, cls

            # Run NMS
            output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]   # 当前图像的gt  [cls_id, x, y, w, h, a]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, 1), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[si].shape[1:], box, shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for di, d in enumerate(pred):
                    jdict.append({'image_id': image_id,
                                  'category_id': coco91class[int(d[6])],
                                  'bbox': [floatn(x, 3) for x in box[di]],
                                  'score': floatn(d[4], 5)})

            # Clip boxes to image bounds   
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = torch.zeros(len(pred), niou, dtype=torch.bool)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = labels[:, 1:6]
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions遍历每个检测出的box
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = skew_bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > nms_thres and m[bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct, pred[:, 5].cpu(), pred[:, 7].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Save JSON
    if save_json and map and len(jdict):
        try:
            imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
            with open('results.json', 'w') as file:
                json.dump(jdict, file)

            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocoGt = COCO('../coco/annotations/instances_val2014.json')  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map = cocoEval.stats[1]  # update mAP to pycocotools mAP
        except:
            print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
def test(cfg,
         data_cfg,
         weights=None,
         batch_size=16,
         img_size=416,
         iou_thres=0.5,
         conf_thres=0.1,
         nms_thres=0.5,
         save_json=False,
         use_cpu=True,
         model=None):
    if model is None:
        use_gpu = torch.cuda.is_available()
        if use_cpu:
            use_gpu = False

        device = torch.device('cuda' if use_gpu else 'cpu')

        # Initialize model
        model = Darknet(cfg, img_size, device)

        # Load weights
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            model.load_darknet_weights(weights)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device  # get model device

    # Configure run
    data_cfg = parse_data_config(data_cfg)
    test_path = data_cfg['valid']

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size=img_size)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=4,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn)

    model.eval()
    seen = 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    mP, mR, mAP, mAPj = 0.0, 0.0, 0.0, 0.0
    jdict, tdict, stats, AP, AP_class = [], [], [], [], []

    coco91class = coco80_to_coco91_class()
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader,
                                            desc='Calculating mAP')):
        targets = targets.to(device)
        imgs = imgs.to(device)

        output = model(imgs)
        output = non_max_suppression(output,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)

        # Per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            correct, detected = [], []
            tcls = torch.Tensor()
            seen += 1

            if pred is None:
                if len(labels):
                    tcls = labels[:, 0].cpu()  # target classes
                    stats.append(
                        (correct, torch.Tensor(), torch.Tensor(), tcls))
                continue

            if save_json:  # add to json pred dictionary
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img_size, box, shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for di, d in enumerate(pred):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': coco91class[int(d[6])],
                        'bbox': [float3(x) for x in box[di]],
                        'score': float(d[4])
                    })

            if len(labels):
                # Extract target boxes as (x1, y1, x2, y2)
                tbox = xywh2xyxy(labels[:, 1:5]) * img_size  # target boxes
                tcls = labels[:, 0]  # target classes

                for *pbox, pconf, pcls_conf, pcls in pred:
                    if pcls not in tcls:
                        correct.append(0)
                        continue

                    # Best iou, index between pred and targets
                    iou, bi = bbox_iou(pbox, tbox).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and bi not in detected:
                        correct.append(1)
                        detected.append(bi)
                    else:
                        correct.append(0)
            else:
                # If no labels add number of detections as incorrect
                correct.extend([0] * len(pred))

            # Append Statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls.cpu()))

    # Compute means
    stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
    if len(stats_np):
        AP, AP_class, R, P = ap_per_class(*stats_np)
        mP, mR, mAP = P.mean(), R.mean(), AP.mean()

    # Print P, R, mAP
    print(('%11s%11s' + '%11.3g' * 3) % (seen, len(dataset), mP, mR, mAP))

    # Print mAP per class
    if len(stats_np):
        print('\nmAP Per Class:')
        names = load_classes(data_cfg['names'])
        for c, a in zip(AP_class, AP):
            print('%15s: %-.4f' % (names[c], a))

    # Save JSON
    if save_json and mAP and len(jdict):
        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
        with open('results.json', 'w') as file:
            json.dump(jdict, file)

        from pycocotools.coco import COCO
        from pycocotools.cocoeval import COCOeval

        # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
        cocoGt = COCO('../coco/annotations/instances_val2014.json'
                      )  # initialize COCO ground truth api
        cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api

        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
        cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        mAP = cocoEval.stats[1]  # update mAP to pycocotools mAP

    # Return mAP
    return mP, mR, mAP
Beispiel #14
0
def train(data_cfg='cfg/face.data', accumulate=1):
    # Configure run
    get_data_cfg = parse_data_cfg(data_cfg)  # 返回训练配置参数,类型:字典

    gpus = get_data_cfg['gpus']
    num_workers = int(get_data_cfg['num_workers'])
    cfg_model = get_data_cfg['cfg_model']
    train_path = get_data_cfg['train']
    num_classes = int(get_data_cfg['classes'])
    finetune_model = get_data_cfg['finetune_model']
    batch_size = int(get_data_cfg['batch_size'])
    img_size = int(get_data_cfg['img_size'])
    multi_scale = get_data_cfg['multi_scale']
    epochs = int(get_data_cfg['epochs'])
    lr_step = str(get_data_cfg['lr_step'])
    lr0 = float(get_data_cfg['lr0'])

    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    device = select_device()

    if multi_scale == 'True':
        multi_scale = True
    else:
        multi_scale = False
    print('data_cfg            : ', data_cfg)
    print('voc.data config len : ', len(get_data_cfg))
    print('GPUs             : ', gpus)
    print('num_workers      : ', num_workers)
    print('model            : ', cfg_model)
    print('Finetune_model   : ', finetune_model)
    print('train_path       : ', train_path)
    print('num_classes      : ', num_classes)
    print('batch_size       : ', batch_size)
    print('img_size         : ', img_size)
    print('multi_scale      : ', multi_scale)
    print('lr_step          : ', lr_step)
    print('lr0              : ', lr0)
    a_scalse = 416. / img_size
    anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)]
    anchors_new = [(int(anchors[j][0] / a_scalse), int(anchors[j][1] / a_scalse)) for j in range(len(anchors))]
    model = Yolov3(num_classes, anchors=anchors_new)
    weights = './weights'
    # mkdir save model document
    if not os.path.exists(weights):
        os.mkdir(weights)

    model = model.to(device)
    latest = weights + 'latest_{}.pt'.format(img_size)
    best = weights + 'best_{}.pt'.format(img_size)
    # Optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)

    start_epoch = 0

    if os.access(finetune_model, os.F_OK):  # load retrain/finetune_model
        print('loading yolo-v3 finetune_model ~~~~~~', finetune_model)
        not_load_filters = 3 * (80 + 5)  # voc: 3*(20+5), coco: 3*(80+5)=255
        chkpt = torch.load(finetune_model, map_location=device)
        model.load_state_dict(
            {k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False)
        # model.load_state_dict(chkpt['model'])
        if 'coco' not in finetune_model:
            start_epoch = chkpt['epoch']
            if chkpt['optimizer'] is not None:
                optimizer.load_state_dict(chkpt['optimizer'])
                best_loss = chkpt['best_loss']

    milestones = [int(i) for i in lr_step.split(",")]
    print('milestones : ', milestones)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")],
                                                     gamma=0.1,
                                                     last_epoch=start_epoch - 1)

    # Dataset
    print('multi_scale : ', multi_scale)
    dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True,
                                  multi_scale=multi_scale)
    print('--------------->>> imge num ---------->>>: ', dataset.__len__())
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)

    t = time.time()
    nB = len(dataloader)
    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches

    best_loss = float('inf')
    test_loss = float('inf')

    flag_start = False

    for epoch in range(0, epochs):
        model.train()
        if flag_start:
            scheduler.step()
        flag_start = True
        mloss = defaultdict(float)  # mean loss
        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
            multi_size = imgs.size()
            imgs = imgs.to(device)
            targets = targets.to(device)
            nt = len(targets)
            if nt == 0:  # if no targets continue
                continue
            if epoch == 0 and i <= n_burnin:
                lr = lr0 * (i / n_burnin) ** 4
                for x in optimizer.param_groups:
                    x['lr'] = lr

            # Run model
            pred = model(imgs)

            # Build targets
            target_list = build_targets(model, targets)

            # Compute loss
            loss, loss_dict = compute_loss(pred, target_list)

            # Compute gradient
            loss.backward()

            # Accumulate gradient for x batches before optimizing
            if (i + 1) % accumulate == 0 or (i + 1) == nB:
                optimizer.step()
                optimizer.zero_grad()

            # Running epoch-means of tracked metrics
            for key, val in loss_dict.items():
                mloss[key] = (mloss[key] * i + val) / (i + 1)

            print(
                'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, '
                'wh {:.3f}, '
                'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1,
                                                                             multi_size[2], multi_size[3]
                                                                             , nt, scheduler.get_lr()[0], mloss['xy'],
                                                                             mloss['wh'], mloss['conf'], mloss['cls'],
                                                                             mloss['total'], time.time() - t),
                end='\r')

            s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'],
                                              mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt,
                                              time.time() - t)
            t = time.time()
        print()
        # Create checkpoint
        chkpt = {'epoch': epoch,
                 'best_loss': best_loss,
                 'model': model.module.state_dict() if type(
                     model) is nn.parallel.DistributedDataParallel else model.state_dict(),
                 'optimizer': optimizer.state_dict()}

        # Save latest checkpoint
        torch.save(chkpt, latest)

        # Save best checkpoint
        if best_loss == test_loss and epoch % 5 == 0:
            torch.save(chkpt, best)

        # Save backup every 10 epochs (optional)
        if epoch > 0 and epoch % 5 == 0:
            torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size, epoch))

        # Delete checkpoint
        del chkpt
Beispiel #15
0
                torch.load(args['weights'], map_location=device)['model'])
        except:
            model.load_state_dict(
                torch.load(args['weights'], map_location=device))
    else:  # darknet format
        load_darknet_weights(model, args['weights'])

    ########
    # Data #
    ########
    data = parse_data_cfg(args['data'])
    path = data['test'] if 'test' in data else data[
        'valid']  # path to test images
    dataset = LoadImagesAndLabels(path,
                                  args['img_size'],
                                  args['batch_size'],
                                  rect=args['rect'],
                                  single_cls=int(data['classes']) == True,
                                  cache_labels=True)

    dataloader = DataLoader(dataset,
                            batch_size=args['batch_size'],
                            shuffle=False,
                            num_workers=min(
                                [os.cpu_count(), args['batch_size'], 8]),
                            pin_memory=True,
                            collate_fn=dataset.collate_fn)

    if not os.path.exists(args['output']):
        os.makedirs(args['output'])

    YOLO_Gradcam(model, dataloader, device, args)
Beispiel #16
0
def train():
    batch_size = args.batch_size
    epochs = args.epochs
    train_path = args.train_path
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(train_path)
    print(device)
    print('epochs', epochs)
    print('batch size', batch_size)
    start_epoch = 0

    ##############################################################################################################
    # student model
    model = MobileFaceNet(714).to(device)
    resume = True
    if resume:
        chkpt = torch.load(args.model_path)
        model.load_state_dict(chkpt)
        # model = model.to(device)
    transfer = False
    if transfer:
        for i, param in enumerate(model.parameters()):
            if i < 24:
                param.requires_grad = False

    ##############################################################################################################
    # pg0, pg1 = [], []  # optimizer parameter groups
    # for k, v in dict(model.named_parameters()).items():
    #     if 'Conv2d.weight' in k:
    #         pg1 += [v]  # parameter group 1 (apply weight_decay)
    #     else:
    #         pg0 += [v]  # parameter group 0
    # optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
    # optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
    # del pg0, pg1
    # optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=hyp['lr0'],
                           weight_decay=hyp['weight_decay'])
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[round(args.epochs * x) for x in [0.8, 0.9]],
        gamma=0.1)
    scheduler.last_epoch = start_epoch - 1

    # Optimizer
    for k, v in dict(model.named_parameters()).items():
        # print(k,v.shape)
        pass
    dataset = LoadImagesAndLabels(
        train_path,
        img_size=112,
        batch_size=16,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=False,  # rectangular training
        image_weights=False,
        cache_labels=False,
        cache_images=False)
    # num_workers = 0
    # https://discuss.pytorch.org/t/eoferror-ran-out-of-input-when-enumerating-the-train-loader/22692/7
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=min([os.cpu_count(), batch_size, 4]),
        shuffle=True,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)
    # model.nc = nc
    model.hyp = hyp
    nb = len(dataloader)

    running_loss = [0.0]
    print('Starting %s for %g epochs...' % ('training', epochs))
    for epoch in range(start_epoch, epochs):
        model.train()
        # pbar = tqdm(dataloader, total=nb)
        for i, (imgs, labels) in enumerate(dataloader):
            if imgs is None or labels is None:
                continue

            imgs = imgs.to(device)
            labels = labels.to(device)
            preds = model(imgs)
            optimizer.zero_grad()
            # _, preds = torch.max(outputs, 1)
            loss = compute_loss(preds, labels)
            loss.backward()
            optimizer.step()

            running_loss.append(running_loss[-1] + loss.item() * imgs.size(0))
            if i % 600 == 0:
                # with open(r'weights/gg.log', 'w+') as f:
                #     f.writelines([str(loss.item()),'\n'])
                print(loss.item())

        scheduler.step()

        if tb_writer:
            titles = ['running_loss']
            for xi, title in zip(running_loss, titles):
                tb_writer.add_scalar(title, xi, epoch)
        # save model
        if True:  # epoch % 1 == 0:
            # with open(r'weights/gg.log', 'w+') as f:
            #     loss_toines(loss_to_log)
            #     f.write('_log = [str(cum_loss_i) for cum_loss_i in running_loss]
            #     f.writel\n')
            print(os.path.splitext(args.model_path)[0] + str(epoch) + '.pth')
            torch.save(
                model.state_dict(),
                os.path.splitext(args.model_path)[0] + str(epoch) + '.pth')