Esempio n. 1
0
    def test_mb2_ssd_coco_80(self):
        model = get_model_by_name(
            model_name="mb2_ssd",
            dataset_name="coco_80",
            pretrained=True,
            progress=False,
        )
        from deeplite_torch_zoo.src.objectdetection.datasets.coco_config import (
            DATA,
            MISSING_IDS,
        )

        test_loader = get_data_splits_by_name(
            data_root="/neutrino/datasets/coco2017/",
            dataset_name="coco",
            model_name="mb2_ssd",
            batch_size=32,
            missing_ids=MISSING_IDS,
            classes=DATA["CLASSES"],
        )["test"]
        cocoGt = COCO(
            "/neutrino/datasets/coco2017/annotations/instances_val2017.json")

        eval_fn = get_eval_function("mb2_ssd", "coco_80")
        APs = eval_fn(
            model,
            test_loader,
            gt=cocoGt,
            _set="coco",
        )

        print(APs)
        self.assertEqual(abs(APs["mAP"] - 0.138) < 0.001, True)
Esempio n. 2
0
def test_create_detection_model_output_shape(model_name, dataset_name,
                                             datasplit_kwargs, output_shapes):
    model = create_model(
        model_name=model_name,
        pretraining_dataset=dataset_name,
        num_classes=CUSTOM_NUM_CLASSES,
        progress=False,
        device="cpu",
    )
    if model_name in MODEL_NAME_DATASPLIT_FN_ARG_MAP:
        model_name = MODEL_NAME_DATASPLIT_FN_ARG_MAP[model_name]
    train_loader = get_data_splits_by_name(
        data_root=MOCK_VOC_PATH,
        dataset_name=dataset_name,
        model_name=model_name,
        batch_size=TEST_BATCH_SIZE,
        num_workers=0,
        device="cpu",
        **datasplit_kwargs,
    )["train"]

    if 'yolo' in model_name:
        dataset = train_loader.dataset
        img, _, _, _ = dataset[0]
        y = model(torch.unsqueeze(img, dim=0))
        assert y[0].shape == (1, *output_shapes[0])
        assert y[1].shape == (1, *output_shapes[1])
        assert y[2].shape == (1, *output_shapes[2])
    else:
        img, _, _ = next(iter(train_loader))
        model.eval()
        y1, y2 = model(img)
        assert y1.shape == (TEST_BATCH_SIZE, *output_shapes[0])
        assert y2.shape == (TEST_BATCH_SIZE, *output_shapes[1])
Esempio n. 3
0
def test_create_segmentation_model_output_shape(model_name, dataset_name,
                                                datasplit_kwargs,
                                                output_shape):
    model = create_model(
        model_name=model_name,
        pretraining_dataset=dataset_name,
        num_classes=CUSTOM_NUM_CLASSES,
        progress=False,
        device="cpu",
    )
    if model_name in MODEL_NAME_DATASPLIT_FN_ARG_MAP:
        model_name = MODEL_NAME_DATASPLIT_FN_ARG_MAP[model_name]
    test_loader = get_data_splits_by_name(
        data_root=MOCK_DATASETS_PATH
        if 'voc' in dataset_name else MOCK_CARVANA_PATH,
        dataset_name=dataset_name,
        model_name=model_name,
        num_workers=0,
        device="cpu",
        **datasplit_kwargs,
    )["test"]
    dataset = test_loader.dataset
    if 'unet' in model_name:
        img, msk, _ = dataset[0]
    else:
        img, msk = dataset[0]
    model.eval()
    y = model(torch.unsqueeze(img, dim=0))
    assert y.shape == (*output_shape, *msk.shape)
Esempio n. 4
0
    def test_mb2_ssd_coco_6(self):
        model = get_model_by_name(
            model_name="mb2_ssd",
            dataset_name="coco_gm_6",
            pretrained=True,
            progress=False,
        )
        test_loader = get_data_splits_by_name(
            data_root="/home/ehsan/data/",
            dataset_name="coco_gm",
            model_name="mb2_ssd",
            batch_size=32,
            train_ann_file="train_data_COCO.json",
            train_dir="images/train",
            val_ann_file="test_data_COCO.json",
            val_dir="images/test",
            classes=[
                "class1", "class2", "class3", "class4", "class5", "class6"
            ],
        )["test"]
        cocoGt = COCO("/home/ehsan/data/test_data_COCO.json")
        eval_fn = get_eval_function("mb2_ssd", "coco_gm")
        APs = eval_fn(
            model,
            test_loader,
            gt=cocoGt,
            _set="coco",
        )

        self.assertEqual(abs(APs["mAP"] - 0.227) < 0.001, True)
Esempio n. 5
0
 def test_cifar100_dataset(self):
     BATCH_SIZE = 128
     datasplit = get_data_splits_by_name(dataset_name="cifar100",
                                         batch_size=BATCH_SIZE)
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 391)
     self.assertEqual(test_len, 79)
Esempio n. 6
0
 def test_mnist_dataset(self):
     BATCH_SIZE = 128
     datasplit = get_data_splits_by_name(dataset_name="mnist",
                                         batch_size=BATCH_SIZE)
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 469)
     self.assertEqual(test_len, 79)
Esempio n. 7
0
 def test_vww_dataset(self):
     BATCH_SIZE = 128
     datasplit = get_data_splits_by_name(
         dataset_name="vww",
         data_root=str(DATASETS_ROOT / "vww"),
         batch_size=BATCH_SIZE,
     )
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 901)
     self.assertEqual(test_len, 63)
Esempio n. 8
0
 def test_imagenet1000_dataset(self):
     BATCH_SIZE = 128
     datasplit = get_data_splits_by_name(
         data_root=str(DATASETS_ROOT / "imagenet"),
         dataset_name="imagenet",
         batch_size=BATCH_SIZE,
     )
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 10010)
     self.assertEqual(test_len, 391)
Esempio n. 9
0
 def test_coco_yolo_dataset(self):
     BATCH_SIZE = 10
     datasplit = get_data_splits_by_name(
         data_root=str(DATASETS_ROOT / "coco"),
         dataset_name="coco",
         model_name="yolo",
         batch_size=BATCH_SIZE,
     )
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 11829)
     self.assertEqual(test_len, 500)
Esempio n. 10
0
 def test_voc0712_dataset(self):
     BATCH_SIZE = 128
     datasplit = get_data_splits_by_name(
         data_root=str(DATASETS_ROOT / "VOCdevkit"),
         dataset_name="voc",
         model_name="vgg16_ssd",
         batch_size=BATCH_SIZE,
     )
     train_len = len(datasplit["train"])
     test_len = len(datasplit["test"])
     self.assertEqual(train_len, 130)
     self.assertEqual(test_len, 39)
Esempio n. 11
0
 def test_mb3_small_vww(self):
     model = get_model_by_name(
         model_name="mobilenetv3_small",
         dataset_name="vww",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/vww",
         dataset_name="vww",
         batch_size=128,
     )["test"]
     eval_fn = get_eval_function("mobilenetv3_small", "vww")
     ACC = eval_fn(model, test_loader)
     self.assertEqual(abs(ACC["acc"] - 0.892) < 0.001, True)
Esempio n. 12
0
 def test_mb2_ssd_voc_20(self):
     model = get_model_by_name(
         model_name="mb2_ssd",
         dataset_name="voc_20",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/VOCdevkit",
         dataset_name="voc",
         model_name="mb2_ssd_lite",
         batch_size=32,
     )["test"]
     eval_fn = get_eval_function("mb2_ssd", "voc_20")
     APs = eval_fn(model, test_loader)
     self.assertEqual(abs(APs["mAP"] - 0.443) < 0.001, True)
Esempio n. 13
0
 def test_resnet50_tinyimagenet(self):
     model = get_model_by_name(
         model_name="resnet50",
         dataset_name="tinyimagenet",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/TinyImageNet/",
         dataset_name="tinyimagenet",
         batch_size=128,
         num_workers=0,
     )["val"]
     eval_fn = get_eval_function("resnet50", "tinyimagenet")
     ACC = eval_fn(model, test_loader)
     print(ACC)
     self.assertEqual(abs(ACC["acc"] - 0.730) < 0.001, True)
Esempio n. 14
0
 def test_vgg16_ssd_wider_face(self):
     model = get_model_by_name(
         model_name="vgg16_ssd",
         dataset_name="wider_face",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/wider_face",
         dataset_name="wider_face",
         model_name="vgg16_ssd",
         batch_size=8,
     )["test"]
     eval_fn = get_eval_function("vgg16_ssd", "wider_face")
     APs = eval_fn(model, test_loader)
     print(APs)
     self.assertEqual(abs(APs["mAP"] - 0.7071) < 0.001, True)
Esempio n. 15
0
 def test_unet_scse_resnet18_carvana(self):
     model = get_model_by_name(
         model_name="unet_scse_resnet18",
         dataset_name="carvana",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/carvana",
         dataset_name="carvana",
         model_name="unet",
         num_workers=1,
     )["test"]
     eval_fn = get_eval_function("unet_scse_resnet18", "carvana")
     acc = eval_fn(model, test_loader, net="unet_scse_resnet18")
     miou = acc["miou"]
     print(miou)
     self.assertEqual(abs(miou - 0.989) < 0.001, True)
Esempio n. 16
0
 def test_unet_carvana(self):
     model = get_model_by_name(
         model_name="unet",
         dataset_name="carvana",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/carvana",
         dataset_name="carvana",
         model_name="unet",
         num_workers=1,
     )["test"]
     eval_fn = get_eval_function("unet", "carvana")
     acc = eval_fn(model, test_loader, net="unet")
     dc = acc["dice_coeff"]
     print(dc)
     self.assertEqual(abs(dc - 0.983) < 0.001, True)
Esempio n. 17
0
 def test_fasterrcnn_resnet50_fpn_coco(self):
     model = get_model_by_name(
         model_name="fasterrcnn_resnet50_fpn",
         dataset_name="coco_80",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets/coco2017/",
         dataset_name="coco",
         model_name="fasterrcnn_resnet50_fpn",
         batch_size=32,
     )["test"]
     cocoGt = COCO(
         "/neutrino/datasets/coco2017/annotations/instances_val2017.json")
     eval_fn = get_eval_function("fasterrcnn_resnet50_fpn", "coco_80")
     APs = eval_fn(model, test_loader, gt=cocoGt)
     self.assertEqual(abs(APs["mAP"] - 0.369) < 0.001, True)
Esempio n. 18
0
 def test_deeplab_mobilenet_voc_20(self):
     model = get_model_by_name(
         model_name="deeplab_mobilenet",
         dataset_name="voc_20",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets",
         sbd_root=None,
         dataset_name="voc",
         model_name="deeplab_mobilenet",
         num_workers=2,
         backbone="vgg",
     )["test"]
     eval_fn = get_eval_function("deeplab_mobilenet", "voc_20")
     acc = eval_fn(model, test_loader, net="deeplab")
     miou = acc["miou"]
     print(miou)
     self.assertEqual(abs(miou - 0.571) < 0.001, True)
Esempio n. 19
0
 def test_fcn32_voc_20(self):
     model = get_model_by_name(
         model_name="fcn32",
         dataset_name="voc_20",
         pretrained=True,
         progress=False,
     )
     test_loader = get_data_splits_by_name(
         data_root="/neutrino/datasets",
         dataset_name="voc",
         model_name="fcn32",
         num_workers=1,
         batch_size=1,
         backbone="vgg",
     )["test"]
     eval_fn = get_eval_function("fcn32", "voc_20")
     acc = eval_fn(model, test_loader, net="fcn32")
     miou = acc["miou"]
     print(miou)
     self.assertEqual(abs(miou - 0.713) < 0.001, True)
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch training Example")
    parser.add_argument(
        "--batch-size",
        type=int,
        default=64,
        metavar="N",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument("--dataset",
                        metavar="DATASET",
                        default="cifar100",
                        help="dataset to use")
    parser.add_argument(
        "-j",
        "--workers",
        type=int,
        metavar="N",
        default=4,
        help="number of data loading workers",
    )
    parser.add_argument("-r",
                        "--data_root",
                        metavar="PATH",
                        default="",
                        help="dataset data root path")
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1000,
        metavar="N",
        help="input batch size for testing (default: 1000)",
    )
    parser.add_argument(
        "--epochs",
        type=int,
        default=14,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.1,
        metavar="LR",
        help="learning rate (default: 1.0)",
    )
    parser.add_argument(
        "--gamma",
        type=float,
        default=0.7,
        metavar="M",
        help="Learning rate step gamma (default: 0.7)",
    )
    parser.add_argument(
        "--log-interval",
        type=int,
        default=100,
        metavar="N",
        help="how many batches to wait before logging training status",
    )
    parser.add_argument('-a',
                        '--arch',
                        metavar='ARCH',
                        default='vgg19',
                        help='model architecture')
    args = parser.parse_args()

    device = torch.device("cuda")

    data_splits = get_data_splits_by_name(
        dataset_name=args.dataset,
        data_root=args.data_root,
        batch_size=args.batch_size,
        num_torch_workers=args.workers,
    )
    model = get_model_by_name(model_name=args.arch,
                              dataset_name=args.dataset,
                              pretrained=True,
                              progress=True,
                              device=device)

    model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)
    criterion = CrossEntropyLoss()
    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, data_splits["train"], optimizer, criterion,
              epoch)
        test(model, device, data_splits["test"])
        scheduler.step()

    torch.save(model.state_dict(), "{}_checkpoint.pt".format(args.arch))
Esempio n. 21
0
def train(opt, device):
    epochs, batch_size, noval, nosave, workers, freeze, = \
        opt.epochs, opt.batch_size, opt.noval, opt.nosave, opt.workers, opt.freeze

    d = datetime.datetime.now()
    run_id = '{:%Y-%m-%d__%H-%M-%S}'.format(d)
    save_dir = Path(opt.save_dir) / run_id

    # Directories
    w = save_dir / 'weights'  # weights dir
    w.mkdir(parents=True, exist_ok=True)  # make dir
    last, best = w / 'last.pt', w / 'best.pt'

    # Get hyperparameter dict
    hyp, hyp_loss = get_hyperparameter_dict(opt.dataset_name, opt.hp_config)

    # Save run settings
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.safe_dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.safe_dump(vars(opt), f, sort_keys=False)
    tb_writer = SummaryWriter(save_dir)
    opt.img_dir = Path(opt.img_dir)

    # Config
    cuda = device.type != 'cpu'
    init_seeds(1 + RANK)

    # Dataloaders
    dataset_kwargs = {}
    if opt.train_img_res:
        dataset_kwargs = {'img_size': opt.train_img_res}
    dataset_splits = get_data_splits_by_name(
        data_root=opt.img_dir,
        dataset_name=opt.dataset_name,
        model_name=opt.model_name,
        batch_size=batch_size,
        num_workers=workers,
        distributed=(cuda and RANK != -1),
        **dataset_kwargs
    )
    test_img_size = dataset_splits["test"].dataset._img_size
    train_img_size = dataset_splits["train"].dataset._img_size
    if opt.test_img_res:
        test_img_size = opt.test_img_res

    train_loader = dataset_splits["train"]
    dataset = train_loader.dataset
    nc = dataset.num_classes

    nb = len(train_loader)  # number of batches

    # Model
    model = create_model(
        model_name=opt.model_name,
        pretraining_dataset=opt.pretraining_source_dataset,
        pretrained=opt.pretrained,
        num_classes=nc,
        progress=True,
        device=device,
    )

    # Freeze
    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print(f'freezing {k}')
            v.requires_grad = False

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    g0, g1, g2 = [], [], []  # optimizer parameter groups
    for v in model.modules():
        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
            g2.append(v.bias)
        if isinstance(v, nn.BatchNorm2d):  # weight (no decay)
            g0.append(v.weight)
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
            g1.append(v.weight)

    if opt.adam:
        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
    else:
        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

    optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']})  # add g1 with weight_decay
    optimizer.add_param_group({'params': g2})  # add g2 (biases)
    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
                f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
    del g0, g1, g2

    # Scheduler
    if opt.linear_lr:
        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
    else:
        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)

    # EMA
    ema = ModelEMA(model) if RANK in [-1, 0] else None

    start_epoch, best_fitness = 0, 0.0

    # Image sizes
    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])

    # DP mode
    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and RANK != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        LOGGER.info('Using SyncBatchNorm()')

    # Process 0
    if RANK in [-1, 0]:
        # Anchors
        model.half().float()  # pre-reduce anchor precision

    # DDP mode
    if cuda and RANK != -1:
        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)

    # Model parameters
    hyp['giou'] *= 3. / nl  # scale to layers
    hyp['box'] = hyp['giou']
    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
    hyp['obj'] *= (train_img_size / 640) ** 2 * 3. / nl  # scale to image size and layers
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model

    eval_function = get_eval_function(dataset_name=opt.dataset_name,
        model_name=opt.model_name)
    criterion = YoloV5Loss(
        model=model,
        num_classes=nc,
        device=device,
        hyp_cfg=hyp_loss,
    )

    if opt.eval_before_train:
        ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir,
            nc, test_img_size, device)
        LOGGER.info(f'Eval metrics: {ap_dict}')

    # Start training
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
    last_opt_step = -1
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    stopper = EarlyStopping(patience=opt.patience)

    loss_giou_mean = AverageMeter()
    loss_conf_mean = AverageMeter()
    loss_cls_mean = AverageMeter()
    loss_mean = AverageMeter()

    LOGGER.info(f'Image sizes {train_img_size} train, {test_img_size} val\n'
                f'Using {train_loader.num_workers} dataloader workers\n'
                f"Logging results to {colorstr('bold', save_dir)}\n"
                f'Starting training for {epochs} epochs...')
    for epoch in range(start_epoch, epochs):  # epoch
        model.train()

        mloss = torch.zeros(3, device=device)  # mean losses
        if RANK != -1:
            train_loader.sampler.set_epoch(epoch)
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
        if RANK in [-1, 0]:
            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (imgs, targets, labels_length, _) in pbar:  # batch
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device, non_blocking=True).float()

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(train_img_size * 0.5, train_img_size * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Forward
            with amp.autocast(enabled=cuda):
                pred = model(imgs)  # forward
                loss, loss_giou, loss_conf, loss_cls = criterion(
                       pred, targets, labels_length, imgs.shape[-1]
                )
                # Update running mean of tracked metrics
                loss_items = torch.tensor([loss_giou, loss_conf, loss_cls]).to(device)

                if RANK in (-1, 0):
                    loss_giou_mean.update(loss_giou, imgs.size(0))
                    loss_conf_mean.update(loss_conf, imgs.size(0))
                    loss_cls_mean.update(loss_cls, imgs.size(0))
                    loss_mean.update(loss, imgs.size(0))

                if RANK != -1:
                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode

            # Backward
            scaler.scale(loss).backward()

            # Optimize
            if ni - last_opt_step >= accumulate:
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)
                last_opt_step = ni

            # Log
            if RANK in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
                pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
                    f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
            # end batch

        # Scheduler
        scheduler.step()

        if RANK in [-1, 0]:
            for idx, param_group in enumerate(optimizer.param_groups):
                tb_writer.add_scalar(f'learning_rate/gr{idx}', param_group['lr'], epoch)
            tb_writer.add_scalar('train/giou_loss', loss_giou_mean.avg, epoch)
            tb_writer.add_scalar('train/conf_loss', loss_conf_mean.avg, epoch)
            tb_writer.add_scalar('train/cls_loss', loss_cls_mean.avg, epoch)
            tb_writer.add_scalar('train/loss', loss_mean.avg, epoch)

            # mAP
            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
            if (not noval or final_epoch) and epoch % opt.eval_freq == 0:  # Calculate mAP
                ap_dict = evaluate(ema.ema, eval_function, opt.dataset_name, opt.img_dir,
                    nc, test_img_size, device)
                LOGGER.info(f'Eval metrics: {ap_dict}')
                tb_writer.add_scalar('eval/mAP', ap_dict['mAP'], epoch)
                for eval_key, eval_value in ap_dict.items():
                    if eval_key != 'mAP':
                        tb_writer.add_scalar(f'ap_per_class/{eval_key}', eval_value, epoch)

            # Update best mAP
            fi = ap_dict['mAP']
            if fi > best_fitness:
                best_fitness = fi

            # Save model
            if (not nosave) or final_epoch:  # if save
                ckpt = {'epoch': epoch,
                        'best_fitness': best_fitness,
                        'model': deepcopy(de_parallel(model)).half(),
                        'ema': deepcopy(ema.ema).half(),
                        'updates': ema.updates,
                        'optimizer': optimizer.state_dict()}

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
                    torch.save(ckpt, w / f'epoch{epoch}.pt')
                del ckpt

            # Stop Single-GPU
            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
                break

        # end epoch
    # end training
    if RANK in [-1, 0]:
        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
        for f in last, best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is best:
                    LOGGER.info(f'\nValidating {f}...')
                    ckpt = torch.load(f, map_location=device)
                    model = ckpt['ema' if ckpt.get('ema') else 'model']
                    model.float().eval()

                    ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir,
                        nc, test_img_size, device)
                    LOGGER.info(f'Eval metrics: {ap_dict}')

        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")

    torch.cuda.empty_cache()
Esempio n. 22
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    if args.log_wandb:
        if has_wandb:
            wandb.init(project=args.experiment, config=args)
        else:
            _logger.warning(
                "You've requested to log metrics to wandb but package not found. "
                "Metrics not being logged to wandb, try `pip install wandb`")

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
        _logger.info(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (args.rank, args.world_size))
    else:
        _logger.info('Training with a single process on 1 GPUs.')
    assert args.rank >= 0

    # resolve AMP arguments based on PyTorch / Apex availability
    use_amp = None
    if args.amp:
        # `--amp` chooses native amp before apex (APEX ver not actively maintained)
        if has_native_amp:
            args.native_amp = True
        elif has_apex:
            args.apex_amp = True
    if args.apex_amp and has_apex:
        use_amp = 'apex'
    elif args.native_amp and has_native_amp:
        use_amp = 'native'
    elif args.apex_amp or args.native_amp:
        _logger.warning(
            "Neither APEX or native Torch AMP is available, using float32. "
            "Install NVIDA apex or upgrade to PyTorch 1.6")

    random_seed(args.seed, args.rank)

    if args.fuser:
        set_jit_fuser(args.fuser)

    data_splits = get_data_splits_by_name(
        dataset_name=args.dataset_name,
        data_root=args.data_dir,
        batch_size=args.batch_size,
    )
    loader_train, loader_eval = data_splits['train'], data_splits['test']

    model_wrapper_fn = MODEL_WRAPPER_REGISTRY.get(
        model_name=args.model.lower(),
        dataset_name=args.pretraining_original_dataset)
    model = model_wrapper_fn(pretrained=args.pretrained,
                             progress=True,
                             num_classes=len(loader_train.dataset.classes))

    if args.local_rank == 0:
        _logger.info(
            f'Model {safe_model_name(args.model)} created, param count:{sum([m.numel() for m in model.parameters()])}'
        )

    data_config = resolve_data_config(vars(args),
                                      model=model,
                                      verbose=args.local_rank == 0)

    # setup augmentation batch splits for contrastive loss or split bn
    num_aug_splits = 0
    if args.aug_splits > 0:
        assert args.aug_splits > 1, 'A split of 1 makes no sense'
        num_aug_splits = args.aug_splits

    # enable split bn (separate bn stats per batch-portion)
    if args.split_bn:
        assert num_aug_splits > 1 or args.resplit
        model = convert_splitbn_model(model, max(num_aug_splits, 2))

    # move model to GPU, enable channels last layout if set
    model.cuda()
    if args.channels_last:
        model = model.to(memory_format=torch.channels_last)

    # setup synchronized BatchNorm for distributed training
    if args.distributed and args.sync_bn:
        assert not args.split_bn
        if has_apex and use_amp == 'apex':
            # Apex SyncBN preferred unless native amp is activated
            model = convert_syncbn_model(model)
        else:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        if args.local_rank == 0:
            _logger.info(
                'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.'
            )

    optimizer = create_optimizer_v2(model, **optimizer_kwargs(cfg=args))

    # setup automatic mixed-precision (AMP) loss scaling and op casting
    amp_autocast = suppress  # do nothing
    loss_scaler = None
    if use_amp == 'apex':
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        loss_scaler = ApexScaler()
        if args.local_rank == 0:
            _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.')
    elif use_amp == 'native':
        amp_autocast = torch.cuda.amp.autocast
        loss_scaler = NativeScaler()
        if args.local_rank == 0:
            _logger.info(
                'Using native Torch AMP. Training in mixed precision.')
    else:
        if args.local_rank == 0:
            _logger.info('AMP not enabled. Training in float32.')

    # optionally resume from a checkpoint
    resume_epoch = None
    if args.resume:
        resume_epoch = resume_checkpoint(
            model,
            args.resume,
            optimizer=None if args.no_resume_opt else optimizer,
            loss_scaler=None if args.no_resume_opt else loss_scaler,
            log_info=args.local_rank == 0)

    # setup exponential moving average of model weights, SWA could be used here too
    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEmaV2(
            model,
            decay=args.model_ema_decay,
            device='cpu' if args.model_ema_force_cpu else None)
        if args.resume:
            load_checkpoint(model_ema.module, args.resume, use_ema=True)

    # setup distributed training
    if args.distributed:
        if has_apex and use_amp == 'apex':
            # Apex DDP preferred unless native amp is activated
            if args.local_rank == 0:
                _logger.info("Using NVIDIA APEX DistributedDataParallel.")
            model = ApexDDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                _logger.info("Using native Torch DistributedDataParallel.")
            model = NativeDDP(model,
                              device_ids=[args.local_rank],
                              broadcast_buffers=not args.no_ddp_bb)
        # NOTE: EMA model does not need to be wrapped by DDP

    # setup learning rate schedule and starting epoch
    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        _logger.info('Scheduled epochs: {}'.format(num_epochs))

    # setup loss function
    if args.jsd_loss:
        assert num_aug_splits > 1  # JSD only valid with aug splits set
        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits,
                                        smoothing=args.smoothing)
    elif args.smoothing:
        if args.bce_loss:
            train_loss_fn = BinaryCrossEntropy(
                smoothing=args.smoothing,
                target_threshold=args.bce_target_thresh)
        else:
            train_loss_fn = LabelSmoothingCrossEntropy(
                smoothing=args.smoothing)
    else:
        train_loss_fn = nn.CrossEntropyLoss()
    train_loss_fn = train_loss_fn.cuda()
    validate_loss_fn = nn.CrossEntropyLoss().cuda()

    # setup checkpoint saver and eval metric tracking
    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = None
    if args.rank == 0:
        if args.experiment:
            exp_name = args.experiment
        else:
            exp_name = '-'.join([
                datetime.now().strftime("%Y%m%d-%H%M%S"),
                safe_model_name(args.model),
                str(data_config['input_size'][-1])
            ])
        output_dir = get_outdir(
            args.output if args.output else './output/train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(model=model,
                                optimizer=optimizer,
                                args=args,
                                model_ema=model_ema,
                                amp_scaler=loss_scaler,
                                checkpoint_dir=output_dir,
                                recovery_dir=output_dir,
                                decreasing=decreasing,
                                max_history=args.checkpoint_hist)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed and hasattr(loader_train.sampler, 'set_epoch'):
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_one_epoch(epoch,
                                            model,
                                            loader_train,
                                            optimizer,
                                            train_loss_fn,
                                            args,
                                            lr_scheduler=lr_scheduler,
                                            saver=saver,
                                            output_dir=output_dir,
                                            amp_autocast=amp_autocast,
                                            loss_scaler=loss_scaler,
                                            model_ema=model_ema)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    _logger.info(
                        "Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            eval_metrics = validate(model,
                                    loader_eval,
                                    validate_loss_fn,
                                    args,
                                    amp_autocast=amp_autocast)

            if model_ema is not None and not args.model_ema_force_cpu:
                if args.distributed and args.dist_bn in ('broadcast',
                                                         'reduce'):
                    distribute_bn(model_ema, args.world_size,
                                  args.dist_bn == 'reduce')
                ema_eval_metrics = validate(model_ema.module,
                                            loader_eval,
                                            validate_loss_fn,
                                            args,
                                            amp_autocast=amp_autocast,
                                            log_suffix=' (EMA)')
                eval_metrics = ema_eval_metrics

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            if output_dir is not None:
                update_summary(epoch,
                               train_metrics,
                               eval_metrics,
                               os.path.join(output_dir, 'summary.csv'),
                               write_header=best_metric is None,
                               log_wandb=args.log_wandb and has_wandb)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(
                    epoch, metric=save_metric)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        _logger.info('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))