Esempio n. 1
0
def _infer(model, root_path, test_loader=None):
    if test_loader is None:
        test_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            root_path,
            'test',
            transform=transforms.Compose([
                transforms.Resize(opts.imResize, interpolation=3),
                transforms.CenterCrop(opts.imsize),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])),
                                                  batch_size=opts.batchsize,
                                                  shuffle=False,
                                                  num_workers=4,
                                                  pin_memory=True)
        print('loaded {} test images'.format(len(test_loader.dataset)))

    outputs = []
    s_t = time.time()
    for idx, image in enumerate(test_loader):
        if torch.cuda.is_available():
            image = image.cuda()
        _, probs = model(image)
        output = torch.argmax(probs, dim=1)
        output = output.detach().cpu().numpy()
        outputs.append(output)

    outputs = np.concatenate(outputs)
    return outputs
Esempio n. 2
0
def main():
    global opts
    opts = parser.parse_args()
    opts.cuda = 0

    # Set GPU
    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Number of device: {}".format(torch.cuda.device_count()))
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    if opts.model == 'resnet18':
        model = resnet18(pretrained=False)
        if opts.batchsize == -1:
            opts.batchsize = 1024 if opts.half else 512
    elif opts.model == 'resnet50':
        model = resnet50(pretrained=False)
        if opts.batchsize == -1:
            opts.batchsize = 512 if opts.half else 256
    elif opts.model == 'resnet101':
        model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet101', pretrained=False)
        if opts.batchsize == -1:
            opts.batchsize = 360 if opts.half else 180
    elif opts.model == 'resnet152':
        model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet152', pretrained=False)
        if opts.batchsize == -1:
            opts.batchsize = 256 if opts.half else 128
    elif opts.model == 'ran56':
         model = ResidualAttentionModel_56()
         if opts.batchsize == -1:
            opts.batchsize = 140
    elif opts.model == 'ran92':
         model = ResidualAttentionModel_92()
         if opts.batchsize == -1:
            opts.batchsize = 80      
    

    ch = model.fc.in_features
    model.fc = nn.Sequential(nn.Linear(ch, ch), nn.ReLU(), nn.Linear(ch, ch))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # model = torch.nn.DataParallel(model)
    model.eval()
    if opts.half:
        model.half()
    for layer in model.modules():
        if isinstance(layer, nn.BatchNorm2d):
            layer.float()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_parameters = sum([p.data.nelement() for p in model.parameters()])
    print('  + Number of params: {}'.format(n_parameters))


    ### GPU Setup ###
    if use_gpu:
        model.cuda()
    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################
    
    bind_nsml(model)
    
    if opts.mode == 'train':
        model.train()
        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(len(train_ids), len(val_ids), len(unl_ids)))
        label_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'train', train_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize),
                                  transforms.RandomResizedCrop(opts.imsize),
                                  # transforms.Resize((opts.imsize, opts.imsize)),
                                  transforms.RandomHorizontalFlip(),
                                  # transforms.ColorJitter(0.5, 0.5, 0.5, 0.5),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                                batch_size=opts.batchsize * 2, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'unlabel', unl_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize),
                                  transforms.RandomResizedCrop(opts.imsize),
                                  transforms.ColorJitter(0.5, 0.5, 0.5, 0.5),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                                batch_size=opts.batchsize, shuffle=True, num_workers=0, pin_memory=True, drop_last=True)
        print('unlabel_loader done')

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'val', val_ids,
                               transform=transforms.Compose([
                                   transforms.Resize(opts.imResize),
                                   transforms.CenterCrop(opts.imsize),
                                   transforms.ToTensor(),
                                   transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                               batch_size=opts.batchsize, shuffle=False, num_workers=0, pin_memory=True, drop_last=False)
        print('validation_loader done')

        model = torch.nn.DataParallel(model) 
        model.to(device)
        bind_nsml(model)
        #Set optimizer
        if opts.optimizer == 'SGD':
            if opts.adaptive_lr:
                base_optimizer = optim.SGD(model.parameters(), lr=0.3*opts.batchsize/256)
            else:
                if opts.lr == -1:
                    base_optimizer = optim.SGD(model.parameters(), lr=0.001)
                else:
                    base_optimizer = optim.SGD(model.parameters(), lr=opts.lr)

        elif opts.optimizer == 'Adam':
            if opts.adaptive_lr:
                base_optimizer = optim.Adam(model.parameters(), lr=0.3*opts.batchsize/256)
            else:
                if opts.lr == -1:
                    base_optimizer = optim.Adam(model.parameters(), lr=0.001)
                else:
                    base_optimizer = optim.Adam(model.parameters(), lr=opts.lr)
        if opts.LARS:
            optimizer = torchlars.LARS(optimizer=base_optimizer, eps=1e-8, trust_coef=0.001)
        else: 
            optimizer = base_optimizer

        # INSTANTIATE LOSS CLASS
        unlabel_criterion = nt_cross_entropy

        # INSTANTIATE STEP LEARNING SCHEDULER CLASS
        if opts.scheduler == 'linear':
            scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,  milestones=[50, 150], gamma=0.1)
        elif opts.scheduler == 'exp':
            scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1e-6)

        model.train()
        print('==================================')
        print(opts)
        print('==================================')
        print('starting pretask')
        total_iter = 0
        for epoch in range(1, 201):
            for it, data in enumerate(unlabel_loader):
                total_iter += 1
                d = data.size()
                if opts.half:
                    x = data.view(d[0]*2, d[2], d[3], d[4]).half().to(device)
                else:
                    x = data.view(d[0]*2, d[2], d[3], d[4]).to(device)

                optimizer.zero_grad()
                p = model(x)
                if opts.half:
                    loss = unlabel_criterion(p.float())
                else: 
                    loss = unlabel_criterion(p)

                loss.backward()
                if opts.half:
                    model.float()
                optimizer.step()
                if opts.half:
                    model.half()
                    for layer in model.modules():
                        if isinstance(layer, nn.BatchNorm2d):
                            layer.float()
                print("epoch: ", epoch,  "loss: ", loss.item())
                nsml.report(summary=True, loss=loss.item(), step=total_iter)
            scheduler.step()
            print("epoch: ", epoch,  "loss: ", loss.item())
            if (epoch) % 2 == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_pre{}'.format(epoch))
                else:
                    torch.save(model.state_dict(), os.path.join('runs', opts.name + '_pre{}'.format(epoch))) 
def main():
    global opts
    opts = parser.parse_args()
    opts.cuda = 0

    # Set GPU
    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")


    # Set model
    model = Res18_basic(NUM_CLASSES)
    model.eval()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_parameters = sum([p.data.nelement() for p in model.parameters()])
    print('  + Number of params: {}'.format(n_parameters))

    if use_gpu:
        model.cuda()

    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################

    if opts.mode == 'train':
        model.train()
        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(len(train_ids), len(val_ids), len(unl_ids)))
        train_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'train', train_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize),
                                  transforms.RandomResizedCrop(opts.imsize),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                                batch_size=opts.batchsize, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'unlabel', unl_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize),
                                  transforms.RandomResizedCrop(opts.imsize),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                                batch_size=opts.batchsize, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
        print('unlabel_loader done')    

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH, 'val', val_ids,
                               transform=transforms.Compose([
                                   transforms.Resize(opts.imResize),
                                   transforms.CenterCrop(opts.imsize),
                                   transforms.ToTensor(),
                                   transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])),
                               batch_size=opts.batchsize, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)
        print('validation_loader done')

        # Set optimizer
        optimizer = optim.Adam(model.parameters(), lr=opts.lr)

        # INSTANTIATE LOSS CLASS
        train_criterion = SemiLoss()

        # INSTANTIATE STEP LEARNING SCHEDULER CLASS
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,  milestones=[50, 150], gamma=0.1)

        # Train and Validation 
        best_acc = -1
        for epoch in range(opts.start_epoch, opts.epochs + 1):
            print('start training')
            loss, _, _ = train(opts, train_loader, unlabel_loader, model, train_criterion, optimizer, epoch, use_gpu)
            scheduler.step()

            print('start validation')
            acc_top1, acc_top5 = validation(opts, validation_loader, model, epoch, use_gpu)
            is_best = acc_top1 > best_acc
            best_acc = max(acc_top1, best_acc)
            if is_best:
                print('saving best checkpoint...')
                if IS_ON_NSML:
                    nsml.save(opts.name + '_best')
                else:
                    torch.save(model.state_dict(), os.path.join('runs', opts.name + '_best'))
            if (epoch + 1) % opts.save_epoch == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(model.state_dict(), os.path.join('runs', opts.name + '_e{}'.format(epoch)))
Esempio n. 4
0
def main():
    global opts, global_step
    opts = parser.parse_args()
    opts.cuda = 0

    global_step = 0

    print(opts)

    # Set GPU
    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)

    else:
        print("Currently using CPU (GPU is highly recommended)")

    ######################################################################
    # Set this value to True if we use "MixSim_Model".
    # "MixSim_Model" sends its parameters to gpu devices on its own for model parallel.
    # Doesn't apply for "MixSim_Model_Single", which uses single gpu. So set is_mixsim = False.
    ######################################################################
    is_mixsim = True
    need_pretraining = True

    # Set model
    model = MixSim_Model(NUM_CLASSES, opts.gpu_ids.split(','))
    model.eval()

    # set EMA model
    ema_model = MixSim_Model(NUM_CLASSES, opts.gpu_ids.split(','))
    for param in ema_model.parameters():
        param.detach_()
    ema_model.eval()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_parameters = sum([p.data.nelement() for p in model.parameters()])
    print('  + Number of params: {}'.format(n_parameters))

    # "MixSim_Model" sends its parameters to gpu devices on its own.
    if use_gpu and (not is_mixsim):
        model.cuda()
        ema_model.cuda()

    model_for_test = ema_model  # change this to model if ema_model is not used.

    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model_for_test)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################

    if opts.mode == 'train':
        # set multi-gpu (We won't use this with MixSim_Model)
        if len(opts.gpu_ids.split(',')) > 1 and (not is_mixsim):
            model = nn.DataParallel(model)
            ema_model = nn.DataParallel(ema_model)
        model.train()
        ema_model.train()

        ######################################################################
        # Data Augmentation for train data and unlabeled data
        ######################################################################
        data_transforms = transforms.Compose([
            transforms.RandomResizedCrop(opts.imsize, interpolation=3),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply(
                [transforms.ColorJitter(0.7, 0.7, 0.7, 0.2)], p=0.5),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])

        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(
            os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(
            len(train_ids), len(val_ids), len(unl_ids)))
        train_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            DATASET_PATH, 'train', train_ids, transform=data_transforms),
                                                   batch_size=opts.batchsize,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   pin_memory=True,
                                                   drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'unlabel',
                              unl_ids,
                              transform=data_transforms),
            batch_size=opts.batchsize * opts.unlabelratio,
            shuffle=True,
            num_workers=0,
            pin_memory=True,
            drop_last=True)
        print('unlabel_loader done')

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'val',
                              val_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize,
                                                    interpolation=3),
                                  transforms.CenterCrop(opts.imsize),
                                  transforms.ToTensor(),
                                  transforms.Normalize(
                                      mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
                              ])),
            batch_size=opts.batchsize,
            shuffle=False,
            num_workers=0,
            pin_memory=True,
            drop_last=False)
        print('validation_loader done')

        if opts.steps_per_epoch < 0:
            opts.steps_per_epoch = len(train_loader)

        ######################################################################
        # Set Optimizer
        # Adamax and Yogi are optimization alogorithms based on Adam with more effective learning rate control.
        # LARS is layer-wise adaptive rate scaling
        # LARSWrapper, which is optimizer wraaper that uses LARS algorithms, helps stability with huge batch size.
        ######################################################################
        # optimizer = optim.Adam(model.parameters(), lr=opts.lr, weight_decay=5e-4)
        # optimizer = optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
        # optimizer = LARSWrapper(t_optim.Yogi(model.parameters(), lr=0.01, eps=opts.optimizer_eps))
        optimizer = t_optim.Yogi(model.parameters(),
                                 lr=opts.optimizer_lr,
                                 eps=opts.optimizer_eps)
        ema_optimizer = WeightEMA(model,
                                  ema_model,
                                  lr=opts.ema_optimizer_lr,
                                  alpha=opts.ema_decay)

        # INSTANTIATE LOSS CLASS
        train_criterion_pre = NCELoss()
        train_criterion_fine = NCELoss()
        train_criterion_distill = SemiLoss()

        ######################################################################
        # INSTANTIATE STEP LEARNING SCHEDULER CLASS
        ######################################################################
        # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,  milestones=[50, 150], gamma=0.1)
        # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5)
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, eps=1e-3)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=opts.steps_per_epoch * opts.epochs // 10)

        # Train and Validation
        best_acc = -1
        best_weight_acc = [-1] * 5
        is_weighted_best = [False] * 5
        for epoch in range(opts.start_epoch, opts.epochs + 1):
            # print('start training')
            if (need_pretraining and epoch <= opts.pre_train_epoch):
                pre_loss = train_pre(opts, unlabel_loader, model,
                                     train_criterion_pre, optimizer,
                                     ema_optimizer, epoch, use_gpu, scheduler,
                                     is_mixsim)
                print(
                    'epoch {:03d}/{:03d} finished, pre_loss: {:.3f}:pre-training'
                    .format(epoch, opts.epochs, pre_loss))
                continue
            elif (need_pretraining
                  and epoch <= opts.pre_train_epoch + opts.fine_tune_epoch):
                loss, avg_top1, avg_top5 = train_fine(
                    opts, train_loader, model, train_criterion_fine, optimizer,
                    ema_optimizer, epoch, use_gpu, scheduler, is_mixsim)
                print(
                    'epoch {:03d}/{:03d} finished, loss: {:.3f}, avg_top1: {:.3f}%, avg_top5: {:.3f}%: fine-tuning'
                    .format(epoch, opts.epochs, loss, avg_top1, avg_top5))
                continue
            else:
                loss, loss_x, loss_u, avg_top1, avg_top5 = train_distill(
                    opts, train_loader, unlabel_loader, model,
                    train_criterion_distill, optimizer, ema_optimizer, epoch,
                    use_gpu, scheduler, is_mixsim)
                print(
                    'epoch {:03d}/{:03d} finished, loss: {:.3f}, loss_x: {:.3f}, loss_un: {:.3f}, avg_top1: {:.3f}%, avg_top5: {:.3f}%: distillation'
                    .format(epoch, opts.epochs, loss, loss_x, loss_u, avg_top1,
                            avg_top5))

            # scheduler.step()

            ######################################################################
            # For each weights=[0,0.5,1.0,1.5,2.0], save the best model with
            # best accuracy of (acc_top1 + weights * acc_top5).
            ######################################################################
            # print('start validation')
            acc_top1, acc_top5 = validation(opts, validation_loader, ema_model,
                                            epoch, use_gpu)
            is_best = acc_top1 > best_acc
            best_acc = max(acc_top1, best_acc)
            for w in range(4):
                is_weighted_best[w] = acc_top1 + (
                    (w + 1) * 0.5 * acc_top5) > best_weight_acc[w]
                best_weight_acc[w] = max(acc_top1 + ((w + 1) * 0.5 * acc_top5),
                                         best_weight_acc[w])
            if is_best:
                print(
                    'model achieved the best accuracy ({:.3f}%) - saving best checkpoint...'
                    .format(best_acc))
                if IS_ON_NSML:
                    nsml.save(opts.name + '_best')
                else:
                    torch.save(ema_model.state_dict(),
                               os.path.join('runs', opts.name + '_best'))
            for w in range(5):
                if (is_weighted_best[w]):
                    if IS_ON_NSML:
                        nsml.save(opts.name + '_{}w_best'.format(5 * (w + 1)))
                    else:
                        torch.save(
                            ema_model.state_dict(),
                            os.path.join(
                                'runs',
                                opts.name + '_{}w_best'.format(5 * (w + 1))))
            if (epoch + 1) % opts.save_epoch == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        ema_model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))
Esempio n. 5
0
def main():
    global opts, global_step
    opts = parser.parse_args()
    opts.cuda = 0

    global_step = 0

    print(opts)

    # Set GPU
    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    # Set model
    model = Res18_basic(NUM_CLASSES)
    model.eval()

    # set EMA model
    ema_model = Res18_basic(NUM_CLASSES)
    for param in ema_model.parameters():
        param.detach_()
    ema_model.eval()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_parameters = sum([p.data.nelement() for p in model.parameters()])
    print('  + Number of params: {}'.format(n_parameters))

    if use_gpu:
        model.cuda()
        ema_model.cuda()

    model_for_test = ema_model  # change this to model if ema_model is not used.

    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model_for_test)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################

    if opts.mode == 'train':
        # set multi-gpu
        if len(opts.gpu_ids.split(',')) > 1:
            model = nn.DataParallel(model)
            ema_model = nn.DataParallel(ema_model)
        model.train()
        ema_model.train()

        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(
            os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(
            len(train_ids), len(val_ids), len(unl_ids)))
        color_jitter = transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
        train_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            DATASET_PATH,
            'train',
            train_ids,
            transform=transforms.Compose([
                transforms.Resize(opts.imResize),
                transforms.RandomResizedCrop(opts.imsize),
                transforms.RandomApply([color_jitter], p=0.8),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])),
                                                   batch_size=opts.batchsize,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   pin_memory=True,
                                                   drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            DATASET_PATH,
            'unlabel',
            unl_ids,
            transform=transforms.Compose([
                transforms.Resize(opts.imResize),
                transforms.RandomResizedCrop(opts.imsize),
                transforms.RandomApply([color_jitter], p=0.8),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])),
                                                     batch_size=opts.batchsize,
                                                     shuffle=True,
                                                     num_workers=0,
                                                     pin_memory=True,
                                                     drop_last=True)
        print('unlabel_loader done')

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'val',
                              val_ids,
                              transform=transforms.Compose([
                                  transforms.Resize(opts.imResize),
                                  transforms.CenterCrop(opts.imsize),
                                  transforms.ToTensor(),
                                  transforms.Normalize(
                                      mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
                              ])),
            batch_size=opts.batchsize,
            shuffle=False,
            num_workers=0,
            pin_memory=True,
            drop_last=False)
        print('validation_loader done')

        if opts.steps_per_epoch < 0:
            opts.steps_per_epoch = len(train_loader)

        # Set optimizer
        optimizer = optim.Adam(model.parameters(),
                               lr=opts.lr,
                               weight_decay=5e-4)
        ema_optimizer = WeightEMA(model,
                                  ema_model,
                                  lr=opts.lr,
                                  alpha=opts.ema_decay)

        # INSTANTIATE LOSS CLASS
        train_criterion = SemiLoss()

        # INSTANTIATE STEP LEARNING SCHEDULER CLASS
        # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,  milestones=[50, 150], gamma=0.1)

        # Train and Validation
        best_acc = -1
        for epoch in range(opts.start_epoch, opts.epochs + 1):
            # print('start training')
            loss, loss_x, loss_u, avg_top1, avg_top5 = train(
                opts, train_loader, unlabel_loader, model, train_criterion,
                optimizer, ema_optimizer, epoch, use_gpu)
            print(
                'epoch {:03d}/{:03d} finished, loss: {:.3f}, loss_x: {:.3f}, loss_un: {:.3f}, avg_top1: {:.3f}%, avg_top5: {:.3f}%'
                .format(epoch, opts.epochs, loss, loss_x, loss_u, avg_top1,
                        avg_top5))
            # scheduler.step()

            # print('start validation')
            acc_top1, acc_top5 = validation(opts, validation_loader, ema_model,
                                            epoch, use_gpu)
            print(acc_top1)
            is_best = acc_top1 > best_acc
            best_acc = max(acc_top1, best_acc)
            if is_best:
                print(
                    'model achieved the best accuracy ({:.3f}%) - saving best checkpoint...'
                    .format(best_acc))
                if IS_ON_NSML:
                    nsml.save(opts.name + '_best')
                else:
                    torch.save(ema_model.state_dict(),
                               os.path.join('runs', opts.name + '_best'))
            if (epoch + 1) % opts.save_epoch == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        ema_model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))
Esempio n. 6
0
def main():
    global opts
    opts = parser.parse_args()
    opts.cuda = 0
    print(opts)
    # Set GPU
    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Number of device: {}".format(torch.cuda.device_count()))
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    # Set models
    model = ResidualAttentionModel_92_2()

    ch = model.fc.in_features
    model.fc = nn.Sequential(nn.Linear(ch, ch), nn.ReLU(),
                             nn.Linear(ch, NUM_CLASSES))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model.eval()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_parameters = sum([p.data.nelement() for p in model.parameters()])
    print('  + Number of params: {}'.format(n_parameters))

    ### GPU Setup ###
    if use_gpu:
        model.cuda()
    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################
    if opts.mode == 'train':
        # Define transformations
        weakTransform = transforms.Compose([
            transforms.Resize(opts.imResize),
            transforms.RandomResizedCrop(opts.imsize),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])
        StrongTransform = transforms.Compose([
            transforms.Resize(opts.imResize),
            transforms.RandomResizedCrop(opts.imsize),
            transforms.ColorJitter(0.5, 0.5, 0.5, 0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])
        validTransform = transforms.Compose([
            transforms.Resize(opts.imResize),
            transforms.CenterCrop(opts.imsize),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])

        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(
            os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(
            len(train_ids), len(val_ids), len(unl_ids)))

        label_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            DATASET_PATH, 'train', train_ids, transform=weakTransform),
                                                   batch_size=opts.batchsize,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   pin_memory=True,
                                                   drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(SimpleImageLoader(
            DATASET_PATH, 'unlabel', unl_ids, transform=strongTransform),
                                                     batch_size=opts.batchsize,
                                                     shuffle=True,
                                                     num_workers=4,
                                                     pin_memory=True,
                                                     drop_last=True)
        print('unlabel_loader done')

        unlabel_unsym_loader = torch.utils.data.DataLoader(  #Apply differenct augmentations to original images
            SimpleImageLoader(DATASET_PATH,
                              'unlabel',
                              unl_ids,
                              unsym=True,
                              transform=strongTransform,
                              transform_base=weakTransform),
            batch_size=opts.batchsize,
            shuffle=True,
            num_workers=4,
            pin_memory=True,
            drop_last=True)
        print('unlabel_unsym_loader done')

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'val',
                              val_ids,
                              transform=validTransform),
            batch_size=opts.batchsize,
            shuffle=False,
            num_workers=4,
            pin_memory=True,
            drop_last=False)
        print('validation_loader done')

        model.fc = nn.Sequential(nn.Linear(ch, ch), nn.ReLU(),
                                 nn.Linear(ch, ch))
        bind_nsml(model)
        # model.load(session="kaist007/fashion_dataset/", checkpoint=) # load pretrained model

        # Change layers for finetuning
        for param in model.parameters():
            param.requires_grad = False

        model.fc = nn.Sequential(model.fc[0], nn.ReLU(),
                                 nn.Linear(ch, NUM_CLASSES))
        model = torch.nn.DataParallel(model)
        bind_nsml(model)
        model.to(device)

        # Set hyperparameters for finetuning
        args = loadFineTuningArgs(opts)

        ##### Finetuning-Stage 1: Classification using only labeled data #####

        optimizer = optim.Adam(model.parameters(), lr=args.lr_stg1)
        train_criterion = nn.CrossEntropyLoss()

        bestAcc = -1
        model.train()
        print('start training')
        for epoch in range(1, 1 + args.epoch_stg1):
            fineTuning_base(label_loader, model, train_criterion, optimizer)

            acc_top1, acc_top5 = validation(opts, validation_loader, model,
                                            epoch, use_gpu)
            is_best = acc_top1 > bestAcc
            bestAcc = max(acc_top1, bestAcc)
            print("epoch {} loss: {}".format(epoch, total_loss))
            nsml.report(summary=True,
                        step=epoch,
                        loss=total_loss.item(),
                        accuracy_1=acc_top1,
                        accuracy_5=acc_top5)
            if is_best:
                print('saving best checkpoint...')
                if IS_ON_NSML:
                    nsml.save(opts.name + '_best')
                else:
                    torch.save(model.state_dict(),
                               os.path.join('runs', opts.name + '_best'))
            if (epoch) % 1 == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))

        if not opts.isLP:
            ##### Finetuning-Stage 2: consistency regularization with UDA #####

            assert (opts.isUDA)

            optimizer = optim.Adam(model.parameters(), lr=args.lr_stg2)
            scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=[6, 12, 18, 24], gamma=0.1)
            train_criterion = nn.CrossEntropyLoss()

            bestAcc = -1
            acc_top1, acc_top5 = validation(opts, validation_loader, model, 0,
                                            use_gpu)
            print("Starting Accuracy| top-1: {}, top-5: {}".format(
                prec1, prec5))
            model.train()
            print('start training')
            label_iter = iter(label_loader)
            unlabel_iter = iter(unlabel_unsym_loader)
            for epoch in range(1, 1 + args.epoch_stg2 + args.epoch_stg3):
                fineTuning_UDA(label_loader, unlabel_unsym_loader, model,
                               optimizer, scheduler)

                acc_top1, acc_top5 = validation(opts, validation_loader, model,
                                                epoch, use_gpu)
                is_best = acc_top1 > bestAcc
                bestAcc = max(acc_top1, bestAcc)
                nsml.report(summary=True,
                            step=epoch,
                            accuracy_1=acc_top1,
                            accuracy_5=acc_top5)
                if is_best:
                    print('saving best checkpoint...')
                    if IS_ON_NSML:
                        nsml.save(opts.name + '_best')
                    else:
                        torch.save(model.state_dict(),
                                   os.path.join('runs', opts.name + '_best'))
                if (epoch) % 1 == 0:
                    if IS_ON_NSML:
                        nsml.save(opts.name + '_e{}'.format(epoch))
                    else:
                        torch.save(
                            model.state_dict(),
                            os.path.join('runs',
                                         opts.name + '_e{}'.format(epoch)))

        else:
            ##### Finetuning Stage2: Label propagation occurs and unlabeled data is included for task #####

            for param in model.parameters():
                param.requires_grad = True

            # Prepare dataloader for label propagation
            LPLoader, LPLoaderNoshuff, LPData = createTrainLoader(
                weakTransform,
                validTransform,
                DATASET_PATH, (train_ids, unl_ids),
                args,
                uda=False,
                uda_transformation=StrongTransform)

            # Starting Accuracy
            prec1, prec5 = validation(opts, validation_loader, model, -1,
                                      use_gpu)
            print("Starting Accuracy| top-1: {}, top-5: {}".format(
                prec1, prec5))

            model.train()
            optimizer = torch.optim.SGD(model.parameters(),
                                        args.lr_stg2,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
            bestAcc = -1

            # Extracts features and conduct label propagation
            print('Start training')
            print('Extracting features...')
            feats = extractFeatures(LPLoaderNoshuff, model)
            LPData.updatePLabels(feats, k=args.dfs_k, max_iter=20)

            for epoch in range(1, 1 + args.lr_stg2):
                fineTuning_LP(LPLoader, model, optimizer, epoch, uda=True)
                acc_top1, acc_top5 = validation(opts, validation_loader, model,
                                                epoch, use_gpu)
                is_best = acc_top1 > bestAcc
                bestAcc = max(acc_top1, bestAcc)
                if IS_ON_NSML:
                    nsml.report(summary=True,
                                step=epoch,
                                accuracy_1=acc_top1,
                                accuracy_5=acc_top5)
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))
                if is_best:
                    print('saving best checkpoint...')
                    if IS_ON_NSML:
                        nsml.save(opts.name + '_best')
                    else:
                        torch.save(model.state_dict(),
                                   os.path.join('runs', opts.name + '_best'))

            ##### Finetuning Stg3: reduced learning rate, addition of conssistency loss #####

            LPLoader, LPLoaderNoshuff, LPData = createLPTrainLoader(
                weakTransform,
                validTransform,
                DATASET_PATH, (train_ids, unl_ids),
                args,
                uda=opts.isUDA,
                uda_transformation=StrongTransform)

            # Starting Accuracy
            prec1, prec5 = validation(opts, validation_loader, model, -1,
                                      use_gpu)
            print("Starting Accuracy| top-1: {}, top-5: {}".format(
                prec1, prec5))
            model.train()
            optimizer = torch.optim.SGD(model.parameters(),
                                        args.lr_stg3,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
            bestAcc = -1

            # Extracts features and conduct label propagation
            print('Start training')
            print('Extracting features...')
            feats = extractFeatures(LPLoaderNoshuff, model)
            LPData.updatePLabels(feats, k=args.dfs_k, max_iter=20)

            for epoch in range(1, 1 + args.lr_stg3):
                fineTuning_LP(LPLoader, model, optimizer, epoch, uda=True)
                acc_top1, acc_top5 = validation(opts, validation_loader, model,
                                                epoch, use_gpu)
                is_best = acc_top1 > bestAcc
                bestAcc = max(acc_top1, bestAcc)
                if IS_ON_NSML:
                    nsml.report(summary=True,
                                step=epoch,
                                accuracy_1=acc_top1,
                                accuracy_5=acc_top5)
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))
                if is_best:
                    print('saving best checkpoint...')
                    if IS_ON_NSML:
                        nsml.save(opts.name + '_best')
                    else:
                        torch.save(model.state_dict(),
                                   os.path.join('runs', opts.name + '_best'))
Esempio n. 7
0
def main():
    global opts, global_step
    opts = parser.parse_args()
    opts.cuda = 0
    global_step = 0
    print(opts)

    seed = opts.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    use_gpu = torch.cuda.is_available()
    if use_gpu:
        opts.cuda = 1
        print("Currently using GPU {}".format(opts.gpu_ids))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    # Set model
    model = contrastive_model(NUM_CLASSES, opts.out_dim)

    if use_gpu:
        model.cuda()

    ### DO NOT MODIFY THIS BLOCK ###
    if IS_ON_NSML:
        bind_nsml(model)
        if opts.pause:
            nsml.paused(scope=locals())
    ################################

    if opts.mode == 'train':
        # set multi-gpu
        if len(opts.gpu_ids.split(',')) > 1:
            model = nn.DataParallel(model)
        model.train()

        # Set dataloader
        train_ids, val_ids, unl_ids = split_ids(
            os.path.join(DATASET_PATH, 'train/train_label'), 0.2)
        print('found {} train, {} validation and {} unlabeled images'.format(
            len(train_ids), len(val_ids), len(unl_ids)))

        # Set transforms for train
        train_transforms = transforms.Compose([
            transforms.Resize(opts.imResize),
            transforms.RandomResizedCrop(opts.imsize, scale=(0.8, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        eval_transforms = transforms.Compose([
            transforms.Resize(opts.imResize),
            transforms.CenterCrop(opts.imsize),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        train_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'train',
                              train_ids,
                              transform=train_transforms),
            batch_size=opts.batchsize_label,
            shuffle=True,
            num_workers=opts.num_worker,
            pin_memory=True,
            drop_last=True)
        print('train_loader done')

        unlabel_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'unlabel',
                              unl_ids,
                              transform=train_transforms),
            batch_size=opts.batchsize_unlabel,
            shuffle=True,
            num_workers=opts.num_worker,
            pin_memory=True,
            drop_last=True)
        print('unlabel_loader done')

        validation_loader = torch.utils.data.DataLoader(
            SimpleImageLoader(DATASET_PATH,
                              'val',
                              val_ids,
                              transform=eval_transforms),
            batch_size=opts.batchsize_label,
            shuffle=False,
            num_workers=opts.num_worker,
            pin_memory=True,
            drop_last=False)
        print('validation_loader done')

        # Set optimizer
        if opts.use_pretrained:
            optimizer = optim.SGD(model.parameters(),
                                  lr=opts.combined_lr,
                                  momentum=opts.momentum,
                                  weight_decay=1e-6)
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=opts.lr,
                                  momentum=opts.momentum,
                                  weight_decay=1e-6)

        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=len(unlabel_loader), eta_min=0, last_epoch=-1)
        criterion = ContrastiveLoss(opts.temperature, opts.consistency_epoch,
                                    opts.batchsize_label,
                                    opts.batchsize_unlabel, NUM_CLASSES,
                                    opts.smoothing)
        opts.steps_per_epoch = len(unlabel_loader)

        # Train and Validation
        best_acc = 0
        # Load model from other session
        if opts.use_pretrained:
            nsml.load(checkpoint=opts.ckpt, session=opts.session)
            nsml.save('saved')

        for epoch in range(opts.start_epoch, opts.epochs + 1):
            if opts.use_pretrained:
                if epoch <= opts.consistency_epoch:
                    continue

                if epoch > opts.consistency_epoch and epoch <= opts.consistency_epoch + opts.warmup_epoch:
                    for g in optimizer.param_groups:
                        g['lr'] = opts.combined_lr * (
                            epoch - opts.consistency_epoch) / opts.warmup_epoch
                else:
                    scheduler.step()
            else:
                if epoch <= opts.warmup_epoch:  # warm up
                    for g in optimizer.param_groups:
                        g['lr'] = opts.lr * epoch / opts.warmup_epoch
                else:
                    scheduler.step()

            loss, avg_top1, avg_top5 = train(opts, train_loader,
                                             unlabel_loader, model, criterion,
                                             optimizer, epoch, use_gpu)
            print(
                'epoch {:03d}/{:03d} finished, loss: {:.3f}, avg_top1: {:.3f}%, avg_top5: {:.3f}%'
                .format(epoch, opts.epochs, loss, avg_top1, avg_top5))

            acc_top1, acc_top5 = validation(opts, validation_loader, model,
                                            epoch, use_gpu)
            is_best = acc_top1 > best_acc
            if is_best:
                best_acc = acc_top1
                print(
                    'model achieved the best accuracy ({:.3f}%) - saving best checkpoint...'
                    .format(best_acc))
                if IS_ON_NSML:
                    nsml.save(opts.name + '_best')
                else:
                    torch.save(model.state_dict(),
                               os.path.join('runs', opts.name + '_best'))
            if (epoch + 1) % opts.save_epoch == 0:
                if IS_ON_NSML:
                    nsml.save(opts.name + '_e{}'.format(epoch))
                else:
                    torch.save(
                        model.state_dict(),
                        os.path.join('runs', opts.name + '_e{}'.format(epoch)))
Esempio n. 8
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

    # freeze all layers but the last fc
    for name, param in model.named_parameters():
        if name not in ['fc.weight', 'fc.bias']:
            param.requires_grad = False
    # init the fc layer
    model.fc.weight.data.normal_(mean=0.0, std=0.01)
    model.fc.bias.data.zero_()

    # load from pre-trained, before DistributedDataParallel constructor
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading checkpoint '{}'".format(args.pretrained))
            checkpoint = torch.load(args.pretrained, map_location="cpu")

            # rename moco pre-trained keys
            state_dict = checkpoint['state_dict']
            for k in list(state_dict.keys()):
                # retain only encoder_q up to before the embedding layer
                if k.startswith('module.encoder_q'
                                ) and not k.startswith('module.encoder_q.fc'):
                    # remove prefix
                    state_dict[k[len("module.encoder_q."):]] = state_dict[k]
                # delete renamed or unused k
                del state_dict[k]

            args.start_epoch = 0
            msg = model.load_state_dict(state_dict, strict=False)
            assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

            print("=> loaded pre-trained model '{}'".format(args.pretrained))
        else:
            print("=> no checkpoint found at '{}'".format(args.pretrained))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    assert len(parameters) == 2  # fc.weight, fc.bias
    optimizer = torch.optim.SGD(parameters,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    # traindir = os.path.join(args.data, 'train')
    # valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # train_dataset = datasets.ImageFolder(
    #     traindir,
    #     transforms.Compose([
    #         transforms.RandomResizedCrop(224),
    #         transforms.RandomHorizontalFlip(),
    #         transforms.ToTensor(),
    #         normalize,
    #     ]))

    default_augmentations = [
        transforms.Resize(args.imResize),
        transforms.RandomResizedCrop(args.imsize),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]

    train_dataset = SimpleImageLoader(
        args, 'train', transform=transforms.Compose(default_augmentations))
    valid_dataset = SimpleImageLoader(
        args,
        'validation',
        transform=transforms.Compose(default_augmentations))
    # unlabel_dataset = SimpleImageLoader(args, 'unlabel',
    #                       transform=transforms.Compose(default_augmentations))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    # train_loader = torch.utils.data.DataLoader(
    #     train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
    #     num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    # val_loader = torch.utils.data.DataLoader(
    #     datasets.ImageFolder(valdir, transforms.Compose([
    #         transforms.Resize(256),
    #         transforms.CenterCrop(224),
    #         transforms.ToTensor(),
    #         normalize,
    #     ])),
    #     batch_size=args.batch_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=True)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    val_loader = torch.utils.data.DataLoader(valid_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             drop_last=False)

    # unlabel_loader = torch.utils.data.DataLoader(
    #     unlabel_dataset, batch_size=opts.batchsize, shuffle=False,
    #     num_workers=4, pin_memory=True, drop_last=False)

    if args.evaluate:
        test_dataset = SimpleImageLoader(
            args, 'test', transform=transforms.Compose(default_augmentations))
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=True,
                                                  drop_last=False)

        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
            if epoch == args.start_epoch:
                sanity_check(model.state_dict(), args.pretrained)
Esempio n. 9
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = moco.builder.MoCo(models.__dict__[args.arch], args.moco_dim,
                              args.moco_k, args.moco_m, args.moco_t, args.mlp)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    # traindir = os.path.join(args.data, 'train')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])],
                                   p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    # train_dataset = datasets.ImageFolder(
    #     traindir,
    #     moco.loader.TwoCropsTransform(transforms.Compose(augmentation)))
    default_augmentations = [
        transforms.Resize(args.imResize),
        transforms.RandomResizedCrop(args.imsize, scale=(0.2, 1.)),
        transforms.RandomApply(
            [
                transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
            ],
            p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]

    train_dataset = SimpleImageLoader(
        args, 'train', transform=transforms.Compose(augmentation))
    valid_dataset = SimpleImageLoader(
        args, 'validation', transform=transforms.Compose(augmentation))
    unlabel_dataset = SimpleImageLoader(
        args, 'unlabel', transform=transforms.Compose(augmentation))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=4,
                                               pin_memory=True,
                                               drop_last=False)

    unlabel_loader = torch.utils.data.DataLoader(unlabel_dataset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=4,
                                                 pin_memory=True,
                                                 drop_last=False)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename='checkpoint_{:04d}.pth.tar'.format(epoch))