Beispiel #1
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](low_dim=args.low_dim)

    if not args.distributed:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolderInstance(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolderInstance(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define lemniscate and loss function (criterion)
    ndata = train_dataset.__len__()
    if args.nce_k > 0:
        lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t,
                                args.nce_m).cuda()
        criterion = NCECriterion(ndata).cuda()
    else:
        lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t,
                                   args.nce_m).cuda()
        criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            lemniscate = checkpoint['lemniscate']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    if args.evaluate:
        kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, lemniscate, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = NN(epoch, model, lemniscate, train_loader, val_loader)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'lemniscate': lemniscate,
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
    # evaluate KNN after last epoch
    kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
Beispiel #2
0
def main():
    # so other functions can access these variables
    global args, dataloaders, data_sizes, image_sets
    args = get_user_args()

    # defining processing device, if cuda is available then GPU else CPU
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and args.gpu) else "cpu")

    print('=> beginning training using {}'.format(str(device).upper()))

    # lets the user know which model is being trained
    print('=> creating model: {}'.format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)

    print('* ' * 20)

    model.to(device)  # send device to processor

    # image location with child folders of train, valid, test
    data_dir = Path(args.data)
    train_dir = data_dir / 'train'
    valid_dir = data_dir / 'valid'
    test_dir = data_dir / 'test'

    # variable for various iterations later
    states = ['train', 'valid', 'test']

    # for easy iteration later
    dirs_dict = {'train': train_dir, 'valid': valid_dir, 'test': test_dir}

    # image normalization parameters, predefined
    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])

    # transforms for valid and test data, use same parameters
    valid_test_transforms = [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize]

    data_transforms = {
        'train':  # vector manipulation for generalized learning
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomRotation(30),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            normalize
        ]),
        'valid':
        transforms.Compose(valid_test_transforms),
        'test':
        transforms.Compose(valid_test_transforms)
    }

    image_sets = {
        i_set: datasets.ImageFolder(
            dirs_dict[i_set], transform=data_transforms[i_set])
        for i_set in states
    }

    dataloaders = {
        'train': torch.utils.data.DataLoader(
            image_sets['train'], batch_size=args.batch_size, shuffle=True),
        'valid': torch.utils.data.DataLoader(image_sets['valid'],       batch_size=args.batch_size),
        'test': torch.utils.data.DataLoader(image_sets['test'], batch_size=args.batch_size)
    }
    classes = image_sets['train'].classes
    data_sizes = {x: len(image_sets[x]) for x in states}

    for p in model.parameters():
        p.requires_grad = False  # ensures gradients aren't calculated for parameters

    classifier = nn.Sequential(
        OrderedDict([
            ('fc1', nn.Linear(
                model.classifier[0].in_features, args.hidden_units)),
            ('relu1,', nn.ReLU()),
            ('dropout', nn.Dropout(args.dropout)),
            ('fc2', nn.Linear(args.hidden_units, len(classes))),
            ('output', nn.LogSoftmax(dim=1)),
        ]))

    model.classifier = classifier

    criterion = nn.NLLLoss()

    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=args.learning_rate)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.0125)

    model_trained = train(model, optimizer, criterion,
                          scheduler, args.epochs, device)

    save_checkpoint(model_trained, args.epochs, args.save_dir,
                    args.arch, args.learning_rate, optimizer, args.hidden_units)
Beispiel #3
0
# import matplotlib.pyplot as plt
from tqdm import tqdm # Displays a progress bar

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, Subset, DataLoader, random_split


# Load the dataset and train, val, test splits
# TODO: currently only experiment with FashionMNIST. Need to process your own dataset
print("Loading datasets...")
DATA_transform= transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(), # Transform from [0,255] uint8 to [0,1] float
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize to zero mean and unit variance
])
training_data_dir='./data/Training'
testing_data_dir='./data/Testing'
TUMOR_train=datasets.ImageFolder(training_data_dir, DATA_transform)
TUMOR_test = datasets.ImageFolder(testing_data_dir, DATA_transform)
print(TUMOR_test.classes)
# FASHION_trainval = datasets.FashionMNIST('.', download=True, train=True, transform=FASHION_transform)
# FASHION_train = Subset(FASHION_trainval, range(50000))
# FASHION_val = Subset(FASHION_trainval, range(50000,60000))
# FASHION_test = datasets.FashionMNIST('.', download=True, train=False, transform=FASHION_transform)

print("Done!")
Beispiel #4
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    if args.gen_map:
        args.qw = -1
        args.qa = -1
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
    else:
        print("=> creating model '{}'".format(args.arch))
    try:
        model = models.__dict__[args.arch](pretrained=args.pretrained)
        args.qw = -1
        args.qa = -1
    except KeyError:
        if 'bp' in args.arch:
            model = imagenet_extra_models.__dict__[args.arch](
                pretrained=args.pretrained,
                nbits_w=args.qw,
                log=args.quan_log,
                increase_factor=args.increase_factor)
        else:
            model = imagenet_extra_models.__dict__[args.arch](
                pretrained=args.pretrained)
    print('model:\n=========\n{}\n=========='.format(model))
    if args.gen_map:
        try:
            ori_model = models.__dict__[args.original_model]()
        except KeyError:
            ori_model = imagenet_extra_models.__dict__[args.original_model]()
        print('original model:\n=========\n{}\n=========='.format(ori_model))

        key_map = gen_key_map(model.state_dict(), ori_model.state_dict())

        with open('models/imagenet/{}_map.json'.format(args.arch), 'w') as wf:
            json.dump(key_map, wf)
        print('Generate key map done')
        return

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    params = add_weight_decay(model,
                              weight_decay=args.weight_decay,
                              skip_keys=['expand_'])
    optimizer = torch.optim.SGD(params, args.lr, momentum=args.momentum)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            model.load_state_dict(checkpoint['state_dict'])
            # if not args.quant_bias_scale:
            #     args.start_epoch = checkpoint['epoch']
            #     best_acc1 = checkpoint['best_acc1']
            #     if args.gpu is not None:
            #         # best_acc1 may be from a checkpoint from a different GPU
            #         best_acc1 = best_acc1.to(args.gpu)
            # try:
            #     model.load_state_dict(checkpoint['state_dict'])
            #     # ValueError: loaded state dict has a different number of parameter groups
            #     # different version
            #     # optimizer.load_state_dict(checkpoint['optimizer'])
            # except RuntimeError:
            #     print('Fine-tune qfi_wide model using qfn_relaxed weights.')
            #     key_map = gen_key_map(model.state_dict(), checkpoint['state_dict'])
            #     load_fake_quantized_state_dict(model, checkpoint['state_dict'], key_map)
            #     args.start_epoch = 0
            #     best_acc1 = 0
            #     optimizer = torch.optim.SGD(params, args.lr,
            #                                 momentum=args.momentum)
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    args.log_name = 'logger/{}_{}'.format(args.arch, args.log_name)
    writer = SummaryWriter(args.log_name)
    with open('{}/{}.txt'.format(args.log_name, args.arch), 'w') as wf:
        wf.write(str(model))
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=args.epochs)
    scheduler_warmup = GradualWarmupScheduler(optimizer,
                                              multiplier=10,
                                              total_epoch=3,
                                              after_scheduler=scheduler_cosine)
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        # adjust_learning_rate(optimizer, epoch, args)
        scheduler_warmup.step()
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, writer)

        # evaluate on validation set
        acc1, acc5 = validate(val_loader, model, criterion, args)
        writer.add_scalar('val/acc1', acc1, epoch)
        writer.add_scalar('val/acc5', acc5, epoch)
        # writer.add_scalar('val/bs', bs, epoch)
        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                prefix='{}/{}'.format(args.log_name, args.arch))
        if epoch % 10 == 0:
            save_checkpoint_backup(model.state_dict(),
                                   prefix='{}/{}_{}'.format(
                                       args.log_name, args.arch, epoch))
    writer.close()
Beispiel #5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--world_size',
                        type=int,
                        default=1,
                        help='number of GPUs to use')

    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--wd',
                        type=float,
                        default=1e-4,
                        help='weight decay (default: 5e-4)')
    parser.add_argument('--lr-decay-every',
                        type=int,
                        default=100,
                        help='learning rate decay by 10 every X epochs')
    parser.add_argument('--lr-decay-scalar',
                        type=float,
                        default=0.1,
                        help='--')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--run_test',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='run test only')

    parser.add_argument(
        '--limit_training_batches',
        type=int,
        default=-1,
        help='how many batches to do per training, -1 means as many as possible'
    )

    parser.add_argument('--no_grad_clip',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='turn off gradient clipping')

    parser.add_argument('--get_flops',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='add hooks to compute flops')

    parser.add_argument(
        '--get_inference_time',
        default=False,
        type=str2bool,
        nargs='?',
        help='runs valid multiple times and reports the result')

    parser.add_argument('--mgpu',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='use data paralization via multiple GPUs')

    parser.add_argument('--dataset',
                        default="MNIST",
                        type=str,
                        help='dataset for experiment, choice: MNIST, CIFAR10')

    parser.add_argument('--data',
                        metavar='DIR',
                        default='/imagenet',
                        help='path to imagenet dataset')

    parser.add_argument(
        '--model',
        default="lenet3",
        type=str,
        help='model selection, choices: lenet3, vgg, mobilenetv2, resnet18',
        choices=[
            "lenet3", "vgg", "mobilenetv2", "resnet18", "resnet152",
            "resnet50", "resnet50_noskip", "resnet20", "resnet34", "resnet101",
            "resnet101_noskip", "densenet201_imagenet", 'densenet121_imagenet',
            "multprun_gate5_gpu_0316_1", "mult_prun8_gpu", "multnas5_gpu"
        ])

    parser.add_argument('--tensorboard',
                        type=str2bool,
                        nargs='?',
                        help='Log progress to TensorBoard')

    parser.add_argument(
        '--save_models',
        default=True,
        type=str2bool,
        nargs='?',
        help='if True, models will be saved to the local folder')

    parser.add_argument('--fineturn_model',
                        type=str2bool,
                        nargs='?',
                        help='Log progress to TensorBoard')

    # ============================PRUNING added
    parser.add_argument(
        '--pruning_config',
        default=None,
        type=str,
        help=
        'path to pruning configuration file, will overwrite all pruning parameters in arguments'
    )

    parser.add_argument('--group_wd_coeff',
                        type=float,
                        default=0.0,
                        help='group weight decay')
    parser.add_argument('--name',
                        default='test',
                        type=str,
                        help='experiment name(folder) to store logs')

    parser.add_argument(
        '--augment',
        default=False,
        type=str2bool,
        nargs='?',
        help=
        'enable or not augmentation of training dataset, only for CIFAR, def False'
    )

    parser.add_argument('--load_model',
                        default='',
                        type=str,
                        help='path to model weights')

    parser.add_argument('--pruning',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='enable or not pruning, def False')

    parser.add_argument(
        '--pruning-threshold',
        '--pt',
        default=100.0,
        type=float,
        help=
        'Max error perc on validation set while pruning (default: 100.0 means always prune)'
    )

    parser.add_argument(
        '--pruning-momentum',
        default=0.0,
        type=float,
        help=
        'Use momentum on criteria between pruning iterations, def 0.0 means no momentum'
    )

    parser.add_argument('--pruning-step',
                        default=15,
                        type=int,
                        help='How often to check loss and do pruning step')

    parser.add_argument('--prune_per_iteration',
                        default=10,
                        type=int,
                        help='How many neurons to remove at each iteration')

    parser.add_argument(
        '--fixed_layer',
        default=-1,
        type=int,
        help='Prune only a given layer with index, use -1 to prune all')

    parser.add_argument('--start_pruning_after_n_iterations',
                        default=0,
                        type=int,
                        help='from which iteration to start pruning')

    parser.add_argument('--maximum_pruning_iterations',
                        default=1e8,
                        type=int,
                        help='maximum pruning iterations')

    parser.add_argument('--starting_neuron',
                        default=0,
                        type=int,
                        help='starting position for oracle pruning')

    parser.add_argument('--prune_neurons_max',
                        default=-1,
                        type=int,
                        help='prune_neurons_max')

    parser.add_argument('--pruning-method',
                        default=0,
                        type=int,
                        help='pruning method to be used, see readme.md')

    parser.add_argument('--pruning_fixed_criteria',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='enable or not criteria reevaluation, def False')

    parser.add_argument('--fixed_network',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='fix network for oracle or criteria computation')

    parser.add_argument(
        '--zero_lr_for_epochs',
        default=-1,
        type=int,
        help='Learning rate will be set to 0 for given number of updates')

    parser.add_argument(
        '--dynamic_network',
        default=False,
        type=str2bool,
        nargs='?',
        help=
        'Creates a new network graph from pruned model, works with ResNet-101 only'
    )

    parser.add_argument('--use_test_as_train',
                        default=False,
                        type=str2bool,
                        nargs='?',
                        help='use testing dataset instead of training')

    parser.add_argument('--pruning_mask_from',
                        default='',
                        type=str,
                        help='path to mask file precomputed')

    parser.add_argument(
        '--compute_flops',
        default=True,
        type=str2bool,
        nargs='?',
        help=
        'if True, will run dummy inference of batch 1 before training to get conv sizes'
    )

    # ============================END pruning added

    best_prec1 = 0
    global global_iteration
    global group_wd_optimizer
    global_iteration = 0

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    args.distributed = args.world_size > 1
    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=0)

    device = torch.device("cuda" if use_cuda else "cpu")

    # dataset loading section
    if args.dataset == "MNIST":
        kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            '../data',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('../data',
                           train=False,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307, ), (0.3081, ))
                           ])),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs)

    elif args.dataset == "CIFAR10":
        # Data loading code
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        if args.augment:
            transform_train = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
        else:
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ])

        transform_test = transforms.Compose([transforms.ToTensor(), normalize])

        kwargs = {'num_workers': 8, 'pin_memory': True}
        train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            '../data', train=True, download=True, transform=transform_train),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   drop_last=True,
                                                   **kwargs)

        test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('../data', train=False, transform=transform_test),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs)

    elif args.dataset == "Imagenet":
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')

        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
        else:
            train_sampler = None

        kwargs = {'num_workers': 16}

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=(train_sampler is None),
            sampler=train_sampler,
            pin_memory=True,
            **kwargs)

        if args.use_test_as_train:
            train_loader = torch.utils.data.DataLoader(
                datasets.ImageFolder(
                    valdir,
                    transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        normalize,
                    ])),
                batch_size=args.batch_size,
                shuffle=(train_sampler is None),
                **kwargs)

        test_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])),
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  **kwargs)
    #wm
    elif args.dataset == "mult_5T":
        args.data_root = ['/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/CX_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TK_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/ZR_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TX_20200616',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/WM_20200709']

        args.data_root_val = ['/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/CX_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TK_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/ZR_20200709',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TX_20200616',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/WM_20200709']

        args.train_data_list = ['/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/CX_20200709/txt/cx_train.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TK_20200709/txt/tk_train.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/ZR_20200709/txt/zr_train.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TX_20200616/txt/tx_train.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/WM_20200709/txt/wm_train.txt']

        args.val_data_list = ['/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/CX_20200709/txt/cx_val.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TK_20200709/txt/tk_val.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/ZR_20200709/txt/zr_val.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/TX_20200616/txt/tx_val.txt',\
        '/workspace/mnt/storage/yangdecheng/yangdecheng/data/TR-NMA-07/WM_20200709/txt/wm_val.txt']

        num_tasks = len(args.data_root)
        args.ngpu = 8
        args.workers = 8
        args.train_batch_size = [40, 40, 40, 40, 40]  #36
        args.val_batch_size = [100, 100, 100, 100, 100]
        args.loss_weight = [1.0, 1.0, 1.0, 1.0, 1.0]
        args.val_num_classes = [[0, 1, 2, 3, 4], [0, 1, 2], [0, 1], [0, 1],
                                [0, 1, 2, 3, 4, 5, 6]]
        args.mixup_alpha = None  #None

        for i in range(num_tasks):
            args.train_batch_size[i] *= args.ngpu
            args.val_batch_size[i] *= args.ngpu

        pixel_mean = [0.406, 0.456, 0.485]
        pixel_std = [0.225, 0.224, 0.229]

        #私人定制:
        train_dataset = []
        for i in range(num_tasks):
            if i == 1:
                train_dataset.append(
                    FileListLabeledDataset(
                        args.train_data_list[i],
                        args.data_root[i],
                        Compose([
                            RandomResizedCrop(
                                112,
                                scale=(0.94, 1.),
                                ratio=(1. / 4., 4. / 1.)
                            ),  #scale=(0.7, 1.2), ratio=(1. / 1., 4. / 1.)
                            RandomHorizontalFlip(),
                            ColorJitter(brightness=[0.5, 1.5],
                                        contrast=[0.5, 1.5],
                                        saturation=[0.5, 1.5],
                                        hue=0),
                            ToTensor(),
                            Lighting(1, [0.2175, 0.0188, 0.0045],
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]]),
                            Normalize(pixel_mean, pixel_std),
                        ])))
            else:
                train_dataset.append(
                    FileListLabeledDataset(
                        args.train_data_list[i], args.data_root[i],
                        Compose([
                            RandomResizedCrop(112,
                                              scale=(0.7, 1.2),
                                              ratio=(1. / 1., 4. / 1.)),
                            RandomHorizontalFlip(),
                            ColorJitter(brightness=[0.5, 1.5],
                                        contrast=[0.5, 1.5],
                                        saturation=[0.5, 1.5],
                                        hue=0),
                            ToTensor(),
                            Lighting(1, [0.2175, 0.0188, 0.0045],
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]]),
                            Normalize(pixel_mean, pixel_std),
                        ])))
        #原来的
        # train_dataset  = [FileListLabeledDataset(
        # args.train_data_list[i], args.data_root[i],
        # Compose([
        #     RandomResizedCrop(112,scale=(0.7, 1.2), ratio=(1. / 1., 4. / 1.)),
        #     RandomHorizontalFlip(),
        #     ColorJitter(brightness=[0.5,1.5], contrast=[0.5,1.5], saturation=[0.5,1.5], hue= 0),
        #     ToTensor(),
        #     Lighting(1, [0.2175, 0.0188, 0.0045], [[-0.5675,  0.7192,  0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948,  0.4203]]),
        #     Normalize(pixel_mean, pixel_std),]),
        # memcached=False,
        # memcached_client="") for i in range(num_tasks)]

        args.num_classes = [td.num_class for td in train_dataset]
        train_longest_size = max([
            int(np.ceil(len(td) / float(bs)))
            for td, bs in zip(train_dataset, args.train_batch_size)
        ])
        train_sampler = [
            GivenSizeSampler(td,
                             total_size=train_longest_size * bs,
                             rand_seed=0)
            for td, bs in zip(train_dataset, args.train_batch_size)
        ]
        train_loader = [
            DataLoader(train_dataset[k],
                       batch_size=args.train_batch_size[k],
                       shuffle=False,
                       num_workers=args.workers,
                       pin_memory=False,
                       sampler=train_sampler[k]) for k in range(num_tasks)
        ]

        val_dataset = [
            FileListLabeledDataset(
                args.val_data_list[i],
                args.data_root_val[i],
                Compose([
                    Resize((112, 112)),
                    # CenterCrop(112),
                    ToTensor(),
                    Normalize(pixel_mean, pixel_std),
                ]),
                memcached=False,
                memcached_client="") for i in range(num_tasks)
        ]
        val_longest_size = max([
            int(np.ceil(len(vd) / float(bs)))
            for vd, bs in zip(val_dataset, args.val_batch_size)
        ])
        test_loader = [
            DataLoader(val_dataset[k],
                       batch_size=args.val_batch_size[k],
                       shuffle=False,
                       num_workers=args.workers,
                       pin_memory=False) for k in range(num_tasks)
        ]

    if args.model == "lenet3":
        model = LeNet(dataset=args.dataset)
    elif args.model == "vgg":
        model = vgg11_bn(pretrained=True)
    elif args.model == "resnet18":
        model = PreActResNet18()
    elif (args.model == "resnet50") or (args.model == "resnet50_noskip"):
        if args.dataset == "CIFAR10":
            model = PreActResNet50(dataset=args.dataset)
        else:
            from models.resnet import resnet50
            skip_gate = True
            if "noskip" in args.model:
                skip_gate = False

            if args.pruning_method not in [22, 40]:
                skip_gate = False
            model = resnet50(skip_gate=skip_gate)
    elif args.model == "resnet34":
        if not (args.dataset == "CIFAR10"):
            from models.resnet import resnet34
            model = resnet34()
    elif args.model == "multprun_gate5_gpu_0316_1":
        from models.multitask import MultiTaskWithLoss
        model = MultiTaskWithLoss(backbone=args.model,
                                  num_classes=args.num_classes,
                                  feature_dim=2560,
                                  spatial_size=112,
                                  arc_fc=False,
                                  feat_bn=False)
        print(model)
    elif args.model == "mult_prun8_gpu":
        from models.multitask import MultiTaskWithLoss
        model = MultiTaskWithLoss(backbone=args.model,
                                  num_classes=args.num_classes,
                                  feature_dim=18,
                                  spatial_size=112,
                                  arc_fc=False,
                                  feat_bn=False)
        print(model)
    elif args.model == "multnas5_gpu":  #作为修改项
        from models.multitask import MultiTaskWithLoss
        model = MultiTaskWithLoss(backbone=args.model,
                                  num_classes=args.num_classes,
                                  feature_dim=512,
                                  spatial_size=112,
                                  arc_fc=False,
                                  feat_bn=False)
        print(model)
    elif "resnet101" in args.model:
        if not (args.dataset == "CIFAR10"):
            from models.resnet import resnet101
            if args.dataset == "Imagenet":
                classes = 1000

            if "noskip" in args.model:
                model = resnet101(num_classes=classes, skip_gate=False)
            else:
                model = resnet101(num_classes=classes)

    elif args.model == "resnet20":
        if args.dataset == "CIFAR10":
            NotImplementedError(
                "resnet20 is not implemented in the current project")
            # from models.resnet_cifar import resnet20
            # model = resnet20()
    elif args.model == "resnet152":
        model = PreActResNet152()
    elif args.model == "densenet201_imagenet":
        from models.densenet_imagenet import DenseNet201
        model = DenseNet201(gate_types=['output_bn'], pretrained=True)
    elif args.model == "densenet121_imagenet":
        from models.densenet_imagenet import DenseNet121
        model = DenseNet121(gate_types=['output_bn'], pretrained=True)
    else:
        print(args.model, "model is not supported")

    ####end dataset preparation

    if args.dynamic_network:
        # attempts to load pruned model and modify it be removing pruned channels
        # works for resnet101 only
        if (len(args.load_model) > 0) and (args.dynamic_network):
            if os.path.isfile(args.load_model):
                load_model_pytorch(model, args.load_model, args.model)

            else:
                print("=> no checkpoint found at '{}'".format(args.load_model))
                exit()

        dynamic_network_change_local(model)

        # save the model
        log_save_folder = "%s" % args.name
        if not os.path.exists(log_save_folder):
            os.makedirs(log_save_folder)

        if not os.path.exists("%s/models" % (log_save_folder)):
            os.makedirs("%s/models" % (log_save_folder))

        model_save_path = "%s/models/pruned.weights" % (log_save_folder)
        model_state_dict = model.state_dict()
        if args.save_models:
            save_checkpoint({'state_dict': model_state_dict},
                            False,
                            filename=model_save_path)

    print("model is defined")

    # aux function to get size of feature maps
    # First it adds hooks for each conv layer
    # Then runs inference with 1 image
    output_sizes = get_conv_sizes(args, model)

    if use_cuda and not args.mgpu:
        model = model.to(device)
    elif args.distributed:
        model.cuda()
        print(
            "\n\n WARNING: distributed pruning was not verified and might not work correctly"
        )
        model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.mgpu:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.to(device)

    print(
        "model is set to device: use_cuda {}, args.mgpu {}, agrs.distributed {}"
        .format(use_cuda, args.mgpu, args.distributed))

    weight_decay = args.wd
    if args.fixed_network:
        weight_decay = 0.0

    # remove updates from gate layers, because we want them to be 0 or 1 constantly
    if 1:
        parameters_for_update = []
        parameters_for_update_named = []
        for name, m in model.named_parameters():
            if "gate" not in name:
                parameters_for_update.append(m)
                parameters_for_update_named.append((name, m))
            else:
                print("skipping parameter", name, "shape:", m.shape)

    total_size_params = sum(
        [np.prod(par.shape) for par in parameters_for_update])
    print("Total number of parameters, w/o usage of bn consts: ",
          total_size_params)

    optimizer = optim.SGD(parameters_for_update,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=weight_decay)

    if 1:
        # helping optimizer to implement group lasso (with very small weight that doesn't affect training)
        # will be used to calculate number of remaining flops and parameters in the network
        group_wd_optimizer = group_lasso_decay(
            parameters_for_update,
            group_lasso_weight=args.group_wd_coeff,
            named_parameters=parameters_for_update_named,
            output_sizes=output_sizes)

    cudnn.benchmark = True

    # define objective
    criterion = nn.CrossEntropyLoss()

    ###=======================added for pruning
    # logging part
    log_save_folder = "%s" % args.name
    if not os.path.exists(log_save_folder):
        os.makedirs(log_save_folder)

    if not os.path.exists("%s/models" % (log_save_folder)):
        os.makedirs("%s/models" % (log_save_folder))

    train_writer = None
    if args.tensorboard:
        try:
            # tensorboardX v1.6
            train_writer = SummaryWriter(log_dir="%s" % (log_save_folder))
        except:
            # tensorboardX v1.7
            train_writer = SummaryWriter(logdir="%s" % (log_save_folder))

    time_point = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
    textfile = "%s/log_%s.txt" % (log_save_folder, time_point)
    stdout = Logger(textfile)
    sys.stdout = stdout
    print(" ".join(sys.argv))

    # initializing parameters for pruning
    # we can add weights of different layers or we can add gates (multiplies output with 1, useful only for gradient computation)
    pruning_engine = None
    if args.pruning:
        pruning_settings = dict()
        if not (args.pruning_config is None):
            pruning_settings_reader = PruningConfigReader()
            pruning_settings_reader.read_config(args.pruning_config)
            pruning_settings = pruning_settings_reader.get_parameters()

        # overwrite parameters from config file with those from command line
        # needs manual entry here
        # user_specified = [key for key in vars(default_args).keys() if not (vars(default_args)[key]==vars(args)[key])]
        # argv_of_interest = ['pruning_threshold', 'pruning-momentum', 'pruning_step', 'prune_per_iteration',
        #                     'fixed_layer', 'start_pruning_after_n_iterations', 'maximum_pruning_iterations',
        #                     'starting_neuron', 'prune_neurons_max', 'pruning_method']

        has_attribute = lambda x: any([x in a for a in sys.argv])

        if has_attribute('pruning-momentum'):
            pruning_settings['pruning_momentum'] = vars(
                args)['pruning_momentum']
        if has_attribute('pruning-method'):
            pruning_settings['method'] = vars(args)['pruning_method']

        pruning_parameters_list = prepare_pruning_list(
            pruning_settings,
            model,
            model_name=args.model,
            pruning_mask_from=args.pruning_mask_from,
            name=args.name)
        print("Total pruning layers:", len(pruning_parameters_list))

        folder_to_write = "%s" % log_save_folder + "/"
        log_folder = folder_to_write

        pruning_engine = pytorch_pruning(pruning_parameters_list,
                                         pruning_settings=pruning_settings,
                                         log_folder=log_folder)

        pruning_engine.connect_tensorboard(train_writer)
        pruning_engine.dataset = args.dataset
        pruning_engine.model = args.model
        pruning_engine.pruning_mask_from = args.pruning_mask_from
        pruning_engine.load_mask()
        gates_to_params = connect_gates_with_parameters_for_flops(
            args.model, parameters_for_update_named)
        pruning_engine.gates_to_params = gates_to_params

    ###=======================end for pruning
    # loading model file
    if (len(args.load_model) > 0) and (not args.dynamic_network):
        if os.path.isfile(args.load_model):
            if args.fineturn_model:
                checkpoint = torch.load(args.load_model)
                state_dict = checkpoint['state_dict']
                model = load_module_state_dict_checkpoint(model, state_dict)
            else:
                load_model_pytorch(model, args.load_model, args.model)
        else:
            print("=> no checkpoint found at '{}'".format(args.load_model))
            exit()

    if args.tensorboard and 0:
        if args.dataset == "CIFAR10":
            dummy_input = torch.rand(1, 3, 32, 32).to(device)
        elif args.dataset == "Imagenet":
            dummy_input = torch.rand(1, 3, 224, 224).to(device)

        train_writer.add_graph(model, dummy_input)

    for epoch in range(1, args.epochs + 1):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(args, optimizer, epoch, args.zero_lr_for_epochs,
                             train_writer)

        if not args.run_test and not args.get_inference_time:
            train(args,
                  model,
                  device,
                  train_loader,
                  optimizer,
                  epoch,
                  criterion,
                  train_writer=train_writer,
                  pruning_engine=pruning_engine)

        if args.pruning:
            # skip validation error calculation and model saving
            if pruning_engine.method == 50: continue

        # evaluate on validation set
        prec1 = validate(args,
                         test_loader,
                         model,
                         device,
                         criterion,
                         epoch,
                         train_writer=train_writer)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        model_save_path = "%s/models/checkpoint.weights" % (log_save_folder)
        paths = "%s/models" % (log_save_folder)
        model_state_dict = model.state_dict()
        if args.save_models:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model_state_dict,
                    'best_prec1': best_prec1,
                },
                is_best,
                filename=model_save_path)
            states = {
                'epoch': epoch + 1,
                'state_dict': model_state_dict,
            }
            torch.save(states, '{}/{}.pth.tar'.format(paths, epoch + 1))
Beispiel #6
0
import pandas as pd
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import os
import torch
import json
import time
import argparse
import pickle

image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ]),
    # Validation does not use augmentation
    'val':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
Beispiel #7
0
    def __init__(self, setname, args, augment=False):
        im_size = args.orig_imsize
        csv_path = osp.join(SPLIT_PATH, setname + '.csv')
        cache_path = osp.join( CACHE_PATH, "{}.{}.{}.pt".format(self.__class__.__name__, setname, im_size) )

        self.use_im_cache = ( im_size != -1 ) # not using cache
        if self.use_im_cache:
            if not osp.exists(cache_path):
                print('* Cache miss... Preprocessing {}...'.format(setname))
                resize_ = identity if im_size < 0 else transforms.Resize(im_size)
                data, label = self.parse_csv(csv_path, setname)
                self.data = [ resize_(Image.open(path).convert('RGB')) for path in data ]
                self.label = label
                print('* Dump cache from {}'.format(cache_path))
                torch.save({'data': self.data, 'label': self.label }, cache_path)
            else:
                print('* Load cache from {}'.format(cache_path))
                cache = torch.load(cache_path)
                self.data  = cache['data']
                self.label = cache['label']
        else:
            self.data, self.label = self.parse_csv(csv_path, setname)

        self.num_class = len(set(self.label))

        image_size = 84
        if augment and setname == 'train':
            transforms_list = [
                  transforms.RandomResizedCrop(image_size),
                  transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                  transforms.RandomHorizontalFlip(),
                  transforms.ToTensor(),
                ]
        else:
            transforms_list = [
                  transforms.Resize(92),
                  transforms.CenterCrop(image_size),
                  transforms.ToTensor(),
                ]

        # Transformation
        if args.backbone_class == 'ConvNet':
            self.transform = transforms.Compose(
                transforms_list + [
                transforms.Normalize(np.array([0.485, 0.456, 0.406]),
                                     np.array([0.229, 0.224, 0.225]))
            ])
        elif args.backbone_class == 'Res12':
            self.transform = transforms.Compose(
                transforms_list + [
                transforms.Normalize(np.array([x / 255.0 for x in [120.39586422,  115.59361427, 104.54012653]]),
                                     np.array([x / 255.0 for x in [70.68188272,   68.27635443,  72.54505529]]))
            ])
        elif args.backbone_class == 'Res18':
            self.transform = transforms.Compose(
                transforms_list + [
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])            
        elif args.backbone_class == 'WRN':
            self.transform = transforms.Compose(
                transforms_list + [
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])         
        else:
            raise ValueError('Non-supported Network Types. Please Revise Data Pre-Processing Scripts.')
Beispiel #8
0
def load_data(train=True,
              data_dir='dataset/imagenet',
              batch_size=128,
              subset_len=None,
              sample_method='random',
              distributed=False,
              model_name='resnet18',
              **kwargs):

    #prepare data
    # random.seed(12345)
    traindir = data_dir + '/train'
    valdir = data_dir + '/val'
    train_sampler = None
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if model_name == 'inception_v3':
        size = 299
        resize = 299
    else:
        size = 224
        resize = 256
    if train:
        dataset = torchvision.datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))
        if subset_len:
            assert subset_len <= len(dataset)
            if sample_method == 'random':
                dataset = torch.utils.data.Subset(
                    dataset, random.sample(range(0, len(dataset)), subset_len))
            else:
                dataset = torch.utils.data.Subset(dataset,
                                                  list(range(subset_len)))
        if distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                dataset)
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=(train_sampler is None),
            sampler=train_sampler,
            **kwargs)
    else:
        dataset = torchvision.datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(resize),
                transforms.CenterCrop(size),
                transforms.ToTensor(),
                normalize,
            ]))
        if subset_len:
            assert subset_len <= len(dataset)
            if sample_method == 'random':
                dataset = torch.utils.data.Subset(
                    dataset, random.sample(range(0, len(dataset)), subset_len))
            else:
                dataset = torch.utils.data.Subset(dataset,
                                                  list(range(subset_len)))
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  **kwargs)
    return data_loader, train_sampler
Beispiel #9
0
def train(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Save the arguments.
    with open(os.path.join(args.model_path, 'args.json'), 'w') as args_file:
        json.dump(args.__dict__, args_file)

    # Config logging.
    log_format = '%(levelname)-8s %(message)s'
    logfile = os.path.join(args.model_path, 'train.log')
    logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format)
    logging.getLogger().addHandler(logging.StreamHandler())
    logging.info(json.dumps(args.__dict__))

    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(args.crop_size,
                                     scale=(1.00, 1.2),
                                     ratio=(0.75, 1.3333333333333333)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])])##0.5

    # Load vocabulary wrapper.
    vocab = load_vocab(args.vocab_path)

    
    # Build data loader
    logging.info("Building data loader...")
    train_sampler = None
    val_sampler = None

    data_loader = get_loader(args.dataset, transform,
                                 args.batch_size, shuffle=False,
                                 num_workers=args.num_workers,
                                 max_examples=args.max_examples,
                                 sampler=train_sampler)
    val_data_loader = get_loader(args.val_dataset, transform,
                                     args.batch_size, shuffle=False,
                                     num_workers=args.num_workers,
                                     max_examples=args.max_examples,
                                     sampler=val_sampler)
    logging.info("Done")

    vqg = create_model(args, vocab)
    print(vqg)
    """
    graph = hl.build_graph(vqg.modules())
    graph = graph.build_dot()
    graph.render("weights/tf1/", view=True, format='png')
    g = make_dot(vqg.modules(),vqg.state_dict())
    g.view()"""
    if args.load_model is not None:
        vqg.load_state_dict(torch.load(args.load_model))
    logging.info("Done")

    # Loss criterion.
    pad = vocab(vocab.SYM_PAD)  # Set loss weight for 'pad' symbol to 0
    criterion = nn.CrossEntropyLoss(ignore_index=pad)
    l2_criterion = nn.MSELoss()

    # Setup GPUs.
    if torch.cuda.is_available():
        logging.info("Using available GPU...")
        vqg.cuda()
        criterion.cuda()
        l2_criterion.cuda()

    # Parameters to train.
    gen_params = vqg.generator_parameters()
    info_params = vqg.info_parameters()
    learning_rate = args.learning_rate
    info_learning_rate = args.learning_rate
    gen_optimizer = torch.optim.Adam(gen_params, lr=learning_rate)
    info_optimizer = torch.optim.Adam(info_params, lr=info_learning_rate)
    scheduler = ReduceLROnPlateau(optimizer=gen_optimizer, mode='min',
                                  factor=0.1, patience=args.patience,
                                  verbose=True, min_lr=1e-7)
    info_scheduler = ReduceLROnPlateau(optimizer=info_optimizer, mode='min',
                                       factor=0.1, patience=args.patience,
                                       verbose=True, min_lr=1e-7)

    # Train the model.
    total_steps = len(data_loader)
    start_time = time.time()
    n_steps = 0

    for epoch in range(args.num_epochs):
        for i, (images, questions, qindices) in enumerate(data_loader):
            n_steps += 1

            # Set mini-batch dataset.
            if torch.cuda.is_available():
                images = images.cuda()
                questions = questions.cuda()
                qindices = qindices.cuda()

            # Eval now.
            if (args.eval_every_n_steps is not None and
                    n_steps >= args.eval_every_n_steps and
                    n_steps % args.eval_every_n_steps == 0):
                run_eval(vqg, val_data_loader, criterion, l2_criterion,
                         args, epoch, scheduler,info_scheduler)
                compare_outputs(images, questions, vqg, vocab, logging, args)

            # Forward.
            vqg.train()
            gen_optimizer.zero_grad()
            info_optimizer.zero_grad()
            image_features = vqg.encode_images(images)

            # Question generation.
            mus, logvars = vqg.encode_into_z(image_features)
            zs = vqg.reparameterize(mus, logvars)
            (outputs, _, _) = vqg.decode_questions(
                    image_features, zs, questions=questions,
                    teacher_forcing_ratio=1.0)

            # Reorder the questions based on length.
            questions = torch.index_select(questions, 0, qindices)

            # Ignoring the start token.
            questions = questions[:, 1:]
            qlengths = process_lengths(questions)

            # Convert the output from MAX_LEN list of (BATCH x VOCAB) ->
            # (BATCH x MAX_LEN x VOCAB).
            outputs = [o.unsqueeze(1) for o in outputs]
            outputs = torch.cat(outputs, dim=1)
            outputs = torch.index_select(outputs, 0, qindices)

            # Calculate the generation loss.
            targets = pack_padded_sequence(questions, qlengths,
                                           batch_first=True)[0]
            outputs = pack_padded_sequence(outputs, qlengths,
                                           batch_first=True)[0]
            gen_loss = criterion(outputs, targets)
            total_loss = 0.0
            total_loss += args.lambda_gen * gen_loss
            gen_loss = gen_loss.item()

            # Variational loss.
            kl_loss = gaussian_KL_loss(mus, logvars)
            total_loss += args.lambda_z * kl_loss
            kl_loss = kl_loss.item()

            total_loss.backward()
            gen_optimizer.step()


            # Print log info
            if i % args.log_step == 0:
                delta_time = time.time() - start_time
                start_time = time.time()
                logging.info('Time: %.4f, Epoch [%d/%d], Step [%d/%d], '
                             'LR: %f, gen: %.4f, KL: %.4f '
                             
                             % (delta_time, epoch, args.num_epochs, i,
                                total_steps, gen_optimizer.param_groups[0]['lr'],
                                gen_loss, kl_loss
                                ))

            # Save the models
            if args.save_step is not None and (i+1) % args.save_step == 0:
                torch.save(vqg.state_dict(),
                           os.path.join(args.model_path,
                                        'vqg-tf-%d-%d.pkl'
                                        % (epoch + 1, i + 1)))

        torch.save(vqg.state_dict(),
                   os.path.join(args.model_path,
                                'vqg-tf-%d.pkl' % (epoch+1)))

        # Evaluation and learning rate updates.
        run_eval(vqg, val_data_loader, criterion, l2_criterion,
                 args, epoch, scheduler,info_scheduler)
Beispiel #10
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    print('dataset:', args.dataset)
    if args.dataset == 'EPIC':
        with open(args.foptions, 'rb') as handle:
            options = pickle.load(handle)
            if options['root'][0] == '.':
                # update relative path
                options['root'] = 'basemodel' + options['root'][1:]
            options['num_crops'] = 1
    else:
        options = None

    model = models.Net(options=options).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    extractor_params = list(map(id, model.extractor.parameters()))
    classifier_params = filter(lambda p: id(p) not in extractor_params,
                               model.parameters())

    optimizer = torch.optim.SGD([{
        'params': model.extractor.parameters()
    }, {
        'params': classifier_params,
        'lr': args.lr * 10
    }],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=30,
                                                gamma=0.1)
    # optionally resume from a checkpoint
    # title = 'CUB'
    title = args.dataset

    if args.resume:
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # Data loading code
    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

    if args.dataset == 'CUB':
        print('Using dataset CUB')
        train_dataset = loader.ImageLoader(
            args.data,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]),
            train=True)

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)

        val_loader = torch.utils.data.DataLoader(loader.ImageLoader(
            args.data,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    elif args.dataset == 'EPIC':
        print('Using dataset EPIC')
        train_dataset, val_dataset, train_loader, val_loader = get_datasets_and_dataloaders(
            options, device=options['device'])

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    print('# train_loader:', len(train_loader))
    print('# val_loader:', len(val_loader))

    print('Training epoch from {} to {}'.format(args.start_epoch, args.epochs))

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        lr = optimizer.param_groups[1]['lr']
        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, lr))
        # train for one epoch
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch)

        # evaluate on validation set
        test_loss, test_acc = validate(val_loader, model, criterion)

        # append logger file
        logger.append([lr, train_loss, test_loss, train_acc, test_acc])

        # remember best prec@1 and save checkpoint
        is_best = test_acc > best_prec1
        best_prec1 = max(test_acc, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_prec1)
Beispiel #11
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    # global aim_sess = aim.Session()
    # aim_sess.set_params({
    # 'num_epochs': args.epochs,
    # 'num_classes': 1000,
    # 'batch_size': args.batch_size,
    # }, name='hparams')

    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    repvgg_build_func = get_RepVGG_func_by_name(args.arch)

    model = repvgg_build_func(deploy=False)

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
    elif args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = sgd_optimizer(model, args.lr, args.momentum, args.weight_decay)

    lr_scheduler = CosineAnnealingLR(
        optimizer=optimizer,
        T_max=args.epochs * IMAGENET_TRAINSET_SIZE // args.batch_size //
        ngpus_per_node)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['scheduler'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        # adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args,
              lr_scheduler)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': lr_scheduler.state_dict(),
                }, is_best)
Beispiel #12
0
def build_imagenet(model_state_dict, optimizer_state_dict, **kwargs):
    ratio = kwargs.pop('ratio')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.4,
                               contrast=0.4,
                               saturation=0.4,
                               hue=0.2),
        transforms.ToTensor(),
        normalize,
    ])
    valid_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
    if args.zip_file:
        logging.info('Loading data from zip file')
        traindir = os.path.join(args.data, 'train.zip')
        if args.lazy_load:
            train_data = utils.ZipDataset(traindir, train_transform)
        else:
            logging.info('Loading data into memory')
            train_data = utils.InMemoryZipDataset(traindir,
                                                  train_transform,
                                                  num_workers=32)
    else:
        logging.info('Loading data from directory')
        traindir = os.path.join(args.data, 'train')
        if args.lazy_load:
            train_data = dset.ImageFolder(traindir, train_transform)
        else:
            logging.info('Loading data into memory')
            train_data = utils.InMemoryDataset(traindir,
                                               train_transform,
                                               num_workers=32)

    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(ratio * num_train))
    train_indices = sorted(indices[:split])
    valid_indices = sorted(indices[split:])

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.child_batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(train_indices),
        pin_memory=True,
        num_workers=16)
    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.child_eval_batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_indices),
        pin_memory=True,
        num_workers=16)

    model = NASWSNetworkImageNet(args, 1000, args.child_layers,
                                 args.child_nodes, args.child_channels,
                                 args.child_keep_prob,
                                 args.child_drop_path_keep_prob,
                                 args.child_use_aux_head, args.steps)
    model = model.cuda()
    train_criterion = CrossEntropyLabelSmooth(1000,
                                              args.child_label_smooth).cuda()
    eval_criterion = nn.CrossEntropyLoss().cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.child_lr,
        momentum=0.9,
        weight_decay=args.child_l2_reg,
    )
    if model_state_dict is not None:
        model.load_state_dict(model_state_dict)
    if optimizer_state_dict is not None:
        optimizer.load_state_dict(optimizer_state_dict)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                args.child_decay_period,
                                                gamma=args.child_gamma)
    return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataroot',
                        default='/mnt/Data1',
                        help='path to images')
    parser.add_argument('--workers',
                        default=4,
                        type=int,
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--batch-size',
                        default=2,
                        type=int,
                        help='mini-batch size')
    parser.add_argument('--outroot',
                        default='./results',
                        help='path to save the results')
    parser.add_argument('--exp-name',
                        default='test',
                        help='name of expirement')
    parser.add_argument('--load',
                        default='',
                        help='name of pth to load weights from')
    parser.add_argument('--freeze-cc-net',
                        dest='freeze_cc_net',
                        action='store_true',
                        help='dont train the color corrector net')
    parser.add_argument('--freeze-warp-net',
                        dest='freeze_warp_net',
                        action='store_true',
                        help='dont train the warp net')
    parser.add_argument('--test',
                        dest='test',
                        action='store_true',
                        help='only test the network')
    parser.add_argument(
        '--synth-data',
        dest='synth_data',
        action='store_true',
        help='use synthetic data instead of tank data for training')
    parser.add_argument('--epochs',
                        default=3,
                        type=int,
                        help='number of epochs to train for')
    parser.add_argument('--no-warp-net',
                        dest='warp_net',
                        action='store_false',
                        help='do not include warp net in the model')
    parser.add_argument('--warp-net-downsample',
                        default=3,
                        type=int,
                        help='number of downsampling layers in warp net')
    parser.add_argument('--no-color-net',
                        dest='color_net',
                        action='store_false',
                        help='do not include color net in the model')
    parser.add_argument('--color-net-downsample',
                        default=3,
                        type=int,
                        help='number of downsampling layers in color net')
    parser.add_argument(
        '--no-color-net-skip',
        dest='color_net_skip',
        action='store_false',
        help='dont use u-net skip connections in the color net')
    parser.add_argument(
        '--dim',
        default=32,
        type=int,
        help='initial feature dimension (doubled at each downsampling layer)')
    parser.add_argument('--n-res',
                        default=8,
                        type=int,
                        help='number of residual blocks')
    parser.add_argument('--norm',
                        default='gn',
                        type=str,
                        help='type of normalization layer')
    parser.add_argument(
        '--denormalize',
        dest='denormalize',
        action='store_true',
        help='denormalize output image by input image mean/var')
    parser.add_argument(
        '--weight-X-L1',
        default=1.,
        type=float,
        help='weight of L1 reconstruction loss after color corrector net')
    parser.add_argument('--weight-Y-L1',
                        default=1.,
                        type=float,
                        help='weight of L1 reconstruction loss after warp net')
    parser.add_argument('--weight-Y-VGG',
                        default=1.,
                        type=float,
                        help='weight of perceptual loss after warp net')
    parser.add_argument(
        '--weight-Z-L1',
        default=1.,
        type=float,
        help='weight of L1 reconstruction loss after color net')
    parser.add_argument('--weight-Z-VGG',
                        default=.5,
                        type=float,
                        help='weight of perceptual loss after color net')
    parser.add_argument('--weight-Z-Adv',
                        default=0.2,
                        type=float,
                        help='weight of adversarial loss after color net')
    args = parser.parse_args()

    # set random seed for consistent fixed batch
    torch.manual_seed(8)

    # set weights of losses of intermediate outputs to zero if not necessary
    if not args.warp_net:
        args.weight_Y_L1 = 0
        args.weight_Y_VGG = 0
    if not args.color_net:
        args.weight_Z_L1 = 0
        args.weight_Z_VGG = 0
        args.weight_Z_Adv = 0

    # datasets
    train_dir_1 = os.path.join(args.dataroot, 'Water', 'train')
    train_dir_2 = os.path.join(args.dataroot, 'ImageNet', 'train')
    val_dir_1 = os.path.join(args.dataroot, 'Water', 'test')
    val_dir_2 = os.path.join(args.dataroot, 'ImageNet', 'test')
    test_dir = os.path.join(args.dataroot, 'Water_Real')

    if args.synth_data:
        train_data = ImageFolder(train_dir_2,
                                 transform=transforms.Compose([
                                     transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                                          std=[0.5, 0.5, 0.5]),
                                     synthdata.SynthData(224,
                                                         n=args.batch_size),
                                 ]))
    else:
        train_data = PairedImageFolder(
            train_dir_1,
            train_dir_2,
            transform=transforms.Compose([
                pairedtransforms.RandomResizedCrop(224),
                pairedtransforms.RandomHorizontalFlip(),
                pairedtransforms.ToTensor(),
                pairedtransforms.Normalize(mean=[0.5, 0.5, 0.5],
                                           std=[0.5, 0.5, 0.5]),
            ]))
    val_data = PairedImageFolder(val_dir_1,
                                 val_dir_2,
                                 transform=transforms.Compose([
                                     pairedtransforms.Resize(256),
                                     pairedtransforms.CenterCrop(256),
                                     pairedtransforms.ToTensor(),
                                     pairedtransforms.Normalize(
                                         mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5]),
                                 ]))
    test_data = ImageFolder(test_dir,
                            transform=transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(256),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                                     std=[0.5, 0.5, 0.5]),
                            ]),
                            return_path=True)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.batch_size,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              shuffle=False)

    # fixed test batch for visualization during training
    fixed_batch = iter(val_loader).next()[0]

    # model
    model = networks.Model(args)
    model.cuda()

    # load weights from checkpoint
    if args.test and not args.load:
        args.load = args.exp_name
    if args.load:
        model.load_state_dict(torch.load(
            os.path.join(args.outroot, '%s_net.pth' % args.load)),
                              strict=args.test)

    # create outroot if necessary
    if not os.path.exists(args.outroot):
        os.makedirs(args.outroot)

    # if args.test only run test script
    if args.test:
        test(test_loader, model, args)
        return

    # main training loop
    for epoch in range(args.epochs):
        train(train_loader, model, fixed_batch, epoch, args)
        torch.save(model.state_dict(),
                   os.path.join(args.outroot, '%s_net.pth' % args.exp_name))
        test(test_loader, model, args)
def transforming(path):
    # Transform image data into tensors to be used in PyTorch

    # Getting the specific paths to each of the training, validation and testing folder
    data_dir = path
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'

    # Preparing the transformations needed to convert images into tensors
    data_transforms = {
        'training':
        transforms.Compose([
            transforms.RandomRotation(30),
            transforms.RandomHorizontalFlip(30),
            transforms.RandomResizedCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'validation':
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'testing':
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    # Loading the data from the directories
    image_datasets = {
        'training':
        datasets.ImageFolder(train_dir, transform=data_transforms['training']),
        'validation':
        datasets.ImageFolder(valid_dir,
                             transform=data_transforms['validation']),
        'testing':
        datasets.ImageFolder(test_dir, transform=data_transforms['testing'])
    }

    # Batch iterators to feed the network
    dataloaders = {
        'training':
        torch.utils.data.DataLoader(image_datasets['training'],
                                    batch_size=100,
                                    shuffle=True),
        'validation':
        torch.utils.data.DataLoader(image_datasets['validation'],
                                    batch_size=100,
                                    shuffle=True),
        'testing':
        torch.utils.data.DataLoader(image_datasets['testing'],
                                    batch_size=100,
                                    shuffle=True)
    }

    return dataloaders, image_datasets
Beispiel #15
0
        sample = np.array(sample)

        # blur the image with a 50% chance
        prob = np.random.random_sample()

        if prob < 0.5:
            sigma = (self.max -
                     self.min) * np.random.random_sample() + self.min
            sample = cv2.GaussianBlur(sample,
                                      (self.kernel_size, self.kernel_size),
                                      sigma)

        return sample


train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)],
                           p=0.8),
    transforms.RandomGrayscale(p=0.2),
    GaussianBlur(kernel_size=int(0.1 * 32)),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])
    def __init__(self,
                 normalize=None,
                 size_crops: List[int] = [96, 36],
                 nmb_crops: List[int] = [2, 4],
                 min_scale_crops: List[float] = [0.33, 0.10],
                 max_scale_crops: List[float] = [1, 0.33],
                 gaussian_blur: bool = True,
                 jitter_strength: float = 1.):
        self.jitter_strength = jitter_strength
        self.gaussian_blur = gaussian_blur

        assert len(size_crops) == len(nmb_crops)
        assert len(min_scale_crops) == len(nmb_crops)
        assert len(max_scale_crops) == len(nmb_crops)

        self.size_crops = size_crops
        self.nmb_crops = nmb_crops
        self.min_scale_crops = min_scale_crops
        self.max_scale_crops = max_scale_crops

        self.color_jitter = transforms.ColorJitter(0.8 * self.jitter_strength,
                                                   0.8 * self.jitter_strength,
                                                   0.8 * self.jitter_strength,
                                                   0.2 * self.jitter_strength)

        transform = []
        color_transform = [
            transforms.RandomApply([self.color_jitter], p=0.8),
            transforms.RandomGrayscale(p=0.2)
        ]

        if self.gaussian_blur:
            kernel_size = int(0.1 * self.size_crops[0])
            if kernel_size % 2 == 0:
                kernel_size += 1

            color_transform.append(GaussianBlur(kernel_size=kernel_size,
                                                p=0.5))

        self.color_transform = transforms.Compose(color_transform)

        if normalize is None:
            self.final_transform = transforms.ToTensor()
        else:
            self.final_transform = transforms.Compose(
                [transforms.ToTensor(), normalize])

        for i in range(len(self.size_crops)):
            random_resized_crop = transforms.RandomResizedCrop(
                self.size_crops[i],
                scale=(self.min_scale_crops[i], self.max_scale_crops[i]),
            )

            transform.extend([
                transforms.Compose([
                    random_resized_crop,
                    transforms.RandomHorizontalFlip(p=0.5),
                    self.color_transform, self.final_transform
                ])
            ] * self.nmb_crops[i])

        self.transform = transform

        # add online train transform of the size of global view
        online_train_transform = transforms.Compose([
            transforms.RandomResizedCrop(self.size_crops[0]),
            transforms.RandomHorizontalFlip(), self.final_transform
        ])

        self.transform.append(online_train_transform)
Beispiel #17
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()
    n_ftrs = model.fc.in_features
    model.fc = nn.Linear(n_ftrs, 3)
    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = Synthetic('../annotation.json',
                              subset='training',
                              transform=transforms.Compose([
                                  transforms.RandomResizedCrop(224),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.ToTensor(),
                                  normalize,
                              ]))

    val_dataset = Synthetic('../annotation.json',
                            subset='validation',
                            transform=transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                normalize,
                            ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
Beispiel #18
0
def load_data(
    prop_noPV_training: float,
    min_rescale_images: float,
    batch_size: int,
    dir_data_training: str = "",
    dir_data_validation: str = "",
    dir_data_test: str = "",
):
    """
    Create the DataLoader objects that will generate the training, validation and test sets.

    Parameters
    ----------
    prop_noPV_training : float
        Proportion of noPV images to add for the training of the model.
    min_rescale_images : float
        Minimum proportion of the image to keep for the RandomResizedCrop transform.
    batch_size : int
        Number of samples per batch in the DataLoaders.
    dir_data_training : str, optional
        Directory where the folders "images/", "labels/" and "noPV/" are for the training set.
        If empty, the data is not generated. The default is "".
    dir_data_validation : str, optional
        Directory where the folders "images/", "labels/" and "noPV/" are for the validation set.
        If empty, the data is not generated. The default is "".
    dir_data_test : str, optional
        Directory where the folders "images/", "labels/" and "noPV/" are for the test set.
        If empty, the data is not generated. The default is "".

    Returns
    -------
    train_dl : torch.utils.data.DataLoader
        Training DataLoader, if data directory is provided, otherwise None.
    validation_dl : torch.utils.data.DataLoader
        Validation DataLoader, if data directory is provided, otherwise None.
    test_dl : torch.utils.data.DataLoader
        Test DataLoader, if data directory is provided, otherwise None.
    """

    roof_train_dataset = None
    if dir_data_training:
        # Transforms to augment the data (for training set)
        transform_aug = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomResizedCrop(250,
                                         scale=(min_rescale_images, 1.0),
                                         ratio=(1.0, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])
        # Instantiate the training dataset
        roof_train_dataset = AvailableRooftopDataset(
            dir_PV=os.path.join(dir_data_training, "PV"),
            dir_noPV=os.path.join(dir_data_training, "noPV"),
            dir_labels=os.path.join(dir_data_training, "labels"),
            transform=transform_aug,
            prop_noPV=prop_noPV_training,
        )

    # No transform applied to validation and train images (the model should not need
    # any preprocessing)
    transform_id = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
    ])
    # Instantiate the validation and test datasets
    roof_validation_dataset, roof_test_dataset = (
        AvailableRooftopDataset(
            dir_PV=os.path.join(dir_data, "PV"),
            dir_noPV=os.path.join(dir_data, "noPV"),
            dir_labels=os.path.join(dir_data, "labels"),
            transform=transform_id,
            prop_noPV=-1,  # All of them
        ) if dir_data else None
        for dir_data in (dir_data_validation, dir_data_test))

    # Instantiate the DataLoaders
    roof_train_dl, roof_validation_dl, roof_test_dl = (DataLoader(
        roof_dataset, batch_size=batch_size, shuffle=True,
        num_workers=0) if roof_dataset else None for roof_dataset in (
            roof_train_dataset,
            roof_validation_dataset,
            roof_test_dataset,
        ))

    return roof_train_dl, roof_validation_dl, roof_test_dl
Beispiel #19
0
def get_dataloader(args, add_erasing=False, aug_plus=False):
    if 'cifar' in args.dataset or 'kitchen' in args.dataset:
        if aug_plus:
            # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
            transform_train_list = [
                transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                transforms.RandomApply(
                    [
                        transforms.ColorJitter(0.4, 0.4, 0.4,
                                               0.1)  # not strengthened
                    ],
                    p=0.8),
                transforms.RandomGrayscale(p=0.2),
                transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ]
        else:
            transform_train_list = [
                transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
                transforms.RandomGrayscale(p=0.2),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ]
        if add_erasing:
            transform_train_list.append(transforms.RandomErasing(p=1.0))
        transform_train = transforms.Compose(transform_train_list)

        if 'kitchen' in args.dataset:
            transform_test = transforms.Compose([
                transforms.Resize((32, 32), interpolation=2),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ])
        else:
            transform_test = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ])

    elif 'stl' in args.dataset:
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(size=96, scale=(0.2, 1.)),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

    if args.dataset == 'cifar10':
        trainset = datasets.CIFAR10Instance(root='./data/CIFAR-10',
                                            train=True,
                                            download=True,
                                            transform=transform_train,
                                            two_imgs=args.two_imgs,
                                            three_imgs=args.three_imgs)
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.num_workers,
                                                  pin_memory=False,
                                                  sampler=train_sampler)

        testset = datasets.CIFAR10Instance(root='./data/CIFAR-10',
                                           train=False,
                                           download=True,
                                           transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=100,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
        args.pool_len = 4
        ndata = trainset.__len__()

    elif args.dataset == 'cifar100':
        trainset = datasets.CIFAR100Instance(root='./data/CIFAR-100',
                                             train=True,
                                             download=True,
                                             transform=transform_train,
                                             two_imgs=args.two_imgs,
                                             three_imgs=args.three_imgs)
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.num_workers,
                                                  pin_memory=False,
                                                  sampler=train_sampler)

        testset = datasets.CIFAR100Instance(root='./data/CIFAR-100',
                                            train=False,
                                            download=True,
                                            transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=100,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
        args.pool_len = 4
        ndata = trainset.__len__()

    elif args.dataset == 'stl10':
        trainset = datasets.STL10(root='./data/STL10',
                                  split='train',
                                  download=True,
                                  transform=transform_train,
                                  two_imgs=args.two_imgs,
                                  three_imgs=args.three_imgs)
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.num_workers,
                                                  pin_memory=False,
                                                  sampler=train_sampler)

        testset = datasets.STL10(root='./data/STL10',
                                 split='test',
                                 download=True,
                                 transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=100,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
        args.pool_len = 7
        ndata = trainset.__len__()

    elif args.dataset == 'stl10-full':
        trainset = datasets.STL10(root='./data/STL10',
                                  split='train+unlabeled',
                                  download=True,
                                  transform=transform_train,
                                  two_imgs=args.two_imgs,
                                  three_imgs=args.three_imgs)
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.num_workers,
                                                  pin_memory=False,
                                                  sampler=train_sampler)

        labeledTrainset = datasets.STL10(root='./data/STL10',
                                         split='train',
                                         download=True,
                                         transform=transform_train,
                                         two_imgs=args.two_imgs)
        labeledTrain_sampler = torch.utils.data.distributed.DistributedSampler(
            labeledTrainset)
        labeledTrainloader = torch.utils.data.DataLoader(
            labeledTrainset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=False,
            sampler=labeledTrain_sampler)
        testset = datasets.STL10(root='./data/STL10',
                                 split='test',
                                 download=True,
                                 transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=100,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
        args.pool_len = 7
        ndata = labeledTrainset.__len__()

    elif args.dataset == 'kitchen':
        trainset = datasets.CIFARImageFolder(root='./data/Kitchen-HC/train',
                                             train=True,
                                             transform=transform_train,
                                             two_imgs=args.two_imgs,
                                             three_imgs=args.three_imgs)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  num_workers=args.num_workers,
                                                  pin_memory=False)
        testset = datasets.CIFARImageFolder(root='./data/Kitchen-HC/test',
                                            train=False,
                                            transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=100,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
        args.pool_len = 4
        ndata = trainset.__len__()

    return trainloader, testloader, ndata
Beispiel #20
0
def main():

    global best_acc1
    best_acc1 = 0

    args = parse_option()

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # set the data loader
    train_folder = os.path.join(args.data_folder, 'train')
    val_folder = os.path.join(args.data_folder, 'val')

    image_size = 224
    crop_padding = 32
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=mean, std=std)

    if args.aug == 'NULL':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    elif args.aug == 'CJ':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        raise NotImplemented('augmentation not supported: {}'.format(args.aug))

    train_dataset = datasets.ImageFolder(train_folder, train_transform)
    val_dataset = datasets.ImageFolder(
        val_folder,
        transforms.Compose([
            transforms.Resize(image_size + crop_padding),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ]))

    print(len(train_dataset))
    train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=True)

    # create model and optimizer
    if args.model == 'resnet50':
        model = InsResNet50()
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1)
    elif args.model == 'resnet50x2':
        model = InsResNet50(width=2)
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 2)
    elif args.model == 'resnet50x4':
        model = InsResNet50(width=4)
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 4)
    else:
        raise NotImplementedError('model not supported {}'.format(args.model))

    print('==> loading pre-trained model')
    ckpt = torch.load(args.model_path)
    model.load_state_dict(ckpt['model'])
    print("==> loaded checkpoint '{}' (epoch {})".format(
        args.model_path, ckpt['epoch']))
    print('==> done')

    model = model.cuda()
    classifier = classifier.cuda()

    criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)

    if not args.adam:
        optimizer = torch.optim.SGD(classifier.parameters(),
                                    lr=args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.Adam(classifier.parameters(),
                                     lr=args.learning_rate,
                                     betas=(args.beta1, args.beta2),
                                     weight_decay=args.weight_decay,
                                     eps=1e-8)

    model.eval()
    cudnn.benchmark = True

    # set mixed precision training
    # if args.amp:
    #     model = amp.initialize(model, opt_level=args.opt_level)
    #     classifier, optimizer = amp.initialize(classifier, optimizer, opt_level=args.opt_level)

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            # checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            classifier.load_state_dict(checkpoint['classifier'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            best_acc1 = checkpoint['best_acc1']
            best_acc1 = best_acc1.cuda()
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            if 'opt' in checkpoint.keys():
                # resume optimization hyper-parameters
                print('=> resume hyper parameters')
                if 'bn' in vars(checkpoint['opt']):
                    print('using bn: ', checkpoint['opt'].bn)
                if 'adam' in vars(checkpoint['opt']):
                    print('using adam: ', checkpoint['opt'].adam)
                if 'cosine' in vars(checkpoint['opt']):
                    print('using cosine: ', checkpoint['opt'].cosine)
                args.learning_rate = checkpoint['opt'].learning_rate
                # args.lr_decay_epochs = checkpoint['opt'].lr_decay_epochs
                args.lr_decay_rate = checkpoint['opt'].lr_decay_rate
                args.momentum = checkpoint['opt'].momentum
                args.weight_decay = checkpoint['opt'].weight_decay
                args.beta1 = checkpoint['opt'].beta1
                args.beta2 = checkpoint['opt'].beta2
            del checkpoint
            torch.cuda.empty_cache()
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # set cosine annealing scheduler
    if args.cosine:

        # last_epoch = args.start_epoch - 2
        # eta_min = args.learning_rate * (args.lr_decay_rate ** 3) * 0.1
        # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min, last_epoch)

        eta_min = args.learning_rate * (args.lr_decay_rate**3) * 0.1
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.epochs, eta_min, -1)
        # dummy loop to catch up with current epoch
        for i in range(1, args.start_epoch):
            scheduler.step()

    # tensorboard
    logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2)

    # routine
    for epoch in range(args.start_epoch, args.epochs + 1):

        if args.cosine:
            scheduler.step()
        else:
            adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        train_acc, train_acc5, train_loss = train(epoch, train_loader, model,
                                                  classifier, criterion,
                                                  optimizer, args)
        time2 = time.time()
        print('train epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        logger.log_value('train_acc', train_acc, epoch)
        logger.log_value('train_acc5', train_acc5, epoch)
        logger.log_value('train_loss', train_loss, epoch)
        logger.log_value('learning_rate', optimizer.param_groups[0]['lr'],
                         epoch)

        print("==> testing...")
        test_acc, test_acc5, test_loss = validate(val_loader, model,
                                                  classifier, criterion, args)

        logger.log_value('test_acc', test_acc, epoch)
        logger.log_value('test_acc5', test_acc5, epoch)
        logger.log_value('test_loss', test_loss, epoch)

        # save the best model
        if test_acc > best_acc1:
            best_acc1 = test_acc
            state = {
                'opt': args,
                'epoch': epoch,
                'classifier': classifier.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }
            save_name = '{}_layer{}.pth'.format(args.model, args.layer)
            save_name = os.path.join(args.save_folder, save_name)
            print('saving best model!')
            torch.save(state, save_name)

        # save model
        if epoch % args.save_freq == 0:
            print('==> Saving...')
            state = {
                'opt': args,
                'epoch': epoch,
                'classifier': classifier.state_dict(),
                'best_acc1': test_acc,
                'optimizer': optimizer.state_dict(),
            }
            save_name = 'ckpt_epoch_{epoch}.pth'.format(epoch=epoch)
            save_name = os.path.join(args.save_folder, save_name)
            print('saving regular model!')
            torch.save(state, save_name)

        # tensorboard logger
        pass
Beispiel #21
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    lr_decay_epoch = [int(i) for i in args.lr_decay_epoch.split(',')]

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
    elif args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    logname = args.prefix
    if args.optimizer == 'sgd':
        logname += "SGD_"
        print("sgd")
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'nero':
        logname += "Nero_"
        print("Nero")
        optimizer = Nero(model.parameters(), lr=args.lr, constraints=True)

    cos_sch = False
    scheduler = None
    T_max = math.ceil(1281167.0 / float(args.batch_size)) * (args.epochs)
    if args.sch == 'cos' or args.sch == 'cosine':
        print("cosine scheduler")
        cos_sch = True
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               T_max=T_max,
                                                               eta_min=0.0)


    logname += args.arch + "_sch_" +str(args.sch)+ "_lr" +str(args.lr) + \
            '_epoch' + str(args.epochs) + \
            "_opt_" + args.optimizer + \
            "_b" + str(args.batch_size) + \
            '_momentum' + str(args.momentum) + "_beta" + str(args.beta) + \
            '_wd' + str(args.weight_decay)

    writer = SummaryWriter(args.logdir + '/' + logname)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            optimizer.load_state_dict(checkpoint['optimizer'])
            for group in optimizer.param_groups:
                group["lr"] = args.lr
            if args.sch == 'cos':
                for i in range(checkpoint['epoch'] *
                               math.ceil(1281167.0 / float(args.batch_size))):
                    scheduler.step()

            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(), normalize
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        current_lr = optimizer.param_groups[0]['lr']
        print("current learning rate: {}".format(current_lr))
        writer.add_scalar('lr', current_lr, epoch)

        # train for one epoch
        top1_train, top5_train, losses_train, batch_time_train, scheduler = train(
            train_loader,
            model,
            criterion,
            optimizer,
            epoch,
            args,
            writer,
            scheduler=scheduler)

        if not cos_sch:
            lr = adjust_learning_rate(optimizer, epoch, lr, lr_decay_epoch,
                                      args.lr_decay)

        writer.add_scalar('train/batch_time_mean', batch_time_train, epoch)
        writer.add_scalar('train/loss_mean', losses_train, epoch)
        writer.add_scalar('train/top1_mean', top1_train, epoch)
        writer.add_scalar('train/top5_mean', top5_train, epoch)

        # evaluate on validation set
        top1_val, top5_val, losses_val, batch_time_val = validate(
            val_loader, model, criterion, args, epoch, writer)

        writer.add_scalar('val/batch_time_mean', batch_time_val, epoch)
        writer.add_scalar('val/loss_mean', losses_val, epoch)
        writer.add_scalar('val/top1_mean', top1_val, epoch)
        writer.add_scalar('val/top5_mean', top5_val, epoch)
        # remember best acc@1 and save checkpoint
        is_best = top1_val > best_acc1
        best_acc1 = max(top1_val, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):

            if ((epoch + 1) % 5 == 0):
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_acc1': best_acc1,
                        'optimizer': optimizer.state_dict(),
                    }, is_best, args.logdir + '/' + logname + '/epoch' +
                    str(epoch + 1) + '_checkpoint.pth.tar')
    writer.close()
Beispiel #22
0
import torch
import torchvision.transforms as transforms

from PIL import Image
from torch.autograd import Variable

# Defines where the model is located.
model_path = 'models/classify_resnet_152_80.pth'
# Defines where the test data is located.
test_folder = './data/test'
# Defines how often to print progress.
print_batch_size = 50

# Defines how to pre-process the image data.
transform = transforms.Compose([
    transforms.RandomResizedCrop(200),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Loads the model and sets to eval mode.
model = torch.load(model_path)
model.eval()

# Iterates over all images in test folder.
output = []
for index, row in pd.read_csv('./data/test.csv').iterrows():
    if index % print_batch_size == 0:
        print('Progress: #{:5d} | time: {}'.format(index, time.ctime()))
def get_dataloader(args):
    # Define data files path.
    root_img_folder = "/ais/gobi4/fashion/edge_detection/data_aug"
    root_label_folder = "/ais/gobi4/fashion/edge_detection/data_aug"
    train_anno_txt = "/ais/gobi4/fashion/edge_detection/data_aug/list_train_aug.txt"
    val_anno_txt = "/ais/gobi4/fashion/edge_detection/data_aug/list_test.txt"
    train_hdf5_file = "/ais/gobi6/jiaman/github/CASENet/utils/train_aug_label_binary_np.h5"
    val_hdf5_file = "/ais/gobi6/jiaman/github/CASENet/utils/test_label_binary_np.h5"

    input_size = 472
    normalize = transforms.Normalize(mean=[104.008, 116.669, 122.675],
                                     std=[1, 1, 1])

    train_augmentation = transforms.Compose([
        transforms.RandomResizedCrop(input_size,
                                     scale=(0.75, 1.0),
                                     ratio=(0.75, 1.0)),
        transforms.RandomHorizontalFlip()
    ])
    train_label_augmentation = transforms.Compose([transforms.RandomResizedCrop(input_size, scale=(0.75,1.0), ratio=(0.75,1.0), interpolation=PIL.Image.NEAREST), \
                                transforms.RandomHorizontalFlip()])

    train_dataset = CityscapesData(root_img_folder,
                                   root_label_folder,
                                   train_anno_txt,
                                   train_hdf5_file,
                                   input_size,
                                   cls_num=args.cls_num,
                                   img_transform=transforms.Compose([
                                       train_augmentation,
                                       RGB2BGR(roll=True),
                                       ToTorchFormatTensor(div=False),
                                       normalize,
                                   ]),
                                   label_transform=transforms.Compose([
                                       transforms.ToPILImage(),
                                       train_label_augmentation,
                                       transforms.ToTensor(),
                                   ]))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_dataset = CityscapesData(
        root_img_folder,
        root_label_folder,
        val_anno_txt,
        val_hdf5_file,
        input_size,
        cls_num=args.cls_num,
        img_transform=transforms.Compose([
            transforms.Resize([input_size, input_size]),
            RGB2BGR(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ]),
        label_transform=transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize([input_size, input_size],
                              interpolation=PIL.Image.NEAREST),
            transforms.ToTensor(),
        ]))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size / 2,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    return train_loader, val_loader
Beispiel #24
0
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name,
                                        num_classes,
                                        feature_extract,
                                        use_pretrained=True)

# Print the model we just instantiated
print(model_ft)

# -------------------------- LOADING THE DATA --------------------------
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val':
    transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")
Beispiel #25
0
import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torchvision import datasets, transforms, models
from collections import OrderedDict
import json

data_dir = 'flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'

device = "cuda"

transform_train = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomRotation(30),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

transform_test = transforms.Compose([transforms.Resize(256),
                                     transforms.RandomResizedCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

transform_valid = transform_test

trainset = datasets.ImageFolder(train_dir, transform = transform_train)
testset = datasets.ImageFolder(test_dir, transform = transform_test)
validset = datasets.ImageFolder(valid_dir, transform = transform_valid)
Beispiel #26
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    logger = setup_logger(output='output/{}/'.format(args.output_dir), distributed_rank=dist.get_rank(), name="moco")
    # create model
    logger.info("=> creating model '{}'".format(args.arch))
    model = moco.builder.MoCo(
        models.__dict__[args.arch],
        args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp)
    logger.info(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            logger.info("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            logger.info("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply([
                transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
            ], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]

    train_dataset = datasets.ImageFolder(
        traindir,
        moco.loader.TwoCropsTransform(transforms.Compose(augmentation)))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, logger, args)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer' : optimizer.state_dict(),
            }, is_best=False, filename='output/{}/checkpoint_current.pth.tar'.format(args.output_dir))
            if epoch % args.save_freq == 0:
                save_checkpoint({
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer' : optimizer.state_dict(),
                }, is_best=False, filename='output/{}/checkpoint_{:04d}.pth.tar'.format(args.output_dir,epoch))
Beispiel #27
0
def make_dataset():
    # Small noise is added, following SN-GAN
    def noise(x):
        return x + torch.FloatTensor(x.size()).uniform_(0, 1.0 / 128)

    if opt.dataset == "cifar10":
        trans = tfs.Compose([
            tfs.RandomCrop(opt.img_width, padding=4),
            tfs.RandomHorizontalFlip(),
            tfs.ToTensor(),
            tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
            tfs.Lambda(noise)
        ])
        data = CIFAR10(root=opt.root,
                       train=True,
                       download=True,
                       transform=trans)
        loader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.workers)
    elif opt.dataset == "dog_and_cat_64":
        trans = tfs.Compose([
            tfs.RandomResizedCrop(opt.img_width,
                                  scale=(0.8, 0.9),
                                  ratio=(1.0, 1.0)),
            tfs.RandomHorizontalFlip(),
            tfs.ToTensor(),
            tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
            tfs.Lambda(noise)
        ])
        data = ImageFolder(opt.root, transform=trans)
        loader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.workers)
    elif opt.dataset == "dog_and_cat_128":
        trans = tfs.Compose([
            tfs.RandomResizedCrop(opt.img_width,
                                  scale=(0.8, 0.9),
                                  ratio=(1.0, 1.0)),
            tfs.RandomHorizontalFlip(),
            tfs.ToTensor(),
            tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
            tfs.Lambda(noise)
        ])
        data = ImageFolder(opt.root, transform=trans)
        loader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.workers)
    elif opt.dataset == "imagenet":
        trans = tfs.Compose([
            tfs.RandomResizedCrop(opt.img_width,
                                  scale=(0.8, 0.9),
                                  ratio=(1.0, 1.0)),
            tfs.RandomHorizontalFlip(),
            tfs.ToTensor(),
            tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
            tfs.Lambda(noise)
        ])
        data = ImageFolder(opt.root, transform=trans)
        loader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.workers)
    else:
        raise ValueError(f"Unknown dataset: {opt.dataset}")
    return loader
Beispiel #28
0
SIZE = 224

transform_test = transforms.Compose([
    transforms.Resize(SIZE),
    transforms.CenterCrop(SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_train = transforms.Compose([
    transforms.ColorJitter(brightness=0.4,
                           contrast=0.2,
                           saturation=0.2,
                           hue=0.2),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(SIZE, scale=(0.8, 1.0), ratio=(0.90, 1.10)),
    transform_test,
])


def imshow(img):
    img = img / 2 + 0.5
    img = img.numpy()
    plt.imshow(img.transpose((1, 2, 0)))
    plt.show()


def imload(path):
    return Image.open(path).convert("RGB")

Beispiel #29
0
def main():
    ia.seed(1)

    train_datapath = "./food11re/skewed_training"
    valid_datapath = "./food11re/validation"
    test_datapath = "./food11re/evaluation"

    transform = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        ImgAugTransform(), lambda x: PIL.Image.fromarray(x),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = Food11Dataset(train_datapath, is_train=True)
    train_dataset_folder = torchvision.datasets.ImageFolder(
        root='./food11re/skewed_training', transform=transform)
    valid_dataset = Food11Dataset(valid_datapath, is_train=False)
    test_dataset = Food11Dataset(test_datapath, is_train=False)

    #wts = [100, 781, 67, 169, 196, 75, 757, 1190, 194, 67, 2857]
    #train_dataset.augmentation(wts)

    weight = []
    for i in range(11):
        class_count = train_dataset_folder.targets.count(i)
        weight.append(1. / (class_count / len(train_dataset_folder.targets)))

    samples_weight = np.array([weight[t] for _, t in train_dataset_folder])
    weighted_sampler = data.WeightedRandomSampler(samples_weight,
                                                  num_samples=15000,
                                                  replacement=True)

    randon_sampler = data.RandomSampler(train_dataset,
                                        replacement=True,
                                        num_samples=9000,
                                        generator=None)

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", train_datapath)
    print(train_dataset.show_details())

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", valid_datapath)
    print(valid_dataset.show_details())

    print(
        "----------------------------------------------------------------------------------"
    )
    print("Dataset bf. loading - ", test_datapath)
    print(test_dataset.show_details())

    train_folder_loader = DataLoader(dataset=train_dataset_folder,
                                     num_workers=0,
                                     batch_size=100,
                                     sampler=weighted_sampler)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=0,
                              batch_size=100,
                              sampler=randon_sampler)
    valid_loader = DataLoader(dataset=valid_dataset,
                              num_workers=0,
                              batch_size=100,
                              shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             num_workers=0,
                             batch_size=100,
                             shuffle=False)

    data_loading(train_folder_loader, train_dataset)
    data_loading(train_loader, train_dataset)
    data_loading(valid_loader, valid_dataset)
    data_loading(test_loader, test_dataset)
import torch.optim as optim

import pickle

# In[73]:

num_classes = 120
batch_size = 13
epochs = 15
sample_submission = pd.read_csv('./data/sample_submission.csv')

# In[74]:

normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), normalize
])
valid_transform = transforms.Compose([
    transforms.Resize(360),
    transforms.CenterCrop(299),
    transforms.ToTensor(), normalize
])

trainset = torchvision.datasets.ImageFolder("./data/train/", train_transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=2)