def main_worker(ngpus_per_node, args):
    mean, std = model_utils.get_preprocessing_function(args.colour_space,
                                                       args.vision_type)

    # preparing the output folder
    create_dir(args.out_dir)

    if args.gpus is not None:
        print("Use GPU: {} for training".format(args.gpus))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + args.gpus
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    # create model
    if args.transfer_weights is not None:
        print('Transferred model!')
        model = contrast_utils.AFCModel(args.network_name,
                                        args.transfer_weights)
    elif args.custom_arch:
        print('Custom model!')
        supported_customs = ['resnet_basic_custom', 'resnet_bottleneck_custom']
        if args.network_name in supported_customs:
            model = custom_models.__dict__[args.network_name](
                args.blocks,
                pooling_type=args.pooling_type,
                in_chns=len(mean),
                num_classes=args.num_classes,
                inplanes=args.num_kernels,
                kernel_size=args.kernel_size)
    elif args.pretrained:
        print("=> using pre-trained model '{}'".format(args.network_name))
        model = models.__dict__[args.network_name](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.network_name))
        model = models.__dict__[args.network_name]()

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpus is not None:
            torch.cuda.set_device(args.gpus)
            model.cuda(args.gpus)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpus])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpus is not None:
        torch.cuda.set_device(args.gpus)
        model = model.cuda(args.gpus)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if (args.network_name.startswith('alexnet')
                or args.network_name.startswith('vgg')):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = soft_cross_entropy

    # optimiser
    if args.transfer_weights is None:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    else:
        params_to_optimize = [
            {
                'params': [p for p in model.parameters() if p.requires_grad]
            },
        ]
        optimizer = torch.optim.SGD(params_to_optimize,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    model_progress = []
    model_progress_path = os.path.join(args.out_dir, 'model_progress.csv')
    # optionally resume from a checkpoint
    # TODO: it would be best if resume load the architecture from this file
    # TODO: merge with which_architecture
    best_acc1 = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            args.initial_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.gpus is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpus)
                model = model.cuda(args.gpus)
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            if os.path.exists(model_progress_path):
                model_progress = np.loadtxt(model_progress_path, delimiter=',')
                model_progress = model_progress.tolist()
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    train_trans = []
    valid_trans = []
    both_trans = []
    if args.mosaic_pattern is not None:
        mosaic_trans = preprocessing.mosaic_transformation(args.mosaic_pattern)
        both_trans.append(mosaic_trans)

    if args.num_augmentations != 0:
        augmentations = preprocessing.random_augmentation(
            args.augmentation_settings, args.num_augmentations)
        train_trans.append(augmentations)

    target_size = default_configs.get_default_target_size(
        args.dataset, args.target_size)

    final_trans = [
        cv2_transforms.ToTensor(),
        cv2_transforms.Normalize(mean, std),
    ]

    train_trans.append(
        cv2_transforms.RandomResizedCrop(target_size, scale=(0.08, 1.0)))

    # loading the training set
    train_trans = torch_transforms.Compose(
        [*both_trans, *train_trans, *final_trans])
    train_dataset = image_quality.BAPPS2afc(root=args.data_dir,
                                            split='train',
                                            transform=train_trans,
                                            concat=0.5)

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    valid_trans.extend([
        cv2_transforms.Resize(target_size),
        cv2_transforms.CenterCrop(target_size),
    ])

    # loading validation set
    valid_trans = torch_transforms.Compose(
        [*both_trans, *valid_trans, *final_trans])
    validation_dataset = image_quality.BAPPS2afc(root=args.data_dir,
                                                 split='val',
                                                 transform=valid_trans,
                                                 concat=0)

    val_loader = torch.utils.data.DataLoader(validation_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # training on epoch
    for epoch in range(args.initial_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        misc_utils.adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train_log = train_on_data(train_loader, model, criterion, optimizer,
                                  epoch, args)

        # evaluate on validation set
        validation_log = validate_on_data(val_loader, model, criterion, args)

        model_progress.append([*train_log, *validation_log])

        # remember best acc@1 and save checkpoint
        acc1 = validation_log[2]
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if misc_utils.is_saving_node(args.multiprocessing_distributed,
                                     args.rank, ngpus_per_node):
            misc_utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.network_name,
                    'customs': {
                        'pooling_type': args.pooling_type,
                        'in_chns': len(mean),
                        'num_classes': args.num_classes,
                        'blocks': args.blocks,
                        'num_kernels': args.num_kernels,
                        'kernel_size': args.kernel_size
                    },
                    'transfer_weights': args.transfer_weights,
                    'preprocessing': {
                        'mean': mean,
                        'std': std
                    },
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                    'target_size': target_size,
                },
                is_best,
                out_folder=args.out_dir)
            # TODO: get this header directly as a dictionary keys
            header = 'epoch,t_time,t_loss,t_top5,v_time,v_loss,v_top1'
            np.savetxt(model_progress_path,
                       np.array(model_progress),
                       delimiter=',',
                       header=header)
Beispiel #2
0
def main_worker(ngpus_per_node, args):
    mean, std = model_utils.get_preprocessing_function(args.colour_space,
                                                       args.vision_type)

    if args.gpus is not None:
        print("Use GPU: {} for training".format(args.gpus))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + args.gpus
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    # create model
    if args.transfer_weights is not None:
        print('Transferred model!')
        (model, _) = model_utils.which_network(args.transfer_weights[0],
                                               args.task_type,
                                               num_classes=args.old_classes)
        which_layer = -1
        if len(args.transfer_weights) == 2:
            which_layer = args.transfer_weights[1]
        model = model_utils.NewClassificationModel(model, which_layer,
                                                   args.num_classes)
    elif args.custom_arch:
        print('Custom model!')
        supported_customs = ['resnet_basic_custom', 'resnet_bottleneck_custom']
        if os.path.isfile(args.network_name):
            checkpoint = torch.load(args.network_name, map_location='cpu')
            customs = None
            if 'customs' in checkpoint:
                customs = checkpoint['customs']
                # TODO: num_classes is just for backward compatibility
                if 'num_classes' not in customs:
                    customs['num_classes'] = 1000
            model = which_architecture(checkpoint['arch'], customs,
                                       args.contrast_head)
            args.network_name = checkpoint['arch']

            model.load_state_dict(checkpoint['state_dict'], strict=False)
        elif args.network_name in supported_customs:
            model = custom_models.__dict__[args.network_name](
                args.blocks,
                contrast_head=args.contrast_head,
                pooling_type=args.pooling_type,
                in_chns=len(mean),
                num_classes=args.num_classes,
                inplanes=args.num_kernels,
                kernel_size=args.kernel_size)
    elif args.pretrained:
        print("=> using pre-trained model '{}'".format(args.network_name))
        model = models.__dict__[args.network_name](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.network_name))
        model = models.__dict__[args.network_name]()

    # TODO: why load weights is False?
    args.out_dir = prepare_training.prepare_output_directories(
        dataset_name='contrast',
        network_name=args.network_name,
        optimiser='sgd',
        load_weights=False,
        experiment_name=args.experiment_name,
        framework='pytorch')
    # preparing the output folder
    create_dir(args.out_dir)
    json_file_name = os.path.join(args.out_dir, 'args.json')
    with open(json_file_name, 'w') as fp:
        json.dump(dict(args._get_kwargs()), fp, sort_keys=True, indent=4)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpus is not None:
            torch.cuda.set_device(args.gpus)
            model.cuda(args.gpus)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpus])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpus is not None:
        torch.cuda.set_device(args.gpus)
        model = model.cuda(args.gpus)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if (args.network_name.startswith('alexnet')
                or args.network_name.startswith('vgg')):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpus)

    # optimiser
    if args.transfer_weights is None:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    else:
        # for p in model.features.parameters():
        #     p.requires_grad = False
        params_to_optimize = [
            {
                'params': [p for p in model.features.parameters()],
                'lr': 1e-6
            },
            {
                'params': [p for p in model.fc.parameters()]
            },
        ]
        optimizer = torch.optim.SGD(params_to_optimize,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    model_progress = []
    model_progress_path = os.path.join(args.out_dir, 'model_progress.csv')
    # optionally resume from a checkpoint
    # TODO: it would be best if resume load the architecture from this file
    # TODO: merge with which_architecture
    best_acc1 = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            args.initial_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.gpus is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpus)
                model = model.cuda(args.gpus)
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            if os.path.exists(model_progress_path):
                model_progress = np.loadtxt(model_progress_path, delimiter=',')
                model_progress = model_progress.tolist()
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    train_trans = []
    valid_trans = []
    both_trans = []
    if args.mosaic_pattern is not None:
        mosaic_trans = preprocessing.mosaic_transformation(args.mosaic_pattern)
        both_trans.append(mosaic_trans)

    if args.num_augmentations != 0:
        augmentations = preprocessing.random_augmentation(
            args.augmentation_settings, args.num_augmentations)
        train_trans.append(augmentations)

    target_size = default_configs.get_default_target_size(
        args.dataset, args.target_size)

    # loading the training set
    train_trans = [*both_trans, *train_trans]
    db_params = {
        'colour_space': args.colour_space,
        'vision_type': args.vision_type,
        'mask_image': args.mask_image
    }
    if args.dataset in ['imagenet', 'celeba', 'natural']:
        path_or_sample = args.data_dir
    else:
        path_or_sample = args.train_samples
    train_dataset = dataloader.train_set(args.dataset,
                                         target_size,
                                         mean,
                                         std,
                                         extra_transformation=train_trans,
                                         data_dir=path_or_sample,
                                         **db_params)
    if args.dataset == 'natural':
        train_dataset.num_crops = args.batch_size
        args.batch_size = 1

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    # loading validation set
    valid_trans = [*both_trans, *valid_trans]
    validation_dataset = dataloader.validation_set(
        args.dataset,
        target_size,
        mean,
        std,
        extra_transformation=valid_trans,
        data_dir=path_or_sample,
        **db_params)
    if args.dataset == 'natural':
        validation_dataset.num_crops = train_dataset.num_crops
        args.batch_size = 1

    val_loader = torch.utils.data.DataLoader(validation_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # training on epoch
    for epoch in range(args.initial_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        misc_utils.adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train_log = train_on_data(train_loader, model, criterion, optimizer,
                                  epoch, args)

        # evaluate on validation set
        validation_log = validate_on_data(val_loader, model, criterion, args)

        model_progress.append([*train_log, *validation_log])

        # remember best acc@1 and save checkpoint
        acc1 = validation_log[2]
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if misc_utils.is_saving_node(args.multiprocessing_distributed,
                                     args.rank, ngpus_per_node):
            misc_utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.network_name,
                    'customs': {
                        'pooling_type': args.pooling_type,
                        'in_chns': len(mean),
                        'num_classes': args.num_classes,
                        'blocks': args.blocks,
                        'num_kernels': args.num_kernels,
                        'kernel_size': args.kernel_size
                    },
                    'preprocessing': {
                        'mean': mean,
                        'std': std
                    },
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                    'target_size': target_size,
                },
                is_best,
                out_folder=args.out_dir)
            # TODO: get this header directly as a dictionary keys
            header = 'epoch,t_time,t_loss,t_top1,t_top5,v_time,v_loss,v_top1,v_top5'
            np.savetxt(model_progress_path,
                       np.array(model_progress),
                       delimiter=',',
                       header=header)
Beispiel #3
0
def generic_evaluation(args, fn, save_fn=None, **kwargs):
    manipulation_values = args.parameters['kwargs'][args.manipulation]
    manipulation_name = args.parameters['f_name']
    other_mans = args.parameters['others']
    for j, current_network in enumerate(args.network_files):
        # which architecture
        (model, target_size) = model_utils.which_network(
            current_network,
            args.task_type,
            num_classes=args.num_classes,
            kill_kernels=args.kill_kernels,
            kill_planes=args.kill_planes,
            kill_lines=args.kill_lines)
        model.to(args.device)
        mean, std = model_utils.get_preprocessing_function(
            args.colour_space, args.network_chromaticities[j])
        normalize = transforms.Normalize(mean=mean, std=std)

        for i, manipulation_value in enumerate(manipulation_values):
            args.parameters['kwargs'][args.manipulation] = manipulation_value

            output_file = prepapre_testing._prepare_saving_file(
                args.experiment_name,
                args.network_names[j],
                args.dataset,
                manipulation_name,
                manipulation_value,
                extension='csv')
            if os.path.exists(output_file):
                continue

            if args.task_type == 'segmentation' or 'voc' in args.dataset:
                prediction_transformation = preprocessing.prediction_transformation_seg(
                    args.parameters, args.colour_space,
                    tmp_c_space(manipulation_name))
            else:
                prediction_transformation = preprocessing.prediction_transformation(
                    args.parameters, args.colour_space,
                    tmp_c_space(manipulation_name))
            colour_vision = 'trichromat'
            if _requires_colour_transform(manipulation_name,
                                          args.network_chromaticities[j]):
                colour_vision = args.network_chromaticities[j]

            other_transformations = []
            for oth_man in other_mans:
                if args.task_type == 'segmentation' or 'voc' in args.dataset:
                    other_transformations.append(
                        preprocessing.prediction_transformation_seg(
                            oth_man, args.colour_space,
                            tmp_c_space(oth_man['f_name'])))
                else:
                    other_transformations.append(
                        preprocessing.prediction_transformation(
                            oth_man, args.colour_space,
                            tmp_c_space(oth_man['f_name'])))
            if args.mosaic_pattern is not None:
                other_transformations.append(
                    preprocessing.mosaic_transformation(args.mosaic_pattern))
            if args.sf_filter is not None:
                other_transformations.append(
                    preprocessing.sf_transformation(args.sf_filter,
                                                    args.sf_filter_chn))
            other_transformations.append(prediction_transformation)

            print('Processing network %s and %s %f' %
                  (current_network, manipulation_name, manipulation_value))

            # which dataset
            # reading it after the model, because each might have their own
            # specific size
            # loading validation set
            target_size = get_default_target_size(args.dataset,
                                                  args.target_size)

            target_transform = utils_db.ImagenetCategoryTransform(
                args.categories, args.cat_dir)

            validation_dataset = utils_db.get_validation_dataset(
                args.dataset,
                args.validation_dir,
                colour_vision,
                args.colour_space,
                other_transformations,
                normalize,
                target_size,
                task=args.task_type,
                target_transform=target_transform)

            # TODO: nicer solution:
            if 'sampler' not in args:
                sampler = None
            else:
                sampler = args.sampler(validation_dataset)
            if 'collate_fn' not in args:
                args.collate_fn = None

            # FIXME: add segmentation datasests
            val_loader = torch.utils.data.DataLoader(
                validation_dataset,
                batch_size=args.batch_size,
                shuffle=False,
                num_workers=args.workers,
                pin_memory=True,
                sampler=sampler,
                collate_fn=args.collate_fn)

            if args.random_images is not None:
                out_folder = prepapre_testing.prepare_saving_dir(
                    args.experiment_name, args.network_names[j], args.dataset,
                    manipulation_name)
                normalize_inverse = NormalizeInverse(mean, std)
                fn(val_loader, out_folder, normalize_inverse,
                   manipulation_value, **kwargs)
            elif args.activation_map is not None:
                model = model_utils.LayerActivation(model, args.activation_map)
                current_results = fn(val_loader, model, **kwargs)
                save_fn(current_results, args.experiment_name,
                        args.network_names[j], args.dataset, manipulation_name,
                        manipulation_value)
            else:
                (_, _, current_results) = fn(val_loader, model, **kwargs)
                save_fn(current_results, args.experiment_name,
                        args.network_names[j], args.dataset, manipulation_name,
                        manipulation_value)