def main(father_handle, **extra_argv):
    args = parser.parse_args()
    for key, val in extra_argv.items():
        setattr(args, key, val)
    result_dir = create_result_dir(args)
    if father_handle is not None:
        father_handle.put(result_dir)
    main_worker(args.gpu, args, result_dir)
Esempio n. 2
0
def main(father_handle, **extra_argv):
    args = parser.parse_args()
    for key, val in extra_argv.items():
        setattr(args, key, val)
    result_dir = create_result_dir(args)
    if father_handle is not None:
        father_handle.put(result_dir)
    if args.gpu != -1:
        main_worker(args.gpu, False, args, result_dir)
    else:
        n_procs = torch.cuda.device_count()
        args.world_size *= n_procs
        args.rank *= n_procs
        torch.multiprocessing.spawn(main_worker,
                                    nprocs=n_procs,
                                    args=(True, args, result_dir))
Esempio n. 3
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    print(args)
    from algorithm import Algorithm, SGD, SGDClip, MomClip, MixClip
    if args.algo == 'sgd':
        optimizer = Algorithm(model.parameters(), SGD, lr=args.lr, momentum=args.momentum, wd=args.wd)
    elif args.algo == 'sgd_clip':
        optimizer = Algorithm(model.parameters(), SGDClip, lr=args.lr, gamma=args.gamma, wd=args.wd, momentum=args.momentum)
    elif args.algo == 'mom_clip':
        optimizer = Algorithm(model.parameters(), MomClip, lr=args.lr, gamma=args.gamma, wd=args.wd, momentum=args.momentum)
    elif args.algo == 'mix_clip':
        optimizer = Algorithm(model.parameters(), MixClip, lr=args.lr, momentum=args.momentum, gamma=args.gamma, wd=args.wd)
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(0, val_loader, model, criterion, None, args)
        return

    from utils import create_result_dir, TableLogger
    result_dir = create_result_dir(args)
    train_logger = TableLogger(os.path.join(result_dir, 'train.log'), ['epoch', 'loss', 'acc', 'acc5'])
    test_logger = TableLogger(os.path.join(result_dir, 'test.log'), ['epoch', 'loss', 'acc', 'acc5'])

    for epoch in range(args.start_epoch, args.epochs[-1]):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, train_logger, args)

        # evaluate on validation set
        acc1 = validate(epoch, val_loader, model, criterion, test_logger, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
Esempio n. 4
0
    trainer.extend(
        extensions.PrintReport([
            "epoch", "main/loss", "validation/main/loss", "main/accuracy",
            "validation/main/accuracy", "elapsed_time"
        ]))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              x_key='epoch',
                              file_name='accuracy.png'))
    trainer.extend(extensions.snapshot_object(
        model.predictor, filename='model_epoch-{.updater.epoch}'),
                   trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    chainer.serializers.save_npz(os.path.join(args.out_dir, 'weights.npz'),
                                 model)


if __name__ == '__main__':
    args = parse_args()

    # create result dir
    create_result_dir(args.out_dir, args, args.overwrite)

    main(args)
Esempio n. 5
0
# global variables

subject_id = 0  # other possible values: 546 99 1 0 (or more) (from HCP)
hrf_dur = 20.0
n_voxels_to_retain = 100
n_jobs = 3  # nb CPU to use
lbdas_cue = [1.0]  # list the value for lambda for task 'cue'
lbdas_rh = [3.5]  # list the value for lambda for task 'rh'
lbdas_lh = [3.0]  # list the value for lambda for task 'lf'
all_lbdas = [lbdas_rh, lbdas_lh, lbdas_cue]
trials = ['rh', 'lh', 'cue']  # other possible values: 'lf' 'rf'

# start main

root_dir = create_result_dir(sufixe='fast_hcp_validation')
print("Saving results under '{0}'".format(root_dir))

print("archiving '{0}' under '{1}'".format(__file__, root_dir))
shutil.copyfile(__file__, os.path.join(root_dir, __file__))

fmri_img, anat_img = get_hcp_fmri_fname(fetch_subject_list()[subject_id],
                                        anat_data=True)

canonical_hrf = inf_norm(
    spm_hrf(1.0, t_r=TR_HCP, dur=hrf_dur, normalized_hrf=False)[0])

ref_hrfs, b_est_hrfs, b_est_blocks = {}, {}, {}
for trial, lbdas in zip(trials, all_lbdas):

    print('*' * 80)
Esempio n. 6
0
        'beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)'
    )
    parser.add_argument('--wd',
                        type=float,
                        default=1.2e-6,
                        help='weight decay applied to all weights')
    parser.add_argument('--algo',
                        type=str,
                        default='sgd',
                        help='optimizer to use (sgd, adam)')
    parser.add_argument('--nu', type=float, default=0.7)
    args = parser.parse_args()
    args.tied = True

    from utils import TableLogger, create_result_dir
    result_dir = create_result_dir(args)
    train_logger = TableLogger(os.path.join(result_dir, 'train.log'),
                               ['epoch', 'loss', 'ppl'])
    test_logger = TableLogger(os.path.join(result_dir, 'test.log'),
                              ['epoch', 'loss', 'ppl'])
    args.save = os.path.join(result_dir, args.save)

    # Set the random seed manually for reproducibility.
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else: