def main(father_handle, **extra_argv): args = parser.parse_args() for key, val in extra_argv.items(): setattr(args, key, val) result_dir = create_result_dir(args) if father_handle is not None: father_handle.put(result_dir) main_worker(args.gpu, args, result_dir)
def main(father_handle, **extra_argv): args = parser.parse_args() for key, val in extra_argv.items(): setattr(args, key, val) result_dir = create_result_dir(args) if father_handle is not None: father_handle.put(result_dir) if args.gpu != -1: main_worker(args.gpu, False, args, result_dir) else: n_procs = torch.cuda.device_count() args.world_size *= n_procs args.rank *= n_procs torch.multiprocessing.spawn(main_worker, nprocs=n_procs, args=(True, args, result_dir))
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) print(args) from algorithm import Algorithm, SGD, SGDClip, MomClip, MixClip if args.algo == 'sgd': optimizer = Algorithm(model.parameters(), SGD, lr=args.lr, momentum=args.momentum, wd=args.wd) elif args.algo == 'sgd_clip': optimizer = Algorithm(model.parameters(), SGDClip, lr=args.lr, gamma=args.gamma, wd=args.wd, momentum=args.momentum) elif args.algo == 'mom_clip': optimizer = Algorithm(model.parameters(), MomClip, lr=args.lr, gamma=args.gamma, wd=args.wd, momentum=args.momentum) elif args.algo == 'mix_clip': optimizer = Algorithm(model.parameters(), MixClip, lr=args.lr, momentum=args.momentum, gamma=args.gamma, wd=args.wd) else: raise NotImplementedError # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(0, val_loader, model, criterion, None, args) return from utils import create_result_dir, TableLogger result_dir = create_result_dir(args) train_logger = TableLogger(os.path.join(result_dir, 'train.log'), ['epoch', 'loss', 'acc', 'acc5']) test_logger = TableLogger(os.path.join(result_dir, 'test.log'), ['epoch', 'loss', 'acc', 'acc5']) for epoch in range(args.start_epoch, args.epochs[-1]): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, train_logger, args) # evaluate on validation set acc1 = validate(epoch, val_loader, model, criterion, test_logger, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1)
trainer.extend( extensions.PrintReport([ "epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time" ])) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_epoch-{.updater.epoch}'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.ProgressBar()) trainer.run() chainer.serializers.save_npz(os.path.join(args.out_dir, 'weights.npz'), model) if __name__ == '__main__': args = parse_args() # create result dir create_result_dir(args.out_dir, args, args.overwrite) main(args)
# global variables subject_id = 0 # other possible values: 546 99 1 0 (or more) (from HCP) hrf_dur = 20.0 n_voxels_to_retain = 100 n_jobs = 3 # nb CPU to use lbdas_cue = [1.0] # list the value for lambda for task 'cue' lbdas_rh = [3.5] # list the value for lambda for task 'rh' lbdas_lh = [3.0] # list the value for lambda for task 'lf' all_lbdas = [lbdas_rh, lbdas_lh, lbdas_cue] trials = ['rh', 'lh', 'cue'] # other possible values: 'lf' 'rf' # start main root_dir = create_result_dir(sufixe='fast_hcp_validation') print("Saving results under '{0}'".format(root_dir)) print("archiving '{0}' under '{1}'".format(__file__, root_dir)) shutil.copyfile(__file__, os.path.join(root_dir, __file__)) fmri_img, anat_img = get_hcp_fmri_fname(fetch_subject_list()[subject_id], anat_data=True) canonical_hrf = inf_norm( spm_hrf(1.0, t_r=TR_HCP, dur=hrf_dur, normalized_hrf=False)[0]) ref_hrfs, b_est_hrfs, b_est_blocks = {}, {}, {} for trial, lbdas in zip(trials, all_lbdas): print('*' * 80)
'beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)' ) parser.add_argument('--wd', type=float, default=1.2e-6, help='weight decay applied to all weights') parser.add_argument('--algo', type=str, default='sgd', help='optimizer to use (sgd, adam)') parser.add_argument('--nu', type=float, default=0.7) args = parser.parse_args() args.tied = True from utils import TableLogger, create_result_dir result_dir = create_result_dir(args) train_logger = TableLogger(os.path.join(result_dir, 'train.log'), ['epoch', 'loss', 'ppl']) test_logger = TableLogger(os.path.join(result_dir, 'test.log'), ['epoch', 'loss', 'ppl']) args.save = os.path.join(result_dir, args.save) # Set the random seed manually for reproducibility. np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: