def environ_setting(): global args, train_loader, val_loader, model # arguments setting args = parser.parse_args() args.method_str = util.get_method_str(args) args.log_file_path = os.path.join(args.save_dir, args.log_file_name) print(f'Method | {args.method_str}') args.best_top1_acc = 0.0 args.use_cuda = not args.no_cuda and torch.cuda.is_available() # others setting if args.use_cuda: print("Using CUDA") os.environ["CUDA_VISIBLE_DEVICES"] = "0" torch.backends.cudnn.benchmark = True args.device = torch.device("cuda") else: print('Not using CUDA') args.device = torch.device("cpu") torch.manual_seed(args.seed) ImageFile.LOAD_TRUNCATED_IMAGES = True check_folders_exist() # make sure output folders exist train_loader, val_loader = data_loader.get_cifar100_data_loader( args) # get data loader model = models.get_model(args) # get model print(model) util.print_model_parameters(model)
def initial_process(model): print(model) util.print_model_parameters(model) print( "------------------------- Initial training -------------------------------" ) tok = "initial" criterion = nn.CrossEntropyLoss().cuda() util.initial_train(model, args, train_loader, test_loader, tok, use_cuda=True) accuracy = util.validate(args, test_loader, model, criterion) torch.save(model, f"{args.save_dir}/model_initial_end.ptmodel") util.log(f"{args.save_dir}/{args.log}", f"weight:{args.save_dir}/{args.out_oldweight_folder}") util.log(f"{args.save_dir}/{args.log}", f"model:{args.save_dir}/model_initial_end.ptmodel") util.log(f"{args.save_dir}/{args.log}", f"initial_accuracy {accuracy}") util.layer2torch(model, f"{args.save_dir}/{args.out_oldweight_folder}") weight_list = util.parameters2list(model.children()) util.save_parameters(f"{args.save_dir}/{args.out_oldweight_folder}", weight_list) return model
def initial_process(model): print(model) util.print_model_parameters(model) print("------------------------- Initial training -------------------------------") util.initial_train(model, args, train_loader, val_loader, 'initial') accuracy = util.validate(val_loader, model, args) util.log(f"{args.save_dir}/{args.log}", f"weight\t{args.save_dir}/{args.out_oldweight_folder}") util.log(f"{args.save_dir}/{args.log}", f"model\t{args.save_dir}/model_initial_end.ptmodel") util.log(f"{args.save_dir}/{args.log}", f"initial_accuracy\t{accuracy}") util.layer2torch(f"{args.save_dir}/{args.out_oldweight_folder}",model) weight_list = util.parameters2list(model.children()) util.save_parameters(f"{args.save_dir}/{args.out_oldweight_folder}", weight_list) return model
**kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( 'data', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.test_batch_size, shuffle=False, **kwargs) # Define which model to use model = LeNet_5(mask=True).to(device) print(model) util.print_model_parameters(model) # NOTE : `weight_decay` term denotes L2 regularization loss term optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) initial_optimizer_state_dict = optimizer.state_dict() def train(epochs): model.train() for epoch in range(epochs): pbar = tqdm(enumerate(train_loader), total=len(train_loader)) for batch_idx, (data, target) in pbar: data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target)
if use_cuda: torch.cuda.manual_seed(opt.seed) train_set = get_training_set(opt.upscale_factor) test_set = get_test_set(opt.upscale_factor) training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batch_size, shuffle=True) testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.test_batch_size, shuffle=False) srcnn = SRCNN(mask=True) util.print_model_parameters(srcnn) criterion = nn.MSELoss() if opt.cuda: torch.cuda.set_device(opt.gpuids[0]) with torch.cuda.device(opt.gpuids[0]): srcnn = srcnn.cuda() criterion = criterion.cuda() #srcnn = nn.DataParallel(srcnn, device_ids=opt.gpuids, output_device=opt.gpuids[0]) optimizer = optim.Adam(srcnn.parameters(), lr=opt.lr) initial_optimizer_state_dict = optimizer.state_dict() def train(epoch):
def main(): global args, best_prec1 args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.gpu is not None: model = model.cuda(args.gpu) elif args.distributed: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) else: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) print(model) util.print_model_parameters(model) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=256, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return #if args.pretrained: # print('Pretrained model evaluation...') # validate(val_loader, model, criterion) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args.reg, args.decay) # evaluate on validation set prec1 = validate(val_loader, model, criterion) torch.save( model.state_dict(), 'saves/elt_s_' + str(args.decay) + '_' + str(args.reg) + '.pth')
def main(is_large_model, file_source, epochs, per_epoch, verbose, output_directory, loss, gen_lr, disc_lr, gen_reg, disc_reg): if is_large_model: print("Initiating large model...") util.print_model_parameters(file_source, epochs, per_epoch, verbose, output_directory, loss, gen_lr, disc_lr, gen_reg, disc_reg) sys.stdout.flush() latent_dim = 600 input_shape = (64, 64, 7) generator = em_generator_large(latent_dim, input_shape, reg=lambda: l1l2(gen_reg, gen_reg)) discriminator = em_discriminator_large( input_shape, reg=lambda: l1l2(disc_reg, disc_reg)) train_em_gan(AdversarialOptimizerSimultaneous(), generator, discriminator, Adam(gen_lr), Adam(disc_lr), latent_dim, file_source, "/volumes/raw", input_shape, output_directory, verbose=verbose, epochs=epochs, per_epoch=per_epoch, loss=loss, r_id=("large_" + str(gen_lr) + "_" + str(disc_lr) + "_" + str(gen_reg) + "_" + str(disc_reg)), is_large_model=True) else: print("Initiating small model...") print_model_parameters(file_source, epochs, per_epoch, verbose, output_directory, loss, gen_lr, disc_lr, gen_reg, disc_reg) sys.stdout.flush() latent_dim = 300 input_shape = (24, 24, 12) generator = em_generator(latent_dim, input_shape, reg=lambda: l1l2(gen_reg, gen_reg)) discriminator = em_discriminator(input_shape, reg=lambda: l1l2(disc_reg, disc_reg)) train_em_gan(AdversarialOptimizerSimultaneous(), generator, discriminator, Adam(gen_lr), Adam(disc_lr), latent_dim, file_source, "/volumes/raw", input_shape, output_directory, verbose=verbose, epochs=epochs, per_epoch=per_epoch, loss=loss, r_id=(str(gen_lr) + "_" + str(disc_lr)))