def __init__(self, config): # Config self.config = config self.start = 0 # Unless using pre-trained model # Create directories if not exist utils.make_folder(self.config.save_path) utils.make_folder(self.config.model_weights_path) utils.make_folder(self.config.sample_images_path) # Copy files utils.write_config_to_file(self.config, self.config.save_path) utils.copy_scripts(self.config.save_path) # Check for CUDA utils.check_for_CUDA(self) # Make dataloader self.dataloader, self.num_of_classes = utils.make_dataloader( self.config.batch_size_in_gpu, self.config.dataset, self.config.data_path, self.config.shuffle, self.config.drop_last, self.config.dataloader_args, self.config.resize, self.config.imsize, self.config.centercrop, self.config.centercrop_size) # Data iterator self.data_iter = iter(self.dataloader) # Build G and D self.build_models() if self.config.adv_loss == 'dcgan': self.criterion = nn.BCELoss()
def main(): args = get_args() torch.manual_seed(args.seed) shape = (224, 224, 3) """ define dataloader """ train_loader, valid_loader, test_loader = make_dataloader(args) """ define model architecture """ model = get_model(args, shape, args.num_classes) if torch.cuda.device_count() >= 1: print('Model pushed to {} GPU(s), type {}.'.format( torch.cuda.device_count(), torch.cuda.get_device_name(0))) model = model.cuda() else: raise ValueError('CPU training is not supported') """ define loss criterion """ criterion = nn.CrossEntropyLoss().cuda() """ define optimizer """ optimizer = make_optimizer(args, model) """ define learning rate scheduler """ scheduler = make_scheduler(args, optimizer) """ define trainer, evaluator, result_dictionary """ result_dict = { 'args': vars(args), 'epoch': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'test_acc': [] } trainer = Trainer(model, criterion, optimizer, scheduler) evaluator = Evaluator(model, criterion) if args.evaluate: """ load model checkpoint """ model.load() result_dict = evaluator.test(test_loader, args, result_dict) else: evaluator.save(result_dict) """ define training loop """ for epoch in range(args.epochs): result_dict['epoch'] = epoch result_dict = trainer.train(train_loader, epoch, args, result_dict) result_dict = evaluator.evaluate(valid_loader, epoch, args, result_dict) evaluator.save(result_dict) plot_learning_curves(result_dict, epoch, args) result_dict = evaluator.test(test_loader, args, result_dict) evaluator.save(result_dict) """ save model checkpoint """ model.save() print(result_dict)
def main(args): torch.manual_seed(0) train_fname = args.dataset_path + 'train.csv' test_fname = args.dataset_path + 'test.csv' dataloaders = make_dataloader(train_fname, test_fname) model = VDCNN(depth=args.depth, num_class=args.num_class) run_model(model, dataloaders, args.num_epochs)
def __init__(self, config): # Images data path & Output path self.dataset = config.dataset self.data_path = config.data_path self.save_path = os.path.join(config.save_path, config.name) # Training settings self.batch_size = config.batch_size self.total_step = config.total_step self.d_steps_per_iter = config.d_steps_per_iter self.g_steps_per_iter = config.g_steps_per_iter self.d_lr = config.d_lr self.g_lr = config.g_lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.inst_noise_sigma = config.inst_noise_sigma self.inst_noise_sigma_iters = config.inst_noise_sigma_iters self.start = 0 # Unless using pre-trained model # Image transforms self.shuffle = config.shuffle self.drop_last = config.drop_last self.resize = config.resize self.imsize = config.imsize self.centercrop = config.centercrop self.centercrop_size = config.centercrop_size self.tanh_scale = config.tanh_scale self.normalize = config.normalize # Step size self.log_step = config.log_step self.sample_step = config.sample_step self.model_save_step = config.model_save_step self.save_n_images = config.save_n_images self.max_frames_per_gif = config.max_frames_per_gif # Pretrained model self.pretrained_model = config.pretrained_model # Misc self.manual_seed = config.manual_seed self.disable_cuda = config.disable_cuda self.parallel = config.parallel self.dataloader_args = config.dataloader_args # Output paths self.model_weights_path = os.path.join(self.save_path, config.model_weights_dir) self.sample_path = os.path.join(self.save_path, config.sample_dir) # Model hyper-parameters self.adv_loss = config.adv_loss self.z_dim = config.z_dim self.g_conv_dim = config.g_conv_dim self.d_conv_dim = config.d_conv_dim self.lambda_gp = config.lambda_gp # Model name self.name = config.name # Create directories if not exist utils.make_folder(self.save_path) utils.make_folder(self.model_weights_path) utils.make_folder(self.sample_path) # Copy files utils.write_config_to_file(config, self.save_path) utils.copy_scripts(self.save_path) # Check for CUDA utils.check_for_CUDA(self) # Make dataloader self.dataloader, self.num_of_classes = utils.make_dataloader( self.batch_size, self.dataset, self.data_path, self.shuffle, self.drop_last, self.dataloader_args, self.resize, self.imsize, self.centercrop, self.centercrop_size) # Data iterator self.data_iter = iter(self.dataloader) # Build G and D self.build_models() # Start with pretrained model (if it exists) if self.pretrained_model != '': utils.load_pretrained_model(self) if self.adv_loss == 'dcgan': self.criterion = nn.BCELoss()
def main(): global args args = get_config() args.commond = 'python ' + ' '.join(sys.argv) # Create saving directory if args.unigen: save_dir = './results_unigen/{0}/G{1}_glr{2}_dlr{3}_dstep{4}_zdim{5}_{6}/'.format( args.dataset, args.dec_dist, str(args.lr), str(args.lr_d), str(args.d_steps_per_iter), str(args.latent_dim), args.div) else: save_dir = './results/{0}/E{1}_G{2}_glr{3}_dlr{4}_gstep{5}_dstep{6}_zdim{7}_{8}/'.format( args.dataset, args.enc_dist, args.dec_dist, str(args.lr), str(args.lr_d), str(args.g_steps_per_iter), str(args.d_steps_per_iter), str(args.latent_dim), args.div) utils.make_folder(save_dir) utils.write_config_to_file(args, save_dir) global device device = torch.device('cuda') random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Load datasets train_loader, test_loader = utils.make_dataloader(args) num_samples = len(train_loader.dataset) global num_iter_per_epoch num_iter_per_epoch = num_samples // args.batch_size # Losses file log_file_name = os.path.join(save_dir, 'log.txt') global log_file if args.resume: log_file = open(log_file_name, "at") else: log_file = open(log_file_name, "wt") # Build model if args.unigen: if args.dataset == 'mnist_stack': model = DCDecoder(args.latent_dim, 64, args.image_size, 3, args.dec_dist) discriminator = DCDiscriminator(args.d_conv_dim, args.image_size) else: model = Generator(args.latent_dim, args.g_conv_dim, args.image_size) discriminator = Discriminator(args.d_conv_dim, args.image_size) encoder_optimizer = None decoder_optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(discriminator.parameters(), lr=args.lr_d, betas=(args.beta1, args.beta2)) else: if args.dataset == 'mog': model = ToyAE(data_dim=2, latent_dim=args.latent_dim, enc_hidden_dim=500, dec_hidden_dim=500, enc_dist=args.enc_dist, dec_dist=args.dec_dist) discriminator = DiscriminatorMLP(data_dim=2, latent_dim=args.latent_dim, hidden_dim_x=400, hidden_dim_z=400, hidden_dim=400) elif args.dataset in ['mnist', 'mnist_stack']: image_channel = 3 if args.dataset == 'mnist_stack' else 1 tanh = args.prior == 'uniform' and args.enc_dist == 'deterministic' model = DCAE(args.latent_dim, 64, args.image_size, image_channel, args.enc_dist, args.dec_dist, tanh) discriminator = DCJointDiscriminator(args.latent_dim, 64, args.image_size, image_channel, args.dis_fc_size) else: model = BGM(args.latent_dim, args.g_conv_dim, args.image_size, 3, args.enc_dist, args.enc_arch, args.enc_fc_size, args.enc_noise_dim, args.dec_dist) discriminator = BigJointDiscriminator(args.latent_dim, args.d_conv_dim, args.image_size, args.dis_fc_size) encoder_optimizer = optim.Adam(model.encoder.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) decoder_optimizer = optim.Adam(model.decoder.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(discriminator.parameters(), lr=args.lr_d, betas=(args.beta1, args.beta2)) # Load model from checkpoint if args.resume: ckpt_dir = args.ckpt_dir if args.ckpt_dir != '' else save_dir + 'model' + str( args.start_epoch - 1) + '.sav' checkpoint = torch.load(ckpt_dir) model.load_state_dict(checkpoint['model']) discriminator.load_state_dict(checkpoint['discriminator']) del checkpoint model = nn.DataParallel(model.to(device)) discriminator = nn.DataParallel(discriminator.to(device)) # Fixed noise from prior p_z for generating from G global fixed_noise if args.prior == 'gaussian': fixed_noise = torch.randn(args.save_n_samples, args.latent_dim, device=device) else: fixed_noise = torch.rand( args.save_n_samples, args.latent_dim, device=device) * 2 - 1 # Train for i in range(args.start_epoch, args.start_epoch + args.n_epochs): train_age(i, model, discriminator, encoder_optimizer, decoder_optimizer, D_optimizer, train_loader, args.print_every, save_dir, args.sample_every, test_loader) if i % args.save_model_every == 0: torch.save( { 'model': model.module.state_dict(), 'discriminator': discriminator.module.state_dict() }, save_dir + 'model' + str(i) + '.sav')
def main(): # experiment settings via command line args = parse_arguments() # setup logging experiment_name = 'random' if args.random_acq else args.acq_func_ID logdir = './logs/{}/seed{}'.format(experiment_name, args.seed) os.makedirs('./logs/{}/checkpts/'.format(experiment_name), exist_ok=True) # make dir for saving models at checkpoints # TensorBoard logging writer1 = SummaryWriter(log_dir=logdir+'-1') writer2 = SummaryWriter(log_dir=logdir+'-2') writers = [writer1, writer2] # python logging logging.basicConfig(filename='./logs/{}/seed{}.log'.format( experiment_name, args.seed), level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) # makes messages print to stderr, too logging.info('Running experiment with the following settings:') for arg in vars(args): logging.info('{}: {}'.format(arg, getattr(args, arg))) # for reproducibility torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) # load data train_data, test_data = load_data(args) # get idx of pretrain and validation data from train data pool_idx = set(range(len(train_data))) pretrain_idx, pool_idx = balanced_sample(train_data, n_classes=10, k=2, idx_possible=pool_idx) valid_idx, pool_idx = balanced_sample(train_data, n_classes=10, k=10, idx_possible=pool_idx) # Gal doesn't mention if validation set is balanced acq_idx = pretrain_idx.copy() # first 20 acquisitions are the pretraining data log_acquisitions(pretrain_idx, train_data, mean_info_gain=None, i_round=0, writers=writers, cumulative_acqs=0) assert len(pretrain_idx) == 20 assert len(valid_idx) == 100 assert len(pool_idx) == len(train_data) - 120 # make dataloaders train_loader = make_dataloader(train_data, args.train_batch_size, idx=acq_idx, random=True) valid_loader = make_dataloader(train_data, args.valid_batch_size, idx=valid_idx) test_loader = make_dataloader(test_data, args.test_batch_size) # pytorch MNIST example shuffles test set, but seems unnecessary # pretraining # repeat for various choices of lambda: # initial training on 20 points (random but *balanced*) # compute validation error on 100 points # select lamba/model with lowest validation error weight_decay = compute_weight_decay(train_loader, valid_loader, args, writers, i_round=0) # weight_decay = 1.0 writer1.add_scalar('optimal_lambda', weight_decay, 0) model = BayesianCNN() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=weight_decay) fit(model, optimizer, train_loader, test_loader, args, writers, i_round=0) # do pretraining on 20 examples (not quite clear if Gal does this here, but I think so) torch.save(model.state_dict(), './logs/{}/checkpts/model_0.pt'.format( experiment_name)) # for short code tests: [also use --rounds 2 --epochs 1 --dropout_samples 10] # pool_idx.difference_update(set(range(100,60000))) for i_round in range(1, args.rounds + 1): logging.info('\nBEGIN ROUND {}\n'.format(i_round)) # acquire args.acqs_per_round points from train_data according to acq_func new_idx, mean_info_gain = make_acquisitions(train_data, pool_idx, model, args) acq_idx.update(new_idx) pool_idx.difference_update(new_idx) # log some data about those acquisitions n_acqs_previous = args.acqs_pretrain + (i_round-1) * args.acqs_per_round log_acquisitions(new_idx, train_data, mean_info_gain, i_round, writers, cumulative_acqs=n_acqs_previous) # build new train_loader using updated acq_idx train_loader = make_dataloader(train_data, args.train_batch_size, idx=acq_idx, random=True) # reoptimize weight decay given updated, larger training set. Unclear if Gal does this, but seems natural weight_decay = compute_weight_decay(train_loader, valid_loader, args, writers, i_round) writer1.add_scalar('optimal_lambda', weight_decay, i_round) # reinitalise model model = BayesianCNN() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=weight_decay) # train model to convergence on all points acquired so far, computing test error as we go oldw1 = list(model.parameters())[0][0][0][0][0].item() fit(model, optimizer, train_loader, test_loader, args, writers, i_round) torch.save(model.state_dict(), './logs/{}/checkpts/model_{}.pt'.format( experiment_name, i_round)) neww1 = list(model.parameters())[0][0][0][0][0].item() assert oldw1 != neww1, "fit(.) didn't update model parameters"
utils.check_for_CUDA(args) # Load pth pth_dir_name = os.path.dirname(args.pth) print("Loading model", os.path.join(pth_dir_name, 'model.pth')) model = torch.load(os.path.join(pth_dir_name, 'model.pth')) print("Loading model state dict", args.pth) model.load_state_dict(torch.load(args.pth)) model = model.to(args.device) # Make dataloader with Eval parameters print("Making dataloader") args.valid_split = 0 args.centercrop = False args.shuffle = False args.drop_last = False eval_loader = utils.make_dataloader(args) # # Visualize # inputs, classes = next(iter(eval_loader)) # import torchvision.utils # out = torchvision.utils.make_grid(inputs) # utils.imshow(out) # # Visualize end # Evaluate eval(args, model, eval_loader) # EVAL # python eval.py --eval --pth '/home/voletiv/EXPERIMENTS/CnD_experiments/20190208_223808_cnd_kaggle_pt_UNfreeze_ES/model_epoch_0136_batch_00000_of_00141.pth' --data_path '/home/voletiv/Datasets/CatsAndDogs/testset' --out_path '/home/voletiv/EXPERIMENTS/CnD_experiments/20190208_223808_cnd_kaggle_pt_UNfreeze_ES/' --no-cuda
'l1_channel': [0, 1e-4, 1e-3, 1e-2], 'l1_spatial': [0, 1e-4, 1e-3, 1e-2], 'l2': [0] } device = torch.device( 'cuda' if use_cuda and torch.cuda.is_available() else 'cpu') torch.backends.cudnn.deterministic = True train_set = load_dataset(data_file, 'data_train', 'labels_train') valid_set = load_dataset(data_file, 'data_valid', 'labels_valid') test_set = load_dataset(data_file, 'data_test', 'labels_test') var_noise = load_var_noise(data_file, 'var_noise') input_channel, input_size = train_set.tensors[0].size( 1), train_set.tensors[0].size(2) output_size = train_set.tensors[1].size(1) valid_loader = make_dataloader(valid_set, 'valid_set', batch_size=max(param_grid['batch_size'])) test_loader = make_dataloader(test_set, 'test_set', batch_size=max(param_grid['batch_size'])) criterion = nn.MSELoss() best_valid_RMSE = np.full(1, np.inf) for grid in ParameterGrid(param_grid): print(f"===> Hyper-parameters = {grid}:") # random.seed(seed) # numpy.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) importlib.reload(model) net = getattr(model, model_name.upper())(input_channel=input_channel,
from parameters import get_params if __name__ == '__main__': # Get all parameters args = get_params() args.command = 'python ' + ' '.join(sys.argv) # CUDA utils.check_for_CUDA(args) # Seed torch.manual_seed(args.seed) # IMAGES DATALOADER train_loader, valid_loader = utils.make_dataloader(args) print(args) # OUT PATH if not os.path.exists(args.out_path): print("Making", args.out_path) os.makedirs(args.out_path) # Copy all scripts utils.copy_scripts(args.out_path) # Save all args utils.write_config_to_file(args, args.out_path) # MODEL
def train_one(model, data, param, weight, first_layer_no_learn=False, show_every=1, return_model=False): tic = time.time() batch_size = param['batch_size'] lr = param['lr'] l1 = param['l1'] l2 = param['l2'] max_epoch = param['max_epoch'] seed = param['seed'] if seed != -1: torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) input_channel, input_size = data[0].shape[1], data[0].shape[2] output_size = data[1].shape[0] train_loader = make_dataloader(data[0], data[1], batch_size=batch_size, is_train=True) valid_loader = make_dataloader(data[2], data[3], batch_size=batch_size, is_train=False) test_loader = make_dataloader(data[4], data[5], batch_size=batch_size, is_train=False) best_valCC = 0 best_model = None if first_layer_no_learn: model = init_CNN(model, weight) optimizer = Adam([{ 'params': model.conv.parameters() }, { 'params': model.fc.parameters() }], lr=lr, l1=l1, weight_decay=l2, amsgrad=True) else: optimizer = Adam(model.parameters(), lr=lr, l1=l1, weight_decay=l2, amsgrad=True) loss = [] val_corr = [] for epoch in range(max_epoch): if (epoch + 1) % show_every == 0: print(f"Epoch {epoch + 1}:") loss.append(train(model, train_loader, optimizer)) valid_CC = test(model, valid_loader, 'Validation')[1] valid_CC = sum(valid_CC) / len(valid_CC) val_corr.append(valid_CC) if (epoch + 1) % show_every == 0: print(valid_CC) if valid_CC > best_valCC: #recover the best model by validation set best_valCC = valid_CC del best_model best_model = copy.deepcopy(model) print("Done Training") res = test(best_model, test_loader, 'Test') test_corr = res[1] pred = res[-1] test_corr = sum(test_corr) / len(test_corr) print(test_corr) torch.cuda.empty_cache() print("Finished.") if return_model: return best_model, test_corr, loss, val_corr, pred else: return test_coic, loss, val_corr, pred
def make_acquisitions(train_data, pool_idx, model, args): """ Chooses pool points that maximise the acquisition function given in args.acq_func (Or acquires points uniformly at random if args.random_acq == True.) Returns indices of the top `args.acqs_per_round` points from the pool. The elements of train_data at pool_idx are the "pool points". Parameters ---------- train_data: torch.utils.data.Dataset PyTorch dataset, superset of the pool data pool_idx: set indices specifying which points in train_data are in the pool model: torch.nn.Module find information gained about this PyTorch model args: Namespace object experiment arguments from argparse, including acq_func Returns ------- new_idx: set indices of pool points which maximise acquisition function len(new_idx) = args.acqs_per_round mean_info: float (or None) the mean "informativenss" of these points, measuring using the given acqusition function (entropy, variation ratio or standard deviation) """ if args.random_acq: new_idx = set(random.sample( pool_idx, k=args.acqs_per_round)) # random sample without replacement mean_info = None else: best_ent_idx = np.zeros( shape=(0, 2), dtype=np.float64) # array for storing top 10 (entropy, idx) pairs start = time.time() pool_loader = make_dataloader(train_data, args.test_batch_size, idx=pool_idx) # note 1 with torch.no_grad(): for data, _, idx in pool_loader: logging.info( 'Computing info gain for points with (original) indices {}-{} in pool' .format(idx[0], idx[-1])) logprobs = model.forward_stochastic( data, k=args.dropout_samples).double( ) # do entropy calcs in double precision # model outputs logprobs (final layer is log_softmax(.)) # this is for numerical stability in softmax computation # convert these back to probs to do entropy calculations probs = logprobs.exp() info = args.acq_func(probs) # add new (entropy, index) tuples to array of best so far new_ent_idx = np.column_stack((info, idx)) all_ent_idx = np.concatenate((new_ent_idx, best_ent_idx), axis=0) # sort by entropy and take top 10 so far sorted_ind = all_ent_idx[:, 0].argsort() best_ent_idx = all_ent_idx[sorted_ind][-args.acqs_per_round:] assert best_ent_idx.shape == (args.acqs_per_round, 2) end = time.time() logging.info("Time taken for {} acquisitions: {:.1f}s".format( args.acqs_per_round, end - start)) new_idx = set(best_ent_idx[:, 1].astype(int)) mean_info = best_ent_idx[:, 0].mean() return new_idx, mean_info
def main(): args = get_args() torch.manual_seed(args.seed) shape = (224, 224, 3) """ define dataloader """ train_loader, valid_loader, test_loader = make_dataloader(args) """ define model architecture """ model = get_model(args, shape, args.num_classes) if torch.cuda.device_count() >= 1: print('Model pushed to {} GPU(s), type {}.'.format( torch.cuda.device_count(), torch.cuda.get_device_name(0))) model = model.cuda() else: raise ValueError('CPU training is not supported') """ define loss criterion """ criterion = nn.CrossEntropyLoss().cuda() """ define optimizer """ optimizer = make_optimizer(args, model) """ define learning rate scheduler """ scheduler = make_scheduler(args, optimizer) """ define loss scaler for automatic mixed precision """ scaler = torch.cuda.amp.GradScaler() """ define trainer, evaluator, result_dictionary """ result_dict = { 'args': vars(args), 'epoch': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'test_acc': [] } trainer = Trainer(model, criterion, optimizer, scheduler, scaler) evaluator = Evaluator(model, criterion) train_time_list = [] valid_time_list = [] if args.evaluate: """ load model checkpoint """ model.load() result_dict = evaluator.test(test_loader, args, result_dict) else: evaluator.save(result_dict) best_val_acc = 0.0 """ define training loop """ for epoch in range(args.epochs): result_dict['epoch'] = epoch torch.cuda.synchronize() tic1 = time.time() result_dict = trainer.train(train_loader, epoch, args, result_dict) torch.cuda.synchronize() tic2 = time.time() train_time_list.append(tic2 - tic1) torch.cuda.synchronize() tic3 = time.time() result_dict = evaluator.evaluate(valid_loader, epoch, args, result_dict) torch.cuda.synchronize() tic4 = time.time() valid_time_list.append(tic4 - tic3) if result_dict['val_acc'][-1] > best_val_acc: print("{} epoch, best epoch was updated! {}%".format( epoch, result_dict['val_acc'][-1])) best_val_acc = result_dict['val_acc'][-1] model.save(checkpoint_name='best_model') evaluator.save(result_dict) plot_learning_curves(result_dict, epoch, args) result_dict = evaluator.test(test_loader, args, result_dict) evaluator.save(result_dict) """ calculate test accuracy using best model """ model.load(checkpoint_name='best_model') result_dict = evaluator.test(test_loader, args, result_dict) evaluator.save(result_dict) print(result_dict) np.savetxt(os.path.join(model.checkpoint_dir, model.checkpoint_name, 'train_time_amp.csv'), train_time_list, delimiter=',', fmt='%s') np.savetxt(os.path.join(model.checkpoint_dir, model.checkpoint_name, 'valid_time_amp.csv'), valid_time_list, delimiter=',', fmt='%s')
def main(): args = get_args() torch.manual_seed(args.seed) shape = (224, 224, 3) """ define dataloader """ train_loader, valid_loader, test_loader = make_dataloader(args) """ define model architecture """ model = get_model(args, shape, args.num_classes) if torch.cuda.device_count() >= 1: print('Model pushed to {} GPU(s), type {}.'.format( torch.cuda.device_count(), torch.cuda.get_device_name(0))) model = model.cuda() else: raise ValueError('CPU training is not supported') """ define loss criterion """ criterion = nn.CrossEntropyLoss().cuda() """ define optimizer """ optimizer = make_optimizer(args, model) """ define learning rate scheduler """ scheduler = make_scheduler(args, optimizer) """ define trainer, evaluator, result_dictionary """ result_dict = { 'args': vars(args), 'epoch': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'test_acc': [] } trainer = Trainer(model, criterion, optimizer, scheduler) evaluator = Evaluator(model, criterion) if args.evaluate: """ load model checkpoint """ model.load("best_model") result_dict = evaluator.test(test_loader, args, result_dict, True) model.load("last_model") result_dict = evaluator.test(test_loader, args, result_dict, False) else: evaluator.save(result_dict) best_val_acc = 0.0 """ define training loop """ tolerance = 0 for epoch in range(args.epochs): result_dict['epoch'] = epoch result_dict = trainer.train(train_loader, epoch, args, result_dict) result_dict = evaluator.evaluate(valid_loader, epoch, args, result_dict) tolerance += 1 print("tolerance: ", tolerance) if result_dict['val_acc'][-1] > best_val_acc: tolerance = 0 print("{} epoch, best epoch was updated! {}%".format( epoch, result_dict['val_acc'][-1])) best_val_acc = result_dict['val_acc'][-1] model.save(checkpoint_name='best_model') evaluator.save(result_dict) plot_learning_curves(result_dict, epoch, args) if tolerance > 20: break result_dict = evaluator.test(test_loader, args, result_dict, False) evaluator.save(result_dict) """ save model checkpoint """ model.save(checkpoint_name='last_model') """ calculate test accuracy using best model """ model.load(checkpoint_name='best_model') result_dict = evaluator.test(test_loader, args, result_dict, True) evaluator.save(result_dict) print(result_dict)
self.eval = True self.imsize = 64 self.seed = 29 self.batch_size = 128 self.shuffle = False self.drop_last = False self.val_data_path = '' self.kwargs = {} # special_data_path = "/home/voletiv/Datasets/CatsAndDogs/special" special_data_path = "/home/voletivi/scratch/catsndogs/data/special" # Data args = MyArgs(special_data_path) dl = utils.make_dataloader(args) data, classes = next(iter(dl)) # Model # model_pth = '/home/voletiv/EXPERIMENTS/CnD_experiments/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model.pth' # model_state_dict = '/home/voletiv/EXPERIMENTS/CnD_experiments/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model_epoch_0082_batch_00076_of_00282.pth' model_pth = '/home/voletivi/scratch/catsndogs/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model.pth' model_state_dict = '/home/voletivi/scratch/catsndogs/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model_epoch_0082_batch_00076_of_00282.pth' model = torch.load(model_pth) model.load_state_dict(torch.load(model_state_dict)) # model.to('cpu') occ_size = 7 # Data occ sample data_occ = data.clone()
def train_one(model, data, param, weight, first_layer_no_learn=False, return_model=False): tic = time.time() batch_size = param['batch_size'] lr = param['lr'] l1 = param['l1'] l2 = param['l2'] max_epoch = param['max_epoch'] seed = param['seed'] if seed != -1: torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) input_channel, input_size = data[0].shape[1], data[0].shape[2] output_size = data[1].shape[0] train_loader = make_dataloader(data[0], data[1], batch_size=batch_size) valid_loader = make_dataloader(data[2], data[3], batch_size=batch_size) test_loader = make_dataloader(data[4], data[5], batch_size=batch_size) best_valCC = 0 best_model = None if first_layer_no_learn: model = init_CNN(model, weight) optimizer = Adam([{ 'params': model.conv.parameters() }, { 'params': model.fc.parameters() }], lr=lr, l1=l1, weight_decay=l2, amsgrad=True) else: optimizer = Adam(model.parameters(), lr=lr, l1=l1, weight_decay=l2, amsgrad=True) for epoch in range(max_epoch): print(f"===> Training Epoch {epoch + 1}:") train(model, train_loader, optimizer) valid_CC = test(model, valid_loader, 'Validation')[-1] print(valid_CC) if valid_CC > best_valCC: best_valCC = valid_CC del best_model best_model = copy.deepcopy(model) print("===========>") test_corr = test(best_model, test_loader, 'Test')[-1][0] print(test_corr) torch.cuda.empty_cache() print("Finished.") toc = time.time() print("Elapsed time is {:.6f} seconds.".format(toc - tic)) if return_model: return best_model, test_corr, toc - tic else: return test_corr, toc - tic