def run(args): print('\nMODEL SETTINGS: \n', args, '\n') print("Random Seed: ", args.manual_seed) # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') os.makedirs(args.out_dir, exist_ok=True) snap_dir = args.out_dir with open(os.path.join(snap_dir, 'log.txt'), 'a') as ff: print('\nMODEL SETTINGS: \n', args, '\n', file=ff) # SAVING torch.save(args, snap_dir + '.config') # Load snapshot parameters parameters_dict = None if args.state_parameters is not None: assert os.path.isfile(args.state_parameters) parameters_dict = json.load(open(args.state_parameters)) args.learning_rate = parameters_dict['scheduler']['_last_lr'][0] args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('Device:', args.device) # ================================================================================================================== # LOAD DATA # ================================================================================================================== dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_data_path)) validation_dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_valid_data_path)) train_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, drop_last=False) val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=True, drop_last=False) # test_loader = torch.utils.data.DataLoader( # dataset, # batch_size=args.batch_size, # shuffle=False, # **kwargs) args.input_size = [3, 64, 64] # ================================================================================================================== # SELECT MODEL # ================================================================================================================== # flow parameters and architecture choice are passed on to model through args print(args.input_size) from compression.models.load_flowpp_imagenet64 import Imagenet64Model # Load model if args.imagenet64_model is None: model = Imagenet64Model(force_float32_cond=True).eval() else: model_ctor = compression.models.load_imagenet64_model model_filename = os.path.expanduser(args.imagenet64_model) model = model_ctor(model_filename, force_float32_cond=True, from_torch=args.from_torch) model.to(device=args.device) model_sample = model optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) # ================================================================================================================== # TRAINING # ================================================================================================================== train_bpd = [] val_bpd = [] # for early stopping best_val_bpd = np.inf best_val_loss = np.inf if args.state_parameters is None: last_epoch = 1 run_number = 1 else: last_epoch = parameters_dict['epoch'] run_number = parameters_dict['run_number'] + 1 scheduler.load_state_dict(parameters_dict['scheduler']) train_times = [] model.double() for epoch in range(last_epoch, args.epochs + 1): t_start = time.time() if parameters_dict is not None: tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler, True, parameters_dict['batch_idx'], run_number) else: tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler, False) train_bpd.append(tr_bpd) train_times.append(time.time() - t_start) print('One training epoch took %.2f seconds' % (time.time() - t_start)) if epoch < 5 or epoch % args.evaluate_interval_epochs == 0: v_loss, v_bpd = evaluate( val_loader, model, model_sample, args, epoch=epoch, file=snap_dir + 'log.txt') val_bpd.append(v_bpd) best_val_bpd = min(v_bpd, best_val_bpd) best_val_loss = min(v_loss, best_val_loss) print('(BEST: val bpd {:.4f}, val loss {:.4f})\n'.format(best_val_bpd, best_val_loss)) print(f'VALIDATION: loss: {v_loss}, bpd: {v_bpd}') if math.isnan(v_loss): raise ValueError('NaN encountered!') train_bpd = np.hstack(train_bpd) val_bpd = np.array(val_bpd) # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== final_model = torch.load(snap_dir + 'a.model') test_loss, test_bpd = evaluate( train_loader, test_loader, final_model, final_model, args, epoch=epoch, file=snap_dir + 'test_log.txt') print('Test loss / bpd: %.2f / %.2f' % (test_loss, test_bpd))
def run(args, kwargs): # Only for the residual networks (resflow/sylvester) comparison. args.grad_norm_enabled = True # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) model, model_sample, optimizer, scheduler = setup(args) writer = tensorboardX.SummaryWriter(logdir=args.snap_dir) snap_dir = args.snap_dir # ================================================================================================================== # TRAINING # ================================================================================================================== train_bpd = [] val_bpd = [] # for early stopping best_val_bpd = np.inf best_train_bpd = np.inf epoch = 0 train_times = [] model.eval() model.train() starting_epoch = 1 if args.restart_from_epoch is not None: starting_epoch = args.restart_from_epoch for epoch in range(starting_epoch, args.epochs + 1): t_start = time.time() tr_bpd = train(epoch, train_loader, model, optimizer, args) scheduler.step() train_bpd.append(tr_bpd) writer.add_scalar('train bpd', train_bpd[-1], epoch) train_times.append(time.time() - t_start) print('One training epoch took %.2f seconds' % (time.time() - t_start)) if epoch in [1, 5, 10] or epoch % args.evaluate_interval_epochs == 0: tr_bpd = evaluate(train_loader, model, args, iw_samples=1) v_bpd = evaluate(val_loader, model, args, iw_samples=1) # Logging message. with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttrain bpd {:.3f}\tval bpd {:.3f}\t'.format( epoch, tr_bpd, v_bpd) print(msg, file=ff) plot_samples(model_sample, args, epoch, v_bpd) val_bpd.append(v_bpd) writer.add_scalar('val bpd', v_bpd, epoch) # Model save based on val performance if v_bpd < best_val_bpd: best_train_bpd = tr_bpd best_val_bpd = v_bpd try: if hasattr(model, 'module'): torch.save(model.module, snap_dir + 'a.model') else: torch.save(model, snap_dir + 'a.model') torch.save(optimizer, snap_dir + 'a.optimizer') print('->model saved<-') except: print('Saving was unsuccessful.') print('(BEST: train bpd {:.4f}, val bpd {:.4f})\n'.format( best_train_bpd, best_val_bpd)) if math.isnan(v_bpd): raise ValueError('NaN encountered!') # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_train_time, std_train_time)) # ======================================================================== # EVALUATION # ======================================================================== final_model = torch.load(snap_dir + 'a.model') test_bpd = evaluate(test_loader, final_model, args) with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttest negative elbo bpd {:.4f}'.format(epoch, test_bpd) print(msg, file=ff) if 'residual' in args.model_type: print('Importance weighted eval needs exact determinants.') else: test_bpd = evaluate(test_loader, final_model, args, iw_samples=1000) with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttest negative log_px bpd {:.4f}'.format( epoch, test_bpd) print(msg, file=ff)
def run(args, kwargs): print('\nMODEL SETTINGS: \n', args, '\n') print("Random Seed: ", args.manual_seed) # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_') snap_dir = snapshots_path + args.flow + '_gpunum_' + str(args.gpu_num) if args.flow != 'no_flow': snap_dir += '_' + 'num_flows_' + str(args.num_flows) if args.flow == 'orthogonal': snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs) elif args.flow == 'householder': snap_dir = snap_dir + '_num_householder_' + str(args.num_householder) elif args.flow == 'iaf': snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size) snap_dir = snap_dir + '__' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) # SAVING torch.save(args, snap_dir + args.flow + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) # ================================================================================================================== # SELECT MODEL # ================================================================================================================== # flow parameters and architecture choice are passed on to model through args if args.flow == 'no_flow': model = VAE.VAE(args) elif args.flow == 'planar': model = VAE.PlanarVAE(args) elif args.flow == 'iaf': model = VAE.IAFVAE(args) elif args.flow == 'orthogonal': model = VAE.OrthogonalSylvesterVAE(args) elif args.flow == 'householder': model = VAE.HouseholderSylvesterVAE(args) elif args.flow == 'triangular': model = VAE.TriangularSylvesterVAE(args) else: raise ValueError('Invalid flow choice') if args.cuda: print("Model on GPU") model.cuda() print(model) optimizer = optim.Adamax(model.parameters(), lr=args.learning_rate, eps=1.e-7) # ================================================================================================================== # TRAINING # ================================================================================================================== train_loss = [] val_loss = [] # for early stopping best_loss = np.inf best_bpd = np.inf e = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): t_start = time.time() tr_loss = train(epoch, train_loader, model, optimizer, args) train_loss.append(tr_loss) train_times.append(time.time()-t_start) print('One training epoch took %.2f seconds' % (time.time()-t_start)) v_loss, v_bpd = evaluate(val_loader, model, args, epoch=epoch) val_loss.append(v_loss) # early-stopping if v_loss < best_loss: e = 0 best_loss = v_loss if args.input_type != 'binary': best_bpd = v_bpd print('->model saved<-') torch.save(model, snap_dir + args.flow + '.model') # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model') elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup): e += 1 if e > args.early_stopping_epochs: break if args.input_type == 'binary': print('--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss)) else: print('--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss, best_bpd)) if math.isnan(v_loss): raise ValueError('NaN encountered!') train_loss = np.hstack(train_loss) val_loss = np.array(val_loss) plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow) # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== test_score_file = snap_dir + 'test_scores.txt' with open('experiment_log.txt', 'a') as ff: print(args, file=ff) print('Stopped after %d epochs' % epoch, file=ff) print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time), file=ff) final_model = torch.load(snap_dir + args.flow + '.model') if args.testing: validation_loss, validation_bpd = evaluate(val_loader, final_model, args) test_loss, test_bpd = evaluate(test_loader, final_model, args, testing=True) with open('experiment_log.txt', 'a') as ff: print('FINAL EVALUATION ON VALIDATION SET\n' 'ELBO (VAL): {:.4f}\n'.format(validation_loss), file=ff) print('FINAL EVALUATION ON TEST SET\n' 'NLL (TEST): {:.4f}\n'.format(test_loss), file=ff) if args.input_type != 'binary': print('FINAL EVALUATION ON VALIDATION SET\n' 'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff) print('FINAL EVALUATION ON TEST SET\n' 'NLL (TEST) BPD: {:.4f}\n'.format(test_bpd), file=ff) else: validation_loss, validation_bpd = evaluate(val_loader, final_model, args) # save the test score in case you want to look it up later. _, _ = evaluate(test_loader, final_model, args, testing=True, file=test_score_file) with open('experiment_log.txt', 'a') as ff: print('FINAL EVALUATION ON VALIDATION SET\n' 'ELBO (VALIDATION): {:.4f}\n'.format(validation_loss), file=ff) if args.input_type != 'binary': print('FINAL EVALUATION ON VALIDATION SET\n' 'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff)
def run(args, kwargs): # Would probably help, but experiments were done before. args.grad_norm_enabled = False print('\nMODEL SETTINGS: \n', args, '\n') print("Random Seed: ", args.manual_seed) if 'imagenet' in args.dataset and args.evaluate_interval_epochs > 5: args.evaluate_interval_epochs = 5 # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:16].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = args.out_dir snap_dir = snapshots_path + '/' snap_dir += args.exp_name + args.dataset + '_' + 'flows_' + str( args.n_subflows) snap_dir = snap_dir + '_' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) with open(snap_dir + 'log.txt', 'a') as ff: print('\nMODEL SETTINGS: \n', args, '\n', file=ff) writer = tensorboardX.SummaryWriter(logdir=snap_dir) # SAVING torch.save(args, snap_dir + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) # ================================================================================================================== # SELECT MODEL # ================================================================================================================== # flow parameters and architecture choice are passed on to model through # args model_pv = Flow(args, args.input_size, n_levels=args.n_levels, n_subflows=args.n_subflows, use_splitprior=args.use_splitprior, n_context=None, normalize_translation=128., normalize_scale=256.) if args.dequantize_distribution == 'uniform': model_hx = None model_qu_x = Uniform(args.input_size) elif args.dequantize_distribution == 'flow': model_hx = torch.nn.Sequential( Normalize_without_ldj(translation=128., scale=256.), DenseNet(args, input_size=(3, args.input_size[1], args.input_size[2]), n_inputs=3, n_outputs=args.n_context, depth=4, growth=32, dropout_p=args.dropout_p), torch.nn.Conv2d(args.n_context, args.n_context, kernel_size=2, stride=2, padding=0), DenseNet(args, n_inputs=args.n_context, input_size=(3, args.input_size[1], args.input_size[2]), n_outputs=args.n_context, depth=4, growth=32, dropout_p=args.dropout_p), ) model_qu_x = TemplateDistribution( transformations=[ReverseTransformation(Sigmoid())], distribution=Flow(args, args.input_size, n_levels=args.dequantize_levels, n_subflows=args.dequantize_subflows, n_context=args.n_context, use_splitprior=False, normalize_translation=0., normalize_scale=1., parametrize_inverse=True)) else: raise ValueError model = DiscreteLowerboundModel(model_pv, model_qu_x, model_hx) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") model.to(args.device) model_sample = model if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model, dim=0) def lr_lambda(epoch): factor = min(1., (epoch + 1) / args.warmup) * np.power( args.lr_decay, epoch) print('Learning rate factor:', factor) return factor optimizer = optim.Adamax(model.parameters(), lr=args.learning_rate, eps=1.e-7) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1) # Log the number of params. number_of_params = np.sum( [np.prod(tensor.size()) for tensor in model.parameters()]) fn = snap_dir + 'log.txt' with open(fn, 'a') as ff: msg = 'Number of Parameters: {}'.format(number_of_params) print(msg, file=ff) print(msg) # ================================================================================================================== # TRAINING # ================================================================================================================== train_bpd = [] val_bpd = [] # for early stopping best_val_bpd = np.inf best_train_bpd = np.inf epoch = 0 train_times = [] model.eval() model.train() for epoch in range(1, args.epochs + 1): t_start = time.time() tr_bpd = train(epoch, train_loader, model, optimizer, args) scheduler.step() train_bpd.append(tr_bpd) writer.add_scalar('train bpd', train_bpd[-1], epoch) train_times.append(time.time() - t_start) print('One training epoch took %.2f seconds' % (time.time() - t_start)) if epoch < 25 or epoch % args.evaluate_interval_epochs == 0: tr_bpd = evaluate(train_loader, model, args, iw_samples=1) v_bpd = evaluate(val_loader, model, args, iw_samples=1) # Logging message. with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttrain bpd {:.3f}\tval bpd {:.3f}\t'.format( epoch, tr_bpd, v_bpd) print(msg, file=ff) # Sample and time sampling. torch.cuda.synchronize() start_sample = time.time() plot_samples(model_sample, args, epoch, v_bpd) torch.cuda.synchronize() print('Sampling took {} seconds'.format(time.time() - start_sample)) val_bpd.append(v_bpd) writer.add_scalar('val bpd', v_bpd, epoch) # Model save based on val performance if v_bpd < best_val_bpd: best_train_bpd = tr_bpd best_val_bpd = v_bpd try: if hasattr(model, 'module'): torch.save(model.module, snap_dir + 'a.model') else: torch.save(model, snap_dir + 'a.model') torch.save(optimizer, snap_dir + 'a.optimizer') print('->model saved<-') except: print('Saving was unsuccessful.') print('(BEST: train bpd {:.4f}, val bpd {:.4f})\n'.format( best_train_bpd, best_val_bpd)) if math.isnan(v_bpd): raise ValueError('NaN encountered!') # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_train_time, std_train_time)) # ======================================================================== # EVALUATION # ======================================================================== final_model = torch.load(snap_dir + 'a.model') test_bpd = evaluate(test_loader, final_model, args) with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttest negative elbo bpd {:.4f}'.format(epoch, test_bpd) print(msg, file=ff) test_bpd = evaluate(test_loader, final_model, args, iw_samples=1000) with open(snap_dir + 'log.txt', 'a') as ff: msg = 'epoch {}\ttest negative log_px bpd {:.4f}'.format( epoch, test_bpd) print(msg, file=ff)
def run(args, kwargs): print('\nMODEL SETTINGS: \n', args, '\n') print("Random Seed: ", args.manual_seed) if 'imagenet' in args.dataset and args.evaluate_interval_epochs > 5: args.evaluate_interval_epochs = 5 # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = os.path.join( args.out_dir, args.variable_type + '_' + args.distribution_type + args.dataset) snap_dir = snapshots_path snap_dir += '_' + 'flows_' + \ str(args.n_flows) + '_levels_' + str(args.n_levels) snap_dir = snap_dir + '__' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) with open(snap_dir + 'log.txt', 'a') as ff: print('\nMODEL SETTINGS: \n', args, '\n', file=ff) # SAVING torch.save(args, snap_dir + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) # ================================================================================================================== # SELECT MODEL # ================================================================================================================== # flow parameters and architecture choice are passed on to model through args print(args.input_size) import models.Model as Model model = Model.Model(args) args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.set_temperature(args.temperature) model.enable_hard_round(args.hard_round) model_sample = model # ==================================== # INIT # ==================================== # data dependend initialization on CPU for batch_idx, data in enumerate(train_loader): model(data) break if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model, dim=0) model.to(args.device) def lr_lambda(epoch): return min(1., (epoch+1) / args.warmup) * np.power(args.lr_decay, epoch) optimizer = optim.Adamax( model.parameters(), lr=args.learning_rate, eps=1.e-7) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda, last_epoch=-1) # ================================================================================================================== # TRAINING # ================================================================================================================== train_bpd = [] val_bpd = [] # for early stopping best_val_bpd = np.inf best_train_bpd = np.inf epoch = 0 train_times = [] model.eval() model.train() for epoch in range(1, args.epochs + 1): t_start = time.time() scheduler.step() tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args) train_bpd.append(tr_bpd) train_times.append(time.time()-t_start) print('One training epoch took %.2f seconds' % (time.time()-t_start)) if epoch < 25 or epoch % args.evaluate_interval_epochs == 0: v_loss, v_bpd = evaluate( train_loader, val_loader, model, model_sample, args, epoch=epoch, file=snap_dir + 'log.txt') val_bpd.append(v_bpd) # Model save based on TRAIN performance (is heavily correlated with validation performance.) if np.mean(tr_bpd) < best_train_bpd: best_train_bpd = np.mean(tr_bpd) best_val_bpd = v_bpd torch.save(model, snap_dir + 'a.model') torch.save(optimizer, snap_dir + 'a.optimizer') print('->model saved<-') print('(BEST: train bpd {:.4f}, test bpd {:.4f})\n'.format( best_train_bpd, best_val_bpd)) if math.isnan(v_loss): raise ValueError('NaN encountered!') train_bpd = np.hstack(train_bpd) val_bpd = np.array(val_bpd) # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== final_model = torch.load(snap_dir + 'a.model') test_loss, test_bpd = evaluate( train_loader, test_loader, final_model, final_model, args, epoch=epoch, file=snap_dir + 'test_log.txt') print('Test loss / bpd: %.2f / %.2f' % (test_loss, test_bpd))
def main(main_args=None): """ use main_args to run this script as function in another script """ # ========================================================================= # PARSE EXPERIMENT SETTINGS, SETUP SNAPSHOTS DIRECTORY, LOGGING # ========================================================================= args, kwargs = parse_args(main_args) # ========================================================================= # LOAD DATA # ========================================================================= logger.info('LOADING DATA:') train_loader, val_loader, test_loader, args = load_image_dataset(args, **kwargs) # ========================================================================= # SAVE EXPERIMENT SETTINGS # ========================================================================= logger.info(f'EXPERIMENT SETTINGS:\n{args}\n') torch.save(args, os.path.join(args.snap_dir, 'config.pt')) # ========================================================================= # INITIALIZE MODEL AND OPTIMIZATION # ========================================================================= model = init_model(args) optimizer, scheduler = init_optimizer(model, args) num_params = sum([param.nelement() for param in model.parameters()]) logger.info(f"MODEL:\nNumber of model parameters={num_params}\n{model}\n") if args.load: logger.info(f'LOADING CHECKPOINT FROM PRE-TRAINED MODEL: {args.load}') init_with_args = args.flow == "boosted" and args.loaded_init_component is not None and args.loaded_all_trained is not None load(model, optimizer, args.load, args, init_with_args) # ========================================================================= # TRAINING # ========================================================================= training_required = args.epochs > 0 or args.load is None if training_required: logger.info('TRAINING:') if args.tensorboard: logger.info(f'Follow progress on tensorboard: tb {args.snap_dir}') train_loss, val_loss = train(train_loader, val_loader, model, optimizer, scheduler, args) # ========================================================================= # VALIDATION # ========================================================================= logger.info('VALIDATION:') if training_required: load(model, optimizer, args.snap_dir + 'model.pt', args) val_loss, val_rec, val_kl = evaluate(val_loader, model, args, results_type='Validation') # ========================================================================= # TESTING # ========================================================================= if args.testing: logger.info("TESTING:") test_loss, test_rec, test_kl = evaluate(test_loader, model, args, results_type='Test') test_nll = evaluate_likelihood(test_loader, model, args, S=args.nll_samples, MB=args.nll_mb, results_type='Test')
def run(args): print('\nMODEL SETTINGS: \n', args, '\n') print("Random Seed: ", args.manual_seed) train_loader, val_loader, test_loader, args = load_dataset(args) encoder = MLP_encoder(args) decoder = MLP_decoder(args) if args.flow == "planar": model = VAE.PlanarVAE(encoder, decoder, args) elif args.flow == "NICE": # NICE-planar model = VAE.NICEVAE_amor(encoder, decoder, args) elif args.flow == "NICE_MLP": model = VAE.NICEVAE(encoder, decoder, args) elif args.flow == "syl_orthogonal": model = VAE.Sylvester_ortho_VAE(encoder, decoder, args) elif args.flow == "real": model = VAE.RealNVPVAE(encoder, decoder, args) if args.vampprior: load = torch.utils.data.DataLoader(train_loader.dataset, batch_size=args.num_pseudos, shuffle=True) pseudo_inputs = next(iter(load))[0] if args.data_as_pseudo else None model.init_pseudoinputs(pseudo_inputs) if args.cuda: print("Model on GPU") model.cuda() print(model) optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, momentum=0.9) #### Training train_loss = [] val_loss = [] epoch = 0 t = time.time() for epoch in range(1, args.epochs + 1): tr_loss = train(epoch, train_loader, model, optimizer, args) train_loss.append(tr_loss.mean()) v_loss = evaluate(val_loader, model, args) val_loss.append(v_loss) train_loss = np.hstack(train_loss) val_loss = np.hstack(val_loss) #plot_training_curve(train_loss, val_loss) results = { "train_loss": train_loss.tolist(), "val_loss": val_loss.tolist() } #### Testing validation_loss = evaluate(val_loader, model, args) test_loss, log_likelihood = evaluate(test_loader, model, args, testing=True) results["ELBO"] = test_loss results["log_likelihood"] = log_likelihood elapsed = time.time() - t results["Running time"] = elapsed # Save the results. json_dir = args.out_dir + f"{args.flow}perm_k_{args.num_flows}_RMSProp_lr{args.learning_rate}_4" print("Saving data at: " + json_dir) output_folder = pathlib.Path(json_dir) output_folder.mkdir(parents=True, exist_ok=True) results_json = json.dumps(results, indent=4, sort_keys=True) (output_folder / "results.json").write_text(results_json)