def save_im(im, diffeq): global im_indx filename = os.path.join(current_dir, args.save, "flow_%05d.png" % im_indx) utils.makedirs(os.path.dirname(filename)) diffeq = diffeq.clone() de_min, de_max = float(diffeq.min()), float(diffeq.max()) diffeq.clamp_(min=de_min, max=de_max) diffeq.add_(-de_min).div_(de_max - de_min + 1e-5) assert im.shape == diffeq.shape shape = im.shape interleaved = torch.stack([im, diffeq]).transpose(0, 1).contiguous().view( 2 * shape[0], *shape[1:]) save_image(interleaved, filename, nrow=20, padding=0, range=(0, 1)) im_indx += 1
default=None, help="int_t ||df_i/dx_i||_F") parser.add_argument('--JoffdiagFrobint', type=float, default=None, help="int_t ||df/dx - df_i/dx_i||_F") parser.add_argument('--resume', type=str, default=None) parser.add_argument('--save', type=str, default='experiments/cnf') parser.add_argument('--evaluate', action='store_true') parser.add_argument('--val_freq', type=int, default=200) parser.add_argument('--log_freq', type=int, default=10) args = parser.parse_args() # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) if args.layer_type == "blend": logger.info( "!! Setting time_length from None to 1.0 due to use of Blend layers.") args.time_length = 1.0 args.train_T = False logger.info(args) test_batch_size = args.test_batch_size if args.test_batch_size else args.batch_size def batch_iter(X, batch_size=args.batch_size, shuffle=False):
parser.add_argument('--l1int', type=float, default=None, help="int_t ||f||_1") parser.add_argument('--l2int', type=float, default=None, help="int_t ||f||_2") parser.add_argument('--dl2int', type=float, default=None, help="int_t ||f^T df/dt||_2") parser.add_argument('--JFrobint', type=float, default=None, help="int_t ||df/dx||_F") parser.add_argument('--JdiagFrobint', type=float, default=None, help="int_t ||df_i/dx_i||_F") parser.add_argument('--JoffdiagFrobint', type=float, default=None, help="int_t ||df/dx - df_i/dx_i||_F") parser.add_argument('--save', type=str, default='experiments/cnf') parser.add_argument('--viz_freq', type=int, default=1000) parser.add_argument('--val_freq', type=int, default=1000) parser.add_argument('--log_freq', type=int, default=100) parser.add_argument('--gpu', type=int, default=0) args = parser.parse_args() # logger utils.makedirs(args.save) logger = utils.get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') def construct_model(): if args.nf: chain = [] for i in range(args.depth): chain.append(layers.PlanarFlow(2)) return layers.SequentialFlow(chain) else:
print(model) print("Number of trainable parameters: {}".format(count_parameters(model))) model.eval() p_samples = toy_data.inf_train_gen(args.data, batch_size=800**2) with torch.no_grad(): sample_fn, density_fn = get_transforms(model) plt.figure(figsize=(10, 10)) ax = ax = plt.gca() viz_flow.plt_samples(p_samples, ax, npts=800) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) fig_filename = os.path.join(args.save, 'figs', 'true_samples.jpg') utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename) plt.close() plt.figure(figsize=(10, 10)) ax = ax = plt.gca() viz_flow.plt_flow_density(standard_normal_logprob, density_fn, ax, npts=800, memory=200, device=device) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) fig_filename = os.path.join(args.save, 'figs', 'model_density.jpg') utils.makedirs(os.path.dirname(fig_filename)) plt.savefig(fig_filename) plt.close() plt.figure(figsize=(10, 10)) ax = ax = plt.gca() viz_flow.plt_flow_samples(torch.randn, sample_fn, ax, npts=800, memory=200, device=device)
def run(args, kwargs): # ================================================================================================================== # SNAPSHOTS # ================================================================================================================== args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_') snap_dir = snapshots_path + args.flow if args.flow != 'no_flow': snap_dir += '_' + 'num_flows_' + str(args.num_flows) if args.flow == 'orthogonal': snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs) elif args.flow == 'orthogonalH': snap_dir = snap_dir + '_num_householder_' + str(args.num_householder) elif args.flow == 'iaf': snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size) elif args.flow == 'permutation': snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size) elif args.flow == 'mixed': snap_dir = snap_dir + '_' + 'num_householder_' + str( args.num_householder) elif args.flow == 'cnf_rank': snap_dir = snap_dir + '_rank_' + str( args.rank) + '_' + args.dims + '_num_blocks_' + str( args.num_blocks) elif 'cnf' in args.flow: snap_dir = snap_dir + '_' + args.dims + '_num_blocks_' + str( args.num_blocks) if args.retrain_encoder: snap_dir = snap_dir + '_retrain-encoder_' elif args.evaluate: snap_dir = snap_dir + '_evaluate_' snap_dir = snap_dir + '__' + args.model_signature + '/' args.snap_dir = snap_dir if not os.path.exists(snap_dir): os.makedirs(snap_dir) # logger utils.makedirs(args.snap_dir) logger = utils.get_logger(logpath=os.path.join(args.snap_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args) # SAVING torch.save(args, snap_dir + args.flow + '.config') # ================================================================================================================== # LOAD DATA # ================================================================================================================== train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs) if not args.evaluate: # ============================================================================================================== # SELECT MODEL # ============================================================================================================== # flow parameters and architecture choice are passed on to model through args if args.flow == 'no_flow': model = VAE.VAE(args) elif args.flow == 'planar': model = VAE.PlanarVAE(args) elif args.flow == 'iaf': model = VAE.IAFVAE(args) elif args.flow == 'orthogonal': model = VAE.OrthogonalSylvesterVAE(args) elif args.flow == 'householder': model = VAE.HouseholderSylvesterVAE(args) elif args.flow == 'triangular': model = VAE.TriangularSylvesterVAE(args) elif args.flow == 'cnf': model = CNFVAE.CNFVAE(args) elif args.flow == 'cnf_bias': model = CNFVAE.AmortizedBiasCNFVAE(args) elif args.flow == 'cnf_hyper': model = CNFVAE.HypernetCNFVAE(args) elif args.flow == 'cnf_lyper': model = CNFVAE.LypernetCNFVAE(args) elif args.flow == 'cnf_rank': model = CNFVAE.AmortizedLowRankCNFVAE(args) else: raise ValueError('Invalid flow choice') if args.retrain_encoder: logger.info(f"Initializing decoder from {args.model_path}") dec_model = torch.load(args.model_path) dec_sd = {} for k, v in dec_model.state_dict().items(): if 'p_x' in k: dec_sd[k] = v model.load_state_dict(dec_sd, strict=False) if args.cuda: logger.info("Model on GPU") model.cuda() logger.info(model) if args.retrain_encoder: parameters = [] logger.info('Optimizing over:') for name, param in model.named_parameters(): if 'p_x' not in name: logger.info(name) parameters.append(param) else: parameters = model.parameters() optimizer = optim.Adamax(parameters, lr=args.learning_rate, eps=1.e-7) # ================================================================================================================== # TRAINING # ================================================================================================================== train_loss = [] val_loss = [] # for early stopping best_loss = np.inf best_bpd = np.inf e = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): t_start = time.time() tr_loss = train(epoch, train_loader, model, optimizer, args, logger) train_loss.append(tr_loss) train_times.append(time.time() - t_start) logger.info('One training epoch took %.2f seconds' % (time.time() - t_start)) v_loss, v_bpd = evaluate(val_loader, model, args, logger, epoch=epoch) val_loss.append(v_loss) # early-stopping if v_loss < best_loss: e = 0 best_loss = v_loss if args.input_type != 'binary': best_bpd = v_bpd logger.info('->model saved<-') torch.save(model, snap_dir + args.flow + '.model') # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model') elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup): e += 1 if e > args.early_stopping_epochs: break if args.input_type == 'binary': logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format( e, args.early_stopping_epochs, best_loss)) else: logger.info( '--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n' .format(e, args.early_stopping_epochs, best_loss, best_bpd)) if math.isnan(v_loss): raise ValueError('NaN encountered!') train_loss = np.hstack(train_loss) val_loss = np.array(val_loss) plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow) # training time per epoch train_times = np.array(train_times) mean_train_time = np.mean(train_times) std_train_time = np.std(train_times, ddof=1) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) # ================================================================================================================== # EVALUATION # ================================================================================================================== logger.info(args) logger.info('Stopped after %d epochs' % epoch) logger.info('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time)) final_model = torch.load(snap_dir + args.flow + '.model') validation_loss, validation_bpd = evaluate(val_loader, final_model, args, logger) else: validation_loss = "N/A" validation_bpd = "N/A" logger.info(f"Loading model from {args.model_path}") final_model = torch.load(args.model_path) test_loss, test_bpd = evaluate(test_loader, final_model, args, logger, testing=True) logger.info( 'FINAL EVALUATION ON VALIDATION SET. ELBO (VAL): {:.4f}'.format( validation_loss)) logger.info( 'FINAL EVALUATION ON TEST SET. NLL (TEST): {:.4f}'.format(test_loss)) if args.input_type != 'binary': logger.info( 'FINAL EVALUATION ON VALIDATION SET. ELBO (VAL) BPD : {:.4f}'. format(validation_bpd)) logger.info( 'FINAL EVALUATION ON TEST SET. NLL (TEST) BPD: {:.4f}'.format( test_bpd))
def train(): model = build_model_tabular(args, 1).to(device) set_cnf_options(args, model) logger.info(model) logger.info("Number of trainable parameters: {}".format( count_parameters(model))) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) time_meter = utils.RunningAverageMeter(0.93) loss_meter = utils.RunningAverageMeter(0.93) nfef_meter = utils.RunningAverageMeter(0.93) nfeb_meter = utils.RunningAverageMeter(0.93) tt_meter = utils.RunningAverageMeter(0.93) end = time.time() best_loss = float('inf') model.train() for itr in range(1, args.niters + 1): optimizer.zero_grad() loss = compute_loss(args, model) loss_meter.update(loss.item()) total_time = count_total_time(model) nfe_forward = count_nfe(model) loss.backward() optimizer.step() nfe_total = count_nfe(model) nfe_backward = nfe_total - nfe_forward nfef_meter.update(nfe_forward) nfeb_meter.update(nfe_backward) time_meter.update(time.time() - end) tt_meter.update(total_time) log_message = ( 'Iter {:04d} | Time {:.4f}({:.4f}) | Loss {:.6f}({:.6f}) | NFE Forward {:.0f}({:.1f})' ' | NFE Backward {:.0f}({:.1f}) | CNF Time {:.4f}({:.4f})'.format( itr, time_meter.val, time_meter.avg, loss_meter.val, loss_meter.avg, nfef_meter.val, nfef_meter.avg, nfeb_meter.val, nfeb_meter.avg, tt_meter.val, tt_meter.avg)) logger.info(log_message) if itr % args.val_freq == 0 or itr == args.niters: with torch.no_grad(): model.eval() test_loss = compute_loss(args, model, batch_size=args.test_batch_size) test_nfe = count_nfe(model) log_message = '[TEST] Iter {:04d} | Test Loss {:.6f} | NFE {:.0f}'.format( itr, test_loss, test_nfe) logger.info(log_message) if test_loss.item() < best_loss: best_loss = test_loss.item() utils.makedirs(args.save) torch.save( { 'args': args, 'state_dict': model.state_dict(), }, os.path.join(args.save, 'checkpt.pth')) model.train() if itr % args.viz_freq == 0: with torch.no_grad(): model.eval() xx = torch.linspace(-10, 10, 10000).view(-1, 1) true_p = data_density(xx) plt.plot(xx.view(-1).cpu().numpy(), true_p.view(-1).exp().cpu().numpy(), label='True') true_p = model_density(xx, model) plt.plot(xx.view(-1).cpu().numpy(), true_p.view(-1).exp().cpu().numpy(), label='Model') utils.makedirs(os.path.join(args.save, 'figs')) plt.savefig( os.path.join(args.save, 'figs', '{:06d}.jpg'.format(itr))) plt.close() model.train() end = time.time() logger.info('Training has finished.')