def main(): monitoring.restart() try: monitoring.print_config() t_start = time() for i_epoch in range(-c.pre_low_lr, c.n_epochs): if i_epoch < 0: for param_group in model.optim.param_groups: param_group['lr'] = c.lr_init * 1e-1 train_losses = train_epoch(i_epoch) test_losses = train_epoch(i_epoch, test=True) monitoring.show_loss(np.concatenate([train_losses, test_losses])) model.scheduler_step() except: model.save(c.filename_out + '_ABORT') raise finally: print("\n\nTraining took %f minutes\n\n" % ((time()-t_start)/60.)) model.save(c.filename_out)
def main(): monitoring.restart(c, loss_labels) monitoring.visualizer.print_config() t_start = time() try: for i_epoch in range(c.n_epochs): if i_epoch < c.pre_low_lr: for param_group in optim.param_groups: param_group['lr'] = c.lr_init * 3e-2 train_losses = train_epoch(i_epoch) test_losses = train_epoch(i_epoch, test=True) monitoring.visualizer.update_losses(np.concatenate( [train_losses, test_losses]), lr_scheduler.get_lr(), logscale=False) lr_scheduler.step() save(f'output/{c.suffix}.pt') except: save(f'output/{c.suffix}.pt' + '_ABORT') raise finally: print("\n\nTraining took %f minutes\n\n" % ((time() - t_start) / 60.))
def main(c): loss_labels = ['-log p(z)', '-log |det(J)|'] # Init trainable model parameters if c.init_scale > 0: for p in c.model.params_trainable: p.data = c.init_scale * torch.randn_like(p.data) # Count total number of trainable parameters n_model_params = sum([p.numel() for p in c.model.params_trainable]) print(f'\nModel {c.suffix} has {n_model_params:,} trainable parameters.\n') # assert False # Prepare optimizer and learning rate schedule optim = torch.optim.Adam(c.model.params_trainable, lr=c.lr_init, betas=c.adam_betas, eps=1e-4, weight_decay=c.l2_weight_reg) lr_scheduler = torch.optim.lr_scheduler.StepLR( optim, step_size=1, gamma=(c.final_decay)**(1. / c.n_epochs)) # For visualization vis_batchsize = 300 vis_latent = torch.randn(vis_batchsize, c.ndim_x).to(c.device) monitoring.restart(c, loss_labels) # monitoring.visualizer.print_config() t_start = time() try: for i_epoch in range(c.n_epochs): if i_epoch < c.pre_low_lr: for param_group in optim.param_groups: param_group['lr'] = c.lr_init * 3e-2 train_losses = train_epoch(c, optim, i_epoch, vis_latent) test_losses = train_epoch(c, optim, i_epoch, vis_latent, test=True) monitoring.visualizer.update_losses(np.concatenate( [train_losses, test_losses]), lr_scheduler.get_lr(), logscale=False) lr_scheduler.step() # save(c, f'output/{c.suffix}.pt') except: # save(c, f'output/{c.suffix}.pt' + '_ABORT') raise finally: print("\n\nTraining took %f minutes\n\n" % ((time() - t_start) / 60.)) return test_losses.sum()