def compute_bpp_MVGauss_B(dataroot): trainA, trainB, devA, devB, testA, testB = load_edges2shoes(dataroot) train_dataset = UnalignedIterator(trainA, trainB, batch_size=200) print('#training images = %d' % len(train_dataset)) test_dataset = AlignedIterator(testA, testB, batch_size=200) print('#test images = %d' % len(test_dataset)) mvg_mean, mvg_var = train_MVGauss_B(train_dataset) mvg_logvar = torch.log(mvg_var + 1e-5) bpp = eval_bpp_MVGauss_B(test_dataset, mvg_mean, mvg_logvar) print("MVGauss BPP: %.4f" % bpp)
def train_model(): opt = TrainOptions().parse(sub_dirs=['vis_multi','vis_cycle','vis_latest','train_vis_cycle']) out_f = open("%s/results.txt" % opt.expr_dir, 'w') copy_scripts_to_folder(opt.expr_dir) use_gpu = len(opt.gpu_ids) > 0 if opt.seed is not None: print ("using random seed:", opt.seed) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if use_gpu: torch.cuda.manual_seed_all(opt.seed) if opt.numpy_data: trainA, trainB, devA, devB, testA, testB = load_edges2shoes(opt.dataroot) train_dataset = UnalignedIterator(trainA, trainB, batch_size=opt.batchSize) print_log(out_f, '#training images = %d' % len(train_dataset)) vis_inf = False test_dataset = AlignedIterator(testA, testB, batch_size=100) print_log(out_f, '#test images = %d' % len(test_dataset)) dev_dataset = AlignedIterator(devA, devB, batch_size=100) print_log(out_f, '#dev images = %d' % len(dev_dataset)) dev_cycle = itertools.cycle(AlignedIterator(devA, devB, batch_size=25)) else: train_data_loader = DataLoader(opt, subset='train', unaligned=True, batchSize=opt.batchSize) test_data_loader = DataLoader(opt, subset='test', unaligned=False, batchSize=200) dev_data_loader = DataLoader(opt, subset='dev', unaligned=False, batchSize=200) dev_cycle_loader = DataLoader(opt, subset='dev', unaligned=False, batchSize=25) train_dataset = train_data_loader.load_data() dataset_size = len(train_data_loader) print_log(out_f, '#training images = %d' % dataset_size) vis_inf = False test_dataset = test_data_loader.load_data() print_log(out_f, '#test images = %d' % len(test_data_loader)) dev_dataset = dev_data_loader.load_data() print_log(out_f, '#dev images = %d' % len(dev_data_loader)) dev_cycle = itertools.cycle(dev_cycle_loader.load_data()) if opt.supervised: if opt.numpy_data: sup_size = int(len(trainA) * opt.sup_frac) sup_trainA = trainA[:sup_size] sup_trainB = trainB[:sup_size] sup_train_dataset = AlignedIterator(sup_trainA, sup_trainB, batch_size=opt.batchSize) else: sup_train_data_loader = DataLoader(opt, subset='train', unaligned=False, batchSize=opt.batchSize, fraction=opt.sup_frac) sup_train_dataset = sup_train_data_loader.load_data() sup_size = len(sup_train_data_loader) sup_train_dataset = itertools.cycle(sup_train_dataset) print_log(out_f, '#supervised images = %d' % sup_size) # create_model if opt.model == 'stoch_cycle_gan': model = StochCycleGAN(opt) elif opt.model == 'cycle_gan': model = StochCycleGAN(opt, ignore_noise=True) elif opt.model == 'aug_cycle_gan': model = AugmentedCycleGAN(opt) create_sub_dirs(opt, ['vis_inf']) vis_inf = True else: raise NotImplementedError('Specified model is not implemented.') print_log(out_f, "model [%s] was created" % (model.__class__.__name__)) # visualizer = Visualizer(opt) total_steps = 0 print_start_time = time.time() results = { 'best_dev_mse_A' : sys.float_info.max, 'best_test_mse_A' : sys.float_info.max, 'best_dev_bpp_B' : sys.float_info.max, 'best_test_bpp_B' : sys.float_info.max, } save_results(opt.expr_dir, results) history_mse_A = [] history_ubo_B = [] create_sub_dirs(opt, ['vis_pred_B']) for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): epoch_start_time = time.time() epoch_iter = 0 for i, data in enumerate(train_dataset): real_A, real_B = Variable(data['A']), Variable(data['B']) if real_A.size(0) != real_B.size(0): continue prior_z_B = Variable(real_A.data.new(real_A.size(0), opt.nlatent, 1, 1).normal_(0, 1)) total_steps += opt.batchSize epoch_iter += opt.batchSize if use_gpu: real_A = real_A.cuda() real_B = real_B.cuda() prior_z_B = prior_z_B.cuda() if opt.monitor_gnorm: losses, visuals, gnorms = model.train_instance(real_A, real_B, prior_z_B) else: losses, visuals = model.train_instance(real_A, real_B, prior_z_B) # supervised training if opt.supervised: sup_data = sup_train_dataset.next() sup_real_A, sup_real_B = Variable(sup_data['A']), Variable(sup_data['B']) if use_gpu: sup_real_A, sup_real_B = sup_real_A.cuda(), sup_real_B.cuda() sup_losses = model.supervised_train_instance(sup_real_A, sup_real_B, prior_z_B) if total_steps % opt.display_freq == 0: # visualize current training batch visualize_cycle(opt, real_A, visuals, epoch, epoch_iter/opt.batchSize, train=True) # dev_data = dev_cycle.next() dev_data = next(dev_cycle) dev_real_A, dev_real_B = Variable(dev_data['A']), Variable(dev_data['B']) dev_prior_z_B = Variable(dev_real_A.data.new(dev_real_A.size(0), opt.nlatent, 1, 1).normal_(0, 1)) if use_gpu: dev_real_A = dev_real_A.cuda() dev_real_B = dev_real_B.cuda() dev_prior_z_B = dev_prior_z_B.cuda() dev_visuals = model.generate_cycle(dev_real_A, dev_real_B, dev_prior_z_B) visualize_cycle(opt, dev_real_A, dev_visuals, epoch, epoch_iter/opt.batchSize, train=False) # visualize generated B with different z_B visualize_multi(opt, dev_real_A, model, epoch, epoch_iter/opt.batchSize) if vis_inf: # visualize generated B with different z_B infered from real_B visualize_inference(opt, dev_real_A, dev_real_B, model, epoch, epoch_iter/opt.batchSize) if total_steps % opt.print_freq == 0: t = (time.time() - print_start_time) / opt.batchSize print_log(out_f, format_log(epoch, epoch_iter, losses, t)) if opt.supervised: print_log(out_f, format_log(epoch, epoch_iter, sup_losses, t, prefix=False)) if opt.monitor_gnorm: print_log(out_f, format_log(epoch, epoch_iter, gnorms, t, prefix=False)+"\n") print_start_time = time.time() if epoch % opt.save_epoch_freq == 0: print_log(out_f, 'saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) model.save('latest') ##################### # evaluate mappings ##################### if epoch % opt.eval_A_freq == 0: t = time.time() dev_mse_A = eval_mse_A(dev_dataset, model) test_mse_A = eval_mse_A(test_dataset, model) t = time.time() - t history_mse_A.append((dev_mse_A, test_mse_A)) np.save("%s/history_mse_A" % opt.expr_dir, history_mse_A) res_str_list = ["[%d] DEV_MSE_A: %.4f, TEST_MSE_A: %.4f, TIME: %.4f" % (epoch, dev_mse_A, test_mse_A, t)] if dev_mse_A < results['best_dev_mse_A']: with open("%s/best_mse_A.txt" % opt.expr_dir, 'w') as best_mse_A_f: best_mse_A_f.write(res_str_list[0]+'\n') best_mse_A_f.flush() results['best_dev_mse_A'] = dev_mse_A results['best_test_mse_A'] = test_mse_A model.save('best_A') save_results(opt.expr_dir, results) res_str_list += ["*** BEST DEV A ***"] res_str = "\n".join(["-"*60] + res_str_list + ["-"*60]) print_log(out_f, res_str) if epoch % opt.eval_B_freq == 0: t = time.time() if opt.model == 'cycle_gan': steps = 1 else: steps = 50 dev_ubo_B, dev_bpp_B, dev_kld_B = eval_ubo_B(dev_dataset, model, steps, True, 'pred_B_%d' % epoch, opt.vis_pred_B) test_ubo_B, test_bpp_B, test_kld_B = eval_ubo_B(test_dataset, model, steps, False, 'pred_B', opt.vis_pred_B) t = time.time() - t history_ubo_B.append((dev_ubo_B, dev_bpp_B, dev_kld_B, test_ubo_B, test_bpp_B, test_kld_B)) np.save("%s/history_ubo_B" % opt.expr_dir, history_ubo_B) res_str_list = ["[%d] DEV_BPP_B: %.4f, TEST_BPP_B: %.4f, TIME: %.4f" % (epoch, dev_bpp_B, test_bpp_B, t)] if dev_bpp_B < results['best_dev_bpp_B']: with open("%s/best_bpp_B.txt" % opt.expr_dir, 'w') as best_bpp_B_f: best_bpp_B_f.write(res_str_list[0]+'\n') best_bpp_B_f.flush() results['best_dev_bpp_B'] = dev_bpp_B results['best_test_bpp_B'] = test_bpp_B save_results(opt.expr_dir, results) model.save('best_B') res_str_list += ["*** BEST BPP B ***"] res_str = "\n".join(["-"*60] + res_str_list + ["-"*60]) print_log(out_f, res_str) print_log(out_f, 'End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) if epoch > opt.niter: model.update_learning_rate() out_f.close()
def test_model(): opt = TestOptions().parse() dataroot = opt.dataroot # extract expr_dir from chk_path expr_dir = os.path.dirname(opt.chk_path) opt_path = os.path.join(expr_dir, 'opt.pkl') # parse saved options... opt.__dict__.update(parse_opt_file(opt_path)) opt.expr_dir = expr_dir opt.dataroot = dataroot # hack this for now opt.gpu_ids = [0] opt.seed = 12345 random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) # create results directory (under expr_dir) res_path = os.path.join(opt.expr_dir, opt.res_dir) opt.res_dir = res_path if not os.path.exists(res_path): os.makedirs(res_path) use_gpu = len(opt.gpu_ids) > 0 trainA, trainB, devA, devB, testA, testB = load_edges2shoes(opt.dataroot, opt.imgSize) sub_size = int(len(trainA) * 0.2) trainA = trainA[:sub_size] trainB = trainB[:sub_size] train_dataset = UnalignedIterator(trainA, trainB, batch_size=200) print('#training images = %d' % len(train_dataset)) vis_inf = False test_dataset = AlignedIterator(testA, testB, batch_size=200) print('#test images = %d' % len(test_dataset)) dev_dataset = AlignedIterator(devA, devB, batch_size=200) print('#dev images = %d' % len(dev_dataset)) vis_inf = False if opt.model == 'stoch_cycle_gan': model = StochCycleGAN(opt, testing=True) elif opt.model == 'cycle_gan': model = StochCycleGAN(opt, ignore_noise=True, testing=True) elif opt.model == 'aug_cycle_gan': model = AugmentedCycleGAN(opt, testing=True) vis_inf = True else: raise NotImplementedError('Specified model is not implemented.') model.load(opt.chk_path) # model.eval() # debug kl # compute_train_kld(train_dataset, model) if opt.metric == 'bpp': if opt.train_logvar: print("training logvar_B on training data...") logvar_B = train_logvar(train_dataset, model) else: logvar_B = None print("evaluating on test set...") t = time.time() test_ubo_B, test_bpp_B, test_kld_B = eval_ubo_B(test_dataset, model, 500, visualize=True, vis_name='test_pred_B', vis_path=opt.res_dir, logvar_B=logvar_B, verbose=True, compute_l1=True) print("TEST_BPP_B: %.4f, TIME: %.4f" % (test_bpp_B, time.time()-t)) elif opt.metric == 'mse': dev_mse_A = eval_mse_A(dev_dataset, model) test_mse_A = eval_mse_A(test_dataset, model) print("DEV_MSE_A: %.4f, TEST_MSE_A: %.4f" % (dev_mse_A, test_mse_A)) elif opt.metric == 'visual': opt.num_multi = 5 n_vis = 10 dev_dataset = AlignedIterator(devA, devB, batch_size=n_vis) for i, vis_data in enumerate(dev_dataset): with torch.no_grad(): real_A, real_B = Variable(vis_data['A']), Variable(vis_data['B']) prior_z_B = Variable(real_A.data.new(n_vis, opt.nlatent, 1, 1).normal_(0, 1)) if use_gpu: real_A = real_A.cuda() real_B = real_B.cuda() prior_z_B = prior_z_B.cuda() visuals = model.generate_cycle(real_A, real_B, prior_z_B) visualize_cycle(opt, real_A, visuals, name='cycle_%d.png' % i) exit() # visualize generated B with different z_B visualize_multi(opt, real_A, model, name='multi_%d.png' % i) visualize_cycle_B_multi(opt, real_B, model, name='cycle_B_multi_%d.png' % i) visualize_multi_cycle(opt, real_B, model, name='multi_cycle_%d.png' % i) if vis_inf: # visualize generated B with different z_B infered from real_B visualize_inference(opt, real_A, real_B, model, name='inf_%d.png' % i) elif opt.metric == 'noise_sens': sensitivity_to_edge_noise(opt, model, test_dataset.next()['B']) else: raise NotImplementedError('wrong metric!')