def train(): X = T.tensor4('images') y = T.lvector('labels') X_ = nn.placeholder((bs, ) + image_shape, name='images_plhd') y_ = nn.placeholder((bs, ), dtype='int64', name='labels_plhd') net = nets[architecture]((None, ) + image_shape) nn.set_training_on() updates, losses, grad_norms = net.get_updates(X, y, l2_coeff, lr) train_net = nn.function([], list(losses.values()), updates=updates, givens={ X: X_, y: y_ }, name='train net') nn.set_training_off() err, losses = net.get_accuracy(X, y) valid_net = nn.function([], [err, losses['loss']], givens={ X: X_, y: y_ }, name='validate net') train_data = CIFAR10((X_train, y_train), (X_, y_), bs, n_epochs, training=True, shuffle=True, augmentation=transforms) valid_data = CIFAR10((X_test, y_test), (X_, y_), bs, 1, training=False, shuffle=False) mon = nn.Monitor(model_name=architecture, print_freq=print_freq) for it in train_data: with mon: losses_ = train_net() if np.any(np.isnan(losses_)) or np.any(np.isinf(losses_)): raise ValueError('NAN loss!') for j, k in enumerate(losses.keys()): mon.plot(k, losses_[j]) if it % valid_freq == 0: mean_res = np.mean([valid_net() for _ in valid_data], 0) mon.plot('validation error', mean_res[0]) mon.plot('validation loss', mean_res[1]) mon.dump(nn.utils.shared2numpy(net.params), '%s.npy' % architecture, 5) print('Training finished!')
def style_transfer(): enc = VGG19(input_shape) decs = [Decoder(enc, i, name='decoder %d' % i) for i in indices] sty_net = StyleTransfer(enc, decs) X = T.tensor4('input') Y = T.tensor4('style') mon = nn.Monitor(current_folder=args.path_to_weight_files) for idx, layer in enumerate(indices): weights = mon.load('decoder-%d-final.npz' % layer) nn.utils.numpy2shared(weights, decs[idx].params) nn.set_training_off() X_styled = sty_net(X, Y) transfer = nn.function([X, Y], X_styled, name='transfer style') if os.path.isfile(input_path_test) and os.path.isfile(style_path_test): input = prep_image_test(misc.imread(input_path_test)) style = prep_image_test(misc.imread(style_path_test)) output = transfer(input, style) mon.imwrite('test %s' % input_path_test[:-4], input) mon.imwrite('test %s' % style_path_test[:-4], style) mon.imwrite('test %s %s' % (input_path_test[:-4], style_path_test[:-4]), output) elif os.path.isfile(input_path_test) and os.path.isdir(style_path_test): input = prep_image_test(misc.imread(input_path_test)) style_files = os.listdir(style_path_test) for style_file in style_files: style = prep_image_test(misc.imread(os.path.join(style_path_test, style_file))) output = transfer(input, style) mon.imwrite('test %s' % style_file[:-4], style) mon.imwrite('test %s %s' % (input_path_test[:-4], style_file[:-4]), output) mon.imwrite('test %s' % input_path_test[:-4], input) elif os.path.isdir(input_path_test) and os.path.isfile(style_path_test): style = prep_image_test(misc.imread(style_path_test)) input_files = os.listdir(input_path_test) for input_file in input_files: input = prep_image_test(misc.imread(os.path.join(input_path_test, input_file))) output = transfer(input, style) mon.imwrite('test %s' % input_file[:-4], input) mon.imwrite('test %s %s' % (input_file[:-4], style_path_test[:-4]), output) mon.imwrite('test %s' % style_path_test[:-4], style) else: style_files = os.listdir(style_path_test) input_files = os.listdir(input_path_test) for style_file in style_files: style = prep_image_test(misc.imread(os.path.join(style_path_test, style_file))) for input_file in input_files: input = prep_image_test(misc.imread(os.path.join(input_path_test, input_file))) output = transfer(input, style) mon.imwrite('test %s' % input_file[:-4], input) mon.imwrite('test %s %s' % (input_file[:-4], style_file[:-4]), output) mon.imwrite('test %s' % style_file[:-4], style) mon.flush() print('Testing finished!')
def train(): enc = VGG19(input_shape) decs = [Decoder(enc, i, name='decoder %d' % i) for i in indices] sty_net = StyleTransfer(enc, decs) X = T.tensor4('input') Y = T.tensor4('style') idx = T.scalar('iter', 'int32') X_ = nn.placeholder((bs,) + input_shape[1:], name='input_plhd') Y_ = nn.placeholder((bs,) + input_shape[1:], name='style_plhd') lr_ = nn.placeholder(value=lr, name='lr_plhd') nn.set_training_on() losses = [dec.cost(X) for dec in decs] updates = [nn.adam(loss[0] + weight * loss[1], dec.trainable, lr) for loss, dec in zip(losses, decs)] nn.anneal_learning_rate(lr_, idx, 'inverse', decay=decay) trains = [nn.function([], [loss[0], loss[1], dec(X, True)], givens={X: X_}, updates=update, name='train decoder') for loss, dec, update in zip(losses, decs, updates)] nn.set_training_off() X_styled = sty_net(X, Y) transfer = nn.function([], X_styled, givens={X: X_, Y: Y_}, name='transfer style') data_train = DataManager(X_, input_path_train, bs, n_epochs, True, num_val_imgs=num_val_imgs, input_shape=input_shape) data_test = DataManagerStyleTransfer((X_, Y_), (input_path_val, style_path_val), bs, 1, input_shape=input_shape) mon = nn.Monitor(model_name='WCT', valid_freq=print_freq) print('Training...') for it in data_train: results = [train(it) for train in trains] with mon: for layer, res in zip(indices, results): if np.isnan(res[0] + res[1]) or np.isinf(res[0] + res[1]): raise ValueError('Training failed!') mon.plot('pixel loss at layer %d' % layer, res[0]) mon.plot('feature loss at layer %d' % layer, res[1]) if it % val_freq == 0: mon.imwrite('recon img at layer %d' % layer, res[2]) for i in data_test: img = transfer() mon.imwrite('stylized image %d' % i, img) mon.imwrite('input %d' % i, X_.get_value()) mon.imwrite('style %d' % i, Y_.get_value()) for idx, dec in zip(indices, decs): mon.dump(nn.utils.shared2numpy(dec.params), 'decoder-%d.npz' % idx, 5) mon.flush() for idx, dec in zip(indices, decs): mon.dump(nn.utils.shared2numpy(dec.params), 'decoder-%d-final.npz' % idx) print('Training finished!')
def test_monitor_plot(): valid_freq = 2 n_iters = 20 import os import shutil if os.path.exists('results'): shutil.rmtree('results') mon = nn.Monitor(print_freq=valid_freq) for i in range(n_iters): with mon: mon.plot('dummy plot', np.exp(-i)) mon.flush()
def test_monitor_dump(): shape = (128, 256, 3, 3) n_iters = 10 print_freq = 2 a = np.random.rand(*shape).astype('float32') import os import shutil if os.path.exists('results'): shutil.rmtree('results') mon = nn.Monitor(print_freq=print_freq) res = [] for i in range(n_iters): a += 1. res.append(np.copy(a)) with mon: mon.dump(a, 'foo.npy', 3) mon.flush() mon = nn.Monitor(current_folder='results/my_model/run1') loaded_a = [mon.load('foo.npy', version) for version in (6, 8, 10)] utt.assert_allclose(np.array(res)[[6, 8, 9]], loaded_a)
def test_monitor_hist(): valid_freq = 2 size = (64, 32, 3, 3) n_iters = 20 import os import shutil if os.path.exists('results'): shutil.rmtree('results') mon = nn.Monitor(valid_freq=valid_freq) filter = np.random.uniform(-1, 1, size) for i in range(n_iters): with mon: mon.hist('filter%d' % i, filter + i / 10. * np.random.normal(.5, size=size)) mon.flush()
def test_monitor_hist(): valid_freq = 2 size = (64, 32, 3, 3) n_iters = 20 import os import shutil if os.path.exists('results'): shutil.rmtree('results') mon = nn.Monitor(print_freq=valid_freq, hist_last=True) filter = np.random.normal(scale=.25, size=size) for i in range(n_iters): with mon: mon.plot('foo', i**2 / 4.) mon.hist('filter_last', filter * (i + 1) * .25 + i / 10., last_only=True, n_bins=10) mon.hist('filter', filter * (i + 1) * .25 + i / 15.) mon.flush()
def train(): X_A_full = T.tensor4('A') X_B_full = T.tensor4('B') X_A = pre_process(X_A_full) X_B = pre_process(X_B_full) z = nn.utils.srng.normal((bs, latent_dim)) idx = T.scalar('iter') X_A_ = nn.placeholder((bs, 3, image_size*4, image_size*4), name='A_plhd') X_B_ = nn.placeholder((bs, 3, image_size*4, image_size*4), name='B_plhd') lr_ = nn.placeholder(value=lr, name='lr_plhd') net = AugmentedCycleGAN((None, 3, image_size, image_size), latent_dim, n_gen_filters, n_dis_filters, n_enc_filters, 3, use_dropout, use_sigmoid, use_latent_gan) nn.set_training_on() updates_dis, updates_gen, dis_losses, dis_preds, gen_losses, grad_norms = net.learn(X_A, X_B, z, lambda_A, lambda_B, lambda_z_B, lr=lr_, beta1=beta1, max_norm=max_norm) train_dis = nn.function([], list(dis_losses.values()), updates=updates_dis, givens={X_A_full: X_A_, X_B_full: X_B_}, name='train discriminators') train_gen = nn.function([], list(gen_losses.values()), updates=updates_gen, givens={X_A_full: X_A_, X_B_full: X_B_}, name='train generators') discriminate = nn.function([], list(dis_preds.values()), givens={X_A_full: X_A_, X_B_full: X_B_}, name='discriminate') compute_grad_norms = nn.function([], list(grad_norms.values()), givens={X_A_full: X_A_, X_B_full: X_B_}, name='compute grad norms') nn.anneal_learning_rate(lr_, idx, 'linear', num_iters=n_epochs_decay) train_dis_decay = nn.function([idx], list(dis_losses.values()), updates=updates_dis, givens={X_A_full: X_A_, X_B_full: X_B_}, name='train discriminators with decay') nn.set_training_off() fixed_z = T.constant(np.random.normal(size=(bs, latent_dim)), dtype='float32') fixed_multi_z = T.constant(np.repeat(np.random.normal(size=(n_multi, latent_dim)), bs, 0), dtype='float32') visuals = net.generate_cycle(X_A, X_B, fixed_z) multi_fake_B = net.generate_multi(X_A, fixed_multi_z) visualize_single = nn.function([], list(visuals.values()), givens={X_A_full: X_A_, X_B_full: X_B_}, name='visualize single') visualize_multi = nn.function([], multi_fake_B, givens={X_A_full: X_A_}, name='visualize multi') train_data = Edges2Shoes((X_A_, X_B_), bs, n_epochs + n_epochs_decay + 1, 'train', True) val_data = Edges2Shoes((X_A_, X_B_), bs, 1, 'val', False, num_data=bs) mon = nn.Monitor(model_name='Augmented_CycleGAN', print_freq=print_freq) print('Training...') for it in train_data: epoch = 1 + it // (len(train_data) // bs) with mon: res_dis = train_dis() if epoch <= n_epochs else train_dis_decay(epoch - n_epochs) res_gen = train_gen() preds = discriminate() grads_ = compute_grad_norms() mon.plot('lr', lr_.get_value()) for j, k in enumerate(dis_losses.keys()): mon.plot(k, res_dis[j]) for j, k in enumerate(gen_losses.keys()): mon.plot(k, res_gen[j]) for j, k in enumerate(dis_preds.keys()): mon.hist(k, preds[j]) for j, k in enumerate(grad_norms.keys()): mon.plot(k, grads_[j]) if it % valid_freq == 0: for _ in val_data: vis_single = visualize_single() vis_multi = visualize_multi() for j, k in enumerate(visuals.keys()): mon.imwrite(k, vis_single[j][:n_imgs_to_save], callback=unnormalize) for j, fake_B in enumerate(vis_multi): mon.imwrite('fake_B_multi_%d.jpg' % j, fake_B, callback=unnormalize) mon.dump(nn.utils.shared2numpy(net.netG_A_B.params), 'gen_A_B.npy', 5) mon.dump(nn.utils.shared2numpy(net.netG_B_A.params), 'gen_B_A.npy', 5) mon.dump(nn.utils.shared2numpy(net.netD_A.params), 'dis_A.npy', 5) mon.dump(nn.utils.shared2numpy(net.netD_B.params), 'dis_B.npy', 5) mon.dump(nn.utils.shared2numpy(net.netE_B.params), 'enc_B.npy', 5) if use_latent_gan: mon.dump(nn.utils.shared2numpy(net.netD_z_B.params), 'dis_z_B.npy', 5) mon.flush() mon.dump(nn.utils.shared2numpy(net.netG_A_B.params), 'gen_A_B.npy') mon.dump(nn.utils.shared2numpy(net.netG_B_A.params), 'gen_B_A.npy') mon.dump(nn.utils.shared2numpy(net.netD_A.params), 'dis_A.npy') mon.dump(nn.utils.shared2numpy(net.netD_B.params), 'dis_B.npy') mon.dump(nn.utils.shared2numpy(net.netE_B.params), 'enc_B.npy') if use_latent_gan: mon.dump(nn.utils.shared2numpy(net.netD_z_B.params), 'dis_z_B.npy') print('Training finished!')