예제 #1
0
def train():
    X = T.tensor4('images')
    y = T.lvector('labels')

    X_ = nn.placeholder((bs, ) + image_shape, name='images_plhd')
    y_ = nn.placeholder((bs, ), dtype='int64', name='labels_plhd')

    net = nets[architecture]((None, ) + image_shape)
    nn.set_training_on()
    updates, losses, grad_norms = net.get_updates(X, y, l2_coeff, lr)
    train_net = nn.function([],
                            list(losses.values()),
                            updates=updates,
                            givens={
                                X: X_,
                                y: y_
                            },
                            name='train net')

    nn.set_training_off()
    err, losses = net.get_accuracy(X, y)
    valid_net = nn.function([], [err, losses['loss']],
                            givens={
                                X: X_,
                                y: y_
                            },
                            name='validate net')

    train_data = CIFAR10((X_train, y_train), (X_, y_),
                         bs,
                         n_epochs,
                         training=True,
                         shuffle=True,
                         augmentation=transforms)
    valid_data = CIFAR10((X_test, y_test), (X_, y_),
                         bs,
                         1,
                         training=False,
                         shuffle=False)
    mon = nn.Monitor(model_name=architecture, print_freq=print_freq)
    for it in train_data:
        with mon:
            losses_ = train_net()
            if np.any(np.isnan(losses_)) or np.any(np.isinf(losses_)):
                raise ValueError('NAN loss!')

            for j, k in enumerate(losses.keys()):
                mon.plot(k, losses_[j])

            if it % valid_freq == 0:
                mean_res = np.mean([valid_net() for _ in valid_data], 0)
                mon.plot('validation error', mean_res[0])
                mon.plot('validation loss', mean_res[1])
                mon.dump(nn.utils.shared2numpy(net.params),
                         '%s.npy' % architecture, 5)
    print('Training finished!')
예제 #2
0
def style_transfer():
    enc = VGG19(input_shape)
    decs = [Decoder(enc, i, name='decoder %d' % i) for i in indices]
    sty_net = StyleTransfer(enc, decs)

    X = T.tensor4('input')
    Y = T.tensor4('style')

    mon = nn.Monitor(current_folder=args.path_to_weight_files)
    for idx, layer in enumerate(indices):
        weights = mon.load('decoder-%d-final.npz' % layer)
        nn.utils.numpy2shared(weights, decs[idx].params)

    nn.set_training_off()
    X_styled = sty_net(X, Y)
    transfer = nn.function([X, Y], X_styled, name='transfer style')

    if os.path.isfile(input_path_test) and os.path.isfile(style_path_test):
        input = prep_image_test(misc.imread(input_path_test))
        style = prep_image_test(misc.imread(style_path_test))
        output = transfer(input, style)
        mon.imwrite('test %s' % input_path_test[:-4], input)
        mon.imwrite('test %s' % style_path_test[:-4], style)
        mon.imwrite('test %s %s' % (input_path_test[:-4], style_path_test[:-4]), output)
    elif os.path.isfile(input_path_test) and os.path.isdir(style_path_test):
        input = prep_image_test(misc.imread(input_path_test))
        style_files = os.listdir(style_path_test)
        for style_file in style_files:
            style = prep_image_test(misc.imread(os.path.join(style_path_test, style_file)))
            output = transfer(input, style)
            mon.imwrite('test %s' % style_file[:-4], style)
            mon.imwrite('test %s %s' % (input_path_test[:-4], style_file[:-4]), output)
        mon.imwrite('test %s' % input_path_test[:-4], input)
    elif os.path.isdir(input_path_test) and os.path.isfile(style_path_test):
        style = prep_image_test(misc.imread(style_path_test))
        input_files = os.listdir(input_path_test)
        for input_file in input_files:
            input = prep_image_test(misc.imread(os.path.join(input_path_test, input_file)))
            output = transfer(input, style)
            mon.imwrite('test %s' % input_file[:-4], input)
            mon.imwrite('test %s %s' % (input_file[:-4], style_path_test[:-4]), output)
        mon.imwrite('test %s' % style_path_test[:-4], style)
    else:
        style_files = os.listdir(style_path_test)
        input_files = os.listdir(input_path_test)
        for style_file in style_files:
            style = prep_image_test(misc.imread(os.path.join(style_path_test, style_file)))
            for input_file in input_files:
                input = prep_image_test(misc.imread(os.path.join(input_path_test, input_file)))
                output = transfer(input, style)
                mon.imwrite('test %s' % input_file[:-4], input)
                mon.imwrite('test %s %s' % (input_file[:-4], style_file[:-4]), output)
            mon.imwrite('test %s' % style_file[:-4], style)
    mon.flush()
    print('Testing finished!')
예제 #3
0
def train():
    enc = VGG19(input_shape)
    decs = [Decoder(enc, i, name='decoder %d' % i) for i in indices]
    sty_net = StyleTransfer(enc, decs)

    X = T.tensor4('input')
    Y = T.tensor4('style')
    idx = T.scalar('iter', 'int32')
    X_ = nn.placeholder((bs,) + input_shape[1:], name='input_plhd')
    Y_ = nn.placeholder((bs,) + input_shape[1:], name='style_plhd')
    lr_ = nn.placeholder(value=lr, name='lr_plhd')

    nn.set_training_on()
    losses = [dec.cost(X) for dec in decs]
    updates = [nn.adam(loss[0] + weight * loss[1], dec.trainable, lr) for loss, dec in zip(losses, decs)]
    nn.anneal_learning_rate(lr_, idx, 'inverse', decay=decay)
    trains = [nn.function([], [loss[0], loss[1], dec(X, True)], givens={X: X_}, updates=update, name='train decoder')
              for loss, dec, update in zip(losses, decs, updates)]

    nn.set_training_off()
    X_styled = sty_net(X, Y)
    transfer = nn.function([], X_styled, givens={X: X_, Y: Y_}, name='transfer style')

    data_train = DataManager(X_, input_path_train, bs, n_epochs, True, num_val_imgs=num_val_imgs, input_shape=input_shape)
    data_test = DataManagerStyleTransfer((X_, Y_), (input_path_val, style_path_val), bs, 1, input_shape=input_shape)
    mon = nn.Monitor(model_name='WCT', valid_freq=print_freq)

    print('Training...')
    for it in data_train:
        results = [train(it) for train in trains]

        with mon:
            for layer, res in zip(indices, results):
                if np.isnan(res[0] + res[1]) or np.isinf(res[0] + res[1]):
                    raise ValueError('Training failed!')
                mon.plot('pixel loss at layer %d' % layer, res[0])
                mon.plot('feature loss at layer %d' % layer, res[1])

            if it % val_freq == 0:
                mon.imwrite('recon img at layer %d' % layer, res[2])

                for i in data_test:
                    img = transfer()
                    mon.imwrite('stylized image %d' % i, img)
                    mon.imwrite('input %d' % i, X_.get_value())
                    mon.imwrite('style %d' % i, Y_.get_value())

                for idx, dec in zip(indices, decs):
                    mon.dump(nn.utils.shared2numpy(dec.params), 'decoder-%d.npz' % idx, 5)
    mon.flush()
    for idx, dec in zip(indices, decs):
        mon.dump(nn.utils.shared2numpy(dec.params), 'decoder-%d-final.npz' % idx)
    print('Training finished!')
예제 #4
0
def test_monitor_plot():
    valid_freq = 2
    n_iters = 20

    import os
    import shutil
    if os.path.exists('results'):
        shutil.rmtree('results')
    mon = nn.Monitor(print_freq=valid_freq)
    for i in range(n_iters):
        with mon:
            mon.plot('dummy plot', np.exp(-i))
    mon.flush()
예제 #5
0
def test_monitor_dump():
    shape = (128, 256, 3, 3)
    n_iters = 10
    print_freq = 2

    a = np.random.rand(*shape).astype('float32')

    import os
    import shutil
    if os.path.exists('results'):
        shutil.rmtree('results')
    mon = nn.Monitor(print_freq=print_freq)
    res = []
    for i in range(n_iters):
        a += 1.
        res.append(np.copy(a))
        with mon:
            mon.dump(a, 'foo.npy', 3)
    mon.flush()

    mon = nn.Monitor(current_folder='results/my_model/run1')
    loaded_a = [mon.load('foo.npy', version) for version in (6, 8, 10)]
    utt.assert_allclose(np.array(res)[[6, 8, 9]], loaded_a)
예제 #6
0
def test_monitor_hist():
    valid_freq = 2
    size = (64, 32, 3, 3)
    n_iters = 20

    import os
    import shutil
    if os.path.exists('results'):
        shutil.rmtree('results')
    mon = nn.Monitor(valid_freq=valid_freq)
    filter = np.random.uniform(-1, 1, size)
    for i in range(n_iters):
        with mon:
            mon.hist('filter%d' % i, filter + i / 10. * np.random.normal(.5, size=size))
    mon.flush()
예제 #7
0
def test_monitor_hist():
    valid_freq = 2
    size = (64, 32, 3, 3)
    n_iters = 20

    import os
    import shutil
    if os.path.exists('results'):
        shutil.rmtree('results')
    mon = nn.Monitor(print_freq=valid_freq, hist_last=True)
    filter = np.random.normal(scale=.25, size=size)
    for i in range(n_iters):
        with mon:
            mon.plot('foo', i**2 / 4.)
            mon.hist('filter_last',
                     filter * (i + 1) * .25 + i / 10.,
                     last_only=True,
                     n_bins=10)
            mon.hist('filter', filter * (i + 1) * .25 + i / 15.)
    mon.flush()
예제 #8
0
def train():
    X_A_full = T.tensor4('A')
    X_B_full = T.tensor4('B')
    X_A = pre_process(X_A_full)
    X_B = pre_process(X_B_full)
    z = nn.utils.srng.normal((bs, latent_dim))
    idx = T.scalar('iter')

    X_A_ = nn.placeholder((bs, 3, image_size*4, image_size*4), name='A_plhd')
    X_B_ = nn.placeholder((bs, 3, image_size*4, image_size*4), name='B_plhd')
    lr_ = nn.placeholder(value=lr, name='lr_plhd')

    net = AugmentedCycleGAN((None, 3, image_size, image_size), latent_dim, n_gen_filters, n_dis_filters, n_enc_filters, 3,
                            use_dropout, use_sigmoid, use_latent_gan)

    nn.set_training_on()
    updates_dis, updates_gen, dis_losses, dis_preds, gen_losses, grad_norms = net.learn(X_A, X_B, z, lambda_A, lambda_B,
                                                                                        lambda_z_B, lr=lr_, beta1=beta1,
                                                                                        max_norm=max_norm)
    train_dis = nn.function([], list(dis_losses.values()), updates=updates_dis, givens={X_A_full: X_A_, X_B_full: X_B_},
                            name='train discriminators')
    train_gen = nn.function([], list(gen_losses.values()), updates=updates_gen, givens={X_A_full: X_A_, X_B_full: X_B_},
                            name='train generators')
    discriminate = nn.function([], list(dis_preds.values()), givens={X_A_full: X_A_, X_B_full: X_B_}, name='discriminate')
    compute_grad_norms = nn.function([], list(grad_norms.values()), givens={X_A_full: X_A_, X_B_full: X_B_},
                                     name='compute grad norms')

    nn.anneal_learning_rate(lr_, idx, 'linear', num_iters=n_epochs_decay)
    train_dis_decay = nn.function([idx], list(dis_losses.values()), updates=updates_dis, givens={X_A_full: X_A_, X_B_full: X_B_},
                                  name='train discriminators with decay')

    nn.set_training_off()
    fixed_z = T.constant(np.random.normal(size=(bs, latent_dim)), dtype='float32')
    fixed_multi_z = T.constant(np.repeat(np.random.normal(size=(n_multi, latent_dim)), bs, 0), dtype='float32')
    visuals = net.generate_cycle(X_A, X_B, fixed_z)
    multi_fake_B = net.generate_multi(X_A, fixed_multi_z)
    visualize_single = nn.function([], list(visuals.values()), givens={X_A_full: X_A_, X_B_full: X_B_}, name='visualize single')
    visualize_multi = nn.function([], multi_fake_B, givens={X_A_full: X_A_}, name='visualize multi')

    train_data = Edges2Shoes((X_A_, X_B_), bs, n_epochs + n_epochs_decay + 1, 'train', True)
    val_data = Edges2Shoes((X_A_, X_B_), bs, 1, 'val', False, num_data=bs)
    mon = nn.Monitor(model_name='Augmented_CycleGAN', print_freq=print_freq)

    print('Training...')
    for it in train_data:
        epoch = 1 + it // (len(train_data) // bs)

        with mon:
            res_dis = train_dis() if epoch <= n_epochs else train_dis_decay(epoch - n_epochs)
            res_gen = train_gen()
            preds = discriminate()
            grads_ = compute_grad_norms()

            mon.plot('lr', lr_.get_value())

            for j, k in enumerate(dis_losses.keys()):
                mon.plot(k, res_dis[j])

            for j, k in enumerate(gen_losses.keys()):
                mon.plot(k, res_gen[j])

            for j, k in enumerate(dis_preds.keys()):
                mon.hist(k, preds[j])

            for j, k in enumerate(grad_norms.keys()):
                mon.plot(k, grads_[j])

            if it % valid_freq == 0:
                for _ in val_data:
                    vis_single = visualize_single()
                    vis_multi = visualize_multi()

                for j, k in enumerate(visuals.keys()):
                    mon.imwrite(k, vis_single[j][:n_imgs_to_save], callback=unnormalize)

                for j, fake_B in enumerate(vis_multi):
                    mon.imwrite('fake_B_multi_%d.jpg' % j, fake_B, callback=unnormalize)

                mon.dump(nn.utils.shared2numpy(net.netG_A_B.params), 'gen_A_B.npy', 5)
                mon.dump(nn.utils.shared2numpy(net.netG_B_A.params), 'gen_B_A.npy', 5)
                mon.dump(nn.utils.shared2numpy(net.netD_A.params), 'dis_A.npy', 5)
                mon.dump(nn.utils.shared2numpy(net.netD_B.params), 'dis_B.npy', 5)
                mon.dump(nn.utils.shared2numpy(net.netE_B.params), 'enc_B.npy', 5)
                if use_latent_gan:
                    mon.dump(nn.utils.shared2numpy(net.netD_z_B.params), 'dis_z_B.npy', 5)

    mon.flush()
    mon.dump(nn.utils.shared2numpy(net.netG_A_B.params), 'gen_A_B.npy')
    mon.dump(nn.utils.shared2numpy(net.netG_B_A.params), 'gen_B_A.npy')
    mon.dump(nn.utils.shared2numpy(net.netD_A.params), 'dis_A.npy')
    mon.dump(nn.utils.shared2numpy(net.netD_B.params), 'dis_B.npy')
    mon.dump(nn.utils.shared2numpy(net.netE_B.params), 'enc_B.npy')
    if use_latent_gan:
        mon.dump(nn.utils.shared2numpy(net.netD_z_B.params), 'dis_z_B.npy')
    print('Training finished!')