def evaluate(fakes): real_images = T.matrix() fake_images = T.matrix() cost = T.nnet.binary_crossentropy(_evaluator(real_images), swft.floatX(1)).mean() cost += T.nnet.binary_crossentropy(_evaluator(fake_images), swft.floatX(0)).mean() real_accuracy = T.ge(_evaluator(real_images), swft.floatX(0.5)).mean() fake_accuracy = T.lt(_evaluator(fake_images), swft.floatX(0.5)).mean() accuracy = (real_accuracy + fake_accuracy) / swft.floatX(2) real_train, real_dev, real_test = swft.mnist.load(BATCH_SIZE) assert(len(fakes) == 60000) fakes_train = fakes[:50000] fakes_dev = fakes[50000:] def train_epoch(): numpy.random.shuffle(fakes_train) batched = fakes_train.reshape(-1, BATCH_SIZE, 784) for i, (real_images, _) in enumerate(real_train()): yield [real_images, batched[i]] def dev_epoch(): yield [real_dev().next()[0], fakes_dev] swft.train( [real_images, fake_images], [cost], train_epoch, dev_data=dev_epoch, epochs=EPOCHS, print_every=1000 ) fn = theano.function([real_images, fake_images], cost) result = fn(real_dev().next()[0], fakes_dev) swft.delete_params('Evaluator') return result
def evaluate(fakes): real_images = T.matrix() fake_images = T.matrix() cost = T.nnet.binary_crossentropy(_evaluator(real_images), swft.floatX(1)).mean() cost += T.nnet.binary_crossentropy(_evaluator(fake_images), swft.floatX(0)).mean() real_accuracy = T.ge(_evaluator(real_images), swft.floatX(0.5)).mean() fake_accuracy = T.lt(_evaluator(fake_images), swft.floatX(0.5)).mean() accuracy = (real_accuracy + fake_accuracy) / swft.floatX(2) real_train, real_dev, real_test = swft.mnist.load(BATCH_SIZE) assert (len(fakes) == 60000) fakes_train = fakes[:50000] fakes_dev = fakes[50000:] def train_epoch(): numpy.random.shuffle(fakes_train) batched = fakes_train.reshape(-1, BATCH_SIZE, 784) for i, (real_images, _) in enumerate(real_train()): yield [real_images, batched[i]] def dev_epoch(): yield [real_dev().next()[0], fakes_dev] swft.train([real_images, fake_images], [cost], train_epoch, dev_data=dev_epoch, epochs=EPOCHS, print_every=1000) fn = theano.function([real_images, fake_images], cost) result = fn(real_dev().next()[0], fakes_dev) swft.delete_params('Evaluator') return result
gen_cost, lambda x: hasattr(x, 'param') and 'Generator' in x.name) discrim_params = swft.search( discrim_cost, lambda x: hasattr(x, 'param') and 'Discriminator' in x.name) _sample_fn = theano.function([], generator(100)) def generate_image(epoch): sample = _sample_fn() # the transpose is rowx, rowy, height, width -> rowy, height, rowx, width sample = sample.reshape((10, 10, 28, 28)).transpose(1, 2, 0, 3).reshape( (10 * 28, 10 * 28)) plt.imshow(sample, cmap=plt.get_cmap('gray'), vmin=0, vmax=1) plt.savefig('epoch' + str(epoch)) swft.train(symbolic_inputs, [gen_cost, discrim_cost], train_data, dev_data=dev_data, param_sets=[gen_params, discrim_params], optimizers=[ functools.partial(lasagne.updates.momentum, learning_rate=0.1, momentum=0.5), functools.partial(lasagne.updates.momentum, learning_rate=0.1, momentum=0.5) ], epochs=EPOCHS, print_every=1000, callback=generate_image)
images, targets = dev_data().next() samples, reconstructions, latents = sample_fn(images) save_images(samples, 'samples') save_images(reconstructions, 'reconstructions') # Save a scatterplot of the first two dims of the latent representation plt.clf() plt.cla() plt.scatter(*(latents[:,0:2].T), c=targets) plt.xlim(-4*LATENT_STDEV, 4*LATENT_STDEV) plt.ylim(-4*LATENT_STDEV, 4*LATENT_STDEV) plt.savefig('latents_epoch'+str(epoch)) # Start training! swft.train( [images, targets], [full_enc_cost, dec_cost, discrim_cost], param_sets = [enc_params, dec_params, discrim_params], optimizers = [ lasagne.updates.adam, lasagne.updates.adam, lasagne.updates.adam ], print_vars = [reg_cost, reconst_cost, discrim_cost], train_data = train_data, dev_data = dev_data, epochs = EPOCHS, callback = generate_images, print_every = 1000 )
plt.cla() plt.imshow(images, cmap=plt.get_cmap('gray'), vmin=0, vmax=1) plt.savefig(filename + '_epoch' + str(epoch)) images, targets = dev_data().next() samples, reconstructions, latents = sample_fn(images) save_images(samples, 'samples') save_images(reconstructions, 'reconstructions') # Save a scatterplot of the first two dims of the latent representation plt.clf() plt.cla() plt.scatter(*(latents[:, 0:2].T), c=targets) plt.xlim(-4 * LATENT_STDEV, 4 * LATENT_STDEV) plt.ylim(-4 * LATENT_STDEV, 4 * LATENT_STDEV) plt.savefig('latents_epoch' + str(epoch)) # Start training! swft.train([images, targets], [full_enc_cost, dec_cost, discrim_cost], param_sets=[enc_params, dec_params, discrim_params], optimizers=[ lasagne.updates.adam, lasagne.updates.adam, lasagne.updates.adam ], print_vars=[reg_cost, reconst_cost, discrim_cost], train_data=train_data, dev_data=dev_data, epochs=EPOCHS, callback=generate_images, print_every=1000)
discrim_cost += T.nnet.binary_crossentropy(disc_inputs, swft.floatX(1)).mean() discrim_cost /= swft.floatX(2.0) discrim_cost.name = 'discrim_cost' train_data, dev_data, test_data = swft.mnist.load(BATCH_SIZE) gen_params = swft.search(gen_cost, lambda x: hasattr(x, 'param') and 'Generator' in x.name) discrim_params = swft.search(discrim_cost, lambda x: hasattr(x, 'param') and 'Discriminator' in x.name) _sample_fn = theano.function([], generator(100)) def generate_image(epoch): sample = _sample_fn() # the transpose is rowx, rowy, height, width -> rowy, height, rowx, width sample = sample.reshape((10,10,28,28)).transpose(1,2,0,3).reshape((10*28, 10*28)) plt.imshow(sample, cmap = plt.get_cmap('gray'), vmin=0, vmax=1) plt.savefig('epoch'+str(epoch)) swft.train( symbolic_inputs, [gen_cost, discrim_cost], train_data, dev_data=dev_data, param_sets = [gen_params, discrim_params], optimizers=[ functools.partial(lasagne.updates.momentum, learning_rate=0.1, momentum=0.5), functools.partial(lasagne.updates.momentum, learning_rate=0.1, momentum=0.5) ], epochs=EPOCHS, print_every=1000, callback=generate_image )