def example_train(n_epochs=100, batch_size=20, gradient_reg=1.0): import timeit print_initial_parameters = False print_initial_gradient_cost = False print_initial_gradient_norms = False plot_time = 10 fake_x_data = generate_data(10000, 0) real_x_data = generate_data(10000, 3) fake_x_valid = generate_data(1000, 0) real_x_valid = generate_data(1000, 3) index = T.lscalar() x_fake = T.matrix('x_f') x_real = T.matrix('x_r') n_train_batches = fake_x_data.get_value(borrow=True).shape[0] // batch_size n_valid_batches = fake_x_valid.get_value( borrow=True).shape[0] // batch_size print('... building the model') rng = np.random.RandomState(1000) network = Simple_Discriminator( rng=rng, input_fake=x_fake, input_real=x_real, #info_layers=[(5,1,20),(5,20,20),(5,20,20),(5,20,1)] info_layers=[(5, 1, 20), (1, 20, 1)]) cost = -network.mean_difference + gradient_reg / (1.0 - network.gradient_cost) if print_initial_parameters: print('printing initial parameters') for param in network.params: print(param.get_value()) get_max_gradient = theano.function(inputs=[], outputs=network.max_gradient, givens={}) get_gradient_norms = theano.function( inputs=[], outputs=[layer.gradient_norms for layer in network.layers], givens={}) get_gradient_cost = theano.function(inputs=[], outputs=network.gradient_cost, givens={}) if print_initial_gradient_cost: print('initial gradient cost: %f ' % get_gradient_cost()) if print_initial_gradient_norms: print('printing gradient norms') for matrix in get_gradient_norms(): print(matrix) validate_model = theano.function( inputs=[index], outputs=network.mean_difference, givens={ x_fake: fake_x_valid[index * batch_size:(index + 1) * batch_size], x_real: real_x_valid[index * batch_size:(index + 1) * batch_size] }) updates = rmsprop(cost, network.params) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x_fake: fake_x_data[index * batch_size:(index + 1) * batch_size], x_real: real_x_data[index * batch_size:(index + 1) * batch_size] }) print('... training') validation_frequency = n_train_batches plot_frequency = n_train_batches * plot_time start_time = timeit.default_timer() epoch = 0 while (epoch < n_epochs): for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) this_gradient_max = get_max_gradient() print( 'epoch %i, minibatch %i/%i, validation mean square error %f, max_gradient %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss, this_gradient_max)) if (iter + 1) % plot_frequency == 0: with open('test_discriminator_model.pkl', 'wb') as f: pickle.dump(network, f) example_graph() epoch += 1 end_time = timeit.default_timer() print(('The code ran for %.2fs' % (end_time - start_time))) if print_end_parameters: print('printing end parameters') for param in network.params: print(param.get_value())
def example_train(data,n_epochs=100,batch_size=20): import timeit d_shape=[(5,2,20),(5,20,20),(5,20,1)] g_shape=[(5,1,20),(5,20,20),(5,20,2)] r_shape=[(5,2,20),(5,20,20),(5,20,1)] validation_frequency = 1 plot_time=10 initial_r_train=20 g_per_epoch=5 r_per_epoch=9 data_train=data[0] data_valid=data[1] n_train_batches = data_train.get_value(borrow=True).shape[0] // batch_size n_valid_batches = data_train.get_value(borrow=True).shape[0] // batch_size rng = np.random.RandomState(1001) trng = RandomStreams(seed=234) print('... building the model') index = T.lscalar() x_rand = T.matrix('x_rand') x_real = T.matrix('x_real') x_fake = T.matrix('x_rake') generator = Simple_Generator( rng=rng, input_rand=x_rand, g_shape=g_shape, r_shape=r_shape ) discriminator = Simple_Discriminator( rng=rng, input_fake=generator.output, input_real=x_real, info_layers=d_shape ) #the 1.0 befor the / is a hyperparameter g_cost = generator.mse(x_rand)+(discriminator.output).mean() g_updates = rmsprop(g_cost,generator.g_params) r_cost = generator.mse(x_rand)+1.0/(1.0-generator.gradient_cost) r_updates=rmsprop(r_cost,generator.r_params) d_cost = -discriminator.mean_difference+1.0/(1.0-discriminator.gradient_cost) d_updates = rmsprop(d_cost,discriminator.params) train_generator = theano.function( inputs=[], outputs=g_cost, updates=g_updates, givens={ x_rand : trng.uniform( size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0 ) } ) train_reversor = theano.function( inputs=[], outputs=r_cost, updates=r_updates, givens={ x_rand : trng.uniform( size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0 ) } ) train_discriminator = theano.function( inputs=[index], outputs=d_cost, updates=d_updates, givens={ x_real: data_train[index*batch_size:(index+1)*batch_size], x_rand : trng.uniform( size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0 ) } ) validate_model = theano.function( inputs=[index], outputs=discriminator.mean_difference, givens={ x_real: data_valid[index * batch_size:(index + 1) * batch_size], x_rand : trng.uniform( size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0 ) } ) get_max_gradient = theano.function( inputs=[], outputs=(generator.max_gradient,discriminator.max_gradient), givens={ } ) get_reversor_error = theano.function( inputs=[], outputs=generator.mse(x_rand), givens={ x_rand : trng.uniform( size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0 ) } ) print('... training') start_time = timeit.default_timer() for _ in range(initial_r_train): _ = train_reversor() for epoch in range(n_epochs): for minibatch_index in range(n_train_batches): _ = train_discriminator(minibatch_index) for _ in range(g_per_epoch): _ = train_generator() for _ in range(r_per_epoch): _ = train_reversor() if epoch % validation_frequency == 0: validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = np.mean(validation_losses) max_grad = get_max_gradient() print('epoch %i, mean difference %f, r_error %f,r_grad %f, d_grad %f' % ( epoch, this_validation_loss, get_reversor_error(), max_grad[0], max_grad[1] ) ) if epoch % plot_time == 0: with open('test_GAN_g.pkl', 'wb') as f: pickle.dump(generator.generator, f) with open('test_GAN_r.pkl', 'wb') as f: pickle.dump(generator.reversor, f) with open('test_GAN_d.pkl', 'wb') as f: pickle.dump(discriminator, f) example_graph() end_time = timeit.default_timer() print(('The code ran for %.2fs' % (end_time - start_time)))
def test_mnist(n_epoch=1000, batch_size=500): from load_mnist import load_data_mnist import timeit valid_time = 1 scale_factor = 0.01 #one entry per layer CNN_shape = [[batch_size, 28, 28, 5, 5, 1, 2, 20], [batch_size, 24, 24, 5, 5, 20, 2, 50]] fc_info = [[2, 20 * 20 * 50, 500], [2, 500, 10]] datasets = load_data_mnist() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size print('... building the model') index = T.lscalar() x = T.matrix('x') y = T.matrix('y') rng = np.random.RandomState(1001) reshaped_input = x.reshape((batch_size, 1, 28, 28)) convnet = LCNN(rng, input=reshaped_input, shape_layers=CNN_shape) fc_layer_input = convnet.output.flatten(2) fc_layer = LMLP(rng, input=fc_layer_input, info_layers=fc_info) print('number of parameters: ' + str(fc_layer.n_params + convnet.n_params)) params = convnet.params + fc_layer.params scale_params = convnet.scale_params + fc_layer.scale_params max_gradient = convnet.max_gradient * fc_layer.max_gradient get_gradient_max = theano.function(inputs=[], outputs=max_gradient, givens={}) cost = fc_layer.mse(y * scale_factor) validate_model = theano.function( inputs=[index], outputs=cost, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) prediction = T.argmax(fc_layer.output, axis=1) ground_truth = T.argmax(y, axis=1) accuracy = T.mean(T.eq(prediction, ground_truth)) validate_model_acc = theano.function( inputs=[index], outputs=accuracy, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) updates = rmsprop(cost, params) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) rescale_model = theano.function(inputs=[], updates=list(zip(params, scale_params))) print('... training') start_time = timeit.default_timer() epoch = 0 for epoch in range(n_epoch): if epoch % valid_time == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) validation_acc = [ validate_model_acc(i) for i in range(n_valid_batches) ] this_validation_acc = np.mean(validation_acc) print('epoch %i,mse %f, g_max %f, acc %f' % (epoch, this_validation_loss, get_gradient_max(), this_validation_acc)) for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) rescale_model() end_time = timeit.default_timer() print(('The code ran for %.2fs' % (end_time - start_time)))
def example_train(n_epochs=1000, batch_size=200): import timeit g_shape = [(5, 1, 20), (5, 20, 20), (5, 20, 2)] r_shape = [(5, 2, 20), (5, 20, 20), (5, 20, 1)] g_per_epoch = 20 r_per_epoch = 10 plot_time = 10 print_validation_g_parameters = False print_validation_r_parameters = False initial_reversor_train = 1000 print('... building the model') rng = np.random.RandomState(1001) trng = RandomStreams(seed=234) x_rand = T.matrix('x_rand') generator = Simple_Generator(rng=rng, input_rand=x_rand, g_shape=g_shape, r_shape=r_shape) r_cost = generator.mse(x_rand) + 1.0 / (1.0 - generator.gradient_cost) r_updates = rmsprop(r_cost, generator.r_params) f = lambda x: ((1.0 - T.dot(x**2, np.array([[1.0], [1.0]])))**2).mean() cost = f(generator.output) g_cost = generator.mse(x_rand) + cost g_updates = rmsprop(g_cost, generator.g_params) train_reversor = theano.function(inputs=[], outputs=r_cost, updates=r_updates, givens={ x_rand: trng.uniform(size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0) }) train_generator = theano.function(inputs=[], outputs=g_cost, updates=g_updates, givens={ x_rand: trng.uniform(size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0) }) test_generator = theano.function(inputs=[], outputs=cost, givens={ x_rand: trng.uniform(size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0) }) test_reversor = theano.function(inputs=[], outputs=generator.mse(x_rand), givens={ x_rand: trng.uniform(size=(batch_size, g_shape[0][1]), low=-1.0, high=1.0) }) get_max_gradient = theano.function(inputs=[], outputs=generator.max_gradient) print('... training') start_time = timeit.default_timer() for _ in range(initial_reversor_train): _ = train_reversor() for epoch in range(n_epochs): if epoch % plot_time == 0: if print_validation_g_parameters: print('printing gradient parameters') for param in generator.g_params: print(param.get_value()) if print_validation_r_parameters: print('printing reversor parameters') for param in reversor.r_params: print(param.get_value()) with open('test_generator_model.pkl', 'wb') as f: pickle.dump(generator.generator, f) with open('test_reversor_model.pkl', 'wb') as f: pickle.dump(generator.reversor, f) example_graph() for _ in range(g_per_epoch): _ = train_generator() for _ in range(r_per_epoch): _ = train_reversor() gen_cost = test_generator() rev_cost = test_reversor() max_grad = get_max_gradient() print( 'epoch %i, generator cost %f, reversor cost %f, max gradient %f' % (epoch, gen_cost, rev_cost, max_grad)) end_time = timeit.default_timer() print(('The code ran for %.2fs' % (end_time - start_time)))
def example_train(n_epochs=1000, batch_size=20, gradient_reg=1.0, data_num=2): plot_time = 100 import timeit datasets = load_data_test(data_num) train_set_x, train_set_y = datasets[1] valid_set_x, valid_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size print('... building the model') index = T.lscalar() x = T.matrix('x') y = T.matrix('y') rng = np.random.RandomState(1001) # construct the MLP class network = LMLP(rng=rng, input=x, info_layers=[(5, 1, 20), (5, 20, 100), (5, 100, 20), (5, 20, 1)]) cost = network.mse(y) validate_model = theano.function( inputs=[index], outputs=network.mse(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) get_gradient_max = theano.function(inputs=[], outputs=network.max_gradient, givens={}) num_params = len(network.params) updates = rmsprop(cost, network.params) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) rescale_model = theano.function(inputs=[], updates=list( zip(network.params, network.scale_params))) print('... training') validation_frequency = n_train_batches plot_frequency = n_train_batches * plot_time start_time = timeit.default_timer() epoch = 0 while (epoch < n_epochs): for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) rescale_model() iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) this_gradient_max = get_gradient_max() print('epoch %i, minibatch %i/%i, mse %f, g_max %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss, this_gradient_max)) if (iter + 1) % plot_frequency == 0: with open('test_mlp_model.pkl', 'wb') as f: pickle.dump(network, f) example_predict(1000, data_num) epoch += 1 end_time = timeit.default_timer() print(('The code ran for %.2fs' % (end_time - start_time)))