def demo_compare_dtp_methods(predictor_constructors, n_epochs=10, minibatch_size=20, n_tests=20, onehot=True, accumulator=None): dataset = get_mnist_dataset(flat=True, binarize=False) n_categories = dataset.n_categories if onehot: dataset = dataset.to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 learning_curves = compare_predictors( dataset=dataset, online_predictors={ name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))}, accumulators=accumulator) plot_learning_curves(learning_curves)
def demo_compare_dtp_methods( predictor_constructors, n_epochs = 10, minibatch_size = 20, n_tests = 20, onehot = True, accumulator = None ): dataset = get_mnist_dataset(flat = True, binarize = False) n_categories = dataset.n_categories if onehot: dataset = dataset.to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 learning_curves = compare_predictors( dataset=dataset, online_predictors = {name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors}, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))}, accumulators=accumulator ) plot_learning_curves(learning_curves)
def demo_dtp_varieties(hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20, hidden_activation='tanh', output_activation='sigm', optimizer='adamax', learning_rate=0.01, noise=1, predictors=['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'], rng=1234, use_bias=True, live_plot=False, plot=False): """ ; :param hidden_sizes: :param n_epochs: :param minibatch_size: :param n_tests: :return: """ if isinstance(predictors, str): predictors = [predictors] dataset = get_mnist_dataset(flat=True) dataset = dataset.process_with( targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) predictors = OrderedDict( (name, get_predictor(name, input_size=dataset.input_size, target_size=dataset.target_size, hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation=output_activation, optimizer=optimizer, learning_rate=learning_rate, noise=noise, use_bias=use_bias, rng=rng)) for name in predictors) learning_curves = compare_predictors( dataset=dataset, online_predictors=predictors, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, ) if plot: plot_learning_curves(learning_curves)
def demo_perceptron_dtp( hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, lin_dtp=True, ): dataset = get_mnist_dataset(flat=True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP(layers=[ PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp=lin_dtp) for n_in, n_out in zip([dataset.input_size] + hidden_sizes, hidden_sizes + [dataset.target_size]) ], output_cost_function=None).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def demo_rbm_mnist( vis_activation = 'bernoulli', hid_activation = 'bernoulli', n_hidden = 500, plot = True, eta = 0.01, optimizer = 'sgd', w_init_mag = 0.001, minibatch_size = 9, persistent = False, n_epochs = 100, plot_interval = 100, ): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. What you see: A plot will appear with 6 subplots. The subplots are as follows: hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples. visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain. w: A subset of the weight vectors, reshaped to the shape of the input. b: The bias of the hidden units. b_rev: The bias of the visible units. visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the training function). As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data. """ with EnableOmniscence(): if is_test_mode(): n_epochs = 0.01 data = get_mnist_dataset(flat = True).training_set.input rbm = simple_rbm( visible_layer = StochasticNonlinearity(vis_activation), bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0), hidden_layer = StochasticNonlinearity(hid_activation) ) optimizer = \ SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \ AdaMax(alpha=eta) if optimizer == 'adamax' else \ bad_value(optimizer) train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile() def plot_fcn(): lv = train_function.locals() dbplot({ 'visible-pos-chain': lv['wake_visible'].reshape((-1, 28, 28)), 'visible-neg-chain': lv['sleep_visible'].reshape((-1, 28, 28)), }) for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)): train_function(visible_data) if plot and i % plot_interval == 0: plot_fcn()
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20): dataset = get_mnist_dataset() if is_test_mode(): dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function = MultiLayerPerceptron( layer_sizes=[hidden_size, dataset.n_categories], input_size = dataset.input_size, hidden_activation='sig', output_activation='lin', w_init = normal_w_init(mag = 0.01, seed = 5) ), cost_function = softmax_negative_log_likelihood, optimizer = optimizer, ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)), 'adamax': make_mlp(AdaMax(alpha = 1e-3)), }, minibatch_size = 20, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20): dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = optimizer, ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)), 'adamax': make_mlp(AdaMax(alpha = 1e-3)), }, minibatch_size = 20, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def demo_perceptron_dtp( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, lin_dtp = True, ): dataset = get_mnist_dataset(flat = True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP( layers=[PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp = lin_dtp) for n_in, n_out in zip([dataset.input_size]+hidden_sizes, hidden_sizes+[dataset.target_size])], output_cost_function = None ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def demo_rbm_mnist( vis_activation = 'bernoulli', hid_activation = 'bernoulli', n_hidden = 500, plot = True, eta = 0.01, optimizer = 'sgd', w_init_mag = 0.001, minibatch_size = 9, persistent = False, n_epochs = 100, plot_interval = 100, ): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. What you see: A plot will appear with 6 subplots. The subplots are as follows: hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples. visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain. w: A subset of the weight vectors, reshaped to the shape of the input. b: The bias of the hidden units. b_rev: The bias of the visible units. visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the training function). As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data. """ with EnableOmniscence(): # EnableOmniscence allows us to plot internal variables (by referencing the .locals() attribute of a symbolic function.. see plot_fcn below) if is_test_mode(): n_epochs = 0.01 data = get_mnist_dataset(flat = True).training_set.input rbm = simple_rbm( visible_layer = StochasticNonlinearity(vis_activation), bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0), hidden_layer = StochasticNonlinearity(hid_activation) ) optimizer = \ SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \ AdaMax(alpha=eta) if optimizer == 'adamax' else \ bad_value(optimizer) train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile() def plot_fcn(): lv = train_function.locals() dbplot(lv['wake_visible'].reshape((-1, 28, 28)), 'visible-pos-chain') dbplot(lv['sleep_visible'].reshape((-1, 28, 28)), 'visible-neg-chain') for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)): train_function(visible_data) if plot and i % plot_interval == 0: plot_fcn()
def demo_mnist_online_regression( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', regressor_type = 'multinomial', n_epochs = 20, n_test_points = 30, max_training_samples = None, include_biases = True, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True) assert regressor_type in ('multinomial', 'logistic', 'linear') n_outputs = dataset.n_categories if regressor_type in ('logistic', 'linear'): dataset = dataset.to_onehot() predictor = OnlineRegressor( input_size = dataset.input_size, output_size = n_outputs, regressor_type = regressor_type, optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate), include_biases = include_biases ).compile() # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size ) plot_learning_curves(results)
def compare_example_predictors( n_epochs=5, n_tests=20, minibatch_size=10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat=True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset=dataset, online_predictors={ 'Perceptron': Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)), alpha=0.001). to_categorical( n_categories=dataset.n_categories ), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[ dataset.input_size, 500, dataset.n_categories ], hidden_activation='sig', # Sigmoidal hidden units output_activation= 'softmax', # Softmax output unit, since we're doing multinomial classification w_init=0.01, rng=5), cost_function= negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer=SimpleGradientDescent(eta=0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={'RF': RandomForestClassifier(n_estimators=40)}, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def demo_mnist_mlp(test_mode = False): """ Train an MLP on MNIST and print the test scores as training progresses. """ if test_mode: test_period = 200 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30) else: test_period = 1000 minibatch_size = 20 n_epochs = 10 dataset = get_mnist_dataset() # Setup the training and test functions classifier = MultiLayerPerceptron( layer_sizes=[500, 10], input_size = 784, hidden_activation='sig', output_activation='softmax', w_init = normal_w_init(mag = 0.01) ) training_cost_function = normalized_negative_log_likelihood optimizer = SimpleGradientDescent(eta = 0.1) training_function = SupervisedTrainingFunction(classifier, training_cost_function, optimizer).compile() test_cost_function = percent_correct test_function = SupervisedTestFunction(classifier, test_cost_function).compile() def report_test(i): training_cost = test_function(dataset.training_set.input, dataset.training_set.target) print 'Training score at iteration %s: %s' % (i, training_cost) test_cost = test_function(dataset.test_set.input, dataset.test_set.target) print 'Test score at iteration %s: %s' % (i, test_cost) # Train and periodically report the test score. print 'Running MLP on MNIST Dataset...' for i, (_, image_minibatch, label_minibatch) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True)): if i % test_period == 0: report_test(i) training_function(image_minibatch, label_minibatch) report_test('Final') print '...Done.'
def demo_rbm_tutorial( eta = 0.01, n_hidden = 500, n_samples = None, minibatch_size = 10, plot_interval = 10, w_init_mag = 0.01, n_epochs = 1, persistent = False, seed = None ): """ This tutorial trains a standard binary-binary RBM on MNIST, and allows you to view the weights and negative sampling chain. Note: For simplicity, it uses hidden/visible samples to compute the gradient. It's actually better to use the hidden probabilities. """ if is_test_mode(): n_samples=50 n_epochs=1 plot_interval=50 n_hidden = 10 data = get_mnist_dataset(flat = True).training_set.input[:n_samples] n_visible = data.shape[1] rng = np.random.RandomState(seed) activation = lambda x: (1./(1+np.exp(-x)) > rng.rand(*x.shape)).astype(float) w = w_init_mag*np.random.randn(n_visible, n_hidden) b_hid = np.zeros(n_hidden) b_vis = np.zeros(n_visible) if persistent: hid_sleep_state = np.random.rand(minibatch_size, n_hidden) for i, vis_wake_state in enumerate(minibatch_iterate(data, n_epochs = n_epochs, minibatch_size=minibatch_size)): hid_wake_state = activation(vis_wake_state.dot(w)+b_hid) if not persistent: hid_sleep_state = hid_wake_state vis_sleep_state = activation(hid_sleep_state.dot(w.T)+b_vis) hid_sleep_state = activation(vis_sleep_state.dot(w)+b_hid) # Update Parameters w_grad = (vis_wake_state.T.dot(hid_wake_state) - vis_sleep_state.T.dot(hid_sleep_state))/float(minibatch_size) w += w_grad * eta b_vis_grad = np.mean(vis_wake_state, axis = 0) - np.mean(vis_sleep_state, axis = 0) b_vis += b_vis_grad * eta b_hid_grad = np.mean(hid_wake_state, axis = 0) - np.mean(hid_sleep_state, axis = 0) b_hid += b_hid_grad * eta if i % plot_interval == 0: dbplot(w.T[:100].reshape(-1, 28, 28), 'weights') dbplot(vis_sleep_state.reshape(-1, 28, 28), 'dreams') print 'Sample %s' % i
def profile_java_net(): """ Note: These times are super unreliable for some reason.. A given run can vary by 7s-14s for example. God knows why. Version 'old', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 57.100 Scores at Epoch 2.0: Test: 71.200 Elapsed time is: 7.866s Version 'arr', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 261.1s Version 'new', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 8.825s :return: """ mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot() with JPypeConnection(): spiking_net = JavaSpikingNetWrapper.from_init( fractional = True, depth_first=False, smooth_grads = False, back_discretize = 'noreset-herding', w_init=0.01, hold_error=True, rng = 1234, n_steps = 10, eta=0.01, layer_sizes=[784]+[200]+[10], dtype = 'float' ) with EZProfiler(print_result=True): result = assess_online_predictor( predictor = spiking_net, dataset=mnist, evaluation_function='percent_argmax_correct', test_epochs=[0, 1, 2], minibatch_size=1, test_on='test', )
def compare_example_predictors( n_epochs = 5, n_tests = 20, minibatch_size = 10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat = True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset = dataset, online_predictors = { 'Perceptron': Perceptron( w = np.zeros((dataset.input_size, dataset.n_categories)), alpha = 0.001 ).to_categorical(n_categories = dataset.n_categories), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, 500, dataset.n_categories], hidden_activation='sig', # Sigmoidal hidden units output_activation='softmax', # Softmax output unit, since we're doing multinomial classification w_init = 0.01, rng = 5 ), cost_function = negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer = SimpleGradientDescent(eta = 0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={ 'RF': RandomForestClassifier(n_estimators = 40) }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def profile_java_net(): """ Note: These times are super unreliable for some reason.. A given run can vary by 7s-14s for example. God knows why. Version 'old', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 57.100 Scores at Epoch 2.0: Test: 71.200 Elapsed time is: 7.866s Version 'arr', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 261.1s Version 'new', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 8.825s :return: """ mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot() with JPypeConnection(): spiking_net = JavaSpikingNetWrapper.from_init( fractional=True, depth_first=False, smooth_grads=False, back_discretize='noreset-herding', w_init=0.01, hold_error=True, rng=1234, n_steps=10, eta=0.01, layer_sizes=[784] + [200] + [10], dtype='float') with EZProfiler(print_result=True): result = assess_online_predictor( predictor=spiking_net, dataset=mnist, evaluation_function='percent_argmax_correct', test_epochs=[0, 1, 2], minibatch_size=1, test_on='test', )
def demo_variational_autoencoder(minibatch_size=100, n_epochs=2000, plot_interval=100, seed=None): """ Train a Variational Autoencoder on MNIST and look at the samples it generates. :param minibatch_size: Number of elements in the minibatch :param n_epochs: Number of passes through dataset :param plot_interval: Plot every x iterations """ data = get_mnist_dataset(flat=True).training_set.input if is_test_mode(): n_epochs = 1 minibatch_size = 10 data = data[:100] rng = get_rng(seed) model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks( x_dim=data.shape[1], z_dim=20, encoder_hidden_sizes=[200], decoder_hidden_sizes=[200], w_init=lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out), x_distribution='bernoulli', z_distribution='gaussian', hidden_activation='softplus'), optimizer=AdaMax(alpha=0.003), rng=rng) training_fcn = model.train.compile() sampling_fcn = model.sample.compile() for i, minibatch in enumerate( minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)): training_fcn(minibatch) if i % plot_interval == 0: print 'Epoch %s' % (i * minibatch_size / float(len(data)), ) samples = sampling_fcn(25).reshape(5, 5, 28, 28) dbplot(samples, 'Samples from Model') dbplot( model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape( -1, 28, 28), 'dec') dbplot( model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape( -1, 28, 28), 'enc')
def demo_temporal_mnist(n_samples=None, smoothing_steps=200): _, _, original_data, original_labels = get_mnist_dataset( n_training_samples=n_samples, n_test_samples=n_samples).xyxy _, _, temporal_data, temporal_labels = get_temporal_mnist_dataset( n_training_samples=n_samples, n_test_samples=n_samples, smoothing_steps=smoothing_steps).xyxy for ox, oy, tx, ty in zip(original_data, original_labels, temporal_data, temporal_labels): with hold_dbplots(): dbplot(ox, 'sample', title=str(oy)) dbplot(tx, 'smooth', title=str(ty))
def demo_gan_mnist(n_epochs=20, minibatch_size=20, n_discriminator_steps=1, noise_dim=10, plot_period=100, rng=1234): """ Train a Generative Adversarial network on MNIST data, showing generated samples as training progresses. :param n_epochs: Number of epochs to train :param minibatch_size: Size of minibatch to feed in each training iteration :param n_discriminator_steps: Number of steps training discriminator for every step of training generator :param noise_dim: Dimensionality of latent space (from which random samples are pulled) :param plot_period: Plot every N training iterations :param rng: Random number generator or seed """ net = GenerativeAdversarialNetwork( discriminator=MultiLayerPerceptron.from_init(w_init=0.01, layer_sizes=[784, 100, 1], hidden_activation='relu', output_activation='sig', rng=rng), generator=MultiLayerPerceptron.from_init( w_init=0.1, layer_sizes=[noise_dim, 200, 784], hidden_activation='relu', output_activation='sig', rng=rng), noise_dim=noise_dim, optimizer=AdaMax(0.001), rng=rng) data = get_mnist_dataset(flat=True).training_set.input f_train_discriminator = net.train_discriminator.compile() f_train_generator = net.train_generator.compile() f_generate = net.generate.compile() for i, minibatch in enumerate( minibatch_iterate(data, n_epochs=n_epochs, minibatch_size=minibatch_size)): f_train_discriminator(minibatch) print 'Trained Discriminator' if i % n_discriminator_steps == n_discriminator_steps - 1: f_train_generator(n_samples=minibatch_size) print 'Trained Generator' if i % plot_period == 0: samples = f_generate(n_samples=minibatch_size) dbplot(minibatch.reshape(-1, 28, 28), "Real") dbplot(samples.reshape(-1, 28, 28), "Counterfeit") print 'Disp'
def demo_compare_dtp_optimizers( hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20, hidden_activation='tanh', ): dataset = get_mnist_dataset(flat=True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 def make_dtp_net(optimizer_constructor, output_fcn): return DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor=optimizer_constructor, input_activation='sigm', hidden_activation=hidden_activation, output_activation=output_fcn, w_init_mag=0.01, noise=1, ).compile() learning_curves = compare_predictors( dataset=dataset, online_predictors={ 'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn='softmax'), 'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'), 'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'), 'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn='sigm'), 'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'), 'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'), }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, ) plot_learning_curves(learning_curves)
def demo_variational_autoencoder( minibatch_size = 100, n_epochs = 2000, plot_interval = 100, seed = None ): """ Train a Variational Autoencoder on MNIST and look at the samples it generates. :param minibatch_size: Number of elements in the minibatch :param n_epochs: Number of passes through dataset :param plot_interval: Plot every x iterations """ data = get_mnist_dataset(flat = True).training_set.input if is_test_mode(): n_epochs=1 minibatch_size = 10 data = data[:100] rng = get_rng(seed) model = VariationalAutoencoder( pq_pair = EncoderDecoderNetworks( x_dim=data.shape[1], z_dim = 20, encoder_hidden_sizes = [200], decoder_hidden_sizes = [200], w_init = lambda n_in, n_out: 0.01*np.random.randn(n_in, n_out), x_distribution='bernoulli', z_distribution='gaussian', hidden_activation = 'softplus' ), optimizer=AdaMax(alpha = 0.003), rng = rng ) training_fcn = model.train.compile() sampling_fcn = model.sample.compile() for i, minibatch in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)): training_fcn(minibatch) if i % plot_interval == 0: print 'Epoch %s' % (i*minibatch_size/float(len(data)), ) samples = sampling_fcn(25).reshape(5, 5, 28, 28) dbplot(samples, 'Samples from Model') dbplot(model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape(-1, 28, 28), 'dec') dbplot(model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape(-1, 28, 28), 'enc')
def demo_run_dtp_on_mnist(hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, input_activation='sigm', hidden_activation='tanh', output_activation='softmax', optimizer_constructor=lambda: RMSProp(0.001), normalize_inputs=False, local_cost_function=mean_squared_error, output_cost_function=None, noise=1, lin_dtp=False, seed=1234): dataset = get_mnist_dataset(flat=True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel( lambda x: x / np.sum(x, axis=1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor= optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise=noise, cost_function=local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng=seed).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w. get_value().T.reshape(-1, 28, 28))) plot_learning_curves(result)
def demo_run_dtp_on_mnist( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, input_activation = 'sigm', hidden_activation = 'tanh', output_activation = 'softmax', optimizer_constructor = lambda: RMSProp(0.001), normalize_inputs = False, local_cost_function = mean_squared_error, output_cost_function = None, noise = 1, lin_dtp = False, seed = 1234 ): dataset = get_mnist_dataset(flat = True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel(lambda x: x/np.sum(x, axis = 1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise = noise, cost_function = local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng = seed ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28)) ) plot_learning_curves(result)
def demo_dtp_varieties( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20, hidden_activation = 'tanh', output_activation = 'sigm', optimizer = 'adamax', learning_rate = 0.01, noise = 1, predictors = ['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'], rng = 1234, live_plot = False, plot = False ): """ ; :param hidden_sizes: :param n_epochs: :param minibatch_size: :param n_tests: :return: """ if isinstance(predictors, str): predictors = [predictors] dataset = get_mnist_dataset(flat = True) dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) predictors = OrderedDict((name, get_predictor(name, input_size = dataset.input_size, target_size=dataset.target_size, hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation = output_activation, optimizer=optimizer, learning_rate=learning_rate, noise = noise, rng = rng)) for name in predictors) learning_curves = compare_predictors( dataset=dataset, online_predictors = predictors, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, ) if plot: plot_learning_curves(learning_curves)
def mlp_normalization(hidden_size=300, n_epochs=30, n_tests=50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function=MultiLayerPerceptron.from_init(layer_sizes=[ dataset.input_size, hidden_size, dataset.n_categories ], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init=0.01, rng=5), cost_function=softmax_negative_log_likelihood, optimizer=SimpleGradientDescent(eta=0.1), ).compile() return compare_predictors(dataset=dataset, online_predictors={ 'regular': make_mlp(normalize=False, scale=False), 'normalize': make_mlp(normalize=True, scale=False), 'normalize and scale': make_mlp(normalize=True, scale=True), }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct)
def demo_rbm_mnist(plot = True, test_mode = False): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. What you see: A plot will appear with 6 subplots. The subplots are as follows: hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples. visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain. w: A subset of the weight vectors, reshaped to the shape of the input. b: The bias of the hidden units. b_rev: The bias of the visible units. visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the training function). As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data. """ set_enable_omniscence(True) minibatch_size = 9 n_epochs = 0.01 if test_mode else 10 dataset = get_mnist_dataset().process_with(inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), )) rbm = simple_rbm( visible_layer = StochasticLayer('bernoulli'), bridge=FullyConnectedBridge(w = 0.001*np.random.randn(28*28, 500).astype(theano.config.floatX), b=0, b_rev = 0), hidden_layer = StochasticLayer('bernoulli') ) train_function = rbm.get_training_fcn(n_gibbs = 4, persistent = True, optimizer = SimpleGradientDescent(eta = 0.01)).compile() sampling_function = rbm.get_free_sampling_fcn(init_visible_state = np.random.randn(9, 28*28), return_smooth_visible = True).compile() if plot: def debug_variable_setter(): lv = train_function.symbolic.locals() return { 'hidden-neg-chain': lv.sleep_hidden.reshape((-1, 25, 20)), 'visible-neg-chain': lv.hidden_layer.smooth(lv.bridge.reverse(lv.sleep_hidden)).reshape((-1, 28, 28)), 'w': lv.bridge.parameters[0].T[:25].reshape((-1, 28, 28)), 'b': lv.bridge.parameters[1].reshape((25, 20)), 'b_rev': lv.bridge.parameters[2].reshape((28, 28)), } train_function.set_debug_variables(debug_variable_setter) stream = LiveStream(lambda: dict(train_function.get_debug_values().items()+[('visible-sample', visible_samples.reshape((-1, 28, 28)))]), update_every=10) for _, visible_data, _ in dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True): visible_samples, _ = sampling_function() train_function(visible_data) if plot: stream.update()
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = SimpleGradientDescent(eta = 0.1), ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'regular': make_mlp(normalize = False, scale = False), 'normalize': make_mlp(normalize=True, scale = False), 'normalize and scale': make_mlp(normalize=True, scale = True), }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def backprop_vs_difference_target_prop(hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20): dataset = get_mnist_dataset(flat=True) dataset = dataset.process_with( targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors={ 'backprop-mlp': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size] + hidden_sizes + [dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init=0.01, rng=5), cost_function=mean_squared_error, optimizer=AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: AdaMax(0.01), w_init=0.01, noise=1, ).compile() }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, )
def backprop_vs_difference_target_prop( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20 ): dataset = get_mnist_dataset(flat = True) dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors = { 'backprop-mlp': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init = 0.01, rng = 5 ), cost_function = mean_squared_error, optimizer = AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: AdaMax(0.01), w_init=0.01, noise = 1, ).compile() }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, )
def demo_compare_dtp_optimizers( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20, hidden_activation = 'tanh', ): dataset = get_mnist_dataset(flat = True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 def make_dtp_net(optimizer_constructor, output_fcn): return DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = optimizer_constructor, input_activation='sigm', hidden_activation=hidden_activation, output_activation=output_fcn, w_init_mag=0.01, noise = 1, ).compile() learning_curves = compare_predictors( dataset=dataset, online_predictors = { 'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'softmax'), 'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'softmax'), 'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'softmax'), 'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'sigm'), 'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'sigm'), 'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'sigm'), }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, ) plot_learning_curves(learning_curves)
def demo_mnist_mlp( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', hidden_sizes = [300], w_init = 0.01, hidden_activation = 'tanh', output_activation = 'softmax', cost = 'nll-d', visualize_params = False, n_test_points = 30, n_epochs = 10, max_training_samples = None, use_bias = True, onehot = False, rng = 1234, plot = False, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples) if onehot: dataset = dataset.to_onehot() if minibatch_size == 'full': minibatch_size = dataset.training_set.n_samples optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate) # Setup the training and test functions predictor = GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[10], hidden_activation=hidden_activation, output_activation=output_activation, w_init = w_init, use_bias=use_bias, rng = rng, ), cost_function=cost, optimizer=optimizer ).compile() # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays. def vis_callback(xx): p = predictor.symbolic_predictor._function in_layer = { 'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28), 'Layer[0].b': p.layers[0].linear_transform._b.get_value(), } other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])] dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], []))) # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size, test_callback=vis_callback if visualize_params else None ) if plot: plot_learning_curves(results)
def demo_simple_vae_on_mnist( minibatch_size = 100, n_epochs = 2000, plot_interval = 100, calculation_interval = 500, z_dim = 2, hidden_sizes = [400, 200], learning_rate = 0.003, hidden_activation = 'softplus', binary_x = True, w_init_mag = 0.01, gaussian_min_var = None, manifold_grid_size = 11, manifold_grid_span = 2, seed = None ): """ Train a Variational Autoencoder on MNIST and look at the samples it generates. """ dataset = get_mnist_dataset(flat = True) training_data = dataset.training_set.input test_data = dataset.test_set.input if is_test_mode(): n_epochs=1 minibatch_size = 10 training_data = training_data[:100] test_data = test_data[:100] model = GaussianVariationalAutoencoder( x_dim=training_data.shape[1], z_dim = z_dim, encoder_hidden_sizes = hidden_sizes, decoder_hidden_sizes = hidden_sizes[::-1], w_init_mag = w_init_mag, binary_data=binary_x, hidden_activation = hidden_activation, optimizer=AdaMax(alpha = learning_rate), gaussian_min_var = gaussian_min_var, rng = seed ) training_fcn = model.train.compile() # For display, make functions to sample and represent the manifold. sampling_fcn = model.sample.compile() z_manifold_grid = np.array([x.flatten() for x in np.meshgrid(np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size), np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size))]+[np.zeros(manifold_grid_size**2)]*(z_dim-2)).T decoder_mean_fcn = model.decode.compile(fixed_args = dict(z = z_manifold_grid)) lower_bound_fcn = model.compute_lower_bound.compile() for i, minibatch in enumerate(minibatch_iterate(training_data, minibatch_size=minibatch_size, n_epochs=n_epochs)): training_fcn(minibatch) if i % plot_interval == 0: samples = sampling_fcn(25).reshape(5, 5, 28, 28) dbplot(samples, 'Samples from Model') if binary_x: manifold_means = decoder_mean_fcn() else: manifold_means, _ = decoder_mean_fcn() dbplot(manifold_means.reshape(manifold_grid_size, manifold_grid_size, 28, 28), 'First 2-dimensions of manifold.') if i % calculation_interval == 0: training_lower_bound = lower_bound_fcn(training_data) test_lower_bound = lower_bound_fcn(test_data) print 'Epoch: %s, Training Lower Bound: %s, Test Lower bound: %s' % \ (i*minibatch_size/float(len(training_data)), training_lower_bound, test_lower_bound)
def demo_simple_dbn(minibatch_size=10, n_training_epochs_1=5, n_training_epochs_2=50, n_hidden_1=500, n_hidden_2=10, plot_period=100, eta1=0.01, eta2=0.0001, w_init_mag_1=0.01, w_init_mag_2=0.5, seed=None): """ Train a DBN, and create a function to project the test data into a latent space :param minibatch_size: :param n_training_epochs_1: Number of training epochs for the first-level RBM :param n_training_epochs_2: Number of training epochs for the second-level RBM :param n_hidden_1: Number of hidden units for first RBM :param n_hidden_2:nNumber of hidden units for second RBM :param plot_period: How often to plot :param seed: :return: """ dataset = get_mnist_dataset(flat=True) rng = np.random.RandomState(seed) w_init_1 = lambda shape: w_init_mag_1 * rng.randn(*shape) w_init_2 = lambda shape: w_init_mag_2 * rng.randn(*shape) if is_test_mode(): n_training_epochs_1 = 0.01 n_training_epochs_2 = 0.01 # Train the first RBM dbn1 = StackedDeepBeliefNet(rbms=[ BernoulliBernoulliRBM.from_initializer( n_visible=784, n_hidden=n_hidden_1, w_init_fcn=w_init_1) ]) train_first_layer = dbn1.get_training_fcn( optimizer=SimpleGradientDescent(eta=eta1), n_gibbs=1, persistent=True).compile() sample_first_layer = dbn1.get_sampling_fcn( initial_vis=dataset.training_set.input[:minibatch_size], n_steps=10).compile() for i, vis_data in enumerate( minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_1)): if i % plot_period == plot_period - 1: dbplot(dbn1.rbms[0].w.get_value().T[:100].reshape([-1, 28, 28]), 'weights1') dbplot(sample_first_layer()[0].reshape(-1, 28, 28), 'samples1') train_first_layer(vis_data) # Train the second RBM dbn2 = dbn1.stack_another(rbm=BernoulliGaussianRBM.from_initializer( n_visible=n_hidden_1, n_hidden=n_hidden_2, w_init_fcn=w_init_2)) train_second_layer = dbn2.get_training_fcn( optimizer=SimpleGradientDescent(eta=eta2), n_gibbs=1, persistent=True).compile() sample_second_layer = dbn2.get_sampling_fcn( initial_vis=dataset.training_set.input[:minibatch_size], n_steps=10).compile() for i, vis_data in enumerate( minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_2)): if i % plot_period == 0: dbplot(dbn2.rbms[1].w.get_value(), 'weights2') dbplot(sample_second_layer()[0].reshape(-1, 28, 28), 'samples2') train_second_layer(vis_data) # Project data to latent space. project_to_latent = dbn2.propup.compile(fixed_args=dict(stochastic=False)) latent_test_data = project_to_latent(dataset.test_set.input) print 'Projected the test data to a latent space. Shape: %s' % ( latent_test_data.shape, ) decode = dbn2.propdown.compile(fixed_args=dict(stochastic=False)) recon_test_data = decode(latent_test_data) print 'Reconstructed the test data. Shape: %s' % (recon_test_data.shape, )
def demo_simple_dbn( minibatch_size = 10, n_training_epochs_1 = 5, n_training_epochs_2 = 50, n_hidden_1 = 500, n_hidden_2 = 10, plot_period = 100, eta1 = 0.01, eta2 = 0.0001, w_init_mag_1 = 0.01, w_init_mag_2 = 0.5, seed = None ): """ Train a DBN, and create a function to project the test data into a latent space :param minibatch_size: :param n_training_epochs_1: Number of training epochs for the first-level RBM :param n_training_epochs_2: Number of training epochs for the second-level RBM :param n_hidden_1: Number of hidden units for first RBM :param n_hidden_2:nNumber of hidden units for second RBM :param plot_period: How often to plot :param seed: :return: """ dataset = get_mnist_dataset(flat = True) rng = np.random.RandomState(seed) w_init_1 = lambda shape: w_init_mag_1 * rng.randn(*shape) w_init_2 = lambda shape: w_init_mag_2 * rng.randn(*shape) if is_test_mode(): n_training_epochs_1 = 0.01 n_training_epochs_2 = 0.01 # Train the first RBM dbn1 = StackedDeepBeliefNet(rbms = [BernoulliBernoulliRBM.from_initializer(n_visible = 784, n_hidden=n_hidden_1, w_init_fcn = w_init_1)]) train_first_layer = dbn1.get_training_fcn(optimizer=SimpleGradientDescent(eta = eta1), n_gibbs = 1, persistent=True).compile() sample_first_layer = dbn1.get_sampling_fcn(initial_vis=dataset.training_set.input[:minibatch_size], n_steps = 10).compile() for i, vis_data in enumerate(minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_1)): if i % plot_period == plot_period-1: dbplot(dbn1.rbms[0].w.get_value().T[:100].reshape([-1, 28, 28]), 'weights1') dbplot(sample_first_layer()[0].reshape(-1, 28, 28), 'samples1') train_first_layer(vis_data) # Train the second RBM dbn2 = dbn1.stack_another(rbm = BernoulliGaussianRBM.from_initializer(n_visible=n_hidden_1, n_hidden=n_hidden_2, w_init_fcn=w_init_2)) train_second_layer = dbn2.get_training_fcn(optimizer=SimpleGradientDescent(eta = eta2), n_gibbs = 1, persistent=True).compile() sample_second_layer = dbn2.get_sampling_fcn(initial_vis=dataset.training_set.input[:minibatch_size], n_steps = 10).compile() for i, vis_data in enumerate(minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_2)): if i % plot_period == 0: dbplot(dbn2.rbms[1].w.get_value(), 'weights2') dbplot(sample_second_layer()[0].reshape(-1, 28, 28), 'samples2') train_second_layer(vis_data) # Project data to latent space. project_to_latent = dbn2.propup.compile(fixed_args = dict(stochastic = False)) latent_test_data = project_to_latent(dataset.test_set.input) print 'Projected the test data to a latent space. Shape: %s' % (latent_test_data.shape, ) decode = dbn2.propdown.compile(fixed_args = dict(stochastic = False)) recon_test_data = decode(latent_test_data) print 'Reconstructed the test data. Shape: %s' % (recon_test_data.shape, )
def get_temporal_mnist_dataset(smoothing_steps=1000, **mnist_kwargs): tr_x, tr_y, ts_x, ts_y = get_mnist_dataset(**mnist_kwargs).xyxy tr_ixs = temporalize(tr_x, smoothing_steps=smoothing_steps) ts_ixs = temporalize(ts_x, smoothing_steps=smoothing_steps) return DataSet.from_xyxy(tr_x[tr_ixs], tr_y[tr_ixs], ts_x[ts_ixs], ts_y[ts_ixs])
def demo_simple_vae_on_mnist(minibatch_size=100, n_epochs=2000, plot_interval=100, calculation_interval=500, z_dim=2, hidden_sizes=[400, 200], learning_rate=0.003, hidden_activation='softplus', binary_x=True, w_init_mag=0.01, gaussian_min_var=None, manifold_grid_size=11, manifold_grid_span=2, seed=None): """ Train a Variational Autoencoder on MNIST and look at the samples it generates. """ dataset = get_mnist_dataset(flat=True) training_data = dataset.training_set.input test_data = dataset.test_set.input if is_test_mode(): n_epochs = 1 minibatch_size = 10 training_data = training_data[:100] test_data = test_data[:100] model = GaussianVariationalAutoencoder( x_dim=training_data.shape[1], z_dim=z_dim, encoder_hidden_sizes=hidden_sizes, decoder_hidden_sizes=hidden_sizes[::-1], w_init_mag=w_init_mag, binary_data=binary_x, hidden_activation=hidden_activation, optimizer=AdaMax(alpha=learning_rate), gaussian_min_var=gaussian_min_var, rng=seed) training_fcn = model.train.compile() # For display, make functions to sample and represent the manifold. sampling_fcn = model.sample.compile() z_manifold_grid = np.array([ x.flatten() for x in np.meshgrid( np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size), np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size)) ] + [np.zeros(manifold_grid_size**2)] * (z_dim - 2)).T decoder_mean_fcn = model.decode.compile(fixed_args=dict(z=z_manifold_grid)) lower_bound_fcn = model.compute_lower_bound.compile() for i, minibatch in enumerate( minibatch_iterate(training_data, minibatch_size=minibatch_size, n_epochs=n_epochs)): training_fcn(minibatch) if i % plot_interval == 0: samples = sampling_fcn(25).reshape(5, 5, 28, 28) dbplot(samples, 'Samples from Model') if binary_x: manifold_means = decoder_mean_fcn() else: manifold_means, _ = decoder_mean_fcn() dbplot( manifold_means.reshape(manifold_grid_size, manifold_grid_size, 28, 28), 'First 2-dimensions of manifold.') if i % calculation_interval == 0: training_lower_bound = lower_bound_fcn(training_data) test_lower_bound = lower_bound_fcn(test_data) print 'Epoch: %s, Training Lower Bound: %s, Test Lower bound: %s' % \ (i*minibatch_size/float(len(training_data)), training_lower_bound, test_lower_bound)
def compare_spiking_to_nonspiking(hidden_sizes = [300, 300], eta=0.01, w_init=0.01, fractional = False, n_epochs = 20, forward_discretize = 'rect-herding', back_discretize = 'noreset-herding', test_discretize='rect-herding', save_results = False): mnist = get_mnist_dataset(flat=True).to_onehot() test_epochs=[0.0, 0.05, 0.1, 0.2, 0.5]+range(1, n_epochs+1) if is_test_mode(): mnist = mnist.shorten(500) eta = 0.01 w_init=0.01 test_epochs = [0.0, 0.05, 0.1] spiking_net = JavaSpikingNetWrapper.from_init( fractional = fractional, depth_first=False, smooth_grads = False, forward_discretize = forward_discretize, back_discretize = back_discretize, test_discretize = test_discretize, w_init=w_init, hold_error=True, rng = 1234, n_steps = 10, eta=eta, layer_sizes=[784]+hidden_sizes+[10], ) relu_net = GradientBasedPredictor( MultiLayerPerceptron.from_init( hidden_activation = 'relu', output_activation = 'relu', layer_sizes=[784]+hidden_sizes+[10], use_bias=False, w_init=w_init, rng=1234, ), cost_function = 'mse', optimizer=GradientDescent(eta) ).compile() # Listen for spikes forward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')() backward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')() for lay in spiking_net.jnet.layers: lay.forward_herder.add_eavesdropper(forward_eavesdropper) for lay in spiking_net.jnet.layers[1:]: lay.backward_herder.add_eavesdropper(backward_eavesdropper) spiking_net.jnet.error_counter.add_eavesdropper(backward_eavesdropper) forward_counts = [] backward_counts = [] def register_counts(): forward_counts.append(forward_eavesdropper.get_count()) backward_counts.append(backward_eavesdropper.get_count()) results = compare_predictors( dataset=mnist, online_predictors={ 'Spiking-MLP': spiking_net, 'ReLU-MLP': relu_net, }, test_epochs=test_epochs, online_test_callbacks=lambda p: register_counts() if p is spiking_net else None, minibatch_size = 1, test_on = 'training+test', evaluation_function=percent_argmax_incorrect, ) spiking_params = [np.array(lay.forward_weights.w.asFloat()).copy() for lay in spiking_net.jnet.layers] relu_params = [param.get_value().astype(np.float64) for param in relu_net.parameters] # See what the score is when we apply the final spiking weights to the offline_trained_spiking_net = JavaSpikingNetWrapper( ws=relu_params, fractional = fractional, depth_first=False, smooth_grads = False, forward_discretize = forward_discretize, back_discretize = back_discretize, test_discretize = test_discretize, hold_error=True, n_steps = 10, eta=eta, ) # for spiking_layer, p in zip(spiking_net.jnet.layers, relu_params): # spiking_layer.w = p.astype(np.float64) error = [ ('Test', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.test_set.input), mnist.test_set.target)), ('Training', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.training_set.input), mnist.training_set.target)) ] results['Spiking-MLP with ReLU weights'] = LearningCurveData() results['Spiking-MLP with ReLU weights'].add(None, error) print 'Spiking-MLP with ReLU weights: %s' % error # -------------------------------------------------------------------------- # See what the score is when we plug the spiking weights into the ReLU net. for param, sval in zip(relu_net.parameters, spiking_params): param.set_value(sval) error = [ ('Test', percent_argmax_incorrect(relu_net.predict(mnist.test_set.input), mnist.test_set.target)), ('Training', percent_argmax_incorrect(relu_net.predict(mnist.training_set.input), mnist.training_set.target)) ] results['ReLU-MLP with Spiking weights'] = LearningCurveData() results['ReLU-MLP with Spiking weights'].add(None, error) print 'ReLU-MLP with Spiking weights: %s' % error # -------------------------------------------------------------------------- if save_results: with open("mnist_relu_vs_spiking_results-%s.pkl" % datetime.now(), 'w') as f: pickle.dump(results, f) # Problem: this currently includes test forward_rates = np.diff(forward_counts) / (np.diff(test_epochs)*60000) backward_rates = np.diff(backward_counts) / (np.diff(test_epochs)*60000) plt.figure('ReLU vs Spikes') plt.subplot(211) plot_learning_curves(results, title = "MNIST Learning Curves", hang=False, figure_name='ReLU vs Spikes', xscale='linear', yscale='log', y_title='Percent Error') plt.subplot(212) plt.plot(test_epochs[1:], forward_rates) plt.plot(test_epochs[1:], backward_rates) plt.xlabel('Epoch') plt.ylabel('n_spikes') plt.legend(['Mean Forward Spikes', 'Mean Backward Spikes'], loc='best') plt.interactive(is_test_mode()) plt.show()
) ExperimentLibrary.try_hyperparams = Experiment( description="Compare the various hyperparameters to the baseline.", function=with_jpype(lambda fractional = False, depth_first = False, smooth_grads = False, back_discretize = 'noreset-herding', n_steps = 10, hidden_sizes = [200, 200], hold_error = True, : compare_predictors( dataset=(get_mnist_dataset(flat=True).shorten(100) if is_test_mode() else get_mnist_dataset(flat=True)).to_onehot(), online_predictors={'Spiking MLP': JavaSpikingNetWrapper.from_init( fractional = fractional, depth_first = depth_first, smooth_grads = smooth_grads, back_discretize = back_discretize, w_init=0.01, rng = 1234, eta=0.01, n_steps = n_steps, hold_error=hold_error, layer_sizes=[784]+hidden_sizes+[10], )}, test_epochs=[0.0, 0.05] if is_test_mode() else [0.0, 0.05, 0.1, 0.2, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4], minibatch_size = 1, report_test_scores=True,
def demo_dbn_mnist(plot=True, test_mode=True): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. """ set_enable_omniscence(True) minibatch_size = 20 dataset = get_mnist_dataset().process_with( inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), )) w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out) n_training_epochs_1 = 20 n_training_epochs_2 = 20 check_period = 300 if test_mode: n_training_epochs_1 = 0.01 n_training_epochs_2 = 0.01 check_period = 100 dbn = DeepBeliefNet(layers={ 'vis': StochasticLayer('bernoulli'), 'hid': StochasticLayer('bernoulli'), 'ass': StochasticLayer('bernoulli'), 'lab': StochasticLayer('bernoulli'), }, bridges={ ('vis', 'hid'): FullyConnectedBridge(w=w_init(784, 500), b_rev=0), ('hid', 'ass'): FullyConnectedBridge(w=w_init(500, 500), b_rev=0), ('lab', 'ass'): FullyConnectedBridge(w=w_init(10, 500), b_rev=0) }) # Compile the functions you're gonna use. train_first_layer = dbn.get_constrastive_divergence_function( visible_layers='vis', hidden_layers='hid', optimizer=SimpleGradientDescent(eta=0.01), n_gibbs=1, persistent=True).compile() free_energy_of_first_layer = dbn.get_free_energy_function( visible_layers='vis', hidden_layers='hid').compile() train_second_layer = dbn.get_constrastive_divergence_function( visible_layers=('hid', 'lab'), hidden_layers='ass', input_layers=('vis', 'lab'), n_gibbs=1, persistent=True).compile() predict_label = dbn.get_inference_function(input_layers='vis', output_layers='lab', path=[('vis', 'hid'), ('hid', 'ass'), ('ass', 'lab')], smooth=True).compile() encode_label = OneHotEncoding(n_classes=10) # Step 1: Train the first layer, plotting the weights and persistent chain state. if plot: train_first_layer.set_debug_variables( lambda: { 'weights': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)), 'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth=True). symbolic_stateless(*train_first_layer.locals()[ 'initial_hidden']).reshape((-1, 28, 28)) }) plotter = LiveStream(train_first_layer.get_debug_values) for i, (n_samples, visible_data, label_data) in enumerate( dataset.training_set.minibatch_iterator( minibatch_size=minibatch_size, epochs=n_training_epochs_1, single_channel=True)): train_first_layer(visible_data) if i % check_period == 0: print 'Free Energy of Test Data: %s' % (free_energy_of_first_layer( dataset.test_set.input).mean()) if plot: plotter.update() # Step 2: Train the second layer and simultanously compute the classification error from forward passes. if plot: train_second_layer.set_debug_variables( lambda: { 'w_vis_hid': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)), 'w_hid_ass': dbn._bridges['hid', 'ass']._w, 'w_lab_ass': dbn._bridges['hid', 'ass']._w, 'associative_state': train_second_layer.locals()['sleep_hidden'][0].reshape( (-1, 20, 25)), 'hidden_state': train_second_layer.locals()['sleep_visible'][0].reshape( (-1, 20, 25)), 'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth=True). symbolic_stateless(train_second_layer.locals()['sleep_visible'] [0]).reshape((-1, 28, 28)) }) plotter = LiveStream(train_first_layer.get_debug_values) for i, (n_samples, visible_data, label_data) in enumerate( dataset.training_set.minibatch_iterator( minibatch_size=minibatch_size, epochs=n_training_epochs_2, single_channel=True)): train_second_layer(visible_data, encode_label(label_data)) if i % check_period == 0: out, = predict_label(dataset.test_set.input) score = percent_argmax_correct(actual=out, target=dataset.test_set.target) print 'Classification Score: %s' % score if plot: plotter.update()
def demo_dbn_mnist(plot = True, test_mode = True): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. """ set_enable_omniscence(True) minibatch_size = 20 dataset = get_mnist_dataset().process_with(inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), )) w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out) n_training_epochs_1 = 20 n_training_epochs_2 = 20 check_period = 300 if test_mode: n_training_epochs_1 = 0.01 n_training_epochs_2 = 0.01 check_period=100 dbn = DeepBeliefNet( layers = { 'vis': StochasticLayer('bernoulli'), 'hid': StochasticLayer('bernoulli'), 'ass': StochasticLayer('bernoulli'), 'lab': StochasticLayer('bernoulli'), }, bridges = { ('vis', 'hid'): FullyConnectedBridge(w = w_init(784, 500), b_rev = 0), ('hid', 'ass'): FullyConnectedBridge(w = w_init(500, 500), b_rev = 0), ('lab', 'ass'): FullyConnectedBridge(w = w_init(10, 500), b_rev = 0) } ) # Compile the functions you're gonna use. train_first_layer = dbn.get_constrastive_divergence_function(visible_layers = 'vis', hidden_layers='hid', optimizer=SimpleGradientDescent(eta = 0.01), n_gibbs = 1, persistent=True).compile() free_energy_of_first_layer = dbn.get_free_energy_function(visible_layers='vis', hidden_layers='hid').compile() train_second_layer = dbn.get_constrastive_divergence_function(visible_layers=('hid', 'lab'), hidden_layers='ass', input_layers=('vis', 'lab'), n_gibbs=1, persistent=True).compile() predict_label = dbn.get_inference_function(input_layers = 'vis', output_layers='lab', path = [('vis', 'hid'), ('hid', 'ass'), ('ass', 'lab')], smooth = True).compile() encode_label = OneHotEncoding(n_classes=10) # Step 1: Train the first layer, plotting the weights and persistent chain state. if plot: train_first_layer.set_debug_variables(lambda: { 'weights': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)), 'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth = True).symbolic_stateless(*train_first_layer.locals()['initial_hidden']).reshape((-1, 28, 28)) }) plotter = LiveStream(train_first_layer.get_debug_values) for i, (n_samples, visible_data, label_data) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_training_epochs_1, single_channel = True)): train_first_layer(visible_data) if i % check_period == 0: print 'Free Energy of Test Data: %s' % (free_energy_of_first_layer(dataset.test_set.input).mean()) if plot: plotter.update() # Step 2: Train the second layer and simultanously compute the classification error from forward passes. if plot: train_second_layer.set_debug_variables(lambda: { 'w_vis_hid': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)), 'w_hid_ass': dbn._bridges['hid', 'ass']._w, 'w_lab_ass': dbn._bridges['hid', 'ass']._w, 'associative_state': train_second_layer.locals()['sleep_hidden'][0].reshape((-1, 20, 25)), 'hidden_state': train_second_layer.locals()['sleep_visible'][0].reshape((-1, 20, 25)), 'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth = True).symbolic_stateless(train_second_layer.locals()['sleep_visible'][0]).reshape((-1, 28, 28)) }) plotter = LiveStream(train_first_layer.get_debug_values) for i, (n_samples, visible_data, label_data) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_training_epochs_2, single_channel = True)): train_second_layer(visible_data, encode_label(label_data)) if i % check_period == 0: out, = predict_label(dataset.test_set.input) score = percent_argmax_correct(actual = out, target = dataset.test_set.target) print 'Classification Score: %s' % score if plot: plotter.update()
def demo_rbm_mnist(plot=True, test_mode=False): """ In this demo we train an RBM on the MNIST input data (labels are ignored). We plot the state of a markov chanin that is being simulaniously sampled from the RBM, and the parameters of the RBM. What you see: A plot will appear with 6 subplots. The subplots are as follows: hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples. visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain. w: A subset of the weight vectors, reshaped to the shape of the input. b: The bias of the hidden units. b_rev: The bias of the visible units. visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the training function). As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data. """ set_enable_omniscence(True) minibatch_size = 9 n_epochs = 0.01 if test_mode else 10 dataset = get_mnist_dataset().process_with( inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), )) rbm = simple_rbm( visible_layer=StochasticLayer('bernoulli'), bridge=FullyConnectedBridge( w=0.001 * np.random.randn(28 * 28, 500).astype(theano.config.floatX), b=0, b_rev=0), hidden_layer=StochasticLayer('bernoulli')) train_function = rbm.get_training_fcn( n_gibbs=4, persistent=True, optimizer=SimpleGradientDescent(eta=0.01)).compile() sampling_function = rbm.get_free_sampling_fcn( init_visible_state=np.random.randn(9, 28 * 28), return_smooth_visible=True).compile() if plot: def debug_variable_setter(): lv = train_function.symbolic.locals() return { 'hidden-neg-chain': lv.sleep_hidden.reshape((-1, 25, 20)), 'visible-neg-chain': lv.hidden_layer.smooth(lv.bridge.reverse( lv.sleep_hidden)).reshape((-1, 28, 28)), 'w': lv.bridge.parameters[0].T[:25].reshape((-1, 28, 28)), 'b': lv.bridge.parameters[1].reshape((25, 20)), 'b_rev': lv.bridge.parameters[2].reshape((28, 28)), } train_function.set_debug_variables(debug_variable_setter) stream = LiveStream(lambda: dict(train_function.get_debug_values().items( ) + [('visible-sample', visible_samples.reshape((-1, 28, 28)))]), update_every=10) for _, visible_data, _ in dataset.training_set.minibatch_iterator( minibatch_size=minibatch_size, epochs=n_epochs, single_channel=True): visible_samples, _ = sampling_function() train_function(visible_data) if plot: stream.update()