def demo_gan_mnist(n_epochs=20, minibatch_size=20, n_discriminator_steps=1, noise_dim=10, plot_period=100, rng=1234): """ Train a Generative Adversarial network on MNIST data, showing generated samples as training progresses. :param n_epochs: Number of epochs to train :param minibatch_size: Size of minibatch to feed in each training iteration :param n_discriminator_steps: Number of steps training discriminator for every step of training generator :param noise_dim: Dimensionality of latent space (from which random samples are pulled) :param plot_period: Plot every N training iterations :param rng: Random number generator or seed """ net = GenerativeAdversarialNetwork( discriminator=MultiLayerPerceptron.from_init(w_init=0.01, layer_sizes=[784, 100, 1], hidden_activation='relu', output_activation='sig', rng=rng), generator=MultiLayerPerceptron.from_init( w_init=0.1, layer_sizes=[noise_dim, 200, 784], hidden_activation='relu', output_activation='sig', rng=rng), noise_dim=noise_dim, optimizer=AdaMax(0.001), rng=rng) data = get_mnist_dataset(flat=True).training_set.input f_train_discriminator = net.train_discriminator.compile() f_train_generator = net.train_generator.compile() f_generate = net.generate.compile() for i, minibatch in enumerate( minibatch_iterate(data, n_epochs=n_epochs, minibatch_size=minibatch_size)): f_train_discriminator(minibatch) print 'Trained Discriminator' if i % n_discriminator_steps == n_discriminator_steps - 1: f_train_generator(n_samples=minibatch_size) print 'Trained Generator' if i % plot_period == 0: samples = f_generate(n_samples=minibatch_size) dbplot(minibatch.reshape(-1, 28, 28), "Real") dbplot(samples.reshape(-1, 28, 28), "Counterfeit") print 'Disp'
def mnist_adamax_showdown(hidden_size=300, n_epochs=10, n_tests=20): dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function=MultiLayerPerceptron.from_init(layer_sizes=[ dataset.input_size, hidden_size, dataset.n_categories ], hidden_activation='sig', output_activation='lin', w_init=0.01, rng=5), cost_function=softmax_negative_log_likelihood, optimizer=optimizer, ).compile() return compare_predictors(dataset=dataset, online_predictors={ 'sgd': make_mlp(SimpleGradientDescent(eta=0.1)), 'adamax': make_mlp(AdaMax(alpha=1e-3)), }, minibatch_size=20, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct)
def test_symbolic_predicors(): """ This test is meant to serves as both a test and tutorial for how to use a symbolic predictor. It shows how to construct a symbolic predictor using a function, cost function, and optimizer. It then trains this predictor on a synthetic toy dataset and demonstrates that it has learned. """ dataset = get_synthetic_clusters_dataset() symbolic_predictor = GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes = [dataset.input_size, 100, dataset.n_categories], output_activation='softmax', w_init = 0.1, rng = 3252 ), cost_function=negative_log_likelihood_dangerous, optimizer=SimpleGradientDescent(eta = 0.1), ) predictor = symbolic_predictor.compile() # .compile() turns the symbolic predictor into an IPredictor object, which can be called with numpy arrays. init_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target) for x_m, y_m in zip_minibatch_iterate([dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20): predictor.train(x_m, y_m) final_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target) print 'Initial score: %s%%. Final score: %s%%' % (init_score, final_score) assert init_score < 30 assert final_score > 98
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20): dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = optimizer, ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)), 'adamax': make_mlp(AdaMax(alpha = 1e-3)), }, minibatch_size = 20, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def test_predictor_pickling(): dataset = get_synthetic_clusters_dataset() predictor_constructor = lambda: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes = [dataset.input_shape[0], 100, dataset.n_categories], output_activation='softmax', w_init = lambda n_in, n_out, rng = np.random.RandomState(3252): 0.1*rng.randn(n_in, n_out) ), cost_function=negative_log_likelihood_dangerous, optimizer=SimpleGradientDescent(eta = 0.1), ).compile() evaluate = lambda pred: evaluate_predictor(pred, dataset.test_set, percent_argmax_correct) # Train up predictor and save params predictor = predictor_constructor() pre_training_score = evaluate(predictor) assert pre_training_score < 35 train_online_predictor(predictor, dataset.training_set, minibatch_size=20, n_epochs=3) post_training_score = evaluate(predictor) assert post_training_score > 95 with pytest.raises(PicklingError): # TODO: Fix the PicklingError trained_predictor_string = pickle.dumps(predictor) # Instantiate new predictor and load params new_predictor = pickle.loads(trained_predictor_string) loaded_score = evaluate(new_predictor) assert loaded_score == post_training_score > 95
def compare_example_predictors( n_epochs=5, n_tests=20, minibatch_size=10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat=True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset=dataset, online_predictors={ 'Perceptron': Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)), alpha=0.001). to_categorical( n_categories=dataset.n_categories ), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[ dataset.input_size, 500, dataset.n_categories ], hidden_activation='sig', # Sigmoidal hidden units output_activation= 'softmax', # Softmax output unit, since we're doing multinomial classification w_init=0.01, rng=5), cost_function= negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer=SimpleGradientDescent(eta=0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={'RF': RandomForestClassifier(n_estimators=40)}, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def compare_example_predictors( n_epochs = 5, n_tests = 20, minibatch_size = 10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat = True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset = dataset, online_predictors = { 'Perceptron': Perceptron( w = np.zeros((dataset.input_size, dataset.n_categories)), alpha = 0.001 ).to_categorical(n_categories = dataset.n_categories), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, 500, dataset.n_categories], hidden_activation='sig', # Sigmoidal hidden units output_activation='softmax', # Softmax output unit, since we're doing multinomial classification w_init = 0.01, rng = 5 ), cost_function = negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer = SimpleGradientDescent(eta = 0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={ 'RF': RandomForestClassifier(n_estimators = 40) }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def mlp_normalization(hidden_size=300, n_epochs=30, n_tests=50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function=MultiLayerPerceptron.from_init(layer_sizes=[ dataset.input_size, hidden_size, dataset.n_categories ], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init=0.01, rng=5), cost_function=softmax_negative_log_likelihood, optimizer=SimpleGradientDescent(eta=0.1), ).compile() return compare_predictors(dataset=dataset, online_predictors={ 'regular': make_mlp(normalize=False, scale=False), 'normalize': make_mlp(normalize=True, scale=False), 'normalize and scale': make_mlp(normalize=True, scale=True), }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct)
def test_mlp(): assert_online_predictor_not_broken( predictor_constructor=lambda n_dim_in, n_dim_out: GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[n_dim_in, 100, n_dim_out], output_activation='softmax', w_init=0.1, rng=3252), cost_function=negative_log_likelihood_dangerous, optimizer=SimpleGradientDescent(eta=0.1), ).compile(), categorical_target=True, minibatch_size=10, n_epochs=2)
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = SimpleGradientDescent(eta = 0.1), ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'regular': make_mlp(normalize = False, scale = False), 'normalize': make_mlp(normalize=True, scale = False), 'normalize and scale': make_mlp(normalize=True, scale = True), }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def train_conventional_mlp_on_mnist(hidden_sizes, n_epochs=50, w_init='xavier-both', minibatch_size=20, rng=1234, optimizer='sgd', hidden_activations='relu', output_activation='softmax', learning_rate=0.01, cost_function='nll', use_bias=True, l1_loss=0, l2_loss=0, test_on='training+test'): dataset = get_mnist_dataset(flat=True)\ if output_activation != 'softmax': dataset = dataset.to_onehot() all_layer_sizes = [dataset.input_size ] + hidden_sizes + [dataset.n_categories] weights = initialize_network_params(layer_sizes=all_layer_sizes, mag=w_init, base_dist='normal', include_biases=False, rng=rng) net = MultiLayerPerceptron(weights=weights, hidden_activation=hidden_activations, output_activation=output_activation, use_bias=use_bias) predictor = GradientBasedPredictor( function=net, cost_function=get_named_cost_function(cost_function), optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate), regularization_cost=lambda params: sum(l1_loss * abs(p_).sum( ) + l2_loss * (p_**2).sum() if p_.ndim == 2 else 0 for p_ in params)).compile() assess_online_predictor(predictor=predictor, dataset=dataset, evaluation_function='percent_argmax_correct', test_epochs=range(0, n_epochs, 1), test_on=test_on, minibatch_size=minibatch_size) ws = [p.get_value() for p in net.parameters] return ws
def backprop_vs_difference_target_prop(hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20): dataset = get_mnist_dataset(flat=True) dataset = dataset.process_with( targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors={ 'backprop-mlp': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size] + hidden_sizes + [dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init=0.01, rng=5), cost_function=mean_squared_error, optimizer=AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: AdaMax(0.01), w_init=0.01, noise=1, ).compile() }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, )
def test_mlp(): assert_online_predictor_not_broken( predictor_constructor = lambda n_dim_in, n_dim_out: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes = [n_dim_in, 100, n_dim_out], output_activation='softmax', w_init = 0.1, rng = 3252 ), cost_function=negative_log_likelihood_dangerous, optimizer=SimpleGradientDescent(eta = 0.1), ).compile(), categorical_target=True, minibatch_size=10, n_epochs=2 )
def backprop_vs_difference_target_prop( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20 ): dataset = get_mnist_dataset(flat = True) dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors = { 'backprop-mlp': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init = 0.01, rng = 5 ), cost_function = mean_squared_error, optimizer = AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: AdaMax(0.01), w_init=0.01, noise = 1, ).compile() }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, )
def test_bare_bones_mlp(seed=1234): """ This verifies that the MLP works. It's intentionally not using any wrappers on top of MLP to show its "bare bones" usage. Wrapping in GradientBasedPredictor can simplify usage - see test_symbolic_predictors. """ dataset = get_synthetic_clusters_dataset() mlp = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, 20, dataset.n_categories], hidden_activation='relu', output_activation='softmax', w_init=0.01, rng=seed) fwd_fcn = mlp.compile() optimizer = SimpleGradientDescent(eta=0.1) @symbolic_updater def train(x, y): output = mlp(x) cost = negative_log_likelihood_dangerous(output, y) optimizer(cost, mlp.parameters) train_fcn = train.compile() init_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target) for x_m, y_m in zip_minibatch_iterate( [dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20): train_fcn(x_m, y_m) final_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target) print 'Initial score: %s%%. Final score: %s%%' % (init_score, final_score) assert init_score < 30 assert final_score > 98
def test_bare_bones_mlp(seed = 1234): """ This verifies that the MLP works. It's intentionally not using any wrappers on top of MLP to show its "bare bones" usage. Wrapping in GradientBasedPredictor can simplify usage - see test_symbolic_predictors. """ dataset = get_synthetic_clusters_dataset() mlp = MultiLayerPerceptron.from_init( layer_sizes = [dataset.input_size, 20, dataset.n_categories], hidden_activation = 'relu', output_activation = 'softmax', w_init = 0.01, rng = seed ) fwd_fcn = mlp.compile() optimizer = SimpleGradientDescent(eta = 0.1) @symbolic_updater def train(x, y): output = mlp(x) cost = negative_log_likelihood_dangerous(output, y) optimizer(cost, mlp.parameters) train_fcn = train.compile() init_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target) for x_m, y_m in zip_minibatch_iterate([dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20): train_fcn(x_m, y_m) final_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target) print 'Initial score: %s%%. Final score: %s%%' % (init_score, final_score) assert init_score < 30 assert final_score > 98
def get_predictor(predictor_type, input_size, target_size, hidden_sizes = [240], output_activation = 'sigm', hidden_activation = 'tanh', optimizer = 'adamax', learning_rate = 0.01, noise = 1, w_init=0.01, rng = None): """ Specify parameters that will allow you to construct a predictor :param predictor_type: String identifying the predictor class (see below) :param input_size: Integer size of the input vector. Integer :param target_size: Integer size of the target vector :param hidden_sizes: :param input_activation: :param hidden_activation: :param optimizer: :param learning_rate: :return: """ return { 'MLP': lambda: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes = [input_size] + hidden_sizes + [target_size], hidden_activation=hidden_activation, output_activation=output_activation, w_init = w_init, rng = rng ), cost_function = mean_squared_error, optimizer = get_named_optimizer(optimizer, learning_rate), ).compile(), 'DTP': lambda: DifferenceTargetMLP.from_initializer( input_size = input_size, output_size = target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=w_init, noise = noise, rng = rng, ).compile(), 'PreAct-DTP': lambda: DifferenceTargetMLP.from_initializer( input_size = input_size, output_size = target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=w_init, noise = noise, layer_constructor = PreActivationDifferenceTargetLayer.from_initializer, rng = rng, ).compile(), 'Linear-DTP': lambda: LinearDifferenceTargetMLP.from_initializer( input_size = input_size, output_size = target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation='linear', w_init_mag=w_init, noise = noise, rng = rng, # layer_constructor = LinearDifferenceTargetLayer.from_initializer ).compile(), }[predictor_type]()
def compare_spiking_to_nonspiking(hidden_sizes = [300, 300], eta=0.01, w_init=0.01, fractional = False, n_epochs = 20, forward_discretize = 'rect-herding', back_discretize = 'noreset-herding', test_discretize='rect-herding', save_results = False): mnist = get_mnist_dataset(flat=True).to_onehot() test_epochs=[0.0, 0.05, 0.1, 0.2, 0.5]+range(1, n_epochs+1) if is_test_mode(): mnist = mnist.shorten(500) eta = 0.01 w_init=0.01 test_epochs = [0.0, 0.05, 0.1] spiking_net = JavaSpikingNetWrapper.from_init( fractional = fractional, depth_first=False, smooth_grads = False, forward_discretize = forward_discretize, back_discretize = back_discretize, test_discretize = test_discretize, w_init=w_init, hold_error=True, rng = 1234, n_steps = 10, eta=eta, layer_sizes=[784]+hidden_sizes+[10], ) relu_net = GradientBasedPredictor( MultiLayerPerceptron.from_init( hidden_activation = 'relu', output_activation = 'relu', layer_sizes=[784]+hidden_sizes+[10], use_bias=False, w_init=w_init, rng=1234, ), cost_function = 'mse', optimizer=GradientDescent(eta) ).compile() # Listen for spikes forward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')() backward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')() for lay in spiking_net.jnet.layers: lay.forward_herder.add_eavesdropper(forward_eavesdropper) for lay in spiking_net.jnet.layers[1:]: lay.backward_herder.add_eavesdropper(backward_eavesdropper) spiking_net.jnet.error_counter.add_eavesdropper(backward_eavesdropper) forward_counts = [] backward_counts = [] def register_counts(): forward_counts.append(forward_eavesdropper.get_count()) backward_counts.append(backward_eavesdropper.get_count()) results = compare_predictors( dataset=mnist, online_predictors={ 'Spiking-MLP': spiking_net, 'ReLU-MLP': relu_net, }, test_epochs=test_epochs, online_test_callbacks=lambda p: register_counts() if p is spiking_net else None, minibatch_size = 1, test_on = 'training+test', evaluation_function=percent_argmax_incorrect, ) spiking_params = [np.array(lay.forward_weights.w.asFloat()).copy() for lay in spiking_net.jnet.layers] relu_params = [param.get_value().astype(np.float64) for param in relu_net.parameters] # See what the score is when we apply the final spiking weights to the offline_trained_spiking_net = JavaSpikingNetWrapper( ws=relu_params, fractional = fractional, depth_first=False, smooth_grads = False, forward_discretize = forward_discretize, back_discretize = back_discretize, test_discretize = test_discretize, hold_error=True, n_steps = 10, eta=eta, ) # for spiking_layer, p in zip(spiking_net.jnet.layers, relu_params): # spiking_layer.w = p.astype(np.float64) error = [ ('Test', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.test_set.input), mnist.test_set.target)), ('Training', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.training_set.input), mnist.training_set.target)) ] results['Spiking-MLP with ReLU weights'] = LearningCurveData() results['Spiking-MLP with ReLU weights'].add(None, error) print 'Spiking-MLP with ReLU weights: %s' % error # -------------------------------------------------------------------------- # See what the score is when we plug the spiking weights into the ReLU net. for param, sval in zip(relu_net.parameters, spiking_params): param.set_value(sval) error = [ ('Test', percent_argmax_incorrect(relu_net.predict(mnist.test_set.input), mnist.test_set.target)), ('Training', percent_argmax_incorrect(relu_net.predict(mnist.training_set.input), mnist.training_set.target)) ] results['ReLU-MLP with Spiking weights'] = LearningCurveData() results['ReLU-MLP with Spiking weights'].add(None, error) print 'ReLU-MLP with Spiking weights: %s' % error # -------------------------------------------------------------------------- if save_results: with open("mnist_relu_vs_spiking_results-%s.pkl" % datetime.now(), 'w') as f: pickle.dump(results, f) # Problem: this currently includes test forward_rates = np.diff(forward_counts) / (np.diff(test_epochs)*60000) backward_rates = np.diff(backward_counts) / (np.diff(test_epochs)*60000) plt.figure('ReLU vs Spikes') plt.subplot(211) plot_learning_curves(results, title = "MNIST Learning Curves", hang=False, figure_name='ReLU vs Spikes', xscale='linear', yscale='log', y_title='Percent Error') plt.subplot(212) plt.plot(test_epochs[1:], forward_rates) plt.plot(test_epochs[1:], backward_rates) plt.xlabel('Epoch') plt.ylabel('n_spikes') plt.legend(['Mean Forward Spikes', 'Mean Backward Spikes'], loc='best') plt.interactive(is_test_mode()) plt.show()
def demo_mnist_mlp( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', hidden_sizes = [300], w_init = 0.01, hidden_activation = 'tanh', output_activation = 'softmax', cost = 'nll-d', visualize_params = False, n_test_points = 30, n_epochs = 10, max_training_samples = None, use_bias = True, onehot = False, rng = 1234, plot = False, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples) if onehot: dataset = dataset.to_onehot() if minibatch_size == 'full': minibatch_size = dataset.training_set.n_samples optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate) # Setup the training and test functions predictor = GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[10], hidden_activation=hidden_activation, output_activation=output_activation, w_init = w_init, use_bias=use_bias, rng = rng, ), cost_function=cost, optimizer=optimizer ).compile() # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays. def vis_callback(xx): p = predictor.symbolic_predictor._function in_layer = { 'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28), 'Layer[0].b': p.layers[0].linear_transform._b.get_value(), } other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])] dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], []))) # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size, test_callback=vis_callback if visualize_params else None ) if plot: plot_learning_curves(results)
def demo_herding_network(kp=.1, kd=1., kp_back=None, kd_back=None, hidden_sizes=[ 200, ], n_epochs=50, onehot=False, parallel=False, learning_rate=0.01, dataset='mnist', hidden_activation='relu', adaptive=True, adaptation_rate=0.001, output_activation='softmax', loss='nll', fwd_quantizer='herd', back_quantizer='same', minibatch_size=1, swap_mlp=False, plot=False, test_period=.5, grad_calc='true', rng=1234): dataset = get_mnist_dataset( flat=True, join_train_and_val=True ) if dataset == 'mnist' else get_temporal_mnist_dataset( flat=True, join_train_and_val=True) if onehot: dataset = dataset.to_onehot() ws = initialize_network_params(layer_sizes=[28 * 28] + hidden_sizes + [10], mag='xavier-both', include_biases=False, rng=rng) if is_test_mode(): dataset = dataset.shorten(500) n_epochs = 0.1 test_period = 0.03 if kp_back is None: kp_back = kp if kd_back is None: kd_back = kd if back_quantizer == 'same': back_quantizer = fwd_quantizer if adaptive: encdec = lambda: PDAdaptiveEncoderDecoder(kp=kp, kd=kd, adaptation_rate= adaptation_rate, quantization=fwd_quantizer) encdec_back = lambda: PDAdaptiveEncoderDecoder( kp=kp_back, kd=kd_back, adaptation_rate=adaptation_rate, quantization=back_quantizer) else: encdec = PDEncoderDecoder(kp=kp, kd=kd, quantization=fwd_quantizer) encdec_back = PDEncoderDecoder(kp=kp_back, kd=kd_back, quantization=back_quantizer) if swap_mlp: if not parallel: assert minibatch_size == 1, "Unfair comparison otherwise, sorry buddy, can't let you do that." net = GradientBasedPredictor( function=MultiLayerPerceptron.from_weights( weights=ws, hidden_activations=hidden_activation, output_activation=output_activation, ), cost_function=loss, optimizer=GradientDescent(learning_rate), ) prediction_funcs = net.predict.compile() else: net = PDHerdingNetwork( ws=ws, encdec=encdec, encdec_back=encdec_back, hidden_activation=hidden_activation, output_activation=output_activation, optimizer=GradientDescent(learning_rate), minibatch_size=minibatch_size if parallel else 1, grad_calc=grad_calc, loss=loss) noise_free_forward_pass = MultiLayerPerceptron.from_weights( weights=[layer.w for layer in net.layers], biases=[layer.b for layer in net.layers], hidden_activations=hidden_activation, output_activation=output_activation).compile() prediction_funcs = [('noise_free', noise_free_forward_pass), ('herded', net.predict.compile())] op_count_info = [] def test_callback(info, score): if plot: dbplot(net.layers[0].w.get_value().T.reshape(-1, 28, 28), 'w0', cornertext='Epoch {}'.format(info.epoch)) if swap_mlp: all_layer_sizes = [dataset.input_size ] + hidden_sizes + [dataset.target_size] fwd_ops = [ info.sample * d1 * d2 for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:]) ] back_ops = [ info.sample * d1 * d2 for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:]) ] update_ops = [ info.sample * d1 * d2 for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:]) ] else: fwd_ops = [ layer_.fwd_op_count.get_value() for layer_ in net.layers ] back_ops = [ layer_.back_op_count.get_value() for layer_ in net.layers ] update_ops = [ layer_.update_op_count.get_value() for layer_ in net.layers ] if info.epoch != 0: with IndentPrint('Mean Ops by epoch {}'.format(info.epoch)): print 'Fwd: {}'.format([ si_format(ops / info.epoch, format_str='{value} {prefix}Ops') for ops in fwd_ops ]) print 'Back: {}'.format([ si_format(ops / info.epoch, format_str='{value} {prefix}Ops') for ops in back_ops ]) print 'Update: {}'.format([ si_format(ops / info.epoch, format_str='{value} {prefix}Ops') for ops in update_ops ]) if info.epoch > max( 0.5, 2 * test_period) and not swap_mlp and score.get_score( 'train', 'noise_free') < 20: raise Exception("This horse ain't goin' nowhere.") op_count_info.append((info, (fwd_ops, back_ops, update_ops))) info_score_pairs = train_and_test_online_predictor( dataset=dataset, train_fcn=net.train.compile(), predict_fcn=prediction_funcs, minibatch_size=minibatch_size, n_epochs=n_epochs, test_epochs=('every', test_period), score_measure='percent_argmax_correct', test_on='training+test', test_callback=test_callback) return info_score_pairs, op_count_info
def get_predictor(predictor_type, input_size, target_size, hidden_sizes=[240], output_activation='sigm', hidden_activation='tanh', optimizer='adamax', learning_rate=0.01, noise=1, w_init=0.01, use_bias=True, rng=None): """ Specify parameters that will allow you to construct a predictor :param predictor_type: String identifying the predictor class (see below) :param input_size: Integer size of the input vector. Integer :param target_size: Integer size of the target vector :param hidden_sizes: :param input_activation: :param hidden_activation: :param optimizer: :param learning_rate: :return: """ return { 'MLP': lambda: GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[input_size] + hidden_sizes + [target_size], hidden_activation=hidden_activation, output_activation=output_activation, use_bias=use_bias, w_init=w_init, rng=rng), cost_function=mean_squared_error, optimizer=get_named_optimizer(optimizer, learning_rate), ).compile(), 'DTP': lambda: DifferenceTargetMLP.from_initializer( input_size=input_size, output_size=target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: get_named_optimizer( optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=w_init, noise=noise, rng=rng, use_bias=use_bias, ).compile(), 'PreAct-DTP': lambda: DifferenceTargetMLP.from_initializer( input_size=input_size, output_size=target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: get_named_optimizer( optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=w_init, noise=noise, layer_constructor=PreActivationDifferenceTargetLayer. from_initializer, rng=rng, use_bias=use_bias, ).compile(), 'Linear-DTP': lambda: LinearDifferenceTargetMLP.from_initializer( input_size=input_size, output_size=target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: get_named_optimizer( optimizer, learning_rate), # input_activation=input_activation, hidden_activation=hidden_activation, output_activation='linear', w_init_mag=w_init, noise=noise, rng=rng, use_bias=use_bias, # layer_constructor = LinearDifferenceTargetLayer.from_initializer ).compile(), }[predictor_type]()