def test_stretch_minibatches(): """ Assert that the 'stretch' minibatch size works as expected. """ class _SingleSampleRegressor(IPredictor): def __init__(self, n_in, n_out, eta=0.1): self.w = np.zeros((n_in, n_out)) self.eta = eta self.n_train_calls = 0 def _predict_one(self, x): return x.dot(self.w) def _train_one(self, x, y): self.w += self.eta * np.outer(x, y) def predict(self, x): return np.array([self._predict_one(xi) for xi in x]) def train(self, x, y): for xi, yi in zip(x, y): self._train_one(xi, yi) self.n_train_calls += 1 dataset = get_synthetic_clusters_dataset().to_onehot() p1 = _SingleSampleRegressor(dataset.input_size, dataset.target_size) p1_scores = assess_online_predictor( p1, dataset, evaluation_function='percent_argmax_correct', test_epochs=[0, .5, 1], minibatch_size=1, test_on='test') p1_trained_out = p1.predict(dataset.test_set.input) assert p1.n_train_calls == dataset.training_set.n_samples scores = p1_scores.get_scores() assert len(scores) == 3 and scores[0] < 28 and scores[2] > 99 p2 = _SingleSampleRegressor(dataset.input_size, dataset.target_size) assess_online_predictor(p2, dataset, evaluation_function='percent_argmax_correct', test_epochs=[0, 0.5, 1], minibatch_size='stretch', test_on='test') p2_trained_out = p2.predict(dataset.test_set.input) assert p2.n_train_calls == 2 assert np.array_equal(p1_trained_out, p2_trained_out)
def assert_online_predictor_not_broken(predictor_constructor, initial_score_under = 35, final_score_over = 95, n_epochs = 1, minibatch_size = 'full', categorical_target = False, accumulator = None, n_extra_tests = 0): """ Assert that your predictor is not a total embarrassment. (Note that it still may pass this test and be a terrible predictor, this at least makes clear that it's not completely broken.) :param predictor_constructor: A constructor that returns an IPredictor object given (n_dims_in, n_dims_out) as arguments. :param initial_score_under: Asser that the initial score on the 4-cluster dataset (where chance is 25%) is worse than this (mainly just makes sure you're not cheating somehow) :param final_score_over: Assert that the final score is over this - Solving this dataset isn't rocket science. It is not hard to get a final score of 100. :param n_epochs: Now many epochs should you run? :param minibatch_size: Minibatch size. By default, do full-batch training. :param categorical_target: If True, your predictor expects an integer as a target, where the integer indicates the correct label. Otherwise, it expects a one-hot encoding vector - with the unit corresponding to the label being True. :param n_extra_tests: Number of extra tests - you may set this to non-zero to see the progress of your predictor over training. """ dataset = get_synthetic_clusters_dataset(dtype = 'float32') if not categorical_target: dataset = dataset.process_with(targets_processor=multichannel(OneHotEncoding())) out_shape = dataset.target_size else: out_shape = dataset.n_categories predictor = predictor_constructor(dataset.input_size, out_shape) record = assess_online_predictor(predictor, dataset, evaluation_function=percent_argmax_correct, test_epochs=np.linspace(0, n_epochs, 2+n_extra_tests), minibatch_size=minibatch_size, accumulator = accumulator, test_on = 'test') scores = record.get_scores() assert scores[0] <= initial_score_under, "Initial score was %.2f%%, which was greater than expected (<%.2f%%). That's odd." % (scores[0], initial_score_under) assert scores[-1] >= final_score_over, 'Achieved a final score of %.2f%%, which was less than the threshold of %.2f%%' % (scores[-1], final_score_over)
def demo_perceptron_dtp( hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, lin_dtp=True, ): dataset = get_mnist_dataset(flat=True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP(layers=[ PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp=lin_dtp) for n_in, n_out in zip([dataset.input_size] + hidden_sizes, hidden_sizes + [dataset.target_size]) ], output_cost_function=None).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def demo_perceptron_dtp( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, lin_dtp = True, ): dataset = get_mnist_dataset(flat = True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP( layers=[PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp = lin_dtp) for n_in, n_out in zip([dataset.input_size]+hidden_sizes, hidden_sizes+[dataset.target_size])], output_cost_function = None ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def profile_java_net(): """ Note: These times are super unreliable for some reason.. A given run can vary by 7s-14s for example. God knows why. Version 'old', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 57.100 Scores at Epoch 2.0: Test: 71.200 Elapsed time is: 7.866s Version 'arr', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 261.1s Version 'new', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 8.825s :return: """ mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot() with JPypeConnection(): spiking_net = JavaSpikingNetWrapper.from_init( fractional = True, depth_first=False, smooth_grads = False, back_discretize = 'noreset-herding', w_init=0.01, hold_error=True, rng = 1234, n_steps = 10, eta=0.01, layer_sizes=[784]+[200]+[10], dtype = 'float' ) with EZProfiler(print_result=True): result = assess_online_predictor( predictor = spiking_net, dataset=mnist, evaluation_function='percent_argmax_correct', test_epochs=[0, 1, 2], minibatch_size=1, test_on='test', )
def profile_java_net(): """ Note: These times are super unreliable for some reason.. A given run can vary by 7s-14s for example. God knows why. Version 'old', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 57.100 Scores at Epoch 2.0: Test: 71.200 Elapsed time is: 7.866s Version 'arr', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 261.1s Version 'new', Best: Scores at Epoch 0.0: Test: 8.200 Scores at Epoch 1.0: Test: 58.200 Scores at Epoch 2.0: Test: 71.500 Elapsed time is: 8.825s :return: """ mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot() with JPypeConnection(): spiking_net = JavaSpikingNetWrapper.from_init( fractional=True, depth_first=False, smooth_grads=False, back_discretize='noreset-herding', w_init=0.01, hold_error=True, rng=1234, n_steps=10, eta=0.01, layer_sizes=[784] + [200] + [10], dtype='float') with EZProfiler(print_result=True): result = assess_online_predictor( predictor=spiking_net, dataset=mnist, evaluation_function='percent_argmax_correct', test_epochs=[0, 1, 2], minibatch_size=1, test_on='test', )
def test_stretch_minibatches(): """ Assert that the 'stretch' minibatch size works as expected. """ class _SingleSampleRegressor(IPredictor): def __init__(self, n_in, n_out, eta = 0.1): self.w = np.zeros((n_in, n_out)) self.eta = eta self.n_train_calls = 0 def _predict_one(self, x): return x.dot(self.w) def _train_one(self, x, y): self.w += self.eta * np.outer(x, y) def predict(self, x): return np.array([self._predict_one(xi) for xi in x]) def train(self, x, y): for xi, yi in zip(x, y): self._train_one(xi, yi) self.n_train_calls += 1 dataset = get_synthetic_clusters_dataset().to_onehot() p1 = _SingleSampleRegressor(dataset.input_size, dataset.target_size) p1_scores = assess_online_predictor(p1, dataset, evaluation_function='percent_argmax_correct', test_epochs=[0, .5, 1], minibatch_size=1, test_on = 'test') p1_trained_out = p1.predict(dataset.test_set.input) assert p1.n_train_calls == dataset.training_set.n_samples scores = p1_scores.get_scores() assert len(scores) == 3 and scores[0] < 28 and scores[2] > 99 p2 = _SingleSampleRegressor(dataset.input_size, dataset.target_size) assess_online_predictor(p2, dataset, evaluation_function='percent_argmax_correct', test_epochs=[0, 0.5, 1], minibatch_size='stretch', test_on = 'test') p2_trained_out = p2.predict(dataset.test_set.input) assert p2.n_train_calls == 2 assert np.array_equal(p1_trained_out, p2_trained_out)
def assert_online_predictor_not_broken(predictor_constructor, initial_score_under=35, final_score_over=95, n_epochs=1, minibatch_size='full', categorical_target=False, accumulator=None, n_extra_tests=0): """ Assert that your predictor is not a total embarrassment. (Note that it still may pass this test and be a terrible predictor, this at least makes clear that it's not completely broken.) :param predictor_constructor: A constructor that returns an IPredictor object given (n_dims_in, n_dims_out) as arguments. :param initial_score_under: Asser that the initial score on the 4-cluster dataset (where chance is 25%) is worse than this (mainly just makes sure you're not cheating somehow) :param final_score_over: Assert that the final score is over this - Solving this dataset isn't rocket science. It is not hard to get a final score of 100. :param n_epochs: Now many epochs should you run? :param minibatch_size: Minibatch size. By default, do full-batch training. :param categorical_target: If True, your predictor expects an integer as a target, where the integer indicates the correct label. Otherwise, it expects a one-hot encoding vector - with the unit corresponding to the label being True. :param n_extra_tests: Number of extra tests - you may set this to non-zero to see the progress of your predictor over training. """ dataset = get_synthetic_clusters_dataset() if not categorical_target: dataset = dataset.process_with( targets_processor=multichannel(OneHotEncoding())) out_shape = dataset.target_size else: out_shape = dataset.n_categories predictor = predictor_constructor(dataset.input_size, out_shape) record = assess_online_predictor( predictor, dataset, evaluation_function=percent_argmax_correct, test_epochs=np.linspace(0, n_epochs, 2 + n_extra_tests), minibatch_size=minibatch_size, accumulator=accumulator, test_on='test') scores = record.get_scores() assert scores[ 0] <= initial_score_under, "Initial score was %.2f%%, which was greater than expected (<%.2f%%). That's odd." % ( scores[0], initial_score_under) assert scores[ -1] >= final_score_over, 'Achieved a final score of %.2f%%, which was less than the threshold of %.2f%%' % ( scores[-1], final_score_over)
def demo_run_dtp_on_mnist(hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, input_activation='sigm', hidden_activation='tanh', output_activation='softmax', optimizer_constructor=lambda: RMSProp(0.001), normalize_inputs=False, local_cost_function=mean_squared_error, output_cost_function=None, noise=1, lin_dtp=False, seed=1234): dataset = get_mnist_dataset(flat=True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel( lambda x: x / np.sum(x, axis=1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor= optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise=noise, cost_function=local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng=seed).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w. get_value().T.reshape(-1, 28, 28))) plot_learning_curves(result)
def demo_run_dtp_on_mnist( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, input_activation = 'sigm', hidden_activation = 'tanh', output_activation = 'softmax', optimizer_constructor = lambda: RMSProp(0.001), normalize_inputs = False, local_cost_function = mean_squared_error, output_cost_function = None, noise = 1, lin_dtp = False, seed = 1234 ): dataset = get_mnist_dataset(flat = True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel(lambda x: x/np.sum(x, axis = 1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise = noise, cost_function = local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng = seed ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28)) ) plot_learning_curves(result)
def demo_mnist_online_regression( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', regressor_type = 'multinomial', n_epochs = 20, n_test_points = 30, max_training_samples = None, include_biases = True, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True) assert regressor_type in ('multinomial', 'logistic', 'linear') n_outputs = dataset.n_categories if regressor_type in ('logistic', 'linear'): dataset = dataset.to_onehot() predictor = OnlineRegressor( input_size = dataset.input_size, output_size = n_outputs, regressor_type = regressor_type, optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate), include_biases = include_biases ).compile() # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size ) plot_learning_curves(results)
def demo_mnist_mlp( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', hidden_sizes = [300], w_init = 0.01, hidden_activation = 'tanh', output_activation = 'softmax', cost = 'nll-d', visualize_params = False, n_test_points = 30, n_epochs = 10, max_training_samples = None, use_bias = True, onehot = False, rng = 1234, plot = False, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples) if onehot: dataset = dataset.to_onehot() if minibatch_size == 'full': minibatch_size = dataset.training_set.n_samples optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate) # Setup the training and test functions predictor = GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[10], hidden_activation=hidden_activation, output_activation=output_activation, w_init = w_init, use_bias=use_bias, rng = rng, ), cost_function=cost, optimizer=optimizer ).compile() # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays. def vis_callback(xx): p = predictor.symbolic_predictor._function in_layer = { 'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28), 'Layer[0].b': p.layers[0].linear_transform._b.get_value(), } other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])] dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], []))) # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size, test_callback=vis_callback if visualize_params else None ) if plot: plot_learning_curves(results)