def test_prelu_param_updates(self): x_train, _, y_train, _ = simple_classification() prelu_layer1 = layers.PRelu(20, alpha=0.25) prelu_layer2 = layers.PRelu(1, alpha=0.25) gdnet = algorithms.GradientDescent( [ layers.Input(10), prelu_layer1, prelu_layer2, ] ) prelu1_alpha_before_training = prelu_layer1.alpha.get_value() prelu2_alpha_before_training = prelu_layer2.alpha.get_value() gdnet.train(x_train, y_train, epochs=10) prelu1_alpha_after_training = prelu_layer1.alpha.get_value() prelu2_alpha_after_training = prelu_layer2.alpha.get_value() self.assertTrue(all(np.not_equal( prelu1_alpha_before_training, prelu1_alpha_after_training, ))) self.assertTrue(all(np.not_equal( prelu2_alpha_before_training, prelu2_alpha_after_training, )))
def test_prelu_param_updates(self): x_train, _, y_train, _ = simple_classification() prelu_layer1 = layers.PRelu(20, alpha=0.25) prelu_layer2 = layers.PRelu(1, alpha=0.25) gdnet = algorithms.GradientDescent([ layers.Input(10), prelu_layer1, prelu_layer2, ]) prelu1_alpha_before_training = prelu_layer1.alpha.get_value() prelu2_alpha_before_training = prelu_layer2.alpha.get_value() gdnet.train(x_train, y_train, epochs=10) prelu1_alpha_after_training = prelu_layer1.alpha.get_value() prelu2_alpha_after_training = prelu_layer2.alpha.get_value() self.assertTrue( all( np.not_equal( prelu1_alpha_before_training, prelu1_alpha_after_training, ))) self.assertTrue( all( np.not_equal( prelu2_alpha_before_training, prelu2_alpha_after_training, )))
def test_storage_pickle_save_and_load_during_the_training(self): tempdir = tempfile.mkdtemp() x_train, x_test, y_train, y_test = simple_classification() errors = {} def on_epoch_end(network): epoch = network.last_epoch errors[epoch] = network.prediction_error(x_test, y_test) if epoch == 4: storage.load_pickle( network.connection, os.path.join(tempdir, 'training-epoch-2')) raise StopTraining('Stop training process after 4th epoch') else: storage.save_pickle( network.connection, os.path.join(tempdir, 'training-epoch-{}'.format(epoch))) gdnet = algorithms.GradientDescent( connection=(10, 4, 1), epoch_end_signal=on_epoch_end, step=0.5 ) gdnet.train(x_train, y_train) validation_error = gdnet.prediction_error(x_test, y_test) self.assertGreater(errors[2], errors[4]) self.assertAlmostEqual(validation_error, errors[2]) self.assertNotAlmostEqual(validation_error, errors[4])
def test_simple_adam(self): x_train, _, y_train, _ = simple_classification() mnet = algorithms.Adam( (10, 20, 1), step=15.0, batch_size="full", verbose=False, epsilon=1e-8, beta1=0.9, beta2=0.999 ) mnet.train(x_train, y_train, epochs=100) self.assertAlmostEqual(0.06, mnet.errors.last(), places=2)
def test_batch_norm_storage(self): x_train, x_test, y_train, y_test = simple_classification() batch_norm = layers.BatchNorm() gdnet = algorithms.GradientDescent( [ layers.Input(10), layers.Relu(5), batch_norm, layers.Sigmoid(1), ], batch_size=10, verbose=True, # keep it as `True` ) gdnet.train(x_train, y_train, epochs=5) error_before_save = gdnet.prediction_error(x_test, y_test) mean_before_save = self.eval(batch_norm.running_mean) variance_before_save = self.eval(batch_norm.running_inv_std) with tempfile.NamedTemporaryFile() as temp: storage.save(gdnet, temp.name) storage.load(gdnet, temp.name) error_after_load = gdnet.prediction_error(x_test, y_test) mean_after_load = self.eval(batch_norm.running_mean) variance_after_load = self.eval(batch_norm.running_inv_std) self.assertAlmostEqual(error_before_save, error_after_load) np.testing.assert_array_almost_equal(mean_before_save, mean_after_load) np.testing.assert_array_almost_equal(variance_before_save, variance_after_load)
def test_max_norm_regularizer(self): def on_epoch_end(network): layer = network.layers[1] weight = layer.weight.get_value() weight_norm = np.round(np.linalg.norm(weight), 5) bias = layer.bias.get_value() bias_norm = np.round(np.linalg.norm(bias), 5) error_message = "Epoch #{}".format(network.last_epoch) self.assertLessEqual(weight_norm, 2, msg=error_message) self.assertLessEqual(bias_norm, 2, msg=error_message) mnet = algorithms.Momentum( [ layers.Input(10), layers.Relu(20), layers.Sigmoid(1), ], step=0.1, momentum=0.95, verbose=False, epoch_end_signal=on_epoch_end, max_norm=2, addons=[algorithms.MaxNormRegularization], ) x_train, _, y_train, _ = simple_classification() mnet.train(x_train, y_train, epochs=100)
def test_full_batch_training(self): fullbatch_identifiers = BatchSizeProperty.fullbatch_identifiers x_train, _, y_train, _ = simple_classification() xavier_normal = init.XavierNormal() weight1 = xavier_normal.sample((10, 20), return_array=True) weight2 = xavier_normal.sample((20, 1), return_array=True) for network_class in self.network_classes: errors = [] for fullbatch_value in fullbatch_identifiers: net = network_class( [ layers.Input(10), layers.Sigmoid(20, weight=weight1), layers.Sigmoid(1, weight=weight2), ], batch_size=fullbatch_value, ) net.train(x_train, y_train, epochs=10) errors.append(net.errors.last()) self.assertTrue( np.all(np.abs(errors - errors[0]) < 1e-3), msg=errors, )
def test_batch_norm_storage(self): x_train, x_test, y_train, y_test = simple_classification() batch_norm = layers.BatchNorm() gdnet = algorithms.MinibatchGradientDescent( [ layers.Input(10), layers.Relu(5), batch_norm, layers.Sigmoid(1), ], batch_size=10, ) gdnet.train(x_train, y_train) error_before_save = gdnet.prediction_error(x_test, y_test) mean_before_save = batch_norm.running_mean.get_value() inv_std_before_save = batch_norm.running_inv_std.get_value() with tempfile.NamedTemporaryFile() as temp: storage.save(gdnet, temp.name) storage.load(gdnet, temp.name) error_after_load = gdnet.prediction_error(x_test, y_test) mean_after_load = batch_norm.running_mean.get_value() inv_std_after_load = batch_norm.running_inv_std.get_value() self.assertAlmostEqual(error_before_save, error_after_load) np.testing.assert_array_almost_equal(mean_before_save, mean_after_load) np.testing.assert_array_almost_equal(inv_std_before_save, inv_std_after_load)
def test_batch_norm_storage(self): x_train, x_test, y_train, y_test = simple_classification() batch_norm = layers.BatchNorm() gdnet = algorithms.MinibatchGradientDescent( [ layers.Input(10), layers.Relu(5), batch_norm, layers.Sigmoid(1), ], batch_size=10, ) gdnet.train(x_train, y_train) error_before_save = gdnet.prediction_error(x_test, y_test) mean_before_save = batch_norm.running_mean.get_value() inv_std_before_save = batch_norm.running_inv_std.get_value() with tempfile.NamedTemporaryFile() as temp: storage.save(gdnet, temp.name) storage.load(gdnet, temp.name) error_after_load = gdnet.prediction_error(x_test, y_test) mean_after_load = batch_norm.running_mean.get_value() inv_std_after_load = batch_norm.running_inv_std.get_value() self.assertAlmostEqual(error_before_save, error_after_load) np.testing.assert_array_almost_equal(mean_before_save, mean_after_load) np.testing.assert_array_almost_equal(inv_std_before_save, inv_std_after_load)
def test_storage_save_and_load_during_the_training(self): tempdir = tempfile.mkdtemp() x_train, x_test, y_train, y_test = simple_classification() errors = {} def on_epoch_end(network): epoch = network.last_epoch errors[epoch] = network.prediction_error(x_test, y_test) if epoch == 4: storage.load( network.connection, os.path.join(tempdir, 'training-epoch-2')) raise StopTraining('Stop training process after 4th epoch') else: storage.save( network.connection, os.path.join(tempdir, 'training-epoch-{}'.format(epoch))) gdnet = algorithms.GradientDescent( connection=(10, 4, 1), epoch_end_signal=on_epoch_end, step=0.5 ) gdnet.train(x_train, y_train) validation_error = gdnet.prediction_error(x_test, y_test) self.assertGreater(errors[2], errors[4]) self.assertAlmostEqual(validation_error, errors[2]) self.assertNotAlmostEqual(validation_error, errors[4])
def test_training_with_multiple_inputs(self): network = algorithms.GradientDescent( [ [ layers.Input(2) > layers.Sigmoid(3), layers.Input(3) > layers.Sigmoid(5), ], layers.Concatenate(), layers.Sigmoid(1), ], step=0.1, verbose=False, shuffle_data=True, ) x_train, x_test, y_train, y_test = simple_classification(n_samples=100, n_features=5) x_train_2, x_train_3 = x_train[:, :2], x_train[:, 2:] x_test_2, x_test_3 = x_test[:, :2], x_test[:, 2:] network.train([x_train_2, x_train_3], y_train, [x_test_2, x_test_3], y_test, epochs=100) error = network.validation_errors[-1] self.assertAlmostEqual(error, 0.14, places=2)
def test_custom_error_functions(self): # Test that everything works without fail def custom_mse(expected, predicted): return (0.5 * (predicted - expected) ** 2).mean() x_train, _, y_train, _ = simple_classification() gdnet = algorithms.GradientDescent((10, 10, 1), error=custom_mse) gdnet.train(x_train, y_train)
def test_custom_error_functions(self): # Test that everything works without fail def custom_mse(expected, predicted): return (0.5 * (predicted - expected)**2).mean() x_train, _, y_train, _ = simple_classification() gdnet = algorithms.GradientDescent((10, 10, 1), error=custom_mse) gdnet.train(x_train, y_train)
def test_gd(self): x_train, _, y_train, _ = simple_classification() network = algorithms.GradientDescent( layers.Input(10) > layers.Tanh(20) > layers.Tanh(1), step=0.3, verbose=False) network.train(x_train, y_train, epochs=500) self.assertAlmostEqual(network.errors.last(), 0.014, places=3)
def test_gd(self): environment.reproducible() x_train, _, y_train, _ = simple_classification() network = algorithms.BaseGradientDescent( layers.Input(10) > layers.Tanh(20) > layers.Tanh(1), step=0.1, verbose=False) network.train(x_train, y_train, epochs=100) self.assertLess(network.errors.last(), 0.05)
def test_gd(self): x_train, _, y_train, _ = simple_classification() network = algorithms.GradientDescent( layers.Input(10) > layers.Tanh(20) > layers.Tanh(1), step=0.3, verbose=False ) network.train(x_train, y_train, epochs=500) self.assertAlmostEqual(network.errors.last(), 0.014, places=3)
def test_nn_training(self): x_train, x_test, y_train, y_test = simple_classification() with catch_stdout() as out: gdnet = algorithms.GradientDescent((10, 20, 1), verbose=True) gdnet.train(x_train, y_train, x_test, y_test, epochs=4) terminal_output = out.getvalue() self.assertIn("Start training", terminal_output) self.assertIn("-----", terminal_output)
def test_plot_with_validation_dataset(self): original_image_name = format_image_name("with_validation.png") original_image = os.path.join(IMGDIR, original_image_name) with image_comparison(original_image) as fig: ax = fig.add_subplot(1, 1, 1) x_train, x_test, y_train, y_test = simple_classification() gdnet = algorithms.GradientDescent((10, 12, 1), step=0.25) gdnet.train(x_train, y_train, x_test, y_test, epochs=100) gdnet.plot_errors(ax=ax, show=False)
def test_simple_adagrad(self): x_train, x_test, y_train, y_test = simple_classification() mnet = algorithms.Adagrad( (10, 20, 1), step=0.1, batch_size='full', verbose=False, epsilon=1e-5, ) mnet.train(x_train, y_train, x_test, y_test, epochs=100) self.assertGreater(0.15, mnet.validation_errors.last())
def test_simple_adagrad(self): x_train, _, y_train, _ = simple_classification() mnet = algorithms.Adagrad( (10, 20, 1), step=2., batch_size='full', verbose=False, epsilon=1e-5, ) mnet.train(x_train, y_train, epochs=100) self.assertAlmostEqual(0.068, mnet.errors.last(), places=3)
def test_nn_training(self): x_train, x_test, y_train, y_test = simple_classification() with catch_stdout() as out: gdnet = algorithms.GradientDescent((10, 20, 1), verbose=True) gdnet.train(x_train, y_train, x_test, y_test, epochs=4) terminal_output = out.getvalue() self.assertIn("Start training", terminal_output) self.assertIn("-----", terminal_output)
def test_plot_with_validation_dataset(self): original_image_name = format_image_name("with_validation.png") original_image = os.path.join(IMGDIR, original_image_name) with image_comparison(original_image) as fig: ax = fig.add_subplot(1, 1, 1) x_train, x_test, y_train, y_test = simple_classification() gdnet = algorithms.GradientDescent((10, 12, 1), step=0.25) gdnet.train(x_train, y_train, x_test, y_test, epochs=100) plots.error_plot(gdnet, ax=ax, show=False)
def test_simple_adam(self): x_train, x_test, y_train, y_test = simple_classification() mnet = algorithms.Adam( (10, 20, 1), step=0.1, verbose=True, epsilon=1e-4, beta1=0.9, beta2=0.99, ) mnet.train(x_train, y_train, x_test, y_test, epochs=200) self.assertGreater(0.2, mnet.validation_errors.last())
def test_simple_rmsprop(self): x_train, _, y_train, _ = simple_classification() mnet = algorithms.RMSProp( (10, 20, 1), step=.1, batch_size='full', verbose=False, epsilon=1e-5, decay=0.9, ) mnet.train(x_train, y_train, epochs=100) self.assertAlmostEqual(0.01, mnet.errors.last(), places=2)
def test_simple_rmsprop(self): x_train, x_test, y_train, y_test = simple_classification() mnet = algorithms.RMSProp( (10, 20, 1), step=0.02, batch_size='full', verbose=False, epsilon=1e-5, decay=0.9, ) mnet.train(x_train, y_train, x_test, y_test, epochs=100) self.assertGreater(0.11, mnet.validation_errors.last())
def test_simple_momentum(self): x_train, x_test, y_train, y_test = simple_classification() mnet = algorithms.Momentum( (10, 20, 1), step=0.35, momentum=0.99, batch_size='full', verbose=False, nesterov=True, ) mnet.train(x_train, y_train, x_test, y_test, epochs=30) self.assertGreater(0.15, mnet.validation_errors.last())
def test_simple_adamax(self): x_train, _, y_train, _ = simple_classification() mnet = algorithms.Adamax( (10, 20, 1), step=.01, batch_size='full', verbose=False, epsilon=1e-8, beta1=0.9, beta2=0.999, ) mnet.train(x_train, y_train, epochs=100) self.assertAlmostEqual(0.05, mnet.errors.last(), places=2)
def test_simple_momentum(self): x_train, _, y_train, _ = simple_classification() mnet = algorithms.Momentum( (10, 20, 1), step=0.35, momentum=0.99, batch_size='full', verbose=False, nesterov=True, ) mnet.train(x_train, y_train, epochs=40) self.assertAlmostEqual(0.017, mnet.errors.last(), places=3)
def test_simple_adamax(self): x_train, x_test, y_train, y_test = simple_classification() mnet = algorithms.Adamax( (10, 20, 1), step=0.1, batch_size='full', verbose=False, epsilon=1e-7, beta1=0.9, beta2=0.999, ) mnet.train(x_train, y_train, x_test, y_test, epochs=50) self.assertGreater(0.15, mnet.errors.last())
def test_conjgrad(self): cgnet = algorithms.ConjugateGradient( (10, 5, 1), error='binary_crossentropy', shuffle_data=True, verbose=False, update_function='fletcher_reeves', ) x_train, x_test, y_train, y_test = simple_classification() cgnet.train(x_train, y_train, x_test, y_test, epochs=50) actual_prediction = cgnet.predict(x_test).round().T error = metrics.accuracy_score(actual_prediction[0], y_test) self.assertAlmostEqual(error, 0.9, places=1)
def test_full_batch_training(self): fullbatch_identifiers = BatchSizeProperty.fullbatch_identifiers x_train, _, y_train, _ = simple_classification() for network_class in self.network_classes: errors = [] for fullbatch_value in fullbatch_identifiers: self.setUp() net = network_class((10, 20, 1), batch_size=fullbatch_value) net.train(x_train, y_train, epochs=10) errors.append(net.errors.last()) self.assertTrue(all(e == errors[0] for e in errors))
def test_full_batch_training(self): fullbatch_identifiers = BatchSizeProperty.fullbatch_identifiers x_train, _, y_train, _ = simple_classification() for network_class in self.network_classes: errors = [] for fullbatch_value in fullbatch_identifiers: self.setUp() net = network_class((10, 20, 1), batch_size=fullbatch_value) net.train(x_train, y_train, epochs=10) errors.append(net.errors.last()) self.assertTrue(all(e == errors[0] for e in errors))
def test_minibatch_gd(self): x_train, _, y_train, _ = simple_classification() compare_networks( # Test classes algorithms.GradientDescent, partial(algorithms.MinibatchGradientDescent, batch_size=1), # Test data (x_train, y_train), # Network configurations connection=(layers.Input(10) > layers.Tanh(20) > layers.Tanh(1)), step=0.1, shuffle_data=True, verbose=False, # Test configurations epochs=40, show_comparison_plot=False)
def test_compare_bp_and_hessian(self): x_train, x_test, y_train, y_test = simple_classification() compare_networks( # Test classes partial(algorithms.GradientDescent, batch_size='all'), partial(algorithms.Hessian, penalty_const=1), # Test data (x_train, y_train, x_test, y_test), # Network configurations connection=(10, 15, 1), shuffle_data=True, verbose=False, show_epoch=1, # Test configurations epochs=5, show_comparison_plot=False)
def test_minibatch_gd(self): x_train, _, y_train, _ = simple_classification() compare_networks( # Test classes algorithms.GradientDescent, partial(algorithms.MinibatchGradientDescent, batch_size=1), # Test data (x_train, y_train), # Network configurations connection=(layers.Input(10) > layers.Tanh(20) > layers.Tanh(1)), step=0.1, shuffle_data=True, verbose=False, # Test configurations epochs=40, show_comparison_plot=False )
def test_compare_bp_and_hessian(self): x_train, x_test, y_train, y_test = simple_classification() compare_networks( # Test classes algorithms.GradientDescent, partial(algorithms.Hessian, penalty_const=1), # Test data (x_train, y_train, x_test, y_test), # Network configurations connection=(10, 15, 1), shuffle_data=True, verbose=False, show_epoch=1, # Test configurations epochs=5, show_comparison_plot=False )
def test_hessdiag(self): x_train, x_test, y_train, y_test = simple_classification() nw = algorithms.HessianDiagonal( connection=[ layers.Sigmoid(10, init_method='bounded', bounds=(-1, 1)), layers.Sigmoid(20, init_method='bounded', bounds=(-1, 1)), layers.Output(1) ], step=0.1, shuffle_data=False, verbose=False, min_eigval=0.01, ) nw.train(x_train / 2, y_train, epochs=10) y_predict = nw.predict(x_test) self.assertAlmostEqual(0.10, nw.errors.last(), places=2)
def test_storage_with_custom_theano_float_config(self): theano.config.floatX = 'float32' x_train, x_test, y_train, y_test = simple_classification() bpnet = algorithms.GradientDescent((10, 20, 1), step=0.25) bpnet.train(x_train, y_train, x_test, y_test) with tempfile.NamedTemporaryFile() as temp: test_layer_weights = bpnet.input_layer.weight.get_value().copy() dill.dump(bpnet, temp) temp.file.seek(0) theano.config.floatX = 'float64' restored_bpnet = dill.load(temp) np.testing.assert_array_equal( test_layer_weights, restored_bpnet.input_layer.weight.get_value())
def test_nn_training(self): x_train, x_test, y_train, y_test = simple_classification() with catch_stdout() as out: gdnet = algorithms.GradientDescent( (10, 20, 1), verbose=True, batch_size='all', ) gdnet.train(x_train, y_train, x_test, y_test, epochs=4) y_predicted = gdnet.predict(x_test) terminal_output = out.getvalue() self.assertIn("Start training", terminal_output) self.assertIn("------", terminal_output) self.assertEqual(y_predicted.size, y_test.size)
def test_bfgs(self): x_train, x_test, y_train, y_test = simple_classification() qnnet = algorithms.QuasiNewton( connection=[ layers.Sigmoid(10, init_method='ortho'), layers.Sigmoid(25, init_method='ortho'), layers.Output(1) ], shuffle_data=True, show_epoch='20 times', verbose=False, ) qnnet.train(x_train, y_train, x_test, y_test, epochs=20) result = qnnet.predict(x_test).round().astype(int) roc_curve_score = metrics.roc_auc_score(result, y_test) self.assertAlmostEqual(0.92, roc_curve_score, places=2)
def test_with_minibatch(self): x_train, _, y_train, _ = simple_classification() compare_networks( # Test classes partial(algorithms.Momentum, batch_size='full'), partial(algorithms.Momentum, batch_size=1), # Test data (x_train, y_train), # Network configurations connection=(10, 20, 1), step=0.25, momentum=0.1, shuffle_data=True, verbose=False, # Test configurations epochs=40, show_comparison_plot=False, )
def test_storage_with_custom_theano_float_config(self): theano.config.floatX = 'float32' x_train, x_test, y_train, y_test = simple_classification() bpnet = algorithms.GradientDescent((10, 20, 1), step=0.25) bpnet.train(x_train, y_train, x_test, y_test) with tempfile.NamedTemporaryFile() as temp: test_layer_weights = bpnet.input_layer.weight.get_value().copy() dill.dump(bpnet, temp) temp.file.seek(0) theano.config.floatX = 'float64' restored_bpnet = dill.load(temp) np.testing.assert_array_equal( test_layer_weights, restored_bpnet.input_layer.weight.get_value() )
def test_bfgs(self): x_train, x_test, y_train, y_test = simple_classification() qnnet = algorithms.QuasiNewton( connection=[ layers.Input(10), layers.Sigmoid(30, init_method='ortho'), layers.Sigmoid(1, init_method='ortho'), ], shuffle_data=True, show_epoch='20 times', verbose=False, ) qnnet.train(x_train, y_train, x_test, y_test, epochs=20) result = qnnet.predict(x_test).round().astype(int) roc_curve_score = metrics.roc_auc_score(result, y_test) self.assertAlmostEqual(0.92, roc_curve_score, places=2)
def test_quasi_newton_sr1(self): x_train, x_test, y_train, y_test = simple_classification() qnnet = algorithms.QuasiNewton( connection=[ layers.Input(10), layers.Sigmoid(30, weight=init.Orthogonal()), layers.Sigmoid(1, weight=init.Orthogonal()), ], shuffle_data=True, show_epoch=20, verbose=False, update_function="sr1", h0_scale=2, ) qnnet.train(x_train, y_train, x_test, y_test, epochs=10) result = qnnet.predict(x_test).round() roc_curve_score = metrics.roc_auc_score(result, y_test) self.assertAlmostEqual(0.92, roc_curve_score, places=2)
def test_compare_bp_and_hessian(self): x_train, _, y_train, _ = simple_classification() compare_networks( # Test classes algorithms.GradientDescent, partial(algorithms.HessianDiagonal, min_eigval=0.01), # Test data (x_train, y_train), # Network configurations connection=[ layers.Sigmoid(10, init_method='bounded', bounds=(-1, 1)), layers.Sigmoid(20, init_method='bounded', bounds=(-1, 1)), layers.Output(1) ], step=0.1, shuffle_data=True, verbose=False, # Test configurations epochs=50, show_comparison_plot=False )
def test_quasi_newton_psb(self): x_train, x_test, y_train, y_test = simple_classification() qnnet = algorithms.QuasiNewton( connection=[ layers.Input(10), layers.Sigmoid(30, init_method='ortho'), layers.Sigmoid(1, init_method='ortho'), ], shuffle_data=True, show_epoch=20, verbose=False, update_function='psb', h0_scale=2, gradient_tol=1e-10, ) qnnet.train(x_train, y_train, x_test, y_test, epochs=10) result = qnnet.predict(x_test).round() roc_curve_score = metrics.roc_auc_score(result, y_test) self.assertAlmostEqual(0.92, roc_curve_score, places=2)