def test_neuralNetwork_sgd(): from sklearn.neural_network._stochastic_optimizers import SGDOptimizer np.random.seed(2019) X = np.random.normal(size=(1, 500)) target = 3.9285985 * X nn = NeuralNetwork(inputs=1, neurons=3, outputs=1, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') learning_rate = 0.001 yhat = nn.forward_pass(X) nn.backpropagation(yhat.T, target.T) nn.learning_rate = learning_rate initial_params = copy.deepcopy(nn.weights + nn.biases) nn.sgd() grad = nn.d_weights + nn.d_biases params = nn.weights + nn.biases change = [p - i_p for p, i_p in zip(params, initial_params)] skl_sgd = SGDOptimizer(params=initial_params, learning_rate_init=learning_rate, nesterov=False, momentum=1.0) upd = skl_sgd._get_updates(grad) for update_nn, update_skl in zip(change, upd): assert update_nn == pytest.approx(update_skl)
def test_neuralNetwork_adam(): from sklearn.neural_network._stochastic_optimizers import AdamOptimizer np.random.seed(2019) X = np.random.normal(size=(1, 500)) target = 3.9285985 * X nn = NeuralNetwork(inputs=1, neurons=3, outputs=1, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') learning_rate = 0.001 yhat = nn.forward_pass(X) nn.backpropagation(yhat.T, target.T) nn.learning_rate = learning_rate nn.initializeAdam() nn.adam() skl_adam = AdamOptimizer(params=nn.param, learning_rate_init=learning_rate) upd = skl_adam._get_updates(nn.grad) for update_nn, update_skl in zip(nn.change, upd): assert update_nn == pytest.approx(update_skl)
def test_neuralNetwork_backpropagation_multiple_outputs(): # Similar to the test_neuralNetwork_backpropagation() test, but with # multiple samples and features, X having dimensions # (n_samples, n_features) = (3,2), as well as the target having dimensions # (n_outputs) = (2) from sklearn.neural_network import MLPRegressor X = np.array([[0, 1], [1, 2], [2, 3]]) y = np.array([[0, 1], [2, 3], [3, 4]]) mlp = MLPRegressor(solver='sgd', alpha=0.0, learning_rate='constant', learning_rate_init=1e-20, max_iter=1, hidden_layer_sizes=(3, 3), random_state=1, activation='logistic') # Force sklearn to set up all the matrices by fitting a data set. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X, y) W_skl = mlp.coefs_ b_skl = mlp.intercepts_ nn = NeuralNetwork(inputs=2, outputs=2, layers=3, neurons=3, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') for i, w in enumerate(W_skl): nn.weights[i] = w for i, b in enumerate(b_skl): nn.biases[i] = np.expand_dims(b, axis=1) # ======================================================================== n_samples, n_features = X.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ======================================================================== activations = mlp._forward_pass(activations) if y.ndim == 1: y = y.reshape((-1, 1)) loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations, deltas, coef_grads, intercept_grads) yhat = nn.forward_pass(X.T) nn.backpropagation(yhat.T, y) for i, d_bias in enumerate(nn.d_biases): assert np.squeeze(d_bias) == pytest.approx( np.squeeze(intercept_grads[i])) for i, d_weight in enumerate(nn.d_weights): assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
def trainNeuralNetwork(reload=False, wantDeskew=True): # Get the MNIST data, either by loading saved data or getting it new. X_train, labels_train, X_test, labels_test = get_data(reload, wantDeskew) print("X_train.shape", X_train.shape) print("labels_train.shape", labels_train.shape) # ___________ Initialize the neural network. ____________ neural_net = NeuralNetwork(n_in=X_train.shape[1], n_hid=1200, n_out=10, eta=0.1, decay_const=0.8, alpha=0.9, l2=0.07, batch_size=50, n_epochs=15) # Give training data to neural_net to store and further preprocessing. neural_net.set_data(X_train, labels_train) # Create arrays for plotting results. accuracy = [] loss = [] n_train = int(50e3) n_batches = n_train // neural_net.batch_size batches = np.array_split(np.arange(n_train), n_batches) print("Splitting into", len(batches), "of size", neural_net.batch_size) epochs = np.arange(neural_net.n_epochs) n_iter_total = len(epochs) * len(batches) x_axis = np.arange(0, n_iter_total, n_iter_total // 100) print("preparing to collect", len(x_axis), "points to plot.") print("Beginning", n_iter_total, "iterations of minibatch gradient descent.") for i in epochs: print('\n========== EPOCH {} ======'.format(i)) neural_net.new_epoch(i) for j, batch in enumerate(batches): # Tell neural net what data to train with. neural_net.set_active(batch) # Calculate values along feedforward. X, S_h, H, S_o, O = neural_net.forward_pass() # Get the training loss at this iteration. if i * len(batches) + j in x_axis: print(".", end=" ") sys.stdout.flush() loss.append(neural_net.get_loss()) accuracy.append(neural_net.train_accuracy()) # Update weights via backprop. neural_net.train(X, H, O) # Save Kaggle predictions in CSV file. if True: pred_labels_test = neural_net.predict_test(util.preprocess(X_test)) Id = np.reshape(np.arange(1, 1 + X_test.shape[0]), (X_test.shape[0], 1)) Category = np.reshape(pred_labels_test, (X_test.shape[0], 1)) columns = np.hstack((Id, Category)) np.savetxt('predictions.csv', columns, delimiter=',', header='Id, Category', fmt='%d') neural_net.print_results() util.plot_error(x_axis, loss, accuracy, neural_net.get_params())