def test_neuralNetwork_sgd(): from sklearn.neural_network._stochastic_optimizers import SGDOptimizer np.random.seed(2019) X = np.random.normal(size=(1, 500)) target = 3.9285985 * X nn = NeuralNetwork(inputs=1, neurons=3, outputs=1, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') learning_rate = 0.001 yhat = nn.forward_pass(X) nn.backpropagation(yhat.T, target.T) nn.learning_rate = learning_rate initial_params = copy.deepcopy(nn.weights + nn.biases) nn.sgd() grad = nn.d_weights + nn.d_biases params = nn.weights + nn.biases change = [p - i_p for p, i_p in zip(params, initial_params)] skl_sgd = SGDOptimizer(params=initial_params, learning_rate_init=learning_rate, nesterov=False, momentum=1.0) upd = skl_sgd._get_updates(grad) for update_nn, update_skl in zip(change, upd): assert update_nn == pytest.approx(update_skl)
def test_neuralNetwork_adam(): from sklearn.neural_network._stochastic_optimizers import AdamOptimizer np.random.seed(2019) X = np.random.normal(size=(1, 500)) target = 3.9285985 * X nn = NeuralNetwork(inputs=1, neurons=3, outputs=1, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') learning_rate = 0.001 yhat = nn.forward_pass(X) nn.backpropagation(yhat.T, target.T) nn.learning_rate = learning_rate nn.initializeAdam() nn.adam() skl_adam = AdamOptimizer(params=nn.param, learning_rate_init=learning_rate) upd = skl_adam._get_updates(nn.grad) for update_nn, update_skl in zip(nn.change, upd): assert update_nn == pytest.approx(update_skl)
def test_neuralNetwork_backpropagation_multiple_outputs(): # Similar to the test_neuralNetwork_backpropagation() test, but with # multiple samples and features, X having dimensions # (n_samples, n_features) = (3,2), as well as the target having dimensions # (n_outputs) = (2) from sklearn.neural_network import MLPRegressor X = np.array([[0, 1], [1, 2], [2, 3]]) y = np.array([[0, 1], [2, 3], [3, 4]]) mlp = MLPRegressor(solver='sgd', alpha=0.0, learning_rate='constant', learning_rate_init=1e-20, max_iter=1, hidden_layer_sizes=(3, 3), random_state=1, activation='logistic') # Force sklearn to set up all the matrices by fitting a data set. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X, y) W_skl = mlp.coefs_ b_skl = mlp.intercepts_ nn = NeuralNetwork(inputs=2, outputs=2, layers=3, neurons=3, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') for i, w in enumerate(W_skl): nn.weights[i] = w for i, b in enumerate(b_skl): nn.biases[i] = np.expand_dims(b, axis=1) # ======================================================================== n_samples, n_features = X.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ======================================================================== activations = mlp._forward_pass(activations) if y.ndim == 1: y = y.reshape((-1, 1)) loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations, deltas, coef_grads, intercept_grads) yhat = nn.forward_pass(X.T) nn.backpropagation(yhat.T, y) for i, d_bias in enumerate(nn.d_biases): assert np.squeeze(d_bias) == pytest.approx( np.squeeze(intercept_grads[i])) for i, d_weight in enumerate(nn.d_weights): assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
def test_neuralNetwork_backpropagation(): # We re-use the test_neuralNetwork_network networks and this time check # that the computed backpropagation derivatives are equal. from sklearn.neural_network import MLPRegressor X = [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0]] y = [0, 2, 4, 6, 8, 10] mlp = MLPRegressor(solver='sgd', alpha=0.0, hidden_layer_sizes=(3, 3), random_state=1, activation='logistic') # Force sklearn to set up all the matrices by fitting a data set. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X, y) # Throw away all the fitted values, randomize W and b matrices. np.random.seed(18) for i, coeff in enumerate(mlp.coefs_): mlp.coefs_[i] = np.random.normal(size=coeff.shape) for i, bias in enumerate(mlp.intercepts_): mlp.intercepts_[i] = np.random.normal(size=bias.shape) W_skl = mlp.coefs_ b_skl = mlp.intercepts_ nn = NeuralNetwork(inputs=1, outputs=1, layers=3, neurons=3, activations='sigmoid', silent=False) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') nn.weights = W_skl for i, b in enumerate(b_skl): nn.biases[i] = np.expand_dims(b, axis=1) # From the sklearn source, we need to set up some lists to use the _backprop # function in MLPRegressor, see: # # https://github.com/scikit-learn/scikit-learn/blob/bac89c2/sklearn/neural_network/multilayer_perceptron.py#L355 # # ======================================================================== # Initialize lists X = np.array([[1.125982598]]) y = np.array([8.29289285]) mlp.predict(X) n_samples, n_features = X.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes # Make sure self.hidden_layer_sizes is a list if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ======================================================================== activations = mlp._forward_pass(activations) loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations, deltas, coef_grads, intercept_grads) yhat = nn(X) nn.backpropagation(yhat, y) for i, d_bias in enumerate(nn.d_biases): assert np.squeeze(d_bias) == pytest.approx( np.squeeze(intercept_grads[i])) for i, d_weight in enumerate(nn.d_weights): assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))