hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend(np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ========================================================================== print(mlp.out_activation_) activations = mlp._forward_pass(activations) loss, coef_grads, intercept_grads = mlp._backprop( X, target, activations, deltas, coef_grads, intercept_grads) nn = NeuralNetwork( X_data = X, Y_data = target, n_hidden_neurons = 3, n_categories = 1, activation_func_out = 'identity', activation_func = 'relu', cost_func = 'MSE') # Copy the weights and biases from the scikit-learn network to your own. nn.hidden_weights, nn.output_weights = mlp.coefs_ nn.hidden_bias, nn.output_bias = mlp.intercepts_
def test_gradient_mlpregressor(self): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100 y = numpy.arange(X.shape[0]).astype(numpy.float32) y = y.reshape((-1, 1)) reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2, activation='logistic', momentum=0, nesterovs_momentum=False, alpha=0) reg.fit(X, y.ravel()) onx = to_onnx(reg, X, target_opset=opset) onx = onnx_rename_weights(onx) inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1'] xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype( numpy.float32) / 10 yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1)) train_session = OrtGradientForwardBackwardOptimizer( onx, inits, learning_rate=1e-5, warm_start=True, max_iter=2, batch_size=10) train_session.fit(X, y) state = train_session.get_state() state_np = [st.numpy() for st in state] # gradient scikit-learn coef_grads = state_np[::2] intercept_grads = state_np[1::2] layer_units = [3, 5, 1] activations = [xp] + [None] * (len(layer_units) - 1) deltas = [None] * (len(activations) - 1) skl_pred = reg.predict(xp) batch_loss, coef_grads, intercept_grads = reg._backprop( # pylint: disable=W0212 xp, yp, activations, deltas, coef_grads, intercept_grads) deltas = activations[-1] - yp # gradient onnxcustom ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device) ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device) ort_state = [ort_xp] + state prediction = train_session.train_function_.forward( ort_state, training=True) ort_pred = prediction[0].numpy() self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2) loss, loss_gradient = train_session.learning_loss.loss_gradient( train_session.device, ort_yp, prediction[0]) gradient = train_session.train_function_.backward([loss_gradient]) # comparison self.assertEqualArray( batch_loss, loss.numpy() / xp.shape[0], decimal=3) self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3) # do not use iterator for gradient, it may crash ort_grad = [gradient[i].numpy() / xp.shape[0] for i in range(len(gradient))][1:] self.assertEqualArray( intercept_grads[1], ort_grad[3].ravel(), decimal=2) self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2) self.assertEqualArray( intercept_grads[0], ort_grad[1].ravel(), decimal=2) self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)
def test_neuralNetwork_backpropagation(): # We re-use the test_neuralNetwork_network networks and this time check # that the computed backpropagation derivatives are equal. from sklearn.neural_network import MLPRegressor X = [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0]] y = [0, 2, 4, 6, 8, 10] mlp = MLPRegressor(solver='sgd', alpha=0.0, hidden_layer_sizes=(3, 3), random_state=1, activation='logistic') # Force sklearn to set up all the matrices by fitting a data set. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X, y) # Throw away all the fitted values, randomize W and b matrices. np.random.seed(18) for i, coeff in enumerate(mlp.coefs_): mlp.coefs_[i] = np.random.normal(size=coeff.shape) for i, bias in enumerate(mlp.intercepts_): mlp.intercepts_[i] = np.random.normal(size=bias.shape) W_skl = mlp.coefs_ b_skl = mlp.intercepts_ nn = NeuralNetwork(inputs=1, outputs=1, layers=3, neurons=3, activations='sigmoid', silent=False) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') nn.weights = W_skl for i, b in enumerate(b_skl): nn.biases[i] = np.expand_dims(b, axis=1) # From the sklearn source, we need to set up some lists to use the _backprop # function in MLPRegressor, see: # # https://github.com/scikit-learn/scikit-learn/blob/bac89c2/sklearn/neural_network/multilayer_perceptron.py#L355 # # ======================================================================== # Initialize lists X = np.array([[1.125982598]]) y = np.array([8.29289285]) mlp.predict(X) n_samples, n_features = X.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes # Make sure self.hidden_layer_sizes is a list if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ======================================================================== activations = mlp._forward_pass(activations) loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations, deltas, coef_grads, intercept_grads) yhat = nn(X) nn.backpropagation(yhat, y) for i, d_bias in enumerate(nn.d_biases): assert np.squeeze(d_bias) == pytest.approx( np.squeeze(intercept_grads[i])) for i, d_weight in enumerate(nn.d_weights): assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
def test_neuralNetwork_backpropagation_multiple_outputs(): # Similar to the test_neuralNetwork_backpropagation() test, but with # multiple samples and features, X having dimensions # (n_samples, n_features) = (3,2), as well as the target having dimensions # (n_outputs) = (2) from sklearn.neural_network import MLPRegressor X = np.array([[0, 1], [1, 2], [2, 3]]) y = np.array([[0, 1], [2, 3], [3, 4]]) mlp = MLPRegressor(solver='sgd', alpha=0.0, learning_rate='constant', learning_rate_init=1e-20, max_iter=1, hidden_layer_sizes=(3, 3), random_state=1, activation='logistic') # Force sklearn to set up all the matrices by fitting a data set. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X, y) W_skl = mlp.coefs_ b_skl = mlp.intercepts_ nn = NeuralNetwork(inputs=2, outputs=2, layers=3, neurons=3, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') for i, w in enumerate(W_skl): nn.weights[i] = w for i, b in enumerate(b_skl): nn.biases[i] = np.expand_dims(b, axis=1) # ======================================================================== n_samples, n_features = X.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]] # ======================================================================== activations = mlp._forward_pass(activations) if y.ndim == 1: y = y.reshape((-1, 1)) loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations, deltas, coef_grads, intercept_grads) yhat = nn.forward_pass(X.T) nn.backpropagation(yhat.T, y) for i, d_bias in enumerate(nn.d_biases): assert np.squeeze(d_bias) == pytest.approx( np.squeeze(intercept_grads[i])) for i, d_weight in enumerate(nn.d_weights): assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
def test_regressor(X_train, y_train, X_test, y_test, nn_layers, sk_hidden_layers, input_activation, output_activation, alpha=0.0): if input_activation == "sigmoid": sk_input_activation = "logistic" else: sk_input_activation = input_activation if output_activation == "sigmoid": sk_output_activation = "logistic" else: sk_output_activation = output_activation mlp = MLPRegressor( solver='sgd', # Stochastic gradient descent. activation=sk_input_activation, # Skl name for sigmoid. alpha=alpha, # No regularization for simplicity. hidden_layer_sizes=sk_hidden_layers) # Full NN size is (1,3,3,1). mlp.out_activation_ = sk_output_activation # Force sklearn to set up all the necessary matrices by fitting a data # set. We dont care if it converges or not, so lets ignore raised # warnings. with warnings.catch_warnings(): warnings.simplefilter("ignore") mlp.fit(X_train, y_train) # ===================================================================== n_samples, n_features = X_train.shape batch_size = n_samples hidden_layer_sizes = mlp.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_]) activations = [X_test] activations.extend( np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [ np.empty((n_fan_in_, n_fan_out_)) for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:]) ] intercept_grads = [ np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:] ] # ===================================================================== mlp.out_activation_ = sk_output_activation activations = mlp._forward_pass(activations) loss, coef_grads, intercept_grads = mlp._backprop( X_test, y_test, activations, deltas, coef_grads, intercept_grads) # Activates my own MLP nn = MultilayerPerceptron(nn_layers, activation=input_activation, output_activation=output_activation, alpha=alpha) # Copy the weights and biases from the scikit-learn network to your # own. for i, w in enumerate(mlp.coefs_): nn.weights[i] = cp.deepcopy(w.T) for i, b in enumerate(mlp.intercepts_): nn.biases[i] = cp.deepcopy(b.T.reshape(-1, 1)) # Call your own backpropagation function, and you're ready to compare # with the scikit-learn code. y_sklearn = mlp.predict(X_test) y = nn.predict(cp.deepcopy(X_test).T) # Asserts that the forward pass is correct assert np.allclose(y, y_sklearn), ("Prediction {} != {}".format( y, y_sklearn)) delta_w, delta_b = nn._back_propagate(X_test.T, y_test) # Asserts that the the activations is correct in back propagation for i, a in enumerate(nn.activations): print(i, a.T, activations[i]) assert np.allclose(a.T, activations[i]), "error in layer {}".format(i) else: print("Activations are correct.") # Asserts that the the biases is correct in back propagation for i, derivative_bias in enumerate(delta_b): print(i, derivative_bias.T, intercept_grads[i]) assert np.allclose( derivative_bias.T, intercept_grads[i]), ("error in layer {}".format(i)) else: print("Biases derivatives are correct.") # Asserts that the the weights is correct in back propagation for i, derivative_weight in enumerate(delta_w): print(i, derivative_weight.T, coef_grads[i]) assert np.allclose(derivative_weight.T, coef_grads[i]), "error in layer {}".format(i) else: print("Weight derivatives are correct.") print("Test complete\n")