예제 #1
0
hidden_layer_sizes = list(hidden_layer_sizes)
layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_])
activations = [X]
activations.extend(np.empty((batch_size, n_fan_out))
                   for n_fan_out in layer_units[1:])
deltas      = [np.empty_like(a_layer) for a_layer in activations]
coef_grads  = [np.empty((n_fan_in_, n_fan_out_))
               for n_fan_in_, n_fan_out_ in zip(layer_units[:-1],
                                                layer_units[1:])]
intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]]
# ==========================================================================

print(mlp.out_activation_)

activations                       = mlp._forward_pass(activations)
loss, coef_grads, intercept_grads = mlp._backprop(
        X, target, activations, deltas, coef_grads, intercept_grads)


nn = NeuralNetwork( X_data = X,
                    Y_data = target,
                    n_hidden_neurons = 3,
                    n_categories = 1,
                    activation_func_out = 'identity',
                    activation_func = 'relu',
                    cost_func = 'MSE')

# Copy the weights and biases from the scikit-learn network to your own.
nn.hidden_weights, nn.output_weights = mlp.coefs_
nn.hidden_bias, nn.output_bias = mlp.intercepts_

예제 #2
0
    def test_gradient_mlpregressor(self):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100
        y = numpy.arange(X.shape[0]).astype(numpy.float32)
        y = y.reshape((-1, 1))
        reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2,
                           activation='logistic',
                           momentum=0, nesterovs_momentum=False,
                           alpha=0)
        reg.fit(X, y.ravel())

        onx = to_onnx(reg, X, target_opset=opset)
        onx = onnx_rename_weights(onx)
        inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1',
                 'I3_intercepts1']

        xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype(
            numpy.float32) / 10
        yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1))

        train_session = OrtGradientForwardBackwardOptimizer(
            onx, inits, learning_rate=1e-5,
            warm_start=True, max_iter=2, batch_size=10)
        train_session.fit(X, y)
        state = train_session.get_state()
        state_np = [st.numpy() for st in state]

        # gradient scikit-learn

        coef_grads = state_np[::2]
        intercept_grads = state_np[1::2]
        layer_units = [3, 5, 1]
        activations = [xp] + [None] * (len(layer_units) - 1)
        deltas = [None] * (len(activations) - 1)

        skl_pred = reg.predict(xp)

        batch_loss, coef_grads, intercept_grads = reg._backprop(  # pylint: disable=W0212
            xp, yp, activations, deltas,
            coef_grads, intercept_grads)
        deltas = activations[-1] - yp

        # gradient onnxcustom

        ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device)
        ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device)
        ort_state = [ort_xp] + state
        prediction = train_session.train_function_.forward(
            ort_state, training=True)

        ort_pred = prediction[0].numpy()
        self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2)

        loss, loss_gradient = train_session.learning_loss.loss_gradient(
            train_session.device, ort_yp, prediction[0])

        gradient = train_session.train_function_.backward([loss_gradient])

        # comparison

        self.assertEqualArray(
            batch_loss, loss.numpy() / xp.shape[0], decimal=3)
        self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3)

        # do not use iterator for gradient, it may crash
        ort_grad = [gradient[i].numpy() / xp.shape[0]
                    for i in range(len(gradient))][1:]
        self.assertEqualArray(
            intercept_grads[1], ort_grad[3].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2)
        self.assertEqualArray(
            intercept_grads[0], ort_grad[1].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)
예제 #3
0
def test_neuralNetwork_backpropagation():
    # We re-use the test_neuralNetwork_network networks and this time check
    # that the computed backpropagation derivatives are equal.

    from sklearn.neural_network import MLPRegressor

    X = [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0]]
    y = [0, 2, 4, 6, 8, 10]
    mlp = MLPRegressor(solver='sgd',
                       alpha=0.0,
                       hidden_layer_sizes=(3, 3),
                       random_state=1,
                       activation='logistic')
    # Force sklearn to set up all the matrices by fitting a data set.
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mlp.fit(X, y)

    # Throw away all the fitted values, randomize W and b matrices.

    np.random.seed(18)
    for i, coeff in enumerate(mlp.coefs_):
        mlp.coefs_[i] = np.random.normal(size=coeff.shape)
    for i, bias in enumerate(mlp.intercepts_):
        mlp.intercepts_[i] = np.random.normal(size=bias.shape)

    W_skl = mlp.coefs_
    b_skl = mlp.intercepts_

    nn = NeuralNetwork(inputs=1,
                       outputs=1,
                       layers=3,
                       neurons=3,
                       activations='sigmoid',
                       silent=False)
    nn.addLayer()
    nn.addLayer()
    nn.addOutputLayer(activations='identity')
    nn.weights = W_skl
    for i, b in enumerate(b_skl):
        nn.biases[i] = np.expand_dims(b, axis=1)

    # From the sklearn source, we need to set up some lists to use the _backprop
    # function in MLPRegressor, see:
    #
    #    https://github.com/scikit-learn/scikit-learn/blob/bac89c2/sklearn/neural_network/multilayer_perceptron.py#L355
    #
    # ========================================================================
    # Initialize lists
    X = np.array([[1.125982598]])
    y = np.array([8.29289285])
    mlp.predict(X)
    n_samples, n_features = X.shape
    batch_size = n_samples
    hidden_layer_sizes = mlp.hidden_layer_sizes
    # Make sure self.hidden_layer_sizes is a list
    if not hasattr(hidden_layer_sizes, "__iter__"):
        hidden_layer_sizes = [hidden_layer_sizes]
    hidden_layer_sizes = list(hidden_layer_sizes)
    layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_])
    activations = [X]
    activations.extend(
        np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:])
    deltas = [np.empty_like(a_layer) for a_layer in activations]
    coef_grads = [
        np.empty((n_fan_in_, n_fan_out_))
        for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])
    ]
    intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]]
    # ========================================================================
    activations = mlp._forward_pass(activations)
    loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations,
                                                      deltas, coef_grads,
                                                      intercept_grads)

    yhat = nn(X)
    nn.backpropagation(yhat, y)

    for i, d_bias in enumerate(nn.d_biases):
        assert np.squeeze(d_bias) == pytest.approx(
            np.squeeze(intercept_grads[i]))

    for i, d_weight in enumerate(nn.d_weights):
        assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
예제 #4
0
def test_neuralNetwork_backpropagation_multiple_outputs():
    # Similar to the test_neuralNetwork_backpropagation() test, but with
    # multiple samples and features, X having dimensions
    # (n_samples, n_features) = (3,2), as well as the target having dimensions
    # (n_outputs) = (2)

    from sklearn.neural_network import MLPRegressor

    X = np.array([[0, 1], [1, 2], [2, 3]])
    y = np.array([[0, 1], [2, 3], [3, 4]])
    mlp = MLPRegressor(solver='sgd',
                       alpha=0.0,
                       learning_rate='constant',
                       learning_rate_init=1e-20,
                       max_iter=1,
                       hidden_layer_sizes=(3, 3),
                       random_state=1,
                       activation='logistic')
    # Force sklearn to set up all the matrices by fitting a data set.
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mlp.fit(X, y)

    W_skl = mlp.coefs_
    b_skl = mlp.intercepts_

    nn = NeuralNetwork(inputs=2,
                       outputs=2,
                       layers=3,
                       neurons=3,
                       activations='sigmoid',
                       silent=True)
    nn.addLayer()
    nn.addLayer()
    nn.addOutputLayer(activations='identity')

    for i, w in enumerate(W_skl):
        nn.weights[i] = w
    for i, b in enumerate(b_skl):
        nn.biases[i] = np.expand_dims(b, axis=1)

    # ========================================================================
    n_samples, n_features = X.shape
    batch_size = n_samples
    hidden_layer_sizes = mlp.hidden_layer_sizes
    if not hasattr(hidden_layer_sizes, "__iter__"):
        hidden_layer_sizes = [hidden_layer_sizes]
    hidden_layer_sizes = list(hidden_layer_sizes)
    layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_])
    activations = [X]
    activations.extend(
        np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:])
    deltas = [np.empty_like(a_layer) for a_layer in activations]
    coef_grads = [
        np.empty((n_fan_in_, n_fan_out_))
        for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])
    ]
    intercept_grads = [np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]]
    # ========================================================================
    activations = mlp._forward_pass(activations)
    if y.ndim == 1:
        y = y.reshape((-1, 1))
    loss, coef_grads, intercept_grads = mlp._backprop(X, y, activations,
                                                      deltas, coef_grads,
                                                      intercept_grads)

    yhat = nn.forward_pass(X.T)
    nn.backpropagation(yhat.T, y)

    for i, d_bias in enumerate(nn.d_biases):
        assert np.squeeze(d_bias) == pytest.approx(
            np.squeeze(intercept_grads[i]))

    for i, d_weight in enumerate(nn.d_weights):
        assert np.squeeze(d_weight) == pytest.approx(np.squeeze(coef_grads[i]))
예제 #5
0
    def test_regressor(X_train,
                       y_train,
                       X_test,
                       y_test,
                       nn_layers,
                       sk_hidden_layers,
                       input_activation,
                       output_activation,
                       alpha=0.0):

        if input_activation == "sigmoid":
            sk_input_activation = "logistic"
        else:
            sk_input_activation = input_activation

        if output_activation == "sigmoid":
            sk_output_activation = "logistic"
        else:
            sk_output_activation = output_activation

        mlp = MLPRegressor(
            solver='sgd',  # Stochastic gradient descent.
            activation=sk_input_activation,  # Skl name for sigmoid.
            alpha=alpha,  # No regularization for simplicity.
            hidden_layer_sizes=sk_hidden_layers)  # Full NN size is (1,3,3,1).

        mlp.out_activation_ = sk_output_activation

        # Force sklearn to set up all the necessary matrices by fitting a data
        # set. We dont care if it converges or not, so lets ignore raised
        # warnings.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            mlp.fit(X_train, y_train)

        # =====================================================================
        n_samples, n_features = X_train.shape
        batch_size = n_samples
        hidden_layer_sizes = mlp.hidden_layer_sizes
        if not hasattr(hidden_layer_sizes, "__iter__"):
            hidden_layer_sizes = [hidden_layer_sizes]
        hidden_layer_sizes = list(hidden_layer_sizes)
        layer_units = ([n_features] + hidden_layer_sizes + [mlp.n_outputs_])
        activations = [X_test]
        activations.extend(
            np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:])
        deltas = [np.empty_like(a_layer) for a_layer in activations]
        coef_grads = [
            np.empty((n_fan_in_, n_fan_out_))
            for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])
        ]
        intercept_grads = [
            np.empty(n_fan_out_) for n_fan_out_ in layer_units[1:]
        ]
        # =====================================================================

        mlp.out_activation_ = sk_output_activation
        activations = mlp._forward_pass(activations)
        loss, coef_grads, intercept_grads = mlp._backprop(
            X_test, y_test, activations, deltas, coef_grads, intercept_grads)

        # Activates my own MLP
        nn = MultilayerPerceptron(nn_layers,
                                  activation=input_activation,
                                  output_activation=output_activation,
                                  alpha=alpha)

        # Copy the weights and biases from the scikit-learn network to your
        # own.
        for i, w in enumerate(mlp.coefs_):
            nn.weights[i] = cp.deepcopy(w.T)
        for i, b in enumerate(mlp.intercepts_):
            nn.biases[i] = cp.deepcopy(b.T.reshape(-1, 1))

        # Call your own backpropagation function, and you're ready to compare
        # with the scikit-learn code.
        y_sklearn = mlp.predict(X_test)
        y = nn.predict(cp.deepcopy(X_test).T)

        # Asserts that the forward pass is correct
        assert np.allclose(y, y_sklearn), ("Prediction {} != {}".format(
            y, y_sklearn))

        delta_w, delta_b = nn._back_propagate(X_test.T, y_test)

        # Asserts that the the activations is correct in back propagation
        for i, a in enumerate(nn.activations):
            print(i, a.T, activations[i])
            assert np.allclose(a.T,
                               activations[i]), "error in layer {}".format(i)
        else:
            print("Activations are correct.")

        # Asserts that the the biases is correct in back propagation
        for i, derivative_bias in enumerate(delta_b):
            print(i, derivative_bias.T, intercept_grads[i])
            assert np.allclose(
                derivative_bias.T,
                intercept_grads[i]), ("error in layer {}".format(i))
        else:
            print("Biases derivatives are correct.")

        # Asserts that the the weights is correct in back propagation
        for i, derivative_weight in enumerate(delta_w):
            print(i, derivative_weight.T, coef_grads[i])
            assert np.allclose(derivative_weight.T,
                               coef_grads[i]), "error in layer {}".format(i)
        else:
            print("Weight derivatives are correct.")

        print("Test complete\n")