def test_returns_jacobian_matrix_of_valid_shape(self):
        z = np.array([1, 2, -2], float)
        j = Softmax.gradient(z)
        self.assertTupleEqual(j.shape, (3, 3))

        z = np.array([1, 2], float)
        j = Softmax.gradient(z)
        self.assertTupleEqual(j.shape, (2, 2))
class Activation_SoftMax(Layer):
    """A layer that applies an activation operation to the input.
    Parameters:
    -----------
    name: string
        The name of the activation function that will be used.
    """
    def __init__(self, input_shape=None):
        self.layer_name = 'softmax'
        self.input_shape = input_shape
        self.activation_func = Softmax()
        self.trainable = False

    def initialize(self):
        # Just to set the output shape, but not needed below
        self.output_shape = self.input_shape

    def get_output_shape(self):
        return self.output_shape

    def forward(self, Z, training=True):
        self.layer_input = Z
        return self.activation_func(Z)

    def backward(self, dA):
        Z = self.layer_input
        dact = self.activation_func.gradient(Z)
        #assert Z.shape == dact.shape

        dZ = np.sum(np.multiply(dA, dact), axis=1)
        assert (dZ.shape == (Z.shape))

        return dZ
    def test_derivatives_with_different_indices_in_jacobian_matrix(self):
        z = np.array([1, -1.5], float)
        j = Softmax.gradient(z)

        s = Softmax.activation(z)
        self.assertEqual(j[0, 1], s[0] * s[1])

        s = Softmax.activation(z)
        self.assertEqual(j[1, 0], s[1] * s[0])
Esempio n. 4
0
    def test_get_final_layer_error_for_arrays(self):
        cross_entropy = cost_functions.CrossEntropyCost(self.net)

        z_last = np.array([3, -1], float)
        z_last_prime = Softmax.gradient(z_last)

        y = np.array([0, 0.5], float)
        a_last = Softmax.activation(z_last)
        nabla = cross_entropy.get_final_layer_error(a_last, y, z_last_prime)

        self.assertAlmostEqual(nabla[0], a_last[0] - y[0], places=2)
        self.assertAlmostEqual(nabla[1], a_last[1] - y[1], places=2)
Esempio n. 5
0
class MultilayerPerceptron():
    """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.
    Unrolled to display the whole forward and backward pass.
    Parameters:
    -----------
    n_hidden: int:
        The number of processing nodes (neurons) in the hidden layer. 
    n_iterations: float
        The number of training iterations the algorithm will tune the weights for.
    learning_rate: float
        The step length that will be used when updating the weights.
    """
    def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
        self.n_hidden = n_hidden
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        # Hidden layer
        limit   = 1 / math.sqrt(n_features)
        self.W  = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))
        # Output layer
        limit   = 1 / math.sqrt(self.n_hidden)
        self.V  = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))

    def fit(self, X, y):

        self._initialize_weights(X, y)

        for i in range(self.n_iterations):

            # ..............
            #  Forward Pass
            # ..............

            # HIDDEN LAYER
            hidden_input = X.dot(self.W) + self.w0  #(1079*64)(64,16)+(1,16) -> (1079,16)+(1,16)->(1079,16)
            hidden_output = self.hidden_activation(hidden_input)
            # OUTPUT LAYER
            output_layer_input = hidden_output.dot(self.V) + self.v0
            y_pred = self.output_activation(output_layer_input)

            # ...............
            #  Backward Pass
            # ...............

            # OUTPUT LAYER
            # Grad. w.r.t input of output layer
            grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) #(1079,10)(1079,10)->(1079,10)
            grad_v = hidden_output.T.dot(grad_wrt_out_l_input)    # (16,1079)(1079,10)->(16,10)
            grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)  # (1,10)
            # HIDDEN LAYER
            # Grad. w.r.t input of hidden layer     # (1079,10)
            grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_l_input)
            grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

            # Update weights (by gradient descent)
            # Move against the gradient to minimize loss
            self.V  -= self.learning_rate * grad_v
            self.v0 -= self.learning_rate * grad_v0
            self.W  -= self.learning_rate * grad_w
            self.w0 -= self.learning_rate * grad_w0

    # Use the trained model to predict labels of X
    def predict(self, X):
        # Forward pass:
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.output_activation(output_layer_input)
        return y_pred
# Testing derivative



test_matrix = np.random.rand(5,3)
test_matrix.shape

Delta= 0.000000001

displaced = np.zeros(test_matrix.shape)
displaced[:,:] = test_matrix
displaced[np.arange(0,1), :] =displaced[np.arange(0,1), :]  + Delta

ans = ((soft(displaced) -  soft(test_matrix)  )/Delta  ) [:,:]- \
( soft.gradient(test_matrix)   )[:,0,:] < 0.0000001
print(ans)


displaced = np.zeros(test_matrix.shape)
displaced[:,:] = test_matrix
displaced[np.arange(2,3), :] =displaced[np.arange(2,3), :]  + Delta
ans = ((soft(displaced) -  soft(test_matrix)  )/Delta  ) [:,:]- \
( soft.gradient(test_matrix)   )[:,2,:] < 0.0000001
print(ans)

# Cross checking soft function with and without soft max included

from loss_functions import MultiClassCrossEntropy
import numpy as np
nb_classes = 5