Exemple #1
0
 def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
     self.n_hidden = n_hidden
     self.n_iterations = n_iterations
     self.learning_rate = learning_rate
     self.hidden_activation = Sigmoid()
     self.output_activation = Softmax()
     self.loss = CrossEntropy()
    def test_returns_jacobian_matrix_of_valid_shape(self):
        z = np.array([1, 2, -2], float)
        j = Softmax.gradient(z)
        self.assertTupleEqual(j.shape, (3, 3))

        z = np.array([1, 2], float)
        j = Softmax.gradient(z)
        self.assertTupleEqual(j.shape, (2, 2))
    def test_derivatives_with_different_indices_in_jacobian_matrix(self):
        z = np.array([1, -1.5], float)
        j = Softmax.gradient(z)

        s = Softmax.activation(z)
        self.assertEqual(j[0, 1], s[0] * s[1])

        s = Softmax.activation(z)
        self.assertEqual(j[1, 0], s[1] * s[0])
Exemple #4
0
    def test_get_final_layer_error_for_arrays(self):
        cross_entropy = cost_functions.CrossEntropyCost(self.net)

        z_last = np.array([3, -1], float)
        z_last_prime = Softmax.gradient(z_last)

        y = np.array([0, 0.5], float)
        a_last = Softmax.activation(z_last)
        nabla = cross_entropy.get_final_layer_error(a_last, y, z_last_prime)

        self.assertAlmostEqual(nabla[0], a_last[0] - y[0], places=2)
        self.assertAlmostEqual(nabla[1], a_last[1] - y[1], places=2)
    def test_for_2_element_vectors(self):
        z = np.array([1, 2], float)
        a = Softmax.activation(z)
        self.assertTrue(np.allclose(
            a,
            np.array([0.268941, 0.731058], float),
        ))

        z = np.array([0, 2], float)
        a = Softmax.activation(z)
        self.assertTrue(
            np.allclose(
                a,
                np.array([0.1192029, 0.880797], float),
            ))
Exemple #6
0
    def _forward_pass(self, x, third_layer_activation):
        '''
        Performs forward pass of the network.

        a_i - results of applying weights for the data from precious layear
        z_i - result of activation

        We save the results for further backward pass.
        :param x: input data
        :param third_layer_activation: which activation to apply to third layer
        :return: multi-class prediction (n_class, 1)
        '''
        self.x = x
        self.a_1 = self.w1.dot(x) + self.b1
        self.z_1 = Tanh.activation(self.a_1)

        self.a_2 = self.w2.dot(self.z_1) + self.b2
        self.z_2 = Relu.activation(self.a_2)
        self.z_2_with_skip_connection = self.z_2 + self.w_s.dot(x)

        self.a_3 = self.w_out.dot(self.z_2_with_skip_connection) + self.b_out
        if third_layer_activation == 'Softmax':
            self.y_pred = Softmax.activation(self.a_3)
        elif third_layer_activation == 'Tanh':
            self.y_pred = Tanh.activation(self.a_3)
        else:
            raise ValueError("Unknown activation type for 3rd layer")
class Activation_SoftMax(Layer):
    """A layer that applies an activation operation to the input.
    Parameters:
    -----------
    name: string
        The name of the activation function that will be used.
    """
    def __init__(self, input_shape=None):
        self.layer_name = 'softmax'
        self.input_shape = input_shape
        self.activation_func = Softmax()
        self.trainable = False

    def initialize(self):
        # Just to set the output shape, but not needed below
        self.output_shape = self.input_shape

    def get_output_shape(self):
        return self.output_shape

    def forward(self, Z, training=True):
        self.layer_input = Z
        return self.activation_func(Z)

    def backward(self, dA):
        Z = self.layer_input
        dact = self.activation_func.gradient(Z)
        #assert Z.shape == dact.shape

        dZ = np.sum(np.multiply(dA, dact), axis=1)
        assert (dZ.shape == (Z.shape))

        return dZ
Exemple #8
0
    def predict(self, input_word_index, pause_duration=None):
        assert self.initialized, "initialize or load before using"

        self.t_lstm.predict(input_word_index, pause_duration, compute_only_features=True)

        self.m_tm1 = self.m
        self.h_tm1 = self.h

        r = np.dot(self.h_tm1, self.Wr)
        z1 = np.dot(self.t_lstm.h, self.W)
        if self.use_pauses:
            z1 += np.dot(pause_duration[:,np.newaxis], self.Wp)

        z = self.slice(r, self.hidden_size, 0) + self.slice(z1, self.hidden_size, 0)
        self.i  = self.hidden_activation.y(z)

        z = self.slice(r, self.hidden_size, 1) + self.slice(z1, self.hidden_size, 1) + self.m_tm1 * self.Wip
        self.ig = Sigmoid.y(z)
        
        z = self.slice(r, self.hidden_size, 2) + self.slice(z1, self.hidden_size, 2) + self.m_tm1 * self.Wfp
        self.fg = Sigmoid.y(z)
        
        self.m = self.i * self.ig + self.m_tm1 * self.fg
        
        z = self.slice(r, self.hidden_size, 3) + self.slice(z1, self.hidden_size, 3) + self.m * self.Wop
        self.og = Sigmoid.y(z)
        
        self.z = self.hidden_activation.y(self.m)
        self.h = self.z * self.og

        z_y = np.dot(self.h, self.Wy)
        self.y = Softmax.y(z=z_y)

        self._remember_state(pause_duration)
Exemple #9
0
    def predict(self,
                input_word_index,
                pause_duration=None,
                compute_only_features=False):
        assert self.initialized, "initialize or load before using"

        self.m_tm1 = self.m
        self.h_tm1 = self.h

        r = np.dot(self.h_tm1, self.Wr)

        z = self.We[input_word_index]
        if self.use_pauses:
            z += np.dot(pause_duration[:, np.newaxis], self.Wp)
        self.x = self.hidden_activation.y(z)

        z1 = np.dot(self.x, self.W)

        z = self.slice(r, self.hidden_size, 0) + self.slice(
            z1, self.hidden_size, 0)
        self.i = self.hidden_activation.y(z)

        z = self.slice(r, self.hidden_size, 1) + self.slice(
            z1, self.hidden_size, 1) + self.m_tm1 * self.Wip
        self.ig = Sigmoid.y(z)

        z = self.slice(r, self.hidden_size, 2) + self.slice(
            z1, self.hidden_size, 2) + self.m_tm1 * self.Wfp
        self.fg = Sigmoid.y(z)

        self.m = self.i * self.ig + self.m_tm1 * self.fg

        z = self.slice(r, self.hidden_size, 3) + self.slice(
            z1, self.hidden_size, 3) + self.m * self.Wop
        self.og = Sigmoid.y(z)

        self.z = self.hidden_activation.y(self.m)
        self.h = self.z * self.og

        if not compute_only_features:
            z_y = np.dot(self.h, self.Wy)
            self.y = Softmax.y(z=z_y)

        if self.use_pauses:
            self._remember_state(input_word_index, pause_duration[:,
                                                                  np.newaxis])
        else:
            self._remember_state(input_word_index)
Exemple #10
0
class MultilayerPerceptron():
    """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.
    Unrolled to display the whole forward and backward pass.
    Parameters:
    -----------
    n_hidden: int:
        The number of processing nodes (neurons) in the hidden layer. 
    n_iterations: float
        The number of training iterations the algorithm will tune the weights for.
    learning_rate: float
        The step length that will be used when updating the weights.
    """
    def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
        self.n_hidden = n_hidden
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        # Hidden layer
        limit   = 1 / math.sqrt(n_features)
        self.W  = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))
        # Output layer
        limit   = 1 / math.sqrt(self.n_hidden)
        self.V  = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))

    def fit(self, X, y):

        self._initialize_weights(X, y)

        for i in range(self.n_iterations):

            # ..............
            #  Forward Pass
            # ..............

            # HIDDEN LAYER
            hidden_input = X.dot(self.W) + self.w0  #(1079*64)(64,16)+(1,16) -> (1079,16)+(1,16)->(1079,16)
            hidden_output = self.hidden_activation(hidden_input)
            # OUTPUT LAYER
            output_layer_input = hidden_output.dot(self.V) + self.v0
            y_pred = self.output_activation(output_layer_input)

            # ...............
            #  Backward Pass
            # ...............

            # OUTPUT LAYER
            # Grad. w.r.t input of output layer
            grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) #(1079,10)(1079,10)->(1079,10)
            grad_v = hidden_output.T.dot(grad_wrt_out_l_input)    # (16,1079)(1079,10)->(16,10)
            grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)  # (1,10)
            # HIDDEN LAYER
            # Grad. w.r.t input of hidden layer     # (1079,10)
            grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_l_input)
            grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

            # Update weights (by gradient descent)
            # Move against the gradient to minimize loss
            self.V  -= self.learning_rate * grad_v
            self.v0 -= self.learning_rate * grad_v0
            self.W  -= self.learning_rate * grad_w
            self.w0 -= self.learning_rate * grad_w0

    # Use the trained model to predict labels of X
    def predict(self, X):
        # Forward pass:
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.output_activation(output_layer_input)
        return y_pred
 def __init__(self, input_shape=None):
     self.layer_name = 'softmax'
     self.input_shape = input_shape
     self.activation_func = Softmax()
     self.trainable = False
 def test_results_add_to_1(self):
     z = np.array([-3, 0.1, 1, 20], float)
     a = Softmax.activation(z)
     self.assertAlmostEqual(a.sum(), 1)
    def test_activations_in_correct_range(self):
        z = np.array([-1000, 0.1, 2, 200], float)
        a = Softmax.activation(z)

        self.assertTrue(np.all(0 <= a) and np.all(a <= 1))
 def test_returns_array_of_valid_shape(self):
     z = np.array([1, 2], float)
     a = Softmax.activation(z)
     self.assertTupleEqual(a.shape, z.shape)
 def test_on_vectors_with_huge_components(self):
     z = np.array([np.finfo(float).max, 2, np.finfo(float).max / 2], float)
     # won't raise OverflowError
     a = Softmax.activation(z)
import numpy as np
import sys
sys.path.append('../../network')
from activation_functions import Softmax

soft = Softmax()

# Testing derivative



test_matrix = np.random.rand(5,3)
test_matrix.shape

Delta= 0.000000001

displaced = np.zeros(test_matrix.shape)
displaced[:,:] = test_matrix
displaced[np.arange(0,1), :] =displaced[np.arange(0,1), :]  + Delta

ans = ((soft(displaced) -  soft(test_matrix)  )/Delta  ) [:,:]- \
( soft.gradient(test_matrix)   )[:,0,:] < 0.0000001
print(ans)


displaced = np.zeros(test_matrix.shape)
displaced[:,:] = test_matrix
displaced[np.arange(2,3), :] =displaced[np.arange(2,3), :]  + Delta
ans = ((soft(displaced) -  soft(test_matrix)  )/Delta  ) [:,:]- \
( soft.gradient(test_matrix)   )[:,2,:] < 0.0000001
print(ans)