예제 #1
0
    def _gradient(self, weights, lambda_value):
        thetas = list(reshape_vector(weights, self.theta_shapes))
        activations = self._activations(thetas)
        l = lambda_value

        # sigmas = [sigma3, sigma2, ...]
        sigmas = [activations[-1]-self.Y]
        for ind,layer in enumerate(self.z[-2::-1]):
            if self.add_bias:
                layer = add_bias(layer)
                sigma = np.dot(sigmas[-1], thetas[-1-ind])*sigmoid_gradient(layer)
                sigmas.append(del_bias(sigma))

        # deltas = [delta1, delta2, ...]
        deltas = []
        for activation, sigma in zip(activations, sigmas[::-1]):
            deltas.append(np.dot(sigma.T, activation))

        # gradients = [theta1_grad, theta2_grad, ...]
        gradients = []
        for theta, delta in zip(thetas, deltas):
            theta = del_bias(theta)
            theta = add_bias(theta, values_function=np.zeros)
            gradient = delta/self.m + np.dot((l/self.m), theta)
            gradients.append(gradient.T.ravel())

        return np.concatenate(gradients)
예제 #2
0
    def _gradient(self, weights, lambda_value):
        thetas = list(reshape_vector(weights, self.theta_shapes))
        activations = self._activations(thetas)
        l = lambda_value

        # sigmas = [sigma3, sigma2, ...]
        sigmas = [activations[-1] - self.Y]
        for ind, layer in enumerate(self.z[-2::-1]):
            if self.add_bias:
                layer = add_bias(layer)
                sigma = np.dot(sigmas[-1],
                               thetas[-1 - ind]) * sigmoid_gradient(layer)
                sigmas.append(del_bias(sigma))

        # deltas = [delta1, delta2, ...]
        deltas = []
        for activation, sigma in zip(activations, sigmas[::-1]):
            deltas.append(np.dot(sigma.T, activation))

        # gradients = [theta1_grad, theta2_grad, ...]
        gradients = []
        for theta, delta in zip(thetas, deltas):
            theta = del_bias(theta)
            theta = add_bias(theta, values_function=np.zeros)
            gradient = delta / self.m + np.dot((l / self.m), theta)
            gradients.append(gradient.T.ravel())

        return np.concatenate(gradients)
예제 #3
0
    def predict(self, input_layer):
        output_layer = input_layer
        thetas = reshape_vector(self.weights, self.theta_shapes)
        for theta in thetas:
            if self.add_bias:
                output_layer = np.append(1, output_layer)

            output_layer = self.h(theta.dot(output_layer))

        return output_layer
예제 #4
0
    def predict(self, input_layer):
        output_layer = input_layer
        thetas = reshape_vector(self.weights, self.theta_shapes)
        for theta in thetas:
            if self.add_bias:
                output_layer = np.append(1, output_layer)

            output_layer = self.h(theta.dot(output_layer))

        return output_layer
예제 #5
0
    def _cost_function(self, weights, lambda_value):
        thetas = list(reshape_vector(weights, self.theta_shapes))
        activations = self._activations(thetas)
        l = lambda_value

        p = self._penalty(thetas)
        h = activations[-1]  # Output layer
        r = (l*p)/(2*self.m)
        cost = np.sum(np.sum(-self.Y*np.log(h) - (1-self.Y)*np.log(1-h), axis=1))/self.m + r

        return cost
예제 #6
0
    def _cost_function(self, weights, lambda_value):
        thetas = list(reshape_vector(weights, self.theta_shapes))
        activations = self._activations(thetas)
        l = lambda_value

        p = self._penalty(thetas)
        h = activations[-1]  # Output layer
        r = (l * p) / (2 * self.m)
        cost = np.sum(
            np.sum(-self.Y * np.log(h) -
                   (1 - self.Y) * np.log(1 - h), axis=1)) / self.m + r

        return cost
예제 #7
0
 def test_reshape_vector(self):
     vector, shapes = flatten_matrices(THETAS)
     matrices = list(reshape_vector(vector, shapes))
     for m, w in zip(matrices, THETAS):
         self.assertTrue((m == w).all())