def _gradient(self, weights, lambda_value): thetas = list(reshape_vector(weights, self.theta_shapes)) activations = self._activations(thetas) l = lambda_value # sigmas = [sigma3, sigma2, ...] sigmas = [activations[-1]-self.Y] for ind,layer in enumerate(self.z[-2::-1]): if self.add_bias: layer = add_bias(layer) sigma = np.dot(sigmas[-1], thetas[-1-ind])*sigmoid_gradient(layer) sigmas.append(del_bias(sigma)) # deltas = [delta1, delta2, ...] deltas = [] for activation, sigma in zip(activations, sigmas[::-1]): deltas.append(np.dot(sigma.T, activation)) # gradients = [theta1_grad, theta2_grad, ...] gradients = [] for theta, delta in zip(thetas, deltas): theta = del_bias(theta) theta = add_bias(theta, values_function=np.zeros) gradient = delta/self.m + np.dot((l/self.m), theta) gradients.append(gradient.T.ravel()) return np.concatenate(gradients)
def _gradient(self, weights, lambda_value): thetas = list(reshape_vector(weights, self.theta_shapes)) activations = self._activations(thetas) l = lambda_value # sigmas = [sigma3, sigma2, ...] sigmas = [activations[-1] - self.Y] for ind, layer in enumerate(self.z[-2::-1]): if self.add_bias: layer = add_bias(layer) sigma = np.dot(sigmas[-1], thetas[-1 - ind]) * sigmoid_gradient(layer) sigmas.append(del_bias(sigma)) # deltas = [delta1, delta2, ...] deltas = [] for activation, sigma in zip(activations, sigmas[::-1]): deltas.append(np.dot(sigma.T, activation)) # gradients = [theta1_grad, theta2_grad, ...] gradients = [] for theta, delta in zip(thetas, deltas): theta = del_bias(theta) theta = add_bias(theta, values_function=np.zeros) gradient = delta / self.m + np.dot((l / self.m), theta) gradients.append(gradient.T.ravel()) return np.concatenate(gradients)
def predict(self, input_layer): output_layer = input_layer thetas = reshape_vector(self.weights, self.theta_shapes) for theta in thetas: if self.add_bias: output_layer = np.append(1, output_layer) output_layer = self.h(theta.dot(output_layer)) return output_layer
def _cost_function(self, weights, lambda_value): thetas = list(reshape_vector(weights, self.theta_shapes)) activations = self._activations(thetas) l = lambda_value p = self._penalty(thetas) h = activations[-1] # Output layer r = (l*p)/(2*self.m) cost = np.sum(np.sum(-self.Y*np.log(h) - (1-self.Y)*np.log(1-h), axis=1))/self.m + r return cost
def _cost_function(self, weights, lambda_value): thetas = list(reshape_vector(weights, self.theta_shapes)) activations = self._activations(thetas) l = lambda_value p = self._penalty(thetas) h = activations[-1] # Output layer r = (l * p) / (2 * self.m) cost = np.sum( np.sum(-self.Y * np.log(h) - (1 - self.Y) * np.log(1 - h), axis=1)) / self.m + r return cost
def test_reshape_vector(self): vector, shapes = flatten_matrices(THETAS) matrices = list(reshape_vector(vector, shapes)) for m, w in zip(matrices, THETAS): self.assertTrue((m == w).all())