def nn_gradient(Wi, Wo, alpha_i, alpha_o, train_input, train_target): """Calculate the partial derivatives of the quadratic cost function wrt. to the weights. Derivatives of quadratic weight decay are included. Args: Wi: Matrix with input-to-hidden weights.\n Wo: Matrix with hidden-to-output weights.\n alpha_i: Weight decay parameter for input weights.\n alpha_o: Weight decay parameter for output weights.\n train_input: Matrix with examples as rows.\n train_target: Matrix with target values as rows. Yields: dWi: Matrix with gradient for input weights.\n dWo: Matrix with gradient for output weights. Examples: Calculate gradients >>> dWi, dWo = nn_gradient(...) """ # Determine the number of samples exam, inp = train_input.shape # ======================= # FORWARD PASS # ======================= # Calculate hidden and output unit activations Vj, yj = nn_forward(Wi, Wo, train_input) # ======================= # BACKWARD PASS # ======================= # Calculate derivative # by backpropagating the errors from the desired outputs # Output unit deltas delta_o = -(np.atleast_2d(train_target).T - yj) # Hidden unit deltas r, c = Wo.shape delta_h = (1.0 - np.power(Vj, 2)) * (delta_o.dot(Wo[:, :-1])) # Partial derivatives for the output weights dWo = delta_o.T.dot(np.concatenate((Vj, np.ones((exam, 1))), 1)) # Partial derivatives for the input weights dWi = delta_h.T.dot(np.concatenate((train_input, np.ones((exam, 1))), 1)) # Add derivative of the weight decay term dWi = dWi + alpha_i*Wi dWo = dWo + alpha_o*Wo return (dWi, dWo)
def nn_gradient(Wi, Wo, alpha_i, alpha_o, train_input, train_target): """Calculate the partial derivatives of the quadratic cost function wrt. to the weights. Derivatives of quadratic weight decay are included. Args: Wi: Matrix with input-to-hidden weights.\n Wo: Matrix with hidden-to-output weights.\n alpha_i: Weight decay parameter for input weights.\n alpha_o: Weight decay parameter for output weights.\n train_input: Matrix with examples as rows.\n train_target: Matrix with target values as rows. Yields: dWi: Matrix with gradient for input weights.\n dWo: Matrix with gradient for output weights. Examples: Calculate gradients >>> dWi, dWo = nn_gradient(...) """ # Determine the number of samples exam, inp = train_input.shape # ======================= # FORWARD PASS # ======================= # Calculate hidden and output unit activations Vj, yj = nn_forward(Wi, Wo, train_input) # ======================= # BACKWARD PASS # ======================= # Calculate derivative # by backpropagating the errors from the desired outputs # Output unit deltas delta_o = -(np.atleast_2d(train_target).T - yj) # Hidden unit deltas r, c = Wo.shape delta_h = (1.0 - np.power(Vj, 2)) * (delta_o.dot(Wo[:, :-1])) # Partial derivatives for the output weights dWo = delta_o.T.dot(np.concatenate((Vj, np.ones((exam, 1))), 1)) # Partial derivatives for the input weights dWi = delta_h.T.dot(np.concatenate((train_input, np.ones((exam, 1))), 1)) # Add derivative of the weight decay term dWi = dWi + alpha_i * Wi dWo = dWo + alpha_o * Wo return (dWi, dWo)
def nn_cost_quad(Wi, Wo, input, target): """ Calculate the value of the quadratic cost function, i.e. 0.5*(sum of squared error) """ # Calculate network outputs for all exaples Vj, yj = nn_forward(Wi, Wo, input) # Calculate the deviations from desired outputs ej = target - yj # Calculate the sum of squared errors error = 0.5 * np.sum(np.sum(np.power(ej, 2))) return error
def nn_cost_quad_decay(Wi, Wo, input, target, alpha_i=1.0, alpha_o=1.0): """ Calculate the value of the quadratic cost function with a weigth decay, i.e. 0.5*(sum of squared error) + 0.5*alpha*norm(weight) """ # Calculate network outputs for all exaples Vj, yj = nn_forward(Wi, Wo, input) # Calculate the deviations from desired outputs ej = target - yj # Calculate the sum of squared errors error = 0.5 * np.sum(np.sum(np.power(ej, 2))) + 0.5*alpha_i*weight_norm(Wi) + 0.5*alpha_o*weight_norm(Wo) return error
def nn_train(nn,option,train_x,train_y): iteration = option.iteration batch_size = option.batch_size m = train_x.shape[0] num_batches = m / batch_size for k in range(iteration): kk = np.random.permutation(m) for l in range(int(num_batches)): batch_x = train_x[kk[l * batch_size : (l + 1) * batch_size], :] #(l+1)*batch_size也可以改成max((l+1)*batch_size, len(kk)) batch_y = train_y[kk[l * batch_size : (l + 1) * batch_size], :] nn = nn_forward(nn,batch_x,batch_y) nn = nn_backpropagation(nn,batch_y) nn = nn_applygradient(nn) return nn
# Initialize network weights Wi = range * np.random.randn(Nh, Ni + 1) Wo = range * np.random.randn(No, Nh + 1) # dWi, dWo = nn_gradient(Wi, Wo, alpha_i, alpha_o, train_input, train_target) # Determine the number of samples exam, inp = train_input.shape # ###################### # # #### FORWARD PASS #### # # ###################### # # Calculate hidden and output unit activations Vj, yj = nn_forward(Wi, Wo, train_input) # ###################### # # #### BACKWARD PASS ### # # ###################### # # Calculate derivative of # by backpropagating the errors from the desired outputs # Output unit deltas delta_o = -(train_target - yj) # Hidden unit deltas r, c = Wo.shape delta_h = (1.0 - np.power(Vj, 2)) * (delta_o.dot(Wo[:, :c - 1]))
def predict(self, input): """Prediction""" Vj, yj = nn_forward(self.Wi, self.Wo, input) return yj
# Initialize network weights Wi = range*np.random.randn(Nh, Ni+1) Wo = range*np.random.randn(No, Nh+1) # dWi, dWo = nn_gradient(Wi, Wo, alpha_i, alpha_o, train_input, train_target) # Determine the number of samples exam, inp = train_input.shape # ###################### # # #### FORWARD PASS #### # # ###################### # # Calculate hidden and output unit activations Vj, yj = nn_forward(Wi, Wo, train_input) # ###################### # # #### BACKWARD PASS ### # # ###################### # # Calculate derivative of # by backpropagating the errors from the desired outputs # Output unit deltas delta_o = -(train_target - yj) # Hidden unit deltas r, c = Wo.shape delta_h = (1.0 - np.power(Vj, 2)) * (delta_o.dot(Wo[:, :c-1]))