def output_gradient(y_vector, y_hat_vector): gradient = [] for i in range(len(y_vector)): y = y_vector.vector[i] y_hat = y_hat_vector.vector[i] g = -((y / y_hat) - ((1 - y) / (1 - y_hat))) gradient.append(g) return algebra.Vector(gradient)
def predict(X, Y): if memory: layers = memory['layers'] params = memory['params'] act_fun = memory['act_fun'] y_hats = [] for example_idx in range(len(X)): x = algebra.Vector(X[example_idx]) y = algebra.Vector(Y[example_idx]) y_hat, _ = propagation.net_forward_prop(layers, x, params, act_fun) y_hats.append(y_hat.vector) print('\nExample #{}'.format(example_idx)) y.print() y_hat.print() accuracy = metrics.accuracy(y_hats, Y) print('\nAccuracy: {}'.format(accuracy))
def train(X, Y, act_fun, act_fun_back, architecture, loss_metric, learning_rate, epochs, metrics_period): layers = len(architecture) params = init_params(architecture) iterations = 0 for epoch in range(epochs): for example_idx in range(len(X)): x = algebra.Vector(X[example_idx]) y = algebra.Vector(Y[example_idx]) y_hat, layer_outputs = propagation.net_forward_prop( layers, x, params, act_fun) output_gradient = propagation.output_gradient(y, y_hat) param_gradients = propagation.net_back_prop( layers, layer_outputs, output_gradient, params, act_fun_back) update_params(layers, params, param_gradients, learning_rate) iterations += 1 # Metrics if iterations % metrics_period == 0: m_y_hat_list = [] for m_idx in range(len(X)): m_x = algebra.Vector(X[m_idx]) m_y_hat, _ = propagation.net_forward_prop( layers, m_x, params, act_fun) m_y_hat_list.append(m_y_hat.vector) loss = metrics.loss_function(m_y_hat_list, Y, loss_metric) accuracy = metrics.accuracy(m_y_hat_list, Y) print( 'Epoch: {}\tIter: {}k\t\tLoss: {}\t\tAccuracy: {}'.format( epoch + 1, iterations / 1000, loss, accuracy)) memory['layers'] = layers memory['params'] = params memory['act_fun'] = act_fun
def init_params(architecture): params = {} for i in range(1, len(architecture)): curr_size = architecture[i] prev_size = architecture[i - 1] # weight matrix (prev_size x curr_size) weights = algebra.Matrix([[random_param() for _ in range(prev_size)] for _ in range(curr_size)]) params['W{}'.format(i)] = weights # bias vector (curr_size) biases = algebra.Vector([random_param() for _ in range(curr_size)]) params['b{}'.format(i)] = biases return params
def tanh_derivative(z): ones = algebra.Vector([1.0 for _ in range(len(z))]) t = tanh(z) return ones - t.mul_element_wise(t)
def tanh(z): return algebra.Vector([math.tanh(item) for item in z.vector])
import math import algebra def tanh(z): return algebra.Vector([math.tanh(item) for item in z.vector]) def tanh_derivative(z): ones = algebra.Vector([1.0 for _ in range(len(z))]) t = tanh(z) return ones - t.mul_element_wise(t) def tanh_back(dA, z): return dA.mul_element_wise(tanh_derivative(z)) if __name__ == '__main__': v = algebra.Vector([0.0, 0.5, 1.0, 2.0, 8.0]) tanh(v).print() dA = algebra.Vector([0.0, 0.1, 0.2, 0.3, 0.4]) tanh_back(dA, v).print()
def output_gradient(y_vector, y_hat_vector): gradient = [] for i in range(len(y_vector)): y = y_vector.vector[i] y_hat = y_hat_vector.vector[i] g = -((y / y_hat) - ((1 - y) / (1 - y_hat))) gradient.append(g) return algebra.Vector(gradient) if __name__ == '__main__': act_fun = activation.tanh act_back = activation.tanh_back A_prev = algebra.Vector([0.1, 0.2]) W_curr = algebra.Matrix([[0.5, 0.6], [0.6, 0.7], [0.6, 0.5]]) b_curr = algebra.Vector([0.4, 0.5, 0.5]) print('\nForward prop (layer)') Z_curr, A_curr = layer_forward_prop(A_prev, W_curr, b_curr, act_fun) print('A_curr:') A_curr.print() print('\nBack prop (layer)') layer_gradient = algebra.Vector([0.1, 0.2, 0.1]) dW_curr, db_curr, dA_prev = layer_back_prop(layer_gradient, W_curr, Z_curr, A_prev, act_back) print('dW_curr:')