def mgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size, alpha=0): # Initialize paramaters if init_strategy == "random": W, b = init_methods.random_init2(d, hl, ol) else: W, b = init_methods.xavier_init(d, hl, ol) gamma = 0.9 grad_W, grad_b = init_methods.random_init2(d, hl, ol) prev_W, prev_b = init_methods.random_init2(d, hl, ol) iteration = 0 while iteration < epochs: num_points_seen = 0 for loc, (x, y_true) in enumerate(zip(train_x, train_y)): num_points_seen += 1 #Forward Propagation h, a = forward_propagation.forward_propagation( W, b, x, len(hl), ac) # Prediction (y hat) .It will be the last element(np array) of the h list y_pred = h[len(hl) + 1] # Backward Propagation grad_W_element, grad_b_element = back_propagation.back_propagation( W, h, x, y_true, y_pred, len(hl), ac, lf) if loc == 0 or num_points_seen == 1: for i in range(len(grad_W)): grad_W[i] = grad_W_element[i] grad_b[i] = grad_b_element[i] else: for i in range(len(grad_W)): grad_W[i] += grad_W_element[i] grad_b[i] += grad_b_element[i] if num_points_seen == batch_size or loc == len(train_x) - 1: num_points_seen = 0 # Updating of prev_W,prev_b, W and b if iteration == 0: for i in range(1, len(W)): W[i] = W[i] - eta * grad_W[i] - eta * alpha * W[i] b[i] = b[i] - eta * grad_b[i] prev_W[i] = eta * grad_W[i] + eta * alpha * W[i] prev_b[i] = eta * grad_b[i] else: for i in range(1, len(W)): prev_W[i] = np.multiply( gamma, prev_W[i]) + eta * grad_W[i] + eta * alpha * W[i] prev_b[i] = np.multiply(gamma, prev_b[i]) + eta * grad_b[i] W[i] = W[i] - prev_W[i] b[i] = b[i] - prev_b[i] grad_W, grad_b = init_methods.random_init2(d, hl, ol) if lf == "cross_entropy": train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, len(hl), ac, lf) val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, len(hl), ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) # print("\n\niteration number ",iteration," Training Accuracy: ", train_acc, " Training Loss: ", train_loss) # print("\n\niteration number ",iteration," validation Accuracy: ", val_acc, " validation Loss: ", val_loss) iteration += 1 return W, b
def vgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", alpha=0): print("Function Invoked: vgd") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) t, n_hl = 0, len(hl) while t < epochs: gW, gb = [], [] for index, (x, y) in enumerate(zip(train_x, train_y)): # Forward propagation h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W, h, x, y, _y, n_hl, ac, lf) if index == 0: gW = _gW gb = _gb else: gW = list(np.add(gW, _gW)) gb = list(np.add(gb, _gb)) # Update bias for index, (_b, _gb) in enumerate(zip(b, gb)): b[index] = _b - eta * np.array(_gb) # Update weights for index, (_W, _gW) in enumerate(zip(W, gW)): W[index] = _W - eta * (np.array(_gW) + alpha * _W) # Logging to WandB if lf == "cross_entropy": # val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(W, b, val_x, val_y, n_hl, ac, lf) # train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(W, b, train_x, train_y, n_hl, ac, lf) # wandb.log( { "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss } ) t += 1 return W, b
def adam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", batch_size=1): print("Function Invoked: adam") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) n_hl = len(hl) t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0 v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * ( n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2) while t < epochs: gW, gb = [], [] for index, (x, y) in enumerate(zip(train_x, train_y)): # Forward propagation h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W, h, x, y, _y, n_hl, ac, lf) if index % batch_size == 0: gW = _gW gb = _gb else: gW = np.add(gW, _gW) gb = np.add(gb, _gb) if (index + 1) % batch_size == 0: count += 1 update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) gW, gb = [], [] if len(train_x) % batch_size != 0: count += 1 index = batch_size - 1 if len(train_x) < batch_size else -1 update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) # Logging to WandB if lf == "cross_entropy": val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, n_hl, ac, lf) train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, n_hl, ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) t += 1 return W, b
def rmsprop(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size): # Initialize paramaters if init_strategy == "random": W, b = init_methods.random_init2(d, hl, ol) else: W, b = init_methods.xavier_init(d, hl, ol) hist_W, hist_b = init_methods.random_init2(d, hl, ol) grad_W, grad_b = init_methods.random_init2(d, hl, ol) epsilon, beta1 = 1e-8, 0.95 iteration = 0 while iteration < epochs: num_points_seen = 0 for loc, (x, y_true) in enumerate(zip(train_x, train_y)): num_points_seen += 1 # Forward propagation h, a = forward_propagation.forward_propagation( W, b, x, len(hl), ac) # Prediction (y hat) . It will be the last element(np array) of the h list y_pred = h[len(hl) + 1] # Backward propagation grad_W_element, grad_b_element = back_propagation.back_propagation( W, h, x, y_true, y_pred, len(hl), ac, lf) if loc == 0 or num_points_seen == 1: for i in range(len(grad_W)): grad_W[i] = grad_W_element[i] grad_b[i] = grad_b_element[i] else: for i in range(len(grad_W)): grad_W[i] += grad_W_element[i] grad_b[i] += grad_b_element[i] if num_points_seen == batch_size or loc == len(train_x) - 1: num_points_seen = 0 if iteration == 0: for i in range(1, len(W)): hist_W[i] = (1 - beta1) * np.square(grad_W[i]) hist_b[i] = (1 - beta1) * np.square(grad_b[i]) W[i] = W[i] - ( eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i] b[i] = b[i] - ( eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i] else: for i in range(1, len(W)): hist_W[i] = beta1 * hist_W[i] + ( 1 - beta1) * np.square(grad_W[i]) hist_b[i] = beta1 * hist_b[i] + ( 1 - beta1) * np.square(grad_b[i]) W[i] = W[i] - ( eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i] b[i] = b[i] - ( eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i] grad_W, grad_b = init_methods.random_init2(d, hl, ol) if lf == "cross_entropy": train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, len(hl), ac, lf) val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, len(hl), ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) # print("\n\niteration number ",iteration," Training Accuracy: ", train_acc, " Training Loss: ", train_loss) # print("\n\niteration number ",iteration," validation Accuracy: ", val_acc, " validation Loss: ", val_loss) iteration += 1 return W, b
def nadam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", batch_size=1): print("Function Invoked: nadam") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) n_hl = len(hl) t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0 v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * ( n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2) while t < epochs: gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array( [])] * (n_hl + 2), [np.array([])] * (n_hl + 2) for index, (x, y) in enumerate(zip(train_x, train_y)): if index % batch_size == 0: if t == 0 and index == 0: W_look_ahead = np.copy(W) b_look_ahead = np.copy(b) else: for _index, (_b, _m_b, _v_b) in enumerate(zip(b, m_b, v_b)): _m_b_hat = (beta1 * _m_b) / (1 - np.power(beta1, count + 1)) _v_b_hat = (beta2 * _v_b) / (1 - np.power(beta2, count + 1)) b_look_ahead[_index] = _b - ( eta / np.sqrt(_v_b_hat + epsilon)) * _m_b_hat for _index, (_W, _m_W, _v_W) in enumerate(zip(W, m_W, v_W)): _m_W_hat = (beta1 * _m_W) / (1 - np.power(beta1, count + 1)) _v_W_hat = (beta2 * _v_W) / (1 - np.power(beta2, count + 1)) W_look_ahead[_index] = _W - ( eta / np.sqrt(_v_W_hat + epsilon)) * _m_W_hat # Forward propagation h, a = forward_propagation.forward_propagation( W_look_ahead, b_look_ahead, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W_look_ahead, h, x, y, _y, n_hl, ac, lf) if index % batch_size == 0: gW = _gW gb = _gb else: gW = np.add(gW, _gW) gb = np.add(gb, _gb) if (index + 1) % batch_size == 0: count += 1 update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array( [])] * (n_hl + 2), [np.array([])] * (n_hl + 2) if len(train_x) % batch_size != 0: count += 1 index = batch_size - 1 if len(train_x) < batch_size else -1 update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) # Logging to WandB if lf == "cross_entropy": val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, n_hl, ac, lf) train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, n_hl, ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) t += 1 return W, b