def _forward_step(self, x, prev_hidden_state, prev_cell_state): """Forward pass for a single time step of the LSTM layer. Args: x (np.array): Input data of shape (N, D) prev_hidden_state (np.array): Previous hidden state of shape (N, H) prev_cell_state (np.array): Previous cell state of shape (N, H) Returns tuple: - next_hidden_state: Next hidden state, of shape (N, H) - next_cell_state: Next cell state, of shape (N, H) - cache: Tuple of values needed for back-propagation """ _, H = prev_hidden_state.shape # Compute activations acts = np.dot(x, self.Wx) + np.dot(prev_hidden_state, self.Wh) + self.b # Compute the internal gates input_gate = sigmoid(acts[:, 0:H]) forget_gate = sigmoid(acts[:, H:2 * H]) output_gate = sigmoid(acts[:, 2 * H:3 * H]) gain_gate = np.tanh(acts[:, 3 * H:4 * H]) # Compute next states next_cell_state = forget_gate * prev_cell_state + input_gate * gain_gate next_hidden_state = output_gate * np.tanh(next_cell_state) # Cache the results cache = (x, next_hidden_state, next_cell_state, input_gate, forget_gate, output_gate, gain_gate, prev_hidden_state, prev_cell_state) return next_hidden_state, next_cell_state, cache
def predict_with_complex_net(X, V, W): # This function takes in the weights of a neural net that has been trained # using the 'learn_simple_net' function above and returns the prediction # that the function makes using these weights. A = np.dot(X, V) B = sigmoid(A) C = np.dot(B, W) P = sigmoid(C) return P
def back_propagation(self, x, y): """ Calculates the gradient of the cost function Cx for some batch of sample inputs :param x: a is a 784-dimensional vector representing the input image (28x28=784) :param y: a 10-dimensional unit vector representing the correct digit :return nabla_b, nabla_w: the weights and biases representing the gradient nabla_Cx """ nabla_w = [0 for i in self.weights] nabla_b = [0 for i in self.biases] # calculate z values and activations for the backwards pass activation = x activations = [x] z_vector = [] for b, w in zip(self.biases, self.weights): z = mmath.dot(w, activation) + b z_vector.append(z) activation = sigmoid(z) activations.append(activation) # backwards pass through the network delta = quadratic_cost(activations[-1], y) * sigmoid_prime(z_vector[-1]) nabla_b[-1] = delta nabla_w[-1] = mmath.dot(delta, activations[-2].transpose()) # don't include input layer for layer in range(2, self.layer_count): z = z_vector[-layer] sp = sigmoid_prime(z) delta = mmath.dot(self.weights[-layer + 1].transpose(), delta) * sp nabla_b[-layer] = delta nabla_w[-layer] = mmath.dot(delta, activations[-layer - 1].transpose()) return nabla_b, nabla_w
def feed_forward(self, act): """ "Feeds" the input forward given an input, act. :param act: input for the network :return z_vector, activations: the result output, list of activation values """ for b, w in zip(self.biases, self.weights): act = sigmoid(mmath.dot(w, act) + b) return act
def feed_forward_evaluation(self, act): """ "Feeds" the input forward given an input, act. Modified to simplify testing :param act: input for the network :return activations: list of activation values """ # for each vector b, w solve, f(b,w) = sigmoid(a*w + b) (dot product and vector addition) for bias, weight in zip(self.biases, self.weights): act = sigmoid(mmath.dot(weight, act) + bias) return act
def get_model_preds(X_train, y_train, P_train, X_test, y_test, P_test, model_name): lin_model = LogisticRegression(C=C, solver='sag', max_iter=2000) lin_model.fit(X_train, y_train) y_test_scores = sigmoid((X_test.dot(lin_model.coef_.T) + lin_model.intercept_).flatten()) y_hats[model_name] = y_test_scores print('logistic regression evaluation...') performance = list(evaluate_performance_sim(y_test, y_test_scores, P_test)) return lin_model, y_test_scores, performance
def learning_by_gradient_descent(y, tx, w, gamma): """ Do one step of gradient descen using logistic regression. Return the loss and the updated w. """ loss = compute_loss_neg_log_likelihood(y, tx, w) gradient = np.dot(tx.T, sigmoid(np.dot(tx, w)) - y) w -= gamma * gradient return w, loss
def get_model_preds(X_train, y_train, P_train, X_test, y_test, P_test, model_name): lin_model = RandomForestClassifier() lin_model.fit(X_train, y_train) y_test_scores = sigmoid( (X_test.dot(lin_model.coef_.T) + lin_model.intercept_).flatten()) y_hats[model_name] = y_test_scores print('logistic regression evaluation...') performance = list(evaluate_performance_sim(y_test, y_test_scores, P_test)) return lin_model, y_test_scores, performance
def learn_complex_net(X, y): # This function learns the weights for a neural net that calculates one set # of "intermediate inputs" and then uses those inputs to make a prediction. # The multiplications are set up so that in each iteration, the weights are # indeed updated in the correct direction. To understand why, follow the # arguments here: http://sethweidman.com/neural_net_post_2 np.random.seed(2) V = np.random.randn(3, 4) W = np.random.randn(4, 1) for j in range(50000): A = np.dot(X, V) B = sigmoid(A) C = np.dot(B, W) P = sigmoid(C) L = 0.5 * (y - P)**2 dLdP = -1.0 * (y - P) dPdC = sigmoid(C) * (1 - sigmoid(C)) dLdC = dLdP * dPdC dCdW = B.T dLdW = np.dot(dCdW, dLdC) dCdB = W.T dLdB = np.dot(dLdC, dCdB) dBdA = sigmoid(A) * (1 - sigmoid(A)) dLdA = dLdB * dBdA dAdV = X.T dLdV = np.dot(dAdV, dLdA) W -= dLdW V -= dLdV return V, W
def learn_simple_net(X, y): # This function learns the weights for the simplest possible "neural net": # one with no hidden layer. This is conceptually equivalent to a logistic # regression. # The multiplications are set up so that in each iteration, the weights are # indeed updated in the correct direction. To understand why, follow the # argument here: http://sethweidman.com/neural_net_post np.random.seed(1) W = np.random.randn(3, 1) for i in range(500): A = np.dot(X, W) P = sigmoid(A) L = 0.5 * (y - P) ** 2 if i % 50 == 0: print P dLdP = -1.0 * (y - P) dPdA = sigmoid(A) * (1.0 - _sigmoid(A)) dLdA = dLdP * dPdA dAdW = X.T dLdW = np.dot(dAdW, dLdA) W -= dLdW return W
def feed_forward(self, act): """ "Feeds" the input forward given an input, act. :param act: input for the network :return z_vector, activations: vector of z s.t. z = sigmoid(a*w+b), list of activation values """ z_vector = [] activations = [act] act_tracker = act # for each vector b, w solve, f(b,w) = sigmoid(a*w + b) (dot product and vector addition) for b, w in zip(self.biases, self.weights): z = mmath.dot(w, act_tracker) + b z_vector.append(z) act_tracker = sigmoid(z) activations.append(act_tracker) return z_vector, activations
def _activations(self, thetas): if self.add_bias: input_layer = add_bias(self.X) else: input_layer = self.X # activations = [a1, a2, ...] activations = [input_layer] self.z = [] # Process hidden layers for i in range(len(thetas)): self.z.append(np.dot(activations[-1], thetas[i].T)) activations.append(sigmoid(self.z[-1])) # Don't add bias terms on the last layer if self.add_bias and i < len(thetas)-1: activations[-1] = add_bias(activations[-1]) return activations
def _activations(self, thetas): if self.add_bias: input_layer = add_bias(self.X) else: input_layer = self.X # activations = [a1, a2, ...] activations = [input_layer] self.z = [] # Process hidden layers for i in range(len(thetas)): self.z.append(np.dot(activations[-1], thetas[i].T)) activations.append(sigmoid(self.z[-1])) # Don't add bias terms on the last layer if self.add_bias and i < len(thetas) - 1: activations[-1] = add_bias(activations[-1]) return activations
def extract(q, candidates, em, transforms, b, n=3): ''' Extract the n top ranked hypernyms among candidates input: q: the embedding of the query candidates: a list of candidate hypernym strings em: dictionary containing all word add_embeddings transforms: projection matrices b: bias ''' #print("Q: ", q,"\n") # p: All projections of q, list of k vectors p = [np.dot(transforms[i], q).T for i in range(k)] # Normalize all projections for i in range(k): p[i] = p[i] / np.sqrt((np.sum(p[i]**2))) #p = p/np.sqrt(np.sum(p**2)) # s: similarities of all projections of q, with all other terms """ Right now checks with all other embeddings (400k), takes a while, but manageable """ s = [(np.dot(p, em[h]), h) for h in candidates] print(len(candidates)) #s = [(np.dot(p,em[h]), h) for h in em] #print("Projections: ",p,"\n") #print("embeddings: ", embeddings,"\n") maxsims = [(_s[np.argmax(_s)], np.argmax(_s), h) for _s, h in s] #print(s) sigmoids = [(sigmoid(np.add(_s, b[idx])), h, idx) for _s, idx, h in maxsims] #sigmoids = [(sigmoid(_s), h, idx) for _s,idx,h in maxsims] sigmoids.sort() return sigmoids[-n:]
def logistic_error(y, tx, w): """ Logistic error Parameters ---------- y : Vector Output. tx : Matrix Input. w : TYPE Weights. Returns ------- loss : Scalar Loss function. """ a = helpers.sigmoid(tx @ w) loss = -(1 / tx.shape[0]) * np.sum((y * np.log(a)) + ((1 - y) * np.log(1 - a))) return loss
def GetValue(self, values = []): """ When setting Neuron.value it would be wise to scale the value to between 0 and 1 so that the sigmoid function can do its thing better """ # This "if" is used so that Neurons can be used for the input # layer and input can be set by simply setting the value if self.value != None: return self.value # helpers.sigmoid(self.value) bias_weight_index = len(self.weights) - 1 # Find the sum of weighted inputs input_sum = 0 # Use passed in values if given if values: # This won't work if the weights and inputs # lists are different lengths assert len(self.weights) - 1 == len(values) for i, value in enumerate(values): if i < bias_weight_index: input_sum += value * self.weights[i] else: # This won't work if the weights and inputs # lists are different lengths assert len(self.weights) - 1 == len(self.inputs) for i, neuron in enumerate(self.inputs): if i < bias_weight_index: input_sum += neuron.GetValue() * self.weights[i] input_sum += self.weights[bias_weight_index] * self.bias return helpers.sigmoid(input_sum)
file = open(json_file, mode='r') input_data = json.load(file) file.close() # Transform input data to usable input to CNN [feat_a, feat_b], _ = get_feat_and_label(input_data) feat_a = np.expand_dims(feat_a, axis=0) feat_b = np.expand_dims(feat_b, axis=0) # Run prediction y_hat = model.predict([feat_a, feat_b]) y_hat = y_hat[0, :, :] # Single batch assumed # Find row with highest confidence conf_logits, bboxes, _ = np.split(y_hat, [1, 5], axis=1) confs = sigmoid(conf_logits) obj_row = int(np.argmax(confs)) max_conf = float(confs[obj_row]) # If high enough confidence, draw rectangle on second frame if max_conf > 0.5: # Convert from CNN output to (top-left x, top-left y, width, height) w.r.t. `feat_b` x, y, w, h = preds_to_bbox(bboxes[obj_row], obj_row, grid_sz=64, anchor_sz=96) # Parameters corresponding to `bbox_a` x_a, y_a, w_a, h_a = input_data['bbox_a']
# This won't work if the weights and inputs # lists are different lengths assert len(self.weights) - 1 == len(values) for i, value in enumerate(values): if i < bias_weight_index: input_sum += value * self.weights[i] else: # This won't work if the weights and inputs # lists are different lengths assert len(self.weights) - 1 == len(self.inputs) for i, neuron in enumerate(self.inputs): if i < bias_weight_index: input_sum += neuron.GetValue() * self.weights[i] input_sum += self.weights[bias_weight_index] * self.bias return helpers.sigmoid(input_sum) # Short "unit" test if __name__ == "__main__": n1 = Neuron() n1.value = 2 n2 = Neuron() n2.value = 3 n3 = Neuron([0.5, 0.25, 1], [n1, n2]) print(n3.GetValue()) print(n3.GetValue() == helpers.sigmoid(2*0.5+3*0.25+-1*1))
# 1. compute the predictions for one sample, 2. compare them with what was # expected, then backpropagate errors backwards through the NN # changing Theta_j. Return to step one for the next sample. for w in range(m_train): """ %%%%%% Forward propagation %%%%%% Using sigmoid (logistic) activation function a_j[i] : activation of unit i in layer j Theta_j : Weights matrix controlling the mapping of j_th layer to j+1 Theta_j has dim s_(j+1) x (s_j) """ a_1 = np.transpose(X_train[w]) # activation values for first layer a_1 = np.reshape(a_1, (len(a_1), 1)) a_2 = sigmoid(np.matmul(Theta_1, a_1)) a_2 = np.reshape(a_2, (len(a_2), 1)) a_3 = sigmoid(np.matmul(Theta_2, a_2)) a_3 = np.reshape(a_3, (len(a_3), 1)) a_4 = sigmoid(np.matmul(Theta_3, a_3)) # predition layer a_4 = np.reshape(a_4, (len(a_4), 1)) # accumulate cost function temp = reshape_class(y_train[w]) len_temp = len(temp) J_part1 = np.multiply(temp, np.log(sigmoid(a_4))) J_part2 = np.multiply((np.ones((len_temp, 1)) - temp), np.log(np.ones((len(a_4), 1)) - sigmoid(a_4)))
def test_in_one(n_dim, batch_size, n_iter, C, alpha, compute_emd=True, k_nbrs=3, emd_method=emd_samples): global X, P, y, df, X_test reps = {} X_no_p = df.drop(['Y', 'P'], axis=1).values # declare variables X = torch.tensor(X).float() P = torch.tensor(P).long() # train-test split data_train, data_test = split_data_np( (X.data.cpu().numpy(), P.data.cpu().numpy(), y), 0.7) X_train, P_train, y_train = data_train X_test, P_test, y_test = data_test X_train_no_p = X_train[:, :-1] X_test_no_p = X_test[:, :-1] X_u = X[P == 1] X_n = X[P == 0] # AE. model_ae = FairRep(len(X[0]), n_dim) train_rep(model_ae, 0.01, X, P, n_iter, 10, batch_size, alpha=0, C_reg=0, compute_emd=compute_emd, adv=False, verbose=True) # AE_P. model_ae_P = FairRep(len(X[0]) - 1, n_dim - 1) train_rep(model_ae_P, 0.01, X_no_p, P, n_iter, 10, batch_size, alpha=0, C_reg=0, compute_emd=compute_emd, adv=False, verbose=True) # NFR. model_name = 'compas_Original' model_nfr = FairRep(len(X[0]), n_dim) X = torch.tensor(X).float() P = torch.tensor(P).long() train_rep(model_nfr, 0.01, X, P, n_iter, 10, batch_size, alpha=alpha, C_reg=0, compute_emd=compute_emd, adv=True, verbose=True) results = {} print('begin testing.') X_ori_np = X.data.cpu().numpy() # Original. print('logistic regression on the original...') lin_model, y_test_scores, performance = get_model_preds( X_train, y_train, P_train, X_test, y_test, P_test, model_name) y_hats[model_name] = get_preds_on_full_dataset(X, lin_model) reps[model_name] = None print(X_train.shape, X_test.shape) save_decision_boundary_plot(np.concatenate((X_train, X_test)), np.concatenate((y_train, y_test)), np.concatenate((P_train, P_test)), model_name) performance.append(emd_method(X_n, X_u)) performance.append( get_consistency(X.data.cpu().numpy(), lin_model, n_neighbors=k_nbrs)) performance.append(stat_diff(X.data.cpu().numpy(), P, lin_model)) performance.append(equal_odds(X.data.cpu().numpy(), y, P, lin_model)) # make_cal_plot(X.data.cpu().numpy(), y, P, lin_model, model_name) results[model_name] = performance # Original-P. model_name = 'compas_Original-P' print('logistic regression on the original-P') lin_model, y_test_scores, performance = get_model_preds( X_train_no_p, y_train, P_train, X_test_no_p, y_test, P_test, model_name) y_hats[model_name] = get_preds_on_full_dataset(X[:, :-1], lin_model) reps[model_name] = None save_decision_boundary_plot(np.concatenate((X_train_no_p, X_test_no_p)), np.concatenate((y_train, y_test)), np.concatenate((P_train, P_test)), model_name) performance.append(emd_method(X_n[:, :-1], X_u[:, :-1])) print('calculating consistency...') performance.append( get_consistency(X[:, :-1].data.cpu().numpy(), lin_model, n_neighbors=k_nbrs)) print('calculating stat diff...') performance.append(stat_diff(X[:, :-1].data.cpu().numpy(), P, lin_model)) performance.append( equal_odds(X[:, :-1].data.cpu().numpy(), y, P, lin_model)) # make_cal_plot(X[:, :-1].data.cpu().numpy(), y, P, lin_model, model_name) results[model_name] = performance # use encoder model_name = 'compas_AE' U_0 = model_ae.encoder(X[P == 0]).data U_1 = model_ae.encoder(X[P == 1]).data U = model_ae.encoder(X).data U_np = U.cpu().numpy() data_train, data_test = split_data_np((U_np, P.data.cpu().numpy(), y), 0.7) X_train, P_train, y_train = data_train X_test, P_test, y_test = data_test print('logistic regression on AE...') lin_model = LogisticRegression(C=C, solver='sag', max_iter=2000) lin_model.fit(X_train, y_train) save_decision_boundary_plot(np.concatenate((X_train, X_test)), np.concatenate((y_train, y_test)), np.concatenate((P_train, P_test)), model_name) y_test_scores = sigmoid( (X_test.dot(lin_model.coef_.T) + lin_model.intercept_).flatten()) y_hats[model_name] = get_preds_on_full_dataset(U, lin_model) reps[model_name] = U def calc_perf(y_test, y_test_scores, P_test, U, U_0, U_1, U_np, lin_model, X_test, model_name): print('logistic regression evaluation...') performance = list( evaluate_performance_sim(y_test, y_test_scores, P_test)) print('calculating emd...') performance.append(emd_method(U_0, U_1)) print('calculating consistency...') performance.append( get_consistency(U_np, lin_model, n_neighbors=k_nbrs, based_on=X_ori_np)) print('calculating stat diff...') performance.append(stat_diff(X_test, P_test, lin_model)) print('calculating equal odds...') performance.append(equal_odds(X_test, y_test, P_test, lin_model)) # make_cal_plot(X_test, y_test, P_test, lin_model, model_name) return performance performance = calc_perf(y_test, y_test_scores, P_test, U, U_0, U_1, U_np, lin_model, X_test, model_name) results[model_name] = (performance) # AE minus P model_name = 'compas_AE_P' U_0 = model_ae_P.encoder(X[:, :-1][P == 0]).data U_1 = model_ae_P.encoder(X[:, :-1][P == 1]).data U = model_ae_P.encoder(X[:, :-1]).data print('ae-p emd afterwards: ' + str(emd_method(U_0, U_1))) U_np = U.cpu().numpy() data_train, data_test = split_data_np((U_np, P.data.cpu().numpy(), y), 0.7) X_train, P_train, y_train = data_train X_test, P_test, y_test = data_test print('logistic regression on AE-P...') lin_model = LogisticRegression(C=C, solver='sag', max_iter=2000) save_decision_boundary_plot(np.concatenate((X_train, X_test)), np.concatenate((y_train, y_test)), np.concatenate((P_train, P_test)), model_name) lin_model.fit(X_train, y_train) y_test_scores = sigmoid( (X_test.dot(lin_model.coef_.T) + lin_model.intercept_).flatten()) y_hats[model_name] = get_preds_on_full_dataset(U, lin_model) reps[model_name] = U performance = calc_perf(y_test, y_test_scores, P_test, U, U_0, U_1, U_np, lin_model, X_test, model_name) results[model_name] = (performance) model_name = 'compas_NFR' U_0 = model_nfr.encoder(X[P == 0]).data U_1 = model_nfr.encoder(X[P == 1]).data U = model_nfr.encoder(X).data print('nfr emd afterwards: ' + str(emd_method(U_0, U_1))) U_np = U.cpu().numpy() data_train, data_test = split_data_np((U_np, P.data.cpu().numpy(), y), 0.7) X_train, P_train, y_train = data_train X_test, P_test, y_test = data_test print('logistic regression on NFR...') lin_model = LogisticRegression(C=C, solver='sag', max_iter=2000) lin_model.fit(X_train, y_train) save_decision_boundary_plot(np.concatenate((X_train, X_test)), np.concatenate((y_train, y_test)), np.concatenate((P_train, P_test)), model_name) y_test_scores = sigmoid( (X_test.dot(lin_model.coef_.T) + lin_model.intercept_).flatten()) y_hats[model_name] = get_preds_on_full_dataset(U, lin_model) reps[model_name] = U performance = calc_perf(y_test, y_test_scores, P_test, U, U_0, U_1, U_np, lin_model, X_test, model_name) results[model_name] = (performance) return results, y_hats, reps
def get_preds_on_full_dataset(x_context, lin_model): return sigmoid(((x_context.numpy()).dot(lin_model.coef_.T) + lin_model.intercept_).flatten())
def test_sigmoid(self): self.assertEqual(sigmoid(0), 0.5)
# expected, then backpropagate errors backwards through the NN # changing Theta_j. Return to step one for the next sample. for w in range(m_train): """ %%%%%% Forward propagation %%%%%% Using sigmoid (logistic) activation function a_j[i] : activation of unit i in layer j Theta_j : Weights matrix controlling the mapping of j_th layer to j+1 Theta_j has dim s_(j+1) x (s_j) """ a_1 = np.transpose( X_train[w]) # activation values for first layer a_1 = np.reshape(a_1, (len(a_1), 1)) a_2 = sigmoid(np.matmul(Theta_1, a_1)) a_2 = np.reshape(a_2, (len(a_2), 1)) a_3 = sigmoid(np.matmul(Theta_2, a_2)) a_3 = np.reshape(a_3, (len(a_3), 1)) a_4 = sigmoid(np.matmul(Theta_3, a_3)) # predition layer a_4 = np.reshape(a_4, (len(a_4), 1)) # accumulate cost function temp = reshape_class(y_train[w]) len_temp = len(temp) # J_part1 = np.multiply(temp, np.log(sigmoid(a_4))) # J_part2= np.multiply((np.ones((len_temp, 1))-temp), np.log(np.ones((len(a_4), 1))-sigmoid(a_4))) # J = J + sum(J_part1 + J_part2)/(-m_train)
def compute_loss_log_reg(y, tx, w): return -(y.T.dot(np.log(sigmoid(tx.dot(w)))) + (1 - y).T.dot(np.log(1 - sigmoid(tx.dot(w)))))
def activate(self, z): return sigmoid(z)
def compute_gradient_log_reg(y, tx, w): return tx.T.dot(sigmoid(tx.dot(w)) - y)
def hypothesis(self, x, w, b=0): return sigmoid(torch.mm(x, w) + b)
plotDecisionBoundary(theta, X_padded, y) plt.hold(False) # prevents further drawing on plot plt.show(block=False) input('Program paused. Press enter to continue.\n') ## ============== Part 4: Predict and Accuracies ============== # After learning the parameters, you'll like to use it to predict the outcomes # on unseen data. In this part, you will use the logistic regression model # to predict the probability that a student with score 45 on exam 1 and # score 85 on exam 2 will be admitted. # # Furthermore, you will compute the training and test set accuracies of # our model. # # Predict probability for a student with score 45 on exam 1 # and score 85 on exam 2 prob = sigmoid(np.dot(np.array([1, 45, 85]), theta)) print( 'For a student with scores 45 and 85, we predict an admission probability of {:f}' .format(prob)) # Compute accuracy on our training set p = predict(theta, X_padded) print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100)) input('Program paused. Press enter to continue.\n')
def one_iter(examples, transforms, b, learning_rate=0.01): ''' Runs one iteration through all provided examples ''' #maxnorm = 0 N = len(examples) total_loss = 0 for q, h, t in examples: # project the query with all of the projection matrices p = [np.dot(transforms[i], q).T for i in range(k)] # Normalize the projections for i in range(k): p[i] = p[i] / np.sqrt((np.sum(p[i]**2))) #p = p/np.sqrt(np.sum(p**2)) # compute similarities between all projections and the candidate s = np.dot(p, h) # Apply sigmoid function and find the transformation that resulted # in the closest projection to the candidate sigmoids = sigmoid(np.add(s, b)) #sigmoids = sigmoid(s) maxsim = np.argmax(sigmoids) # y: predicted similarity # x: the corresponding projected vector y = sigmoids[maxsim] x = p[maxsim] if (y == 0 or y == 1): print("Perfect hit") continue # Compute the loss and update the corresponding projection matrix # in accordance with gradient descent. loss = t * np.log(y) - (np.subtract(1, t) * np.log(np.subtract(1, y))) total_loss += -abs(loss) gradient = np.dot(x.T, loss) #gradient = np.dot(np.subtract(x,h)) #if(t == 1): # gradient = np.dot(x.T,np.subtract(x,h))/N #elif t == 0: # gradient = np.dot(x.T, np.subtract(x,h))*((-1)/N) #gradnorm = np.linalg.norm(gradient,2) #if(gradnorm > maxnorm): # maxnorm = gradnorm #print("Grad: ",gradient,"\n") #print("Grad*lr: ", np.multiply(learning_rate,gradient)) prev_theta = transforms[maxsim] #print("Before: ", prev_theta) transforms[maxsim] = np.subtract(prev_theta, np.multiply(learning_rate, gradient)) #print("OLD: ", prev_theta ,"\n", "NEW: ", transforms[maxsim],"\n") #print("After: ", transforms[maxsim],"\n") #Update the bias b_loss = min(1, max(0, y + b[maxsim])) - t prev_b = b[maxsim] b[maxsim] = np.subtract(prev_b, np.multiply(learning_rate, b_loss)) #print("Maxnorm: ", maxnorm, "\n") return total_loss