def gradient_descent(y, tx, initial_w, max_iters, gamma, mae=False): """Gradient descent algorithm.""" # Define parameters to store w and loss ws = [initial_w] losses = [compute_loss(y, tx, initial_w, mae=mae)] w = initial_w for n_iter in range(max_iters): # *************************************************** # INSERT YOUR CODE HERE # TODO: compute gradient and loss # *************************************************** gradient = compute_gradient(y, tx, w, mae) # *************************************************** # INSERT YOUR CODE HERE # TODO: update w by gradient # *************************************************** w = w - gamma * gradient loss = compute_loss(y, tx, w, mae=mae) # store w and loss ws.append(w) losses.append(loss) print("[gradient descent] ({bi}/{ti}): loss={l}, w0={w0}, w1={w1}". format(bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1])) return losses, ws
def cross_validation(y, tx, method, params, threshold=0, k=5, seed=0): """Gives the mean RMSE for running the given method with the given parameters""" k_indices = build_k_indices(y, k, seed) s = np.zeros(k) l = np.zeros(k) weights = [] scores = [] for i in range(k): # get k'th subgroup in test, others in train te_indice = k_indices[i] tr_indice = k_indices[~(np.arange(k_indices.shape[0]) == i)] tr_indice = tr_indice.reshape(-1) y_test = y[te_indice] y_train = y[tr_indice] tx_test = tx[te_indice] tx_train = tx[tr_indice] w, _ = run_method(y_train, tx_train, method, params) weights.append(w) if method in ["LR", "RLR"]: l[i] = compute_loss(y_test, tx_test, w, loss_method='sigmoid') s[i] = logistic_score(y_test, tx_test, w, threshold=threshold) else: l[i] = compute_loss(y_test, tx_test, w, loss_method='rmse') s[i] = regresssion_score(y_test, tx_test, w, threshold=threshold) return s, weights, np.mean(l), np.mean(s)
def backtracing(y, tx, w, gradient, beta): p = pow((np.linalg.norm(gradient)), 2) t = 1 loss = co.compute_loss(y, tx, w) loss_mod = co.compute_loss(y, tx, w - t * gradient) while (loss_mod > loss - t / 2 * p): loss_mod = co.compute_loss(y, tx, w - t * gradient) t = beta * t return t
def stochastic_gradient_descent(y, tx, initial_w, batch_size, gamma, max_iters): """Stochastic gradient descent algorithm.""" threshold = 1e-3 # determines convergence. To be tuned # Define parameters to store w and loss ws = [initial_w] losses = [] w = initial_w for n_iter in range(max_iters): for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, 1, True): current_grad = compute_gradient(minibatch_y, minibatch_tx, w) current_loss = compute_loss(y, tx, w) # Moving in the direction of negative gradient w = w - gamma * current_grad # store w and loss ws.append(np.copy(w)) losses.append(current_loss) # Convergence criteria if len(losses) > 1 and np.abs(current_loss - losses[-1]) < threshold: break print("Gradient Descent({bi}): loss={l}".format(bi=n_iter, l=current_loss)) return losses, ws
def least_squares(y, tx, loss_method='mse'): """calculate the least squares solution.""" # weights w = np.linalg.solve(((tx.T).dot(tx)), (tx.T).dot(y)) # loss, default using MSE loss = compute_loss(y, tx, w, loss_method=loss_method) return w, loss
def logistic_trials(y, tx, tx_sub, degree_range, partitions=2): ## Split data into test and training sets ## If partitions > 2, use k-fold cross-validation glob_tx_tr, glob_tx_te, glob_y_tr, glob_y_te = split_data(tx, y, 0.8) ## Initial results: losses, weights, preditions and (test) losses models = [] losses = [] accuracies = [] predictions = [] ## Loops over range of degrees degrees = range(degree_range[0], degree_range[1]) for degree in degrees: print("Trying degree", degree, ":") tx_tr, tx_te, tx_pred = expand(degree, glob_tx_tr, glob_tx_te, tx_sub) initial_w = np.ones(tx_tr.shape[1]) w, loss = logistic_regression(glob_y_tr, tx_tr, initial_w, MAX_ITERS, GAMMA) print("\tTraining Loss = ", loss) y_test = predict_labels(w, tx_te) test_loss = compute_loss(glob_y_te, tx_te, w, func="logistic") accuracy = compute_accuracy((y_test + 1) / 2, glob_y_te) y_pred = predict_labels(w, tx_pred) print("\tTest Loss = ", test_loss, " Test Accuracy = ", accuracy) models.append(("logistic_SGD", degree, w)) losses.append(test_loss) accuracies.append(accuracy) predictions.append(y_pred) return models, losses, accuracies, predictions
def least_squares_SGD(y, tx, initial_w, max_iters, gamma): """ Implements stochastic gradient descent with the batch size of 1. @param y : raw output variable @param tx :raw input variable, might be a polynomial basis obtained from the input x @param initial_w : the intial guess @param max_iters : the maximum number of iterations that the algorithm will run for @param gamma: the size of the step in each of the iterations @return : weights that describe the generated model and the loss associated with them """ shuffle = True w = initial_w n_iter = 0 batch_size = 1 np.random.seed(1) while (n_iter < max_iters): for minibatch_y, minibatch_tx in batch_iter( y, tx, np.random.random_integers(max_iters), batch_size, shuffle): w = w - gamma * sgd_h.compute_stoch_gradient( minibatch_y, minibatch_tx, w) loss = co.compute_loss(minibatch_y, minibatch_tx, w) if n_iter % 50 == 0: print( "Stochastic Gradient Descent({bi}/{ti}): loss={l}".format( bi=n_iter, ti=max_iters - 1, l=loss)) n_iter += 1 if (n_iter >= max_iters): return (w, loss) return (w, loss)
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma): """Stochastic gradient descent.""" # Define parameters to store w and loss ws = [initial_w] losses = [] w = initial_w for n_iter in range(max_iters): for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1): # compute a stochastic gradient and loss grad, _ = compute_stoch_gradient(y_batch, tx_batch, w) # update w through the stochastic gradient update w = w - gamma * grad # calculate loss loss = compute_loss(y, tx, w) # store w and loss ws.append(w) losses.append(loss) print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format( bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1])) return losses, ws
def update_sgd(y, tx, w, ws, losses, gamma, lambda_=0, batch_size=1, method='mse'): for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1): # compute a stochastic gradient grad = compute_gradient(y_batch, tx_batch, w, loss_method=method) + lambda_ * w # update w through the stochastic gradient update w = w - gamma * grad # calculate loss loss = compute_loss(y, tx, w, loss_method=method) + 0.5 * lambda_ * np.sum(w**2) # store w and loss ws.append(w) losses.append(loss) return grad, w, loss, ws, losses
def grid_search(y, tx, w0, w1, loss_type): """Algorithm for grid search.""" losses = np.zeros((len(w0), len(w1))) for i in range(len(w0)): for j in range(len(w1)): w = np.array([w0[i], w1[j]]) losses[i, j] = compute_loss(y, tx, w, loss_type) return losses
def grid_search(y, tx, w0, w1): """Algorithm for grid search.""" losses = np.zeros((len(w0), len(w1))) for i, w0_ in enumerate(w0): for j, w1_ in enumerate(w1): loss = costs.compute_loss(y, tx, [w0_, w1_]) losses[i, j] = loss return losses
def least_squares(y, tx): """ Least squares regression using normal equations """ x_t = tx.T w = np.dot(np.dot(np.linalg.inv(np.dot(x_t, tx)), x_t), y) loss = compute_loss(y, tx, w) return w, loss
def grid_search(y, tx, w0, w1): """Algorithm for grid search.""" losses = np.zeros((len(w0), len(w1))) # compute loss for each combination of w0 and w1. for ind_row, row in enumerate(w0): for ind_col, col in enumerate(w1): w = np.array([row, col]) losses[ind_row, ind_col] = compute_loss(y, tx, w) return losses
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma): """Stochastic gradient descent algorithm.""" ws = [initial_w] losses = [compute_loss(y, tx, initial_w)] w = initial_w for y_batch, tx_batch in batch_iter(y, tx, batch_size, max_iters): gradient = compute_gradient(y, tx, w) w = w - gamma * gradient loss = compute_loss(y, tx, w) # store w and loss ws.append(w) losses.append(loss) print("Stochastic Gradient Descent: loss={l}, w0={w0}, w1={w1}".format( l=loss, w0=w[0], w1=w[1])) return losses, ws
def ridge_regression(y, tx, lambda_): # computing the weights by using the formula lambda_prime = 2 * tx.shape[0] * lambda_ w = np.dot( np.dot( np.linalg.inv( np.dot(tx.T, tx) + lambda_prime * np.identity(tx.shape[1])), tx.T), y) # return w with the corresponding loss return w, compute_loss(y, tx, w)
def least_squares_GD(y, tx, initial_w, max_iters, gamma): # initializing the weights w = initial_w for i in range(max_iters): # computing the gradient gradient = compute_gradient(y, tx, w) # updating the weights w = w - gamma * gradient # return w with the corresponding loss return w, compute_loss(y, tx, w)
def least_squares(y, tx): """calculate the least squares solution.""" # Least squares, returns mse, and optimal weights # Computes (tx^{T}*tx)^{-1}*tx^{T}*y #x_inv=np.linalg.inv(np.dot(tx.T,tx)) xtx = np.dot(tx.T, tx) w = np.linalg.solve(xtx, np.dot(tx.T, y)) loss = co.compute_loss(y, tx, w) return w, loss
def grid_search(y, tx, w0, w1): """Algorithm for grid search.""" losses = np.zeros((len(w0), len(w1))) #compute loss for each combination of w0 and w1 for i in range(0, len(w0)): for j in range(0, len(w1)): losses[i, j] = costs.compute_loss(y, tx, (w0[i], w1[j])) return losses
def ridge_regression(y, tx, lamb): """implement ridge regression.""" # Get w solving Ax = b A = np.transpose(tx).dot(tx) + 2 * lamb * len(y) * np.identity(tx.shape[1]) b = np.transpose(tx).dot(y) w_opt = np.linalg.solve(A, b) loss = compute_loss(y, tx, w_opt) return loss, w_opt
def ridge_regression(y, tx, lambda_, loss_method='mse'): """implement ridge regression. Minimization of the penalized mean squared error with the ridge regularization. """ aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1]) # weights w = np.linalg.solve(tx.T.dot(tx) + aI, tx.T.dot(y)) # loss, default using MSE loss = compute_loss(y, tx, w, loss_method=loss_method) return w, loss
def grid_search(y, tx, w0, w1): """Algorithm for grid search.""" losses = np.zeros((len(w0), len(w1))) # *************************************************** for i in range(len(w0)): for j in range(len(w1)): losses[i, j] = costs.compute_loss(y, tx, [w0[i], w1[j]]) # *************************************************** #raise NotImplementedError return losses
def least_squares(y, tx): """calculate the least squares solution.""" #Solve as Ax = b A = np.transpose(tx).dot(tx) b = np.transpose(tx).dot(y) w_opt = np.linalg.solve(A, b) loss = compute_loss(y, tx, w_opt) return loss, w_opt
def update_gd(y, tx, w, ws, losses, gamma, lambda_=0, method='mse'): # compute gradient grad = compute_gradient(y, tx, w, loss_method=method) + lambda_ * w # gradient w by descent update w = w - gamma * grad # calculate loss loss = compute_loss(y, tx, w, loss_method=method) + 0.5 * lambda_ * np.sum(w**2) ws.append(w) losses.append(loss) return grad, w, loss, ws, losses
def ridge_regression(y, tx, lambda_): """Ridge regression using normal equations""" lambda_prime = 2 * lambda_ * len(y) tx_t = tx.T w = np.dot( np.linalg.inv(tx_t.dot(tx) + lambda_prime * np.eye(tx.shape[1])), tx_t.dot(y)) loss = compute_loss(y, tx, w, metric='mse') return w, loss
def least_squares(y, tx): """ Implements lleast squares. @param y : raw output variable @param tx :raw input variable, might be a polynomial basis obtained from the input x @return : weights that describe the generated model and the loss associated with them """ xtx = np.dot(tx.T, tx) w = np.linalg.solve(xtx, np.dot(tx.T, y)) loss = co.compute_loss(y, tx, w) return (w, loss)
def least_squares_GD(y, tx, initial_w, max_iters, gamma): """Gradient descent algorithm.""" w = initial_w for n_iter in range(max_iters): gradient = compute_gradient(y, tx, w) w = w - gamma * gradient loss = compute_loss(y, tx, w) # loss = calculate_nll(y, tx, w) return w, loss
def ridge_regression(y, tx, lambda_): """ Ridge regression using normal equations """ x_t = tx.T lambd = lambda_ * 2 * len(y) w = np.dot( np.dot(np.linalg.inv(np.dot(x_t, tx) + lambd * np.eye(tx.shape[1])), x_t), y) loss = compute_loss(y, tx, w) return w, loss
def least_squares(y, tx): """calculate the least squares solution.""" if len(y.shape) == 2: y = y.reshape((max(y.shape))) A = np.dot(tx.T, tx) b = np.dot(tx.T, y) w = np.linalg.solve(A, b) loss = compute_loss(y, tx, w) return w, loss
def least_squares(y, tx): """calculate the least squares solution.""" #w = np.dot(np.dot(np.linalg.inv(np.dot(tx.T, tx)), tx.T), y) A = np.dot(tx.T, tx) b = np.dot(tx.T, y) w = np.linalg.solve(A, b) loss = compute_loss(y, tx, w) # loss = calculate_nll(y, tx, w) return w, loss
def ridge_regression(y, tx, lambda_): """ Implements ridge regression. @param y : raw output variable @param tx :raw input variable, might be a polynomial basis obtained from the input x @param lambda_ : parameter to penalize the large weights @return : weights that describe the generated model and the loss associated with them """ w = np.linalg.solve( np.dot(tx.T, tx) + lambda_ * np.identity(tx.shape[1]), np.dot(tx.T, y)) loss = co.compute_loss(y, tx, w) return (w, loss)
def stochastic_gradient_descent( y, tx, initial_w, batch_size, max_iters, gamma): """Stochastic gradient descent.""" # Define parameters to store w and loss ws = [initial_w] losses = [] w = initial_w for n_iter in range(max_iters): for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1): # compute a stochastic gradient and loss grad, _ = compute_stoch_gradient(y_batch, tx_batch, w) # update w through the stochastic gradient update w = w - gamma * grad # calculate loss loss = compute_loss(y, tx, w) # store w and loss ws.append(w) losses.append(loss) print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format( bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1])) return losses, ws