def least_squares_SGD(y, tx, initial_w, batch_size, max_iter, gamma): """ Least square stochastic gradient descent :param y: labels :param tx: features :param initial_w: initial weights :param batch_size: 1 if sgd :param max_iter: max number of iterations :param gamma: learning rate :return ws: weights :return ls: loss """ ws = [] losses = [] w = initial_w for n_iter in range(max_iter): # compute random batch a = ph.batch_iter(y, tx, batch_size, num_batches=1, shuffle=True) a = list(a) tx2, y2 = a[0][1], a[0][0] # compute gradient & loss grad = ph.compute_stoch_gradient(y2,tx2,w) loss= ph.compute_loss(y2, tx2, w) # update gradient w = w-gamma*grad # store w and loss ws.append(w) losses.append(loss) return np.array(ws)[-1], np.array(losses)[-1]
def update_sgd(y, tx, w, ws, losses, gamma, lambda_=0, batch_size=1, method='mse'): for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1): # compute a stochastic gradient grad = compute_gradient(y_batch, tx_batch, w, loss_method=method) + lambda_ * w # update w through the stochastic gradient update w = w - gamma * grad # calculate loss loss = compute_loss(y, tx, w, loss_method=method) + 0.5 * lambda_ * np.sum(w**2) # store w and loss ws.append(w) losses.append(loss) return grad, w, loss, ws, losses
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, _print=True): """ Linear regression (Least squares) using stochastic gradient descent (batch size of 1) :param y: Labels :param tx: Feature Matrix :param initial_w: Initial weight vector :param max_iters: Max iterations :param gamma: Step size :return: weights, loss """ w = initial_w batch_size = 1 for n_iter, data in enumerate(batch_iter(y, tx, batch_size, num_batches=max_iters)): # Fetch batch data y_batch, tx_batch = data dw = compute_mse_gradient(y_batch, tx_batch, w) loss = compute_mse_loss(y_batch, tx_batch, w) # Update weights w = w - gamma * dw if n_iter % 1000 == 0 and _print: print("Gradient Descent({bi}/{ti}): loss={l}".format( bi=n_iter, ti=max_iters - 1, l=loss)) loss = compute_mse_loss(y, tx, w) return w, loss
def least_squares_SGD(y, tx, initial_w, max_iters, gamma): """Stochastic gradient descent algorithm.""" w = initial_w batch_size = 1 # default value as indicated in project description for n_iter in range(max_iters): for y_batch, tx_batch in batch_iter(y, tx, batch_size, num_batches=1): grad, _ = compute_stoch_gradient(y_batch, tx_batch, w) w = w - gamma * grad loss = get_mse_loss(y, tx, w) return w, loss
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, loss_fn=calculate_mse): """Stochastic gradient descent algorithm.""" w = initial_w i = 0 batch_size = 1 for mini_y, mini_x in batch_iter(y, tx, batch_size, max_iters): gradient = compute_gradient(mini_y, mini_x, w) w = w - gamma * gradient i = i + 1 loss = loss_fn(compute_error(y, tx, w)) return w, loss
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, verbose=True): """ Linear regression using stochastic gradient descent. :param y: np.array with the labels :param tx: np.array with the features :param initial_w: np.array with the initial weights :param max_iters: int, maximum number of iterations :param gamma: float, step size :param verbose: boolean, prints losses every 100 iterations :returns: w: np.array with the optimal weights loss: float, optimal loss """ ws = [initial_w] losses = [] w = initial_w threshold = 1e-8 for i in range(max_iters): for y_batch, tx_batch in batch_iter(y, tx, batch_size=1, num_batches=1): # Compute loss err = calculate_error(y_batch, tx_batch, w) loss = calculate_mse(err) # Compute the gradient for mse loss gradient_vector = calculate_gradient(tx_batch, err) # Update weights w -= gamma * gradient_vector ws.append(w) losses.append(loss) if i % 100 == 0: print("Current iteration of SGD={i}, loss={loss:.4f}".format( i=i, loss=loss)) if verbose else None # convergence criterion if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold: break return ws[-1], losses[-1]
def least_squares_SGD(y, tx, initial_w, max_iters, gamma): """Linear regression using stochastic gradient descent.""" # Define parameters to store w and loss w = initial_w losses = [] threshold = 1e-8 for n_iter in range(max_iters): # get a random minibatch of data for minibatch_y, minibatch_x in batch_iter(y, tx, 1): # compute loss and gradient loss = compute_ls_loss(minibatch_y, minibatch_x, w) grad = compute_ls_gradient(minibatch_y, minibatch_x, w) # update w by gradient w = w - gamma * grad # log info # print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format( # bi=n_iter, ti=max_iters - 1, l=loss)) # converge criterion losses.append(loss) if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold: break return (w, loss)
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size): """ Stochastic gradient descent algorithm. """ # initialization w_tot = [initial_w] loss_tot = [] n_iter = 0 # optimization loop while n_iter < max_iters: # pick randomly samples batches = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True) for samples in batches: # read samples y_tmp = samples[0] tx_tmp = samples[1] # compute gradient grad = compute_gradient_mse(y_tmp, tx_tmp, w_tot[-1]) # update w w = w_tot[-1] - gamma * grad # get new loss loss = compute_mse_reg(y_tmp, tx_tmp, w) # store w and loss w_tot.append(w) loss_tot.append(loss) n_iter = n_iter + 1 return w_tot[-1], loss_tot[-1]
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size, threshold=1e-2, debug_mode=0): """ Stochastic gradient descent algorithm. """ # initialization w_tot = [initial_w] loss_tot = [] n_iter = 0 continue_ = True # optimization loop while continue_: # pick randomly samples batches = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True) for samples in batches: # read samples y_tmp = samples[0] tx_tmp = samples[1] # compute gradient grad = compute_gradient_mse(y_tmp, tx_tmp, w_tot[-1]) # update w w = w_tot[-1] - gamma * grad # get new loss loss = compute_mse_reg(y_tmp, tx_tmp, w) # store w and loss w_tot.append(w) loss_tot.append(loss) # check for stopping criteria n_iter = n_iter + 1 continue_ = n_iter < max_iters and np.linalg.norm(grad) > threshold if debug_mode and n_iter % max_iters == 0: # norm of the grad print('n_iter:', n_iter, ', ||grad|| =', np.linalg.norm(grad)) # check if convergence plt.plot(loss_tot) plt.xlabel('iteration') plt.ylabel('likelihood') plt.show() if debug_mode: # check if convergence print('--------------------- final iteration') plt.plot(loss_tot) plt.xlabel('iteration') plt.ylabel('likelihood') plt.show() return w_tot, loss_tot