def least_squares_SGD(y, tx, initial_w, batch_size, max_iter, gamma):
    """
    Least square stochastic gradient descent
    
    :param y: labels
    :param tx: features
    :param initial_w: initial weights
    :param batch_size: 1 if sgd
    :param max_iter: max number of iterations
    :param gamma: learning rate
    :return ws: weights
    :return ls: loss
    """   
    ws = []
    losses = []
    w = initial_w
    for n_iter in range(max_iter):
        # compute random batch
        a = ph.batch_iter(y, tx, batch_size, num_batches=1, shuffle=True)
        a = list(a)
        tx2, y2 = a[0][1], a[0][0]
        # compute gradient & loss
        grad = ph.compute_stoch_gradient(y2,tx2,w)
        loss= ph.compute_loss(y2, tx2, w)
        # update gradient
        w = w-gamma*grad
        # store w and loss
        ws.append(w)
        losses.append(loss)

    return np.array(ws)[-1], np.array(losses)[-1]
def update_sgd(y,
               tx,
               w,
               ws,
               losses,
               gamma,
               lambda_=0,
               batch_size=1,
               method='mse'):
    for y_batch, tx_batch in batch_iter(y,
                                        tx,
                                        batch_size=batch_size,
                                        num_batches=1):
        # compute a stochastic gradient
        grad = compute_gradient(y_batch, tx_batch, w,
                                loss_method=method) + lambda_ * w
        # update w through the stochastic gradient update
        w = w - gamma * grad
        # calculate loss
        loss = compute_loss(y, tx, w,
                            loss_method=method) + 0.5 * lambda_ * np.sum(w**2)
        # store w and loss
        ws.append(w)
        losses.append(loss)
    return grad, w, loss, ws, losses
Example #3
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, _print=True):
    """
    Linear regression (Least squares) using stochastic gradient descent (batch size of 1)
    
    :param y: Labels
    :param tx: Feature Matrix
    :param initial_w: Initial weight vector
    :param max_iters: Max iterations
    :param gamma: Step size
    :return: weights, loss
    """
    w = initial_w
    batch_size = 1
    for n_iter, data in enumerate(batch_iter(y, tx, batch_size, num_batches=max_iters)):
        # Fetch batch data
        y_batch, tx_batch = data
        dw = compute_mse_gradient(y_batch, tx_batch, w)
        loss = compute_mse_loss(y_batch, tx_batch, w)
        
        # Update weights
        w = w - gamma * dw
        if n_iter % 1000 == 0 and _print:
            print("Gradient Descent({bi}/{ti}): loss={l}".format(
                    bi=n_iter, ti=max_iters - 1, l=loss))
    
    loss = compute_mse_loss(y, tx, w)
    return w, loss
Example #4
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Stochastic gradient descent algorithm."""
    w = initial_w
    batch_size = 1  # default value as indicated in project description
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size, num_batches=1):
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            w = w - gamma * grad
            loss = get_mse_loss(y, tx, w)
    return w, loss
Example #5
0
def least_squares_SGD(y,
                      tx,
                      initial_w,
                      max_iters,
                      gamma,
                      loss_fn=calculate_mse):
    """Stochastic gradient descent algorithm."""
    w = initial_w
    i = 0
    batch_size = 1
    for mini_y, mini_x in batch_iter(y, tx, batch_size, max_iters):
        gradient = compute_gradient(mini_y, mini_x, w)
        w = w - gamma * gradient
        i = i + 1
    loss = loss_fn(compute_error(y, tx, w))
    return w, loss
Example #6
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, verbose=True):
    """
    Linear regression using stochastic gradient descent.

    :param y: np.array with the labels
    :param tx: np.array with the features
    :param initial_w: np.array with the initial weights
    :param max_iters: int, maximum number of iterations
    :param gamma: float, step size
    :param verbose: boolean, prints losses every 100 iterations
    :returns:
        w: np.array with the optimal weights
        loss: float, optimal loss
    """
    ws = [initial_w]
    losses = []
    w = initial_w
    threshold = 1e-8

    for i in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=1,
                                            num_batches=1):
            # Compute loss
            err = calculate_error(y_batch, tx_batch, w)
            loss = calculate_mse(err)

            # Compute the gradient for mse loss
            gradient_vector = calculate_gradient(tx_batch, err)

            # Update weights
            w -= gamma * gradient_vector

            ws.append(w)
            losses.append(loss)

            if i % 100 == 0:
                print("Current iteration of SGD={i}, loss={loss:.4f}".format(
                    i=i, loss=loss)) if verbose else None

            # convergence criterion
            if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
                break
    return ws[-1], losses[-1]
Example #7
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Linear regression using stochastic gradient descent."""
    # Define parameters to store w and loss
    w = initial_w
    losses = []
    threshold = 1e-8
    for n_iter in range(max_iters):
        # get a random minibatch of data
        for minibatch_y, minibatch_x in batch_iter(y, tx, 1):
            # compute loss and gradient
            loss = compute_ls_loss(minibatch_y, minibatch_x, w)
            grad = compute_ls_gradient(minibatch_y, minibatch_x, w)
            # update w by gradient
            w = w - gamma * grad
        # log info
        # print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(
        #     bi=n_iter, ti=max_iters - 1, l=loss))
        # converge criterion
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break

    return (w, loss)
Example #8
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size):
    """ Stochastic gradient descent algorithm. """

    # initialization
    w_tot = [initial_w]
    loss_tot = []
    n_iter = 0

    # optimization loop
    while n_iter < max_iters:

        # pick randomly samples
        batches = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True)

        for samples in batches:

            # read samples
            y_tmp = samples[0]
            tx_tmp = samples[1]

            # compute gradient
            grad = compute_gradient_mse(y_tmp, tx_tmp, w_tot[-1])

            # update w
            w = w_tot[-1] - gamma * grad

            # get new loss
            loss = compute_mse_reg(y_tmp, tx_tmp, w)

        # store w and loss
        w_tot.append(w)
        loss_tot.append(loss)

        n_iter = n_iter + 1

    return w_tot[-1], loss_tot[-1]
Example #9
0
def least_squares_SGD(y,
                      tx,
                      initial_w,
                      max_iters,
                      gamma,
                      batch_size,
                      threshold=1e-2,
                      debug_mode=0):
    """ Stochastic gradient descent algorithm. """

    # initialization
    w_tot = [initial_w]
    loss_tot = []
    n_iter = 0
    continue_ = True

    # optimization loop
    while continue_:

        # pick randomly samples
        batches = batch_iter(y, tx, batch_size, num_batches=1, shuffle=True)

        for samples in batches:

            # read samples
            y_tmp = samples[0]
            tx_tmp = samples[1]

            # compute gradient
            grad = compute_gradient_mse(y_tmp, tx_tmp, w_tot[-1])

            # update w
            w = w_tot[-1] - gamma * grad

            # get new loss
            loss = compute_mse_reg(y_tmp, tx_tmp, w)

        # store w and loss
        w_tot.append(w)
        loss_tot.append(loss)

        # check for stopping criteria
        n_iter = n_iter + 1
        continue_ = n_iter < max_iters and np.linalg.norm(grad) > threshold

        if debug_mode and n_iter % max_iters == 0:

            # norm of the grad
            print('n_iter:', n_iter, ', ||grad|| =', np.linalg.norm(grad))

            # check if convergence
            plt.plot(loss_tot)
            plt.xlabel('iteration')
            plt.ylabel('likelihood')
            plt.show()

    if debug_mode:

        # check if convergence
        print('--------------------- final iteration')
        plt.plot(loss_tot)
        plt.xlabel('iteration')
        plt.ylabel('likelihood')
        plt.show()

    return w_tot, loss_tot