Esempio n. 1
0
def gradient_descent(y, tx, initial_w, max_iters, gamma, mae=False):
    """Gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = [compute_loss(y, tx, initial_w, mae=mae)]
    w = initial_w

    for n_iter in range(max_iters):
        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: compute gradient and loss
        # ***************************************************
        gradient = compute_gradient(y, tx, w, mae)

        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: update w by gradient
        # ***************************************************
        w = w - gamma * gradient
        loss = compute_loss(y, tx, w, mae=mae)

        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("[gradient descent] ({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".
              format(bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws
def cross_validation(y, tx, method, params, threshold=0, k=5, seed=0):
    """Gives the mean RMSE for running the given method with the given parameters"""
    k_indices = build_k_indices(y, k, seed)
    s = np.zeros(k)
    l = np.zeros(k)

    weights = []
    scores = []
    for i in range(k):
        # get k'th subgroup in test, others in train
        te_indice = k_indices[i]
        tr_indice = k_indices[~(np.arange(k_indices.shape[0]) == i)]
        tr_indice = tr_indice.reshape(-1)
        y_test = y[te_indice]
        y_train = y[tr_indice]
        tx_test = tx[te_indice]
        tx_train = tx[tr_indice]

        w, _ = run_method(y_train, tx_train, method, params)
        weights.append(w)

        if method in ["LR", "RLR"]:
            l[i] = compute_loss(y_test, tx_test, w, loss_method='sigmoid')
            s[i] = logistic_score(y_test, tx_test, w, threshold=threshold)
        else:
            l[i] = compute_loss(y_test, tx_test, w, loss_method='rmse')
            s[i] = regresssion_score(y_test, tx_test, w, threshold=threshold)

    return s, weights, np.mean(l), np.mean(s)
def backtracing(y, tx, w, gradient, beta):
    p = pow((np.linalg.norm(gradient)), 2)
    t = 1
    loss = co.compute_loss(y, tx, w)
    loss_mod = co.compute_loss(y, tx, w - t * gradient)
    while (loss_mod > loss - t / 2 * p):
        loss_mod = co.compute_loss(y, tx, w - t * gradient)
        t = beta * t
    return t
Esempio n. 4
0
def stochastic_gradient_descent(y, tx, initial_w, batch_size, gamma,
                                max_iters):
    """Stochastic gradient descent algorithm."""
    threshold = 1e-3  # determines convergence. To be tuned

    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, 1,
                                                    True):
            current_grad = compute_gradient(minibatch_y, minibatch_tx, w)
            current_loss = compute_loss(y, tx, w)
            # Moving in the direction of negative gradient
            w = w - gamma * current_grad
            # store w and loss
            ws.append(np.copy(w))
            losses.append(current_loss)
            # Convergence criteria
            if len(losses) > 1 and np.abs(current_loss -
                                          losses[-1]) < threshold:
                break
        print("Gradient Descent({bi}): loss={l}".format(bi=n_iter,
                                                        l=current_loss))
    return losses, ws
def least_squares(y, tx, loss_method='mse'):
    """calculate the least squares solution."""
    # weights
    w = np.linalg.solve(((tx.T).dot(tx)), (tx.T).dot(y))
    # loss, default using MSE
    loss = compute_loss(y, tx, w, loss_method=loss_method)
    return w, loss
Esempio n. 6
0
def logistic_trials(y, tx, tx_sub, degree_range, partitions=2):
    ## Split data into test and training sets
    ## If partitions > 2, use k-fold cross-validation
    glob_tx_tr, glob_tx_te, glob_y_tr, glob_y_te = split_data(tx, y, 0.8)

    ## Initial results: losses, weights, preditions and (test) losses
    models = []
    losses = []
    accuracies = []
    predictions = []

    ## Loops over range of degrees
    degrees = range(degree_range[0], degree_range[1])
    for degree in degrees:
        print("Trying degree", degree, ":")

        tx_tr, tx_te, tx_pred = expand(degree, glob_tx_tr, glob_tx_te, tx_sub)
        initial_w = np.ones(tx_tr.shape[1])

        w, loss = logistic_regression(glob_y_tr, tx_tr, initial_w, MAX_ITERS,
                                      GAMMA)
        print("\tTraining Loss = ", loss)

        y_test = predict_labels(w, tx_te)
        test_loss = compute_loss(glob_y_te, tx_te, w, func="logistic")
        accuracy = compute_accuracy((y_test + 1) / 2, glob_y_te)
        y_pred = predict_labels(w, tx_pred)

        print("\tTest Loss = ", test_loss, " Test Accuracy = ", accuracy)
        models.append(("logistic_SGD", degree, w))
        losses.append(test_loss)
        accuracies.append(accuracy)
        predictions.append(y_pred)
    return models, losses, accuracies, predictions
Esempio n. 7
0
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """
    Implements stochastic gradient descent with the batch size of 1.
        @param y : raw output variable 
        @param tx :raw input variable, might be a polynomial basis obtained from the input x
        @param initial_w : the intial guess
        @param max_iters : the maximum number of iterations that the algorithm will run for
        @param gamma: the size of the step in each of the iterations
        @return : weights that describe the generated model and the loss associated with them
    """
    shuffle = True

    w = initial_w
    n_iter = 0
    batch_size = 1
    np.random.seed(1)

    while (n_iter < max_iters):
        for minibatch_y, minibatch_tx in batch_iter(
                y, tx, np.random.random_integers(max_iters), batch_size,
                shuffle):
            w = w - gamma * sgd_h.compute_stoch_gradient(
                minibatch_y, minibatch_tx, w)
            loss = co.compute_loss(minibatch_y, minibatch_tx, w)
            if n_iter % 50 == 0:
                print(
                    "Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(
                        bi=n_iter, ti=max_iters - 1, l=loss))
            n_iter += 1
            if (n_iter >= max_iters):
                return (w, loss)

    return (w, loss)
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters,
                                gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w

    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y,
                                            tx,
                                            batch_size=batch_size,
                                            num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = compute_loss(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
            bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws
def update_sgd(y,
               tx,
               w,
               ws,
               losses,
               gamma,
               lambda_=0,
               batch_size=1,
               method='mse'):
    for y_batch, tx_batch in batch_iter(y,
                                        tx,
                                        batch_size=batch_size,
                                        num_batches=1):
        # compute a stochastic gradient
        grad = compute_gradient(y_batch, tx_batch, w,
                                loss_method=method) + lambda_ * w
        # update w through the stochastic gradient update
        w = w - gamma * grad
        # calculate loss
        loss = compute_loss(y, tx, w,
                            loss_method=method) + 0.5 * lambda_ * np.sum(w**2)
        # store w and loss
        ws.append(w)
        losses.append(loss)
    return grad, w, loss, ws, losses
Esempio n. 10
0
def grid_search(y, tx, w0, w1, loss_type):
    """Algorithm for grid search."""
    losses = np.zeros((len(w0), len(w1)))
    for i in range(len(w0)):
        for j in range(len(w1)):
            w = np.array([w0[i], w1[j]])
            losses[i, j] = compute_loss(y, tx, w, loss_type)
    return losses
Esempio n. 11
0
def grid_search(y, tx, w0, w1):
    """Algorithm for grid search."""
    losses = np.zeros((len(w0), len(w1)))
    for i, w0_ in enumerate(w0):
        for j, w1_ in enumerate(w1):
            loss = costs.compute_loss(y, tx, [w0_, w1_])
            losses[i, j] = loss

    return losses
Esempio n. 12
0
def least_squares(y, tx):
    """ Least squares regression using normal equations
    """
    x_t = tx.T

    w = np.dot(np.dot(np.linalg.inv(np.dot(x_t, tx)), x_t), y)
    loss = compute_loss(y, tx, w)

    return w, loss
Esempio n. 13
0
def grid_search(y, tx, w0, w1):
    """Algorithm for grid search."""
    losses = np.zeros((len(w0), len(w1)))
    # compute loss for each combination of w0 and w1.
    for ind_row, row in enumerate(w0):
        for ind_col, col in enumerate(w1):
            w = np.array([row, col])
            losses[ind_row, ind_col] = compute_loss(y, tx, w)
    return losses
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters,
                                gamma):
    """Stochastic gradient descent algorithm."""
    ws = [initial_w]
    losses = [compute_loss(y, tx, initial_w)]
    w = initial_w
    for y_batch, tx_batch in batch_iter(y, tx, batch_size, max_iters):
        gradient = compute_gradient(y, tx, w)

        w = w - gamma * gradient
        loss = compute_loss(y, tx, w)

        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Stochastic Gradient Descent: loss={l}, w0={w0}, w1={w1}".format(
            l=loss, w0=w[0], w1=w[1]))

    return losses, ws
Esempio n. 15
0
def ridge_regression(y, tx, lambda_):
    # computing the weights by using the formula
    lambda_prime = 2 * tx.shape[0] * lambda_
    w = np.dot(
        np.dot(
            np.linalg.inv(
                np.dot(tx.T, tx) + lambda_prime * np.identity(tx.shape[1])),
            tx.T), y)
    # return w with the corresponding loss
    return w, compute_loss(y, tx, w)
Esempio n. 16
0
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    # initializing the weights
    w = initial_w
    for i in range(max_iters):
        # computing the gradient
        gradient = compute_gradient(y, tx, w)
        # updating the weights
        w = w - gamma * gradient
    # return w with the corresponding loss
    return w, compute_loss(y, tx, w)
Esempio n. 17
0
def least_squares(y, tx):
    """calculate the least squares solution."""
    # Least squares, returns mse, and optimal weights
    # Computes (tx^{T}*tx)^{-1}*tx^{T}*y

    #x_inv=np.linalg.inv(np.dot(tx.T,tx))
    xtx = np.dot(tx.T, tx)
    w = np.linalg.solve(xtx, np.dot(tx.T, y))
    loss = co.compute_loss(y, tx, w)
    return w, loss
Esempio n. 18
0
def grid_search(y, tx, w0, w1):
    """Algorithm for grid search."""
    losses = np.zeros((len(w0), len(w1)))

    #compute loss for each combination of w0 and w1
    for i in range(0, len(w0)):
        for j in range(0, len(w1)):
            losses[i, j] = costs.compute_loss(y, tx, (w0[i], w1[j]))

    return losses
Esempio n. 19
0
def ridge_regression(y, tx, lamb):
    """implement ridge regression."""
    # Get w solving Ax = b
    A = np.transpose(tx).dot(tx) + 2 * lamb * len(y) * np.identity(tx.shape[1])
    b = np.transpose(tx).dot(y)
    w_opt = np.linalg.solve(A, b)

    loss = compute_loss(y, tx, w_opt)

    return loss, w_opt
def ridge_regression(y, tx, lambda_, loss_method='mse'):
    """implement ridge regression.
    Minimization of the penalized mean squared error with the ridge regularization.
    """
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    # weights
    w = np.linalg.solve(tx.T.dot(tx) + aI, tx.T.dot(y))
    # loss, default using MSE
    loss = compute_loss(y, tx, w, loss_method=loss_method)
    return w, loss
Esempio n. 21
0
def grid_search(y, tx, w0, w1):
    """Algorithm for grid search."""
    losses = np.zeros((len(w0), len(w1)))
    # ***************************************************
    for i in range(len(w0)):
        for j in range(len(w1)):
            losses[i, j] = costs.compute_loss(y, tx, [w0[i], w1[j]])
    # ***************************************************
    #raise NotImplementedError
    return losses
Esempio n. 22
0
def least_squares(y, tx):
    """calculate the least squares solution."""
    #Solve as Ax = b
    A = np.transpose(tx).dot(tx)
    b = np.transpose(tx).dot(y)
    w_opt = np.linalg.solve(A, b)

    loss = compute_loss(y, tx, w_opt)

    return loss, w_opt
def update_gd(y, tx, w, ws, losses, gamma, lambda_=0, method='mse'):
    # compute gradient
    grad = compute_gradient(y, tx, w, loss_method=method) + lambda_ * w
    # gradient w by descent update
    w = w - gamma * grad
    # calculate loss
    loss = compute_loss(y, tx, w,
                        loss_method=method) + 0.5 * lambda_ * np.sum(w**2)
    ws.append(w)
    losses.append(loss)
    return grad, w, loss, ws, losses
Esempio n. 24
0
def ridge_regression(y, tx, lambda_):
    """Ridge regression using normal equations"""
    lambda_prime = 2 * lambda_ * len(y)
    tx_t = tx.T

    w = np.dot(
        np.linalg.inv(tx_t.dot(tx) + lambda_prime * np.eye(tx.shape[1])),
        tx_t.dot(y))
    loss = compute_loss(y, tx, w, metric='mse')

    return w, loss
Esempio n. 25
0
def least_squares(y, tx):
    """
    Implements lleast squares.
        @param y : raw output variable 
        @param tx :raw input variable, might be a polynomial basis obtained from the input x
        @return : weights that describe the generated model and the loss associated with them
    """
    xtx = np.dot(tx.T, tx)
    w = np.linalg.solve(xtx, np.dot(tx.T, y))
    loss = co.compute_loss(y, tx, w)
    return (w, loss)
Esempio n. 26
0
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """Gradient descent algorithm."""
    w = initial_w

    for n_iter in range(max_iters):
        gradient = compute_gradient(y, tx, w)
        w = w - gamma * gradient

    loss = compute_loss(y, tx, w)
    #	loss = calculate_nll(y, tx, w)

    return w, loss
Esempio n. 27
0
def ridge_regression(y, tx, lambda_):
    """ Ridge regression using normal equations
    """
    x_t = tx.T
    lambd = lambda_ * 2 * len(y)

    w = np.dot(
        np.dot(np.linalg.inv(np.dot(x_t, tx) + lambd * np.eye(tx.shape[1])),
               x_t), y)
    loss = compute_loss(y, tx, w)

    return w, loss
Esempio n. 28
0
def least_squares(y, tx):
    """calculate the least squares solution."""
    if len(y.shape) == 2:
        y = y.reshape((max(y.shape)))
    A = np.dot(tx.T, tx)
    b = np.dot(tx.T, y)

    w = np.linalg.solve(A, b)

    loss = compute_loss(y, tx, w)

    return w, loss
Esempio n. 29
0
def least_squares(y, tx):
    """calculate the least squares solution."""
    #w = np.dot(np.dot(np.linalg.inv(np.dot(tx.T, tx)), tx.T), y)

    A = np.dot(tx.T, tx)
    b = np.dot(tx.T, y)

    w = np.linalg.solve(A, b)

    loss = compute_loss(y, tx, w)
    #	loss = calculate_nll(y, tx, w)

    return w, loss
Esempio n. 30
0
def ridge_regression(y, tx, lambda_):
    """ 
    Implements ridge regression.
        @param y : raw output variable 
        @param tx :raw input variable, might be a polynomial basis obtained from the input x
        @param lambda_ : parameter to penalize the large weights
        @return :  weights that describe the generated model and the loss associated with them
    """
    w = np.linalg.solve(
        np.dot(tx.T, tx) + lambda_ * np.identity(tx.shape[1]), np.dot(tx.T, y))

    loss = co.compute_loss(y, tx, w)

    return (w, loss)
def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w

    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = compute_loss(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws