Exemple #1
0
def cross_validation(y, x, k_indices, k, lambda_, degree):
    """return the loss of ridge regression."""
    # ***************************************************
    # get k'th subgroup in test, others in train
    x_test = x[k_indices[k]]
    y_test = y[k_indices[k]]
    
    ind = np.linspace(0,k_indices.shape[0]-1,k_indices.shape[0], dtype = np.int64)
    ind = np.delete(ind,k)
    
    new_ind = np.ravel(k_indices[ind])
    x_train = x[new_ind]
    y_train = y[new_ind]
    

    #form data with polynomial degree
    x_test_poly = build_poly(x_test,degree).T
    x_train_poly = build_poly(x_train,degree).T   
    
    #find weights
    w = ridge_regression(y_train, x_train_poly, lambda_)

    # calculate the loss for train and test data
    loss_tr = np.sqrt(2*compute_mse(y_train,x_train_poly, w))
    loss_te = np.sqrt(2*compute_mse(y_test,x_test_poly, w))
    # ***************************************************
    return loss_tr, loss_te
Exemple #2
0
def cross_validation(y, x, k_indices, k, lambda_, degree,
                     cross_features_degree, compute_weightsFunction,
                     compute_lossFunction):
    """
    selects kth group of indices as test set and rest as training set,
    builds the polynomial features up to degree d
    computes the weights based on the training set with the specified function
    returns losses of training set and testing set with the specified function
    """

    # determine the indices in the training set and those in the test set
    tr_indices = np.concatenate(
        (k_indices[:k].ravel(), k_indices[k + 1:].ravel()))
    te_indices = k_indices[k]

    # select training and testing x and y
    x_tr = x[tr_indices]
    y_tr = y[tr_indices]
    x_te = x[te_indices]
    y_te = y[te_indices]

    # build polynomial features
    x_poly_tr = build_poly(x_tr, degree, cross_features_degree)
    x_poly_te = build_poly(x_te, degree, cross_features_degree)

    # find weights using the training data only
    weights_tr = compute_weightsFunction(y_tr, x_poly_tr, lambda_)

    # compute the losses for cross validation
    loss_tr = compute_lossFunction(y_tr, x_poly_tr,
                                   weights_tr)  # compute without lambda
    loss_te = compute_lossFunction(y_te, x_poly_te, weights_tr)

    return loss_tr, loss_te
def cross_validation(y, x, k_indices, k, lambda_, degree):
    """return the loss of ridge regression."""

    loss_tr = []
    loss_te = []
    for j in range(k):

        index_te = k_indices[j]

        ind = np.ones(k_indices.shape[0], bool)
        ind[j] = False
        index_tr = k_indices[ind].flatten()

        x_tr = x[index_tr]
        x_te = x[index_te]
        y_tr = y[index_tr]
        y_te = y[index_te]

        xpoly_tr = build_poly(x_tr, degree)
        xpoly_te = build_poly(x_te, degree)

        w_s = ridge_regression(y_tr, xpoly_tr, lambda_)

        loss_tr.append(compute_mse(y_tr, xpoly_tr, w_s))
        loss_te.append(compute_mse(y_te, xpoly_te, w_s))

    return np.mean(loss_tr), np.mean(loss_te)
def cross_validation_lr(y, x, k_indices, k, gamma, lambda_, max_iters, degree):
    """ Return the classification error of the logistic regression for each step of the k-fold cross validation.
    
    @param y : raw output variable 
    @param x :raw input variable, might be a polynomial basis obtained from the input x
    @param k_indices : the indices of the data that belong to each of the K groups of the cross_validation.
    @param k : the index of the group that we are using for the testing.
    @param gamma : the gamma with which we're doing the cross_validation
    @param lambda : the penalization parameter we're working on.
    @param max_iters : the max number of iterations of the logistic regression
    @param degree : the degree of the polynomial basis with which we're doing the cross validation
    @return loss_tr : the classification error made on the training data.
    @return loss_te : the classification error made on the testing data.
    """
    #1. WE DIVIDE THE DATA IN THE SUBGROUPS
    # get k'th subgroup in test, others in train:
    x_test = np.array(x[k_indices[k - 1]])
    y_test = np.array(y[k_indices[k - 1]])
    x_train = np.empty((0, x.shape[1]))
    y_train = np.empty((0, 1))
    #This for loops gets the other groups
    for k_iter, validation_points in enumerate(k_indices):
        if (k_iter != k - 1):
            x_train = np.append(x_train, x[validation_points], axis=0)
            y_train = np.append(y_train, y[validation_points])

    #2. WE FORMAT THE DATA
    #we sanitize and standardize our training data here, and apply the same median, mean and variance to the testing data
    x_train = count_NaN(x_train)
    x_test = count_NaN(x_test)

    x_train, median_train = sanitize_NaN(x_train)
    x_test, median_test = sanitize_NaN(x_test, median_train)

    x_train, mean_tr, std_tr = standardize(x_train)
    x_test, mean_te, ste_te = standardize(x_test, mean_tr, std_tr)

    # form data with polynomial degree:
    x_train_poly = build_poly(x_train, degree)
    x_test_poly = build_poly(x_test, degree)
    #print('Shape of polynomial training date :', x_train_poly.shape)

    #3. WE RUN THE MODEL AND COMPUTE THE ERROR
    # Relgularized logistic regression:
    w_rlr = regularized_logistic_regression(y_train, x_train_poly, gamma,
                                            lambda_, max_iters)

    # calculate the classification error for train and test data:
    loss_tr = sum(
        abs((2 * (y_train) - 1) -
            predict_labels(w_rlr, x_train_poly))) / (2 * len(y_train))
    loss_te = sum(abs((2 * y_test - 1) -
                      predict_labels(w_rlr, x_test_poly))) / (2 * len(y_test))

    return loss_tr, loss_te
Exemple #5
0
def plot_fitted_curve(y, x, weights, degree, ax):
    """plot the fitted curve."""
    ax.scatter(x, y, color='b', s=12, facecolors='none', edgecolors='r')
    xvals = np.arange(min(x) - 0.1, max(x) + 0.1, 0.1)
    tx = build_poly(xvals, degree)
    f = tx.dot(weights)
    ax.plot(xvals, f)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_title("Polynomial degree " + str(degree))
Exemple #6
0
def cross_validation_log_reg(y, x, k_indices, k, lambda_, degree, method='penalized'):
    """return the classification accuracy of logistic regression."""
    # ***************************************************
    # get k'th subgroup in test, others in train
    x_test = x[k_indices[k]]
    y_test = y[k_indices[k]]
    
    ind = np.linspace(0,k_indices.shape[0]-1,k_indices.shape[0], dtype = np.int64)
    ind = np.delete(ind,k)
    
    new_ind = np.ravel(k_indices[ind])
    x_train = x[new_ind]
    y_train = y[new_ind]
    

    #form data with polynomial degree
    x_test_poly = build_poly(x_test,degree)
    x_train_poly = build_poly(x_train,degree)
    print(x_train_poly.shape,'***********************')
    
    #init weights
    
    initial_w = np.zeros((x_train_poly.shape[1]))
    print(initial_w.shape,'***********************')
    #find weights
    w = running_gradient(y_train, x_train_poly, initial_w, lambda_, method)
    

    # calculate the classification accuracy for train and test data
    y_pred = predict_labels(w,x_test_poly)
    test_score= calculate_classification_accuracy(y_test, y_pred) 
    
    y_pred_train = predict_labels(w,x_train_poly)
    train_score= calculate_classification_accuracy(y_train, y_pred_train)    
    
    # ***************************************************
    return test_score, train_score