Example #1
0
def plot_error(normalized):
    targets = values[:, 1]
    if normalized == 'yes':
        x = a1.normalize_data(values[:, 7:])
    else:
        x = values[:, 7:]
    N_TRAIN = 100
    x_train = x[0:N_TRAIN, :]
    x_test = x[N_TRAIN:, :]
    t_train = targets[0:N_TRAIN]
    t_test = targets[N_TRAIN:]

    # Complete the linear_regression and evaluate_regression functions of the assignment1.py
    # Pass the required parameters to these functions

    tr_dicts = {}
    te_dicts = {}
    keys = range(1, 7)
    for degree in range(1, 7):
        (w, train_err) = a1.linear_regression(x_train, t_train, 'polynomial',
                                              0, degree, 'yes', 0, 0)
        (t_est, test_err) = a1.evaluate_regression(x_test, t_test, w, degree,
                                                   'polynomial', 'yes', 0, 0)
        tr_dicts[degree] = float(train_err)
        te_dicts[degree] = float(test_err)

    # Produce a plot of results.
    plt.rcParams.update({'font.size': 15})
    plt.plot(list(tr_dicts.keys()), list(tr_dicts.values()))
    plt.plot(list(te_dicts.keys()), list(te_dicts.values()))
    plt.ylabel('RMS')
    plt.legend(['Training error', 'Testing error'])
    plt.title('Fit with polynomials, no regularization')
    plt.xlabel('Polynomial degree')
    plt.show()
Example #2
0
def main():
    # Get input data
    (countries, features, values) = a1.load_unicef_data()

    targets = values[:, 1]

    x = values[:, 7:]
    x = a1.normalize_data(x)
    x = x[0:100, :]
    targets = targets[0:100, :]
    # normalize data
    #
    minAvgRMS = 999999999
    landas = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    avgErrorForEachL = []
    for landa in landas:
        start = 0
        step = 10
        avgErrorRMS = 0
        for i in range(0, 10):
            x_train, t_train, x_validation, t_validation = setXes(
                i * 10, x, targets)
            PlotMatrixRMSErorr = crossValidationCal(x_train, t_train, landa,
                                                    x_validation, t_validation)
            avgErrorRMS += PlotMatrixRMSErorr[0, 0]
        avgErrorRMS = avgErrorRMS / 10
        print avgErrorRMS, landa
        avgErrorForEachL.append(avgErrorRMS)
    plt.semilogx(landas, avgErrorForEachL)

    plt.show()
Example #3
0
def PolynomialRegression(bias):
    (countries, features, values) = a1.load_unicef_data()
    targets = values[:, 1]
    x = values[:, 7:]
    x = a1.normalize_data(x)
    N_TRAIN = 100
    ALL = 195
    x_train = x[0:N_TRAIN, :]
    x_test = x[N_TRAIN:, :]
    t_train = targets[0:N_TRAIN]
    t_test = targets[N_TRAIN:]
    train_error = {}
    test_error = {}
    for degrees in range(1, 7):
        (w, t_err) = a1.linear_regression(x_train, t_train, 'polynomial', 0,
                                          degrees, 0, 1, N_TRAIN, bias)
        (t_est, te_err) = a1.evaluate_regression('polynomial', x_test, w,
                                                 t_test, degrees,
                                                 ALL - N_TRAIN, bias)
        print('degree = ', degrees)
        print(t_err)
        train_error[degrees] = np.sqrt(np.sum(t_err) / 100)
        print('sum=', np.sum(t_est, axis=0))
        print('train_error = ', train_error[degrees])
        test_error[degrees] = np.sqrt(np.sum(te_err) / 95)

    for i in range(1, 7):
        print(train_error[i])
    # for i in range (1,7):
    #     print(test_error[i])
    print(type(train_error))
    plt.rcParams.update({'font.size': 15})

    plt.plot([1, 2, 3, 4, 5, 6], [
        train_error[1], train_error[2], train_error[3], train_error[4],
        train_error[5], train_error[6]
    ])
    plt.plot([1, 2, 3, 4, 5, 6], [
        test_error[1], test_error[2], test_error[3], test_error[4],
        test_error[5], test_error[6]
    ])
    plt.ylabel('RMS')
    plt.legend(['Training error', 'Testing error'])
    plt.title('Fit with polynomials, no regularization, bias:' + bias)
    plt.xlabel('Polynomial degree')
    plt.show()
def main():
    # Get input data
    (countries, features, values) = a1.load_unicef_data()

    targets = values[:, 1]
    x = values[:, 7:]

    # normalize data
    # x = a1.normalize_data(x)

    # set param value
    PlotMatrixRMSErorr, PlotMatrixRMSErorrTest = calculateToPlot(x, targets)
    # Set EW
    # Ew = t_train - (0.5*np.square(PhiTrain))*w
    plotMatrixDegree = np.matrix([[1], [2], [3], [4], [5], [6]])
    plt.figure(1)
    plt.subplot(211)
    plt.plot(plotMatrixDegree, PlotMatrixRMSErorr)
    plt.ylabel('RMS')
    plt.legend(['Test error', 'Training error'])
    plt.title('Fit with polynomials, no regularization')
    plt.xlabel('Polynomial degree')
    plt.subplot(211)
    plt.plot(plotMatrixDegree, PlotMatrixRMSErorrTest)
    plt.ylabel('RMS')
    plt.legend(['Test error', 'Training error'])
    plt.title('Fit with polynomials, no regularization')
    plt.xlabel('Polynomial degree')

    x = a1.normalize_data(x)
    PlotMatrixRMSErorr, PlotMatrixRMSErorrTest = calculateToPlot(x, targets)

    plt.subplot(212)
    plt.plot(plotMatrixDegree, PlotMatrixRMSErorr)
    plt.ylabel('RMS')
    plt.legend(['Test error', 'Training error'])
    plt.title('Fit with polynomials, no regularization and normalize')
    plt.xlabel('Polynomial degree')
    plt.subplot(212)
    plt.plot(plotMatrixDegree, PlotMatrixRMSErorrTest)
    plt.ylabel('RMS')
    plt.legend(['Test error', 'Training error'])
    plt.xlabel('Polynomial degree')

    plt.show()
Example #5
0
#!/usr/bin/env python

import assignment1 as a1
import numpy as np
import matplotlib.pyplot as plt

(countries, features, values) = a1.load_unicef_data()

x = values[:, 7:]
x = a1.normalize_data(x)

N_TRAIN = 100
targets = values[:N_TRAIN, 1]
x = x[0:N_TRAIN, :]
lambda_list = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000]
average_list = []

for i in [0, 0.01, 0.1, 1, 10, 100, 1000, 10000]:
    sum = 0
    for fold in range(1, 11):
        x_vali = x[(fold - 1) * 10:fold * 10, :]
        t_vali = targets[(fold - 1) * 10:fold * 10]
        x_train = np.vstack((x[0:(fold - 1) * 10, :], x[10 * fold:, :]))
        t_train = np.vstack((targets[0:(fold - 1) * 10], targets[10 * fold:]))
        (w, train_err) = a1.linear_regression(x_train, t_train, 'polynomial',
                                              i, 2, 'yes', 0, 0)
        (t_est, test_err) = a1.evaluate_regression(x_vali, t_vali, w, 2,
                                                   'polynomial', 'yes', 0, 0)
        #print(test_err)
        sum = sum + float(test_err)
        #print(sum)
        else:
            g = 0

        if k == 0:
            x_train = values[10*(k+1):N_TRAIN-g,7:]
            x_test = values[10*k:10*(k+1),7:]
            t_train = values[10*(k+1):N_TRAIN-g,1]
            t_test = values[10*k:10*(k+1),1]
        else:
            x_train = np.vstack((values[10 * (k + 1):N_TRAIN - g, 7:],values[:10 * k, 7:]))
            x_test = values[10 * k:10 * (k + 1), 7:]
            t_train = np.vstack((values[10 * (k + 1):N_TRAIN - g, 1],values[:10 * k, 1]))
            t_test = values[10 * k:10 * (k + 1), 1]

        if (Normalized):
            x_train = a1.normalize_data(x_train)
            x_test = a1.normalize_data(x_test)
            t_train = a1.normalize_data(t_train)
            t_test = a1.normalize_data(t_test)
        else:
            ""
        # TO DO:: Complete the linear_regression and evaluate_regression functions of the assignment1.py
        (w, tr_err) = a1.linear_regression(x_train, t_train, basis, reg_lambda[j], d)
        (t_est, te_err) = a1.evaluate_regression(x_test, t_test, w, basis, d)


        train_err[reg_lambda[j]] = train_err[reg_lambda[j]] + tr_err/10
        test_err[reg_lambda[j]] = test_err[reg_lambda[j]] + te_err/10

        k += 1
    j += 1
import numpy as np
import matplotlib.pyplot as plt

# constants
N_TRAIN = 100
FOLD_COUNT = 10
POLYNOMIAL_DEGREE = 2
INCLUDE_BIAS = True
VALIDATION_COUNT = 10
LAMBDA_VALUES = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000]

(countries, features, values) = a1.load_unicef_data()

targets = values[:, 1]
x = values[:, 7:40]
x_n = a1.normalize_data(x)  # x normalized
x_n_train = x_n[0:N_TRAIN, :]
x_n_test = x_n[N_TRAIN:, :]

t_train = targets[0:N_TRAIN]
t_test = targets[N_TRAIN:]

validation_errors = np.zeros(len(LAMBDA_VALUES))

for lambda_index in range(len(LAMBDA_VALUES)):

    for fold in range(FOLD_COUNT, 0, -1):
        # indexes for partitioning
        idx1 = (fold - 1) * 10
        idx2 = fold * 10