def plot_error(normalized): targets = values[:, 1] if normalized == 'yes': x = a1.normalize_data(values[:, 7:]) else: x = values[:, 7:] N_TRAIN = 100 x_train = x[0:N_TRAIN, :] x_test = x[N_TRAIN:, :] t_train = targets[0:N_TRAIN] t_test = targets[N_TRAIN:] # Complete the linear_regression and evaluate_regression functions of the assignment1.py # Pass the required parameters to these functions tr_dicts = {} te_dicts = {} keys = range(1, 7) for degree in range(1, 7): (w, train_err) = a1.linear_regression(x_train, t_train, 'polynomial', 0, degree, 'yes', 0, 0) (t_est, test_err) = a1.evaluate_regression(x_test, t_test, w, degree, 'polynomial', 'yes', 0, 0) tr_dicts[degree] = float(train_err) te_dicts[degree] = float(test_err) # Produce a plot of results. plt.rcParams.update({'font.size': 15}) plt.plot(list(tr_dicts.keys()), list(tr_dicts.values())) plt.plot(list(te_dicts.keys()), list(te_dicts.values())) plt.ylabel('RMS') plt.legend(['Training error', 'Testing error']) plt.title('Fit with polynomials, no regularization') plt.xlabel('Polynomial degree') plt.show()
def main(): # Get input data (countries, features, values) = a1.load_unicef_data() targets = values[:, 1] x = values[:, 7:] x = a1.normalize_data(x) x = x[0:100, :] targets = targets[0:100, :] # normalize data # minAvgRMS = 999999999 landas = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000] avgErrorForEachL = [] for landa in landas: start = 0 step = 10 avgErrorRMS = 0 for i in range(0, 10): x_train, t_train, x_validation, t_validation = setXes( i * 10, x, targets) PlotMatrixRMSErorr = crossValidationCal(x_train, t_train, landa, x_validation, t_validation) avgErrorRMS += PlotMatrixRMSErorr[0, 0] avgErrorRMS = avgErrorRMS / 10 print avgErrorRMS, landa avgErrorForEachL.append(avgErrorRMS) plt.semilogx(landas, avgErrorForEachL) plt.show()
def PolynomialRegression(bias): (countries, features, values) = a1.load_unicef_data() targets = values[:, 1] x = values[:, 7:] x = a1.normalize_data(x) N_TRAIN = 100 ALL = 195 x_train = x[0:N_TRAIN, :] x_test = x[N_TRAIN:, :] t_train = targets[0:N_TRAIN] t_test = targets[N_TRAIN:] train_error = {} test_error = {} for degrees in range(1, 7): (w, t_err) = a1.linear_regression(x_train, t_train, 'polynomial', 0, degrees, 0, 1, N_TRAIN, bias) (t_est, te_err) = a1.evaluate_regression('polynomial', x_test, w, t_test, degrees, ALL - N_TRAIN, bias) print('degree = ', degrees) print(t_err) train_error[degrees] = np.sqrt(np.sum(t_err) / 100) print('sum=', np.sum(t_est, axis=0)) print('train_error = ', train_error[degrees]) test_error[degrees] = np.sqrt(np.sum(te_err) / 95) for i in range(1, 7): print(train_error[i]) # for i in range (1,7): # print(test_error[i]) print(type(train_error)) plt.rcParams.update({'font.size': 15}) plt.plot([1, 2, 3, 4, 5, 6], [ train_error[1], train_error[2], train_error[3], train_error[4], train_error[5], train_error[6] ]) plt.plot([1, 2, 3, 4, 5, 6], [ test_error[1], test_error[2], test_error[3], test_error[4], test_error[5], test_error[6] ]) plt.ylabel('RMS') plt.legend(['Training error', 'Testing error']) plt.title('Fit with polynomials, no regularization, bias:' + bias) plt.xlabel('Polynomial degree') plt.show()
def main(): # Get input data (countries, features, values) = a1.load_unicef_data() targets = values[:, 1] x = values[:, 7:] # normalize data # x = a1.normalize_data(x) # set param value PlotMatrixRMSErorr, PlotMatrixRMSErorrTest = calculateToPlot(x, targets) # Set EW # Ew = t_train - (0.5*np.square(PhiTrain))*w plotMatrixDegree = np.matrix([[1], [2], [3], [4], [5], [6]]) plt.figure(1) plt.subplot(211) plt.plot(plotMatrixDegree, PlotMatrixRMSErorr) plt.ylabel('RMS') plt.legend(['Test error', 'Training error']) plt.title('Fit with polynomials, no regularization') plt.xlabel('Polynomial degree') plt.subplot(211) plt.plot(plotMatrixDegree, PlotMatrixRMSErorrTest) plt.ylabel('RMS') plt.legend(['Test error', 'Training error']) plt.title('Fit with polynomials, no regularization') plt.xlabel('Polynomial degree') x = a1.normalize_data(x) PlotMatrixRMSErorr, PlotMatrixRMSErorrTest = calculateToPlot(x, targets) plt.subplot(212) plt.plot(plotMatrixDegree, PlotMatrixRMSErorr) plt.ylabel('RMS') plt.legend(['Test error', 'Training error']) plt.title('Fit with polynomials, no regularization and normalize') plt.xlabel('Polynomial degree') plt.subplot(212) plt.plot(plotMatrixDegree, PlotMatrixRMSErorrTest) plt.ylabel('RMS') plt.legend(['Test error', 'Training error']) plt.xlabel('Polynomial degree') plt.show()
#!/usr/bin/env python import assignment1 as a1 import numpy as np import matplotlib.pyplot as plt (countries, features, values) = a1.load_unicef_data() x = values[:, 7:] x = a1.normalize_data(x) N_TRAIN = 100 targets = values[:N_TRAIN, 1] x = x[0:N_TRAIN, :] lambda_list = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000] average_list = [] for i in [0, 0.01, 0.1, 1, 10, 100, 1000, 10000]: sum = 0 for fold in range(1, 11): x_vali = x[(fold - 1) * 10:fold * 10, :] t_vali = targets[(fold - 1) * 10:fold * 10] x_train = np.vstack((x[0:(fold - 1) * 10, :], x[10 * fold:, :])) t_train = np.vstack((targets[0:(fold - 1) * 10], targets[10 * fold:])) (w, train_err) = a1.linear_regression(x_train, t_train, 'polynomial', i, 2, 'yes', 0, 0) (t_est, test_err) = a1.evaluate_regression(x_vali, t_vali, w, 2, 'polynomial', 'yes', 0, 0) #print(test_err) sum = sum + float(test_err) #print(sum)
else: g = 0 if k == 0: x_train = values[10*(k+1):N_TRAIN-g,7:] x_test = values[10*k:10*(k+1),7:] t_train = values[10*(k+1):N_TRAIN-g,1] t_test = values[10*k:10*(k+1),1] else: x_train = np.vstack((values[10 * (k + 1):N_TRAIN - g, 7:],values[:10 * k, 7:])) x_test = values[10 * k:10 * (k + 1), 7:] t_train = np.vstack((values[10 * (k + 1):N_TRAIN - g, 1],values[:10 * k, 1])) t_test = values[10 * k:10 * (k + 1), 1] if (Normalized): x_train = a1.normalize_data(x_train) x_test = a1.normalize_data(x_test) t_train = a1.normalize_data(t_train) t_test = a1.normalize_data(t_test) else: "" # TO DO:: Complete the linear_regression and evaluate_regression functions of the assignment1.py (w, tr_err) = a1.linear_regression(x_train, t_train, basis, reg_lambda[j], d) (t_est, te_err) = a1.evaluate_regression(x_test, t_test, w, basis, d) train_err[reg_lambda[j]] = train_err[reg_lambda[j]] + tr_err/10 test_err[reg_lambda[j]] = test_err[reg_lambda[j]] + te_err/10 k += 1 j += 1
import numpy as np import matplotlib.pyplot as plt # constants N_TRAIN = 100 FOLD_COUNT = 10 POLYNOMIAL_DEGREE = 2 INCLUDE_BIAS = True VALIDATION_COUNT = 10 LAMBDA_VALUES = [0, 0.01, 0.1, 1, 10, 100, 1000, 10000] (countries, features, values) = a1.load_unicef_data() targets = values[:, 1] x = values[:, 7:40] x_n = a1.normalize_data(x) # x normalized x_n_train = x_n[0:N_TRAIN, :] x_n_test = x_n[N_TRAIN:, :] t_train = targets[0:N_TRAIN] t_test = targets[N_TRAIN:] validation_errors = np.zeros(len(LAMBDA_VALUES)) for lambda_index in range(len(LAMBDA_VALUES)): for fold in range(FOLD_COUNT, 0, -1): # indexes for partitioning idx1 = (fold - 1) * 10 idx2 = fold * 10