def parameter_search_rbf(inputs, targets, test_fraction): """ """ N = inputs.shape[0] # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) # for the centres of the basis functions sample 10% of the data sample_fraction = 0.15 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=N, p=p), :] print("centres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 2, 17) # of the basis functions reg_params = np.logspace(-15, -4, 11) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) print("Best joint choice of parameters:") print("\tscale %.2g and lambda = %.2g" % (scales[best_i], reg_params[best_j])) # now we can plot the error for different scales using the best # regulariation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j], test_errors[:, best_j]) ax.set_xscale('log') # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i, :], test_errors[best_i, :]) ax.set_xscale('log')
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction, test_error_linear): # for rbf feature mappings # for the centres of the basis functions choose 10% of the data n = inputs.shape[0] centres = inputs[ np.random.choice([False, True], size=n, p=[0.90, 0.10]), :] print("centres shape = %r" % (centres.shape, )) # the width (analogous to standard deviation) of the basis functions scale = 8.5 # of the basis functions print("centres = %r" % (centres, )) print("scale = %r" % (scale, )) feature_mapping = construct_rbf_feature_mapping(centres, scale) design_matrix = feature_mapping(inputs) train_part, test_part = train_and_test_split(n, test_fraction=test_fraction) train_design_matrix, train_targets, test_design_matrix, test_targets = \ train_and_test_partition( design_matrix, targets, train_part, test_part) # outputting the shapes of the train and test parts for debugging print("training design matrix shape = %r" % (train_design_matrix.shape, )) print("testing design matrix shape = %r" % (test_design_matrix.shape, )) print("training targets shape = %r" % (train_targets.shape, )) print("testing targets shape = %r" % (test_targets.shape, ) + "\n") # the rbf feature mapping performance reg_params = np.logspace(-15, 5, 20) train_errors = [] test_errors = [] for reg_param in reg_params: print("Evaluating reg. parameter " + str(reg_param)) train_error, test_error = simple_evaluation_linear_model( design_matrix, targets, test_fraction=test_fraction, reg_param=reg_param) train_errors.append(train_error) test_errors.append(test_error) fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) # plotting a straight line showing the linear performance x_lim = ax.get_xlim() ax.plot(x_lim, test_error_linear * np.ones(2), 'g:') ax.set_xscale('log') ax.set_title('Evaluating RBF Performance') fig.savefig("../plots/rbf_vs_linear.pdf", fmt="pdf")
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction, test_error_linear): """ """ # for rbf feature mappings # for the centres of the basis functions choose 10% of the data N = inputs.shape[0] centres = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :] print("centres.shape = %r" % (centres.shape, )) scale = 10. # of the basis functions feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( designmtx, targets, train_part, test_part) # output the shapes of the train and test parts for debugging print("train_designmtx.shape = %r" % (train_designmtx.shape, )) print("test_designmtx.shape = %r" % (test_designmtx.shape, )) print("train_targets.shape = %r" % (train_targets.shape, )) print("test_targets.shape = %r" % (test_targets.shape, )) # the rbf feature mapping performance reg_params = np.logspace(-15, -4, 11) train_errors = [] test_errors = [] for reg_param in reg_params: print("Evaluating reg_para " + str(reg_param)) train_error, test_error = simple_evaluation_linear_model( designmtx, targets, test_fraction=test_fraction, reg_param=reg_param) train_errors.append(train_error) test_errors.append(test_error) fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) # we also want to plot a straight line showing the linear performance xlim = ax.get_xlim() ax.plot(xlim, test_error_linear * np.ones(2), 'g:') ax.set_xscale('log')
def main(inputs, targets, scale, best_no_centres, test_fraction=0.20): # setting a seed to get the same pseudo-random results every time np.random.seed(30) print("\n") std_inputs = standardise(inputs) train_part, test_part = train_and_test_split(std_inputs.shape[0], test_fraction) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( std_inputs, targets, train_part, test_part) # specifying the centres of the rbf basis functions # choosing 10% of the data for the centres of the basis functions or the optimal proportion from earlier analyses centres = train_inputs[ np.random.choice([False, True], size=train_inputs.shape[0], p=[1 - best_no_centres, best_no_centres]), :] print("centres shape = %r" % (centres.shape, )) # the width (analogous to standard deviation) of the basis functions # scale of the basis functions from analysis in external_data file # We consider the basis function widths to be fixed for simplicity print("scale = %r" % scale) # creating the feature mapping feature_mapping = construct_rbf_feature_mapping(centres, scale) # plotting the basis functions themselves for reference display_basis_functions(feature_mapping, train_inputs.shape[1]) # alpha and beta define the shape of our curve when we start # beta is defining the noise precision of our data, as the reciprocal of the target variance # it is the spread from the highest point (top) of the curve # it corresponds to additive Gaussian noise of variance, which is beta to the power of -1 beta = np.reciprocal(0.40365849982557295) # beta = np.reciprocal(np.var(train_targets)) # beta = 100 # higher beta is going to give us higher precision, so less overlap # as a side note, could also do beta = 1 / np.var(train_targets) # location of the highest point of the initial curve / prior distribution # because targets represent quality ranging from 0 to 10 alpha = mode(targets)[0][0] # alpha = 100 # now applying our feature mapping to the train inputs and constructing the design matrix design_matrix = feature_mapping(train_inputs) # the number of features (phis) is the width of this matrix # it is equal to the number of centres drawn from the train inputs # the shape[0] is the number of data points I use for training M = design_matrix.shape[1] # defining a prior mean and covariance matrix # they represent our prior belief over the distribution # our initial estimate of the range of probabilities m0 = np.zeros(M) for m in range(len(m0)): m0[m] = mode(targets)[0][0] # setting to be the mode of targets # m0[m] = 0 S0 = alpha * np.identity(M) # diagonal regularisation matrix A to punish over-fitting # A = alpha * np.identity(M) # E = 0.5 * m0.T * A * m0 # Zp = regularisation constant # prior_m0 = np.exp(-E)/Zp # finding the posterior over weights # if we have enough data, the posteriors will be the same, no matter the initial parameters # because they will have been updated according to Bayes' rule mN, SN = calculate_weights_posterior(design_matrix, train_targets, beta, m0, S0) # print("mN = %r" % (mN,)) # the posterior mean (also the MAP) gives the central prediction mean_approx = construct_feature_mapping_approx(feature_mapping, mN) # getting MAP and calculating root mean squared errors train_output = mean_approx(train_inputs) test_output = mean_approx(test_inputs) bayesian_mean_train_error = root_mean_squared_error( train_targets, train_output) bayesian_mean_test_error = root_mean_squared_error(test_targets, test_output) print("Root mean squared errors:") print("Train error of posterior mean (applying Bayesian inference): %r" % bayesian_mean_train_error) print("Test error of posterior mean (applying Bayesian inference): %r" % bayesian_mean_test_error) # plotting one input variable on the x axis as an example fig, ax, lines = plot_function_and_data(std_inputs[:, 10], targets) # creating data to use for plotting xs = np.ndarray((101, train_inputs.shape[1])) for column in range(train_inputs.shape[1]): column_sample = np.linspace(-5, 5, 101) column_sample = column_sample.reshape((column_sample.shape[0], )) xs[:, column] = column_sample ys = mean_approx(xs) line, = ax.plot(xs[:, 10], ys, 'r-') lines.append(line) ax.set_ylim([0, 10]) # now plotting a number of samples from the posterior for i in range(20): weights_sample = np.random.multivariate_normal(mN, SN) sample_approx = construct_feature_mapping_approx( feature_mapping, weights_sample) sample_ys = sample_approx(xs) line, = ax.plot(xs[:, 10], sample_ys, 'm', linewidth=0.5) lines.append(line) ax.legend(lines, ['data', 'mean approx', 'samples']) # now for the predictive distribution new_designmtx = feature_mapping(xs) ys, sigma2Ns = predictive_distribution(new_designmtx, beta, mN, SN) print("(sigma2Ns**0.5).shape = %r" % ((sigma2Ns**0.5).shape, )) print("np.sqrt(sigma2Ns).shape = %r" % (np.sqrt(sigma2Ns).shape, )) print("ys.shape = %r" % (ys.shape, )) ax.plot(xs[:, 10], ys, 'r', linewidth=3) lower = ys - np.sqrt(sigma2Ns) upper = ys + np.sqrt(sigma2Ns) print("lower.shape = %r" % (lower.shape, )) print("upper.shape = %r" % (upper.shape, )) ax.fill_between(xs[:, 10], lower, upper, alpha=0.2, color='r') ax.set_title('Posterior Mean, Samples, and Predictive Distribution') ax.set_xlabel('standardised alcohol content') ax.set_ylabel('p(t|x)') fig.tight_layout() fig.savefig("../plots/bayesian/bayesian_rbf.png", fmt="png") plt.show() # the predictive distribution test_design_matrix = feature_mapping(test_inputs) predictions, prediction_sigma2 = predictive_distribution( test_design_matrix, beta, mN, SN) sum_joint_log_probabilities = 0 for n in range(len(predictions)): sum_joint_log_probabilities += math.log(predictions[n]) sum_joint_log_probabilities *= -1 # joint_log_probabilities = (np.array(test_targets).flatten() - np.array(predictions).flatten()) # print(np.mean(joint_log_probabilities)) print("Error as negative joint log probability: %r" % sum_joint_log_probabilities)
def parameter_search_rbf(inputs, targets, test_fraction): """ """ n = inputs.shape[0] # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(n, test_fraction=test_fraction) # for the centres of the basis functions sample 10% of the data sample_fraction = 0.10 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=n, p=p), :] print("\ncentres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 6, 20) # of the basis functions reg_params = np.logspace(-15, 5, 20) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( designmtx, targets, train_part, test_part) # iterating over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) min_place = np.argmin(test_errors) best_i_correct = (int)(min_place / test_errors.shape[1]) best_j_correct = min_place % test_errors.shape[1] print(best_i) print(best_j) print(best_i_correct) print(best_j_correct) min = test_errors[test_errors != 0].min() ij_min = np.where(test_errors == min) ij_min = tuple([i.item() for i in ij_min]) print(ij_min[1]) print("\nBest joint choice of parameters:") print("\tscale %.2g and lambda = %.2g" % (scales[best_i_correct], reg_params[best_j_correct])) # now we can plot the error for different scales using the best # regularisation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j_correct], test_errors[:, best_j_correct]) ax.set_xscale('log') ax.set_title('Train vs Test Error Across Scales') fig.savefig("../plots/rbf_searching_scales.pdf", fmt="pdf") # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i_correct, :], test_errors[best_i_correct, :]) ax.set_xscale('log') ax.set_title('Train vs Test Error Across Reg Params') fig.savefig("../plots/rbf_searching_reg_params.pdf", fmt="pdf") # using the best parameters found above, # we now vary the number of centres and evaluate the performance reg_param = reg_params[best_j_correct] scale = scales[best_i_correct] n_centres_seq = np.arange(1, 20) train_errors = [] test_errors = [] for n_centres in n_centres_seq: # constructing the feature mapping anew for each number of centres centres = np.linspace(0, 1, n_centres) feature_mapping = construct_rbf_feature_mapping(centres, scale) design_matrix = feature_mapping(inputs) train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( design_matrix, targets, train_part, test_part) # evaluating the test and train error for the given regularisation parameter and scale train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param) # collecting the errors train_errors.append(train_error) test_errors.append(test_error) # plotting the results fig, ax = plot_train_test_errors("no. centres", n_centres_seq, train_errors, test_errors) ax.set_title('Train vs Test Error Across Centre Number') fig.savefig("../plots/rbf_searching_number_centres.pdf", fmt="pdf")
def main(ifname, delimiter=None, columns=None, has_header=True, test_fraction=0.25): data, field_names = import_data(ifname, delimiter=delimiter, has_header=has_header, columns=columns) #Exploratory Data Analysis (EDA) raw_data = pd.read_csv('datafile.csv', sep=";") # view correlation efficieny result where |r|=1 has the strongest relation and |r|=0 the weakest df = pd.DataFrame(data=raw_data) print(df.corr()) # view data if it is normally distributed plt.hist(raw_data["quality"], range=(1, 10), edgecolor='black', linewidth=1) plt.xlabel('quality') plt.ylabel('amount of samples') plt.title("distribution of red wine quality") # feature selection import scipy.stats as stats from scipy.stats import chi2_contingency class ChiSquare: def __init__(self, dataframe): self.df = dataframe self.p = None # P-Value self.chi2 = None # Chi Test Statistic self.dof = None self.dfObserved = None self.dfExpected = None def _print_chisquare_result(self, colX, alpha): result = "" if self.p < alpha: result = "{0} is IMPORTANT for Prediction".format(colX) else: result = "{0} is NOT an important predictor. (Discard {0} from model)".format( colX) print(result) def TestIndependence(self, colX, colY, alpha=0.05): X = self.df[colX].astype(str) Y = self.df[colY].astype(str) self.dfObserved = pd.crosstab(Y, X) chi2, p, dof, expected = stats.chi2_contingency( self.dfObserved.values) self.p = p self.chi2 = chi2 self.dof = dof self.dfExpected = pd.DataFrame(expected, columns=self.dfObserved.columns, index=self.dfObserved.index) self._print_chisquare_result(colX, alpha) print('self:%s' % (self), self.chi2, self.p) # Initialize ChiSquare Class cT = ChiSquare(raw_data) # Feature Selection testColumns = [ "fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol" ] for var in testColumns: cT.TestIndependence(colX=var, colY="quality") # split data into inputs and targets inputs = data[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] targets = data[:, 11] # mean normalisation fixed_acidity = inputs[:, 0] volatile_acidity = inputs[:, 1] citric_acid = inputs[:, 2] residual_sugar = inputs[:, 3] chlorides = inputs[:, 4] free_sulfur_dioxide = inputs[:, 5] total_sulfur_dioxide = inputs[:, 6] density = inputs[:, 7] ph = inputs[:, 8] sulphates = inputs[:, 9] alcohol = inputs[:, 10] # draw plot of data set normalised_data = np.column_stack((inputs, targets)) exploratory_plots(normalised_data, field_names) # add a colum of x0.ones inputs[:, 0] = np.ones(len(targets)) # normalize data inputs[:, 1] = (volatile_acidity - np.mean(volatile_acidity)) / np.std(volatile_acidity) inputs[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid) inputs[:, 7] = (density - np.mean(density)) / np.std(density) inputs[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates) inputs[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol) # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) # another evaluation function def rsquare(test_targets, test_predicts): y_mean = np.mean(test_targets) ss_tot = sum((test_targets - y_mean)**2) ss_res = sum((test_targets - test_predicts)**2) rsquare = 1 - (ss_res / ss_tot) return rsquare print( '---------------------------Linear Regression-----------------------------------' ) # linear regression # add a column of 1 to the data matrix inputs = inputs[:, [0, 1, 2, 7, 9, 10]] #train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) weights = ml_weights(train_inputs, train_targets) train_predicts = linear_model_predict(train_inputs, weights) test_predicts = linear_model_predict(test_inputs, weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) print("LR-train_weights", weights) print("LR-train_error", train_error) print("LR-test_error", test_error) print("LR-rsquare score", rsquare(test_targets, test_predicts)) print("LR-prediction:", test_predicts[:20], "LR-original", test_targets[:20]) print( '----------------Regularised Linear Regression-----------------------------' ) #regularised linear regression reg_params = np.logspace(-15, -4, 11) train_errors = [] test_errors = [] for reg_param in reg_params: # print("RLR-Evaluating reg_para " + str(reg_param)) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) reg_weights = regularised_ml_weights(train_inputs, train_targets, reg_param) train_predicts = linear_model_predict(train_inputs, reg_weights) test_predicts = linear_model_predict(test_inputs, reg_weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) train_errors.append(train_error) test_errors.append(test_error) #best lambda test_errors = np.array(test_errors) best_l = np.argmin(test_errors) print("RLR-Best joint choice of parameters:") print("RLR-lambda = %.2g" % (reg_params[best_l])) # plot train_test_errors in different reg_params fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log') reg_weights = regularised_ml_weights(train_inputs, train_targets, best_l) print("RLR-train_weights", reg_weights) print("RLR-train_error", train_errors[best_l]) print("RLR-test_error", test_errors[best_l]) print("RLR-rsquare score", rsquare(test_targets, test_predicts)) print("RLR-prediction:", test_predicts[:20], "RLR-original", test_targets[:20]) print( '-----------------------------kNN Regression------------------------------------' ) # KNN-regression # tip out the x0=1 column inputs = inputs[:, [1, 2, 3, 4, 5]] train_errors = [] test_errors = [] K = range(2, 9) for k in K: train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) knn_approx = construct_knn_approx(train_inputs, train_targets, k) train_knn_predicts = knn_approx(train_inputs) train_error = root_mean_squared_error(train_knn_predicts, train_targets) test_knn_predicts = knn_approx(test_inputs) test_error = root_mean_squared_error(test_knn_predicts, test_targets) train_errors.append(train_error) test_errors.append(test_error) # print("knn_predicts: ", np.around(test_knn_predicts), "knn-original", test_targets) #best k train_errors = np.array(train_errors) test_errors = np.array(test_errors) best_k = np.argmin(test_errors) print("Best joint choice of parameters:") print("k = %.2g" % (K[best_k])) fig, ax = plot_train_test_errors("K", K, train_errors, test_errors) ax.set_xticks(np.arange(min(K), max(K) + 1, 1.0)) print("kNN-train_error", train_errors[-1]) print("kNN-test_error", test_errors[-1]) knn_approx = construct_knn_approx(train_inputs, train_targets, k=3) test_predicts = knn_approx(test_inputs) print("kNN-rsquare score", rsquare(test_targets, test_predicts)) print("kNN-y_predicts", test_predicts[:20], 'y_original', test_targets[:20]) print( '----------------------------RBF Function-------------------------------------' ) # Radinal Basis Functions # for the centres of the basis functions sample 15% of the data sample_fraction = 0.15 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=inputs.shape[0], p=p), :] # !!! print("centres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 2, 17) # of the basis functions reg_params = np.logspace(-15, -4, 11) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition(designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) print("Best joint choice of parameters:") print("\tscale= %.2g and lambda = %.2g" % (scales[best_i], reg_params[best_j])) # now we can plot the error for different scales using the best # regulariation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j], test_errors[:, best_j]) ax.set_xscale('log') # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i, :], test_errors[best_i, :]) ax.set_xscale('log') feature_mapping = construct_rbf_feature_mapping(centres, scales[best_i]) reg_weights = regularised_ml_weights(train_designmtx, train_targets, reg_params[best_j]) # test function test_predicts = np.matrix(test_designmtx) * np.matrix(reg_weights).reshape( (len(reg_weights), 1)) test_predicts = np.array(test_predicts).flatten() print("RBF-train_error", train_errors[best_i, best_j]) print("RBF-test_error", test_errors[best_i, best_j]) print("RBF-rsquare score", rsquare(test_targets, test_predicts)) print('RBF_y_predicts: ', test_predicts[:20], 'rbf_y_originals: ', test_targets[:20]) print( '-----------------------------Polynomial---------------------------------------' ) # Polynomial Basis Function # set input features as 'alcohol' degrees = range(1, 10) train_errors = [] test_errors = [] for degree in degrees: processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) # split data into train and test set processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition\ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) train_errors.append(train_error) test_errors.append(test_error) train_errors = np.array(train_errors) test_errors = np.array(test_errors) print("Polynomial-train error: ", train_errors[-1]) print("Polynomial-test error: ", test_errors[-1]) best_d = np.argmin(test_errors) print("Best joint choice of degree:") final_degree = degrees[best_d] print("degree = %.2g" % (final_degree)) fig, ax = plot_train_test_errors("Degree", degrees, train_errors, test_errors) ax.set_xticks(np.arange(min(degrees), max(degrees) + 1, 1.0)) # test functionality with the final degree processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], final_degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition \ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) # print("processed_train_inputs.shape", processed_train_inputs.shape) # print('weights: ', weights, 'weights shape: ', weights.shape) test_predicts = prediction_function(processed_test_inputs, weights, final_degree) print("Polynomial-rsquare score", rsquare(test_targets, test_predicts)) print('Polynomial-y_predicts: ', test_predicts[:20], 'Polynomial-y_original: ', test_targets[:20]) plt.show()
def main(ifname=None, delimiter=None, columns=None): delimiter = ';' columns = np.arange(12) if ifname is None: ifname = 'datafile.csv' data, field_names = import_data(ifname, delimiter=delimiter, has_header=True, columns=columns) targets = data[:, -1] inputs = data[:, 0:11] #We decided that the test fraction will be 0.2 test_fraction = 0.2 #np.random.seed(5) #let's leave 20% out for the train_part, test_part = train_and_test_split(data.shape[0], test_fraction) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) # get the cross-validation folds num_folds = 5 folds = create_cv_folds( train_inputs.shape[0], num_folds ) # this is just an array of arrays where folds[0][0]= [true,false,false] and folds[0][1]=[false,true,true] #first of all let's plot some exploratory plots exploratory_plots() #Now, let's try some linear regression linear_regression_entry_point(field_names, train_inputs, train_targets, folds, test_fraction) #Now, let's see the performance of the bayesian regression bayesian_regression_entry_point(data) #Let's see how the kNN model will behave kNN_entry_point(data, field_names) #Finally, let's see how the RBF model will behave train_error_linear, test_error_linear = simple_linear_regression( train_inputs, train_targets, folds, test_fraction, test_inputs, test_targets) #RBF regression with normalisation but without cross validation parameter_search_rbf_without_cross(train_inputs, train_targets, test_fraction, test_error_linear, normalize=True) #RBF regression with cross-validation and normalisation parameter_search_rbf_cross(train_inputs, train_targets, folds, test_error_linear, test_inputs, test_targets) #RBF regression with cross-validation but without normalisation parameter_search_rbf_cross(train_inputs, train_targets, folds, test_error_linear, test_inputs, test_targets, normalize=False) plt.show()
def parameter_search_rbf_without_cross(inputs, targets, test_fraction,test_error_linear,normalize=True): """ """ if(normalize): # normalise inputs (meaning radial basis functions are more helpful) for i in range(inputs.shape[1]): inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i]) N = inputs.shape[0] # for the centres of the basis functions sample 10% of the data sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25]) scales = np.logspace(0,4,20 ) # of the basis functions reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength. # create empty 3d arrays to store the train and test errors train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) #Randomly generates a train/test split for data of size N. Returns a 2 arrays of boolean true/false. train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) best_k=0 best_i=0 best_j=0 test_error_temp=10**100 #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%) for k,sample_fraction in enumerate(sample_fractions): p = (1-sample_fraction,sample_fraction) centres = inputs[np.random.choice([False,True], size=N, p=p),:] # iterate over the scales for i,scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres,scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test. This effectively takes as inputs the boolean arrays train_part, test_part and the whole design matrix and #creates 2 subsets of the design matrix (train matrix, test matrix). The test data are splitted as well but the values are not affected train_designmtx, train_targets, test_designmtx, test_targets = train_and_test_partition(designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error,weights = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets,reg_param=reg_param) # store the train and test errors in our 2d arrays train_mean_errors[k,i,j] = train_error test_mean_errors[k,i,j] = test_error #When we've found a lowest than stores test error value, we store it's indices if (np.mean(test_error)<test_error_temp): test_error_temp=test_error best_k=k best_i=i best_j=j print ("The value with the lowest error is:",test_mean_errors[best_k][best_i][best_j]) print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j])) # now we can plot the error for different scales using the best # regularization choice # now we can plot the error for different scales using the best regularization choice and centres percentage fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j]) ax.set_xscale('log') fig.suptitle('RBF regression for the best reg. parameter & centres', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression # ...and the error for different regularisation choices given the best scale choice and centres percentage fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:]) ax.set_xscale('log') fig.suptitle('RBF regression for the best scale parameter & centres', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') # #ax.set_ylim([0,20]) # ...and the error for different centres given the best reg.parameter and the best scale choice fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j]) fig.suptitle('RBF regression for the best scale parameter & reg. parameter', fontsize=10) ax.set_xlim([0.05, 0.25]) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:')