def train_acc(data_path, algorithm_name): print(data_path) x, y, test_x, test_y = data.run(data_path) clf = None if algorithm_name == "gnb": clf = GNB() print("gnb instance.") elif algorithm_name == "lda": clf = LDA() print("lda instance.") elif algorithm_name == "qda": clf = QDA() print("qda instance.") else: print("NO Implement") return "NO Implement" num = 0 clf.fit(x, y) train_result = clf.predict(x) for i in range(len(train_result)): if train_result[i] == y[i]: num += 1 return num / len(y)
def models(modeltype='FDA'): ''' Returns a dictionary of (trained) models. One per dataset. ''' models_dict = dict() for filename in sorted(os.listdir(ROOT)): if filename.endswith('train'): dataset = filename.split('.')[0] XY_dict_train = file_to_dict(filename) X = XY_dict_train['X'] Y = XY_dict_train['Y'] if modeltype == 'FDA': model = FDA() elif modeltype == 'LogReg': model = LogReg() elif modeltype == 'LinReg': model = LinReg() elif modeltype == 'QDA': model = QDA() else: raise ValueError("model not implemented") models_dict[dataset] = model model.fit(X, Y) return models_dict
def main(): #preprocessing dataPre = preprocessing() dataPre.process_data() xtrain, xtest, ytrain, ytest = dataPre.divide_data() #visualizing #dataPre.visualize_data() #modeling (BP) print("Modeling ...") my_model = QDA() my_model.train(xtrain, ytrain) #my_model.test(xtest) acc = my_model.get_accuracy(xtest, ytest) print("testing accuracy : ", "{0:.4f}".format(acc)) print("*********************************************")
def main(dataset, compute_errors=True, plot_boundaries=True, save=False): """ Fit the four models on the training sets, depending on the parameters compute the accuracy et plot the boundary args :: dataset : array(str) """ filename = "data/" + dataset + ".train" x_train, y_train = read_file(filename) filename = "data/" + dataset + ".test" x_test, y_test = read_file(filename) models = [ LDA(x_train, y_train), LinearRegression(x_train, y_train), LogisiticRegression(x_train, y_train), QDA(x_train, y_train) ] model_names = ["LDA", "LinearRegression", "LogisiticRegression", "QDA"] for i, model in enumerate(models): model_name = model_names[i] model.fit() if compute_errors: y_pred_train = [model.predict(x) for x in x_train] e = accuracy(y_train, y_pred_train) print("Accuracy with " + model_name) print("Training: ", e) y_pred_test = [model.predict(x) for x in x_test] e = accuracy(y_test, y_pred_test) print("Testing: ", e) if plot_boundaries: model.plot_boundary() plt.scatter(model.x[:, 0], model.x[:, 1], c=model.y, s=1) title = "Model: " + model_name + ", " + dataset + " (Train)" plt.title(title) if save: plt.savefig("figs/" + model_name + "_" + dataset[-1] + "Train.png") plt.show() model.plot_boundary() plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, s=1) title = "Model: " + model_name + ", " + dataset + " (Test)" plt.title(title) if save: plt.savefig("figs/" + model_name + "_" + dataset[-1] + "Test.png") plt.show()
plot_decision_regions(X=X_combined, y=y_combined, classifier=linr_clf, test_idx=range(X_train.shape[0], X_A_train.shape[0] + X_test.shape[0]), ax=ax[2]) ax[2].set_xlabel("x1", fontsize="large") ax[2].set_ylabel("x2", fontsize="large") ax[2].legend(loc="upper right", fontsize="large") ax[2].set_title("Linear regression (normal equation) on dataset %s" % l, fontsize="x-large", fontweight="bold") """ Run QDA """ QDA_clf = QDA() QDA_clf.fit(X_train, y_train) qda_train_error = np.mean(QDA_clf.predict(X_train).flatten() != y_train) qda_test_error = np.mean(QDA_clf.predict(X_test).flatten() != y_test) plot_decision_regions(X=X_combined, y=y_combined, classifier=QDA_clf, test_idx=range(X_train.shape[0], X_train.shape[0] + X_test.shape[0]), ax=ax[3]) ax[3].set_xlabel("x1", fontsize="large") ax[3].set_ylabel("x2", fontsize="large") ax[3].legend(loc="upper right", fontsize="large") ax[3].set_title("Generative model (QDA) on dataset %s" % l,
x1 = np.random.normal(loc=[1, 2], scale=(0.5, 0.2), size=(m, 2)) y1 = np.zeros((m, 1)) x2 = np.random.normal(loc=[5, 4], scale=(0.6, 1.0), size=(m, 2)) y2 = np.ones((m, 1)) x3 = np.random.normal(loc=[10, 3], scale=(0.4, 0.1), size=(m, 2)) y3 = np.ones((m, 1)) * 2 x = np.concatenate([x1, x2, x3]) y = np.concatenate([y1, y2, y3]) x_train, x_test, y_train, y_test = train_test_split(x, y) qda = QDA() qda.fit(x_train, y_train) train_acc = (qda.predict(x_train).argmax(1) == y_train.squeeze()).mean() * 100 test_acc = (qda.predict(x_test).argmax(1) == y_test.squeeze()).mean() * 100 print(f'Train Accuracy : {train_acc}%') print(f'Test Accuracy : {test_acc}%') # plot xx1 = np.arange(x.min(), x.max(), 0.1) xx2 = np.arange(x.min(), x.max(), 0.1) xx1, xx2 = np.meshgrid(xx1, xx2) xx = np.c_[xx1.ravel(), xx2.ravel()] predict = qda.predict(xx).argmax(1)
log_reg = LogisticRegression() log_reg.fit(X_train, y_train) plot_results(log_reg, params['idx_dataset'], X_train, y_train, X_test, y_test) print('The accuracy on train (test) dataset {} for LogReg: {} ({})'. format(params['idx_dataset'], log_reg.score(X_train, y_train), log_reg.score(X_test, y_test))) # Linear regression lin_reg = LinearRegression() lin_reg.fit(X_train, y_train) plot_results(lin_reg, params['idx_dataset'], X_train, y_train, X_test, y_test) print('The accuracy on train (test) dataset {} for LinReg: {} ({})'. format(params['idx_dataset'], lin_reg.score(X_train, y_train), lin_reg.score(X_test, y_test))) # Quadratic Discriminant Analysis (QDA) qda = QDA() qda.fit(X_train, y_train) plot_results(qda, params['idx_dataset'], X_train, y_train, X_test, y_test) print( 'The accuracy on train (test) dataset {} for QDA: {} ({})'.format( params['idx_dataset'], qda.score(X_train, y_train), qda.score(X_test, y_test))) else: plot_results(None, params['idx_dataset'], X_train, y_train, X_test, y_test)
def main(): datasets = ['A', 'B', 'C'] lda_models = {} qda_models = {} logistic_reg_models = {} lin_reg_models = {} for dataset in datasets: path_train = "classification_data_HWK1/classification_data_HWK1/classification%s.train" % dataset path_test = "classification_data_HWK1/classification_data_HWK1/classification%s.test" % dataset data_x_train, data_y_train = data.parse_data_with_labels( os.path.abspath(path_train), dimension=2, delimiter="\t") data_x_test, data_y_test = data.parse_data_with_labels( os.path.abspath(path_test), dimension=2, delimiter="\t") # LDA lda_models[dataset] = LDA(data_x_train, data_y_train, data_x_test, data_y_test, dataset_name=dataset) lda_models[dataset].train() print "\nLDA_Dataset_%s:\n" % dataset print "The bernoulli parameter pi is \n%s\n" % lda_models[dataset].pi print "The mean for the class {y=0} is \n%s\n" % lda_models[ dataset].mu_0 print "The mean for the class {y=1} is \n%s\n" % lda_models[ dataset].mu_1 print "Sigma is: \n%s\n" % lda_models[dataset].sigma print "Training misclassification error is: %.2f %%\n" % ( lda_models[dataset].compute_misclassification_err()[0] * 100) print "Test misclassification error is: %.2f %%\n" % ( lda_models[dataset].compute_misclassification_err()[1] * 100) # Logistic Regression print "\nLogistic_Regression_Dataset_%s:\n" % dataset w0 = np.array([[0, 0, 1]]).T logistic_reg_models[dataset] = LogisticRegression(data_x_train, data_y_train, w0, data_x_test, data_y_test, dataset_name=dataset, nb_iterations=20, lambda_val=0.01) logistic_reg_models[dataset].train() print "\nThe learnt parameter w is: \n%s\n" % logistic_reg_models[ dataset].w print "\nThe learnt parameter b is: \n%s\n" % logistic_reg_models[ dataset].b print "Training misclassification error is: %.2f %%\n" % ( logistic_reg_models[dataset].compute_misclassification_err()[0] * 100.) print "Test misclassification error is: %.2f %%\n" % ( logistic_reg_models[dataset].compute_misclassification_err()[1] * 100.) # Linear Regression print "\nLinear_Regression_Dataset_%s\n" % dataset lin_reg_models[dataset] = LinearRegression(data_x_train, data_y_train, data_x_test, data_y_test, dataset_name=dataset, lambda_val=0) lin_reg_models[dataset].train() print "The learnt parameter w is: \n%s\n" % lin_reg_models[dataset].w print "\nThe learnt parameter b is: \n%s\n" % logistic_reg_models[ dataset].b print "The variance of Y computed for the latter w is: \n%s\n" % lin_reg_models[ dataset].variance print "Training misclassification error is: %.2f %%\n" % ( lin_reg_models[dataset].compute_misclassification_err()[0] * 100.) print "Test misclassification error is: %.2f %%\n" % ( lin_reg_models[dataset].compute_misclassification_err()[1] * 100.) # QDA qda_models[dataset] = QDA(data_x_train, data_y_train, data_x_test, data_y_test, dataset_name=dataset) qda_models[dataset].train() print "\nQDA_Dataset_%s:\n" % dataset print "The bernoulli parameter pi is \n%s\n" % qda_models[dataset].pi print "The mean for the class {y=0} is \n%s\n" % qda_models[ dataset].mu_0 print "The mean for the class {y=1} is \n%s\n" % qda_models[ dataset].mu_1 print "Sigma for the class {y=0} is: \n%s\n" % qda_models[ dataset].sigma_0 print "Sigma for the class {y=1} is: \n%s\n" % qda_models[ dataset].sigma_1 print "Training misclassification error is: %.2f %%\n" % ( qda_models[dataset].compute_misclassification_err()[0] * 100) print "Test misclassification error is: %.2f %%\n" % ( qda_models[dataset].compute_misclassification_err()[1] * 100) for model in [lda_models, logistic_reg_models, lin_reg_models, qda_models]: plt.subplot(221) model['A'].plot() plt.subplot(222) model['B'].plot() plt.subplot(212) model['C'].plot() plt.show() if model == logistic_reg_models: plt.subplot(221) model['A'].plot_convergence_func() plt.subplot(222) model['B'].plot_convergence_func() plt.subplot(212) model['C'].plot_convergence_func() plt.show() plt.subplot(221) model['A'].plot(test_mode=True) plt.subplot(222) model['B'].plot(test_mode=True) plt.subplot(212) model['C'].plot(test_mode=True) plt.show()