def multiFisher(): filePath = "iris.data.txt" bX, bY = loadData.DataLoader(filePath).multiClassifyGetData() mX = np.array(bX) mY = np.array(bY).reshape(150, 1) # 用reshape函数修改Y矩阵的形式 trainX, trainY, testX, testY = generateData.DataGenerator(mX, mY).randDivide()
def binaryFisher(): filePath = "iris.data.txt" bX, bY = loadData.DataLoader(filePath).binaryClassifyGetData() bX = np.array(bX) bY = np.array(bY).reshape(100, 1) # 用reshape函数修改Y矩阵的形式 trainX, trainY, testX, testY = generateData.DataGenerator(bX, bY).randDivide() trainX = trainX.astype(float) trainY = trainY.astype(float) testX = testX.astype(float) testY = testY.astype(float) # print(trainX) # print(trainY) # print(testX) # print(testY) # print(np.shape(trainX), np.shape(trainY), np.shape(testX), np.shape(testY)) miu1, length1 = fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).getMiu(1) miu2, length2 = fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).getMiu(2) # # print(miu1) # # print(miu2) # S1 = np.zeros((4, 4)) # S2 = np.zeros((4, 4)) # for i in range(len(trainX)): # if int(trainY[i]) == 1: # xMat = (trainX[i] - miu1).reshape((4, 1)) # s = np.dot(xMat, xMat.T) # # print(s) # S1 = S1 + s # else: # xMat = (trainX[i] - miu2).reshape((4, 1)) # s = np.dot(xMat, xMat.T) # # print(s) # S2 = S2 + s # # print(S1) # # print(S2) S1 = fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).getSw(miu1, 1) S2 = fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).getSw(miu2, 2) Sw = np.mat(S1 + S2) miu = (miu2 - miu1).reshape(4, 1) w = np.dot(Sw.I, miu) # print(w) fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).plot(w) miuAll = (length1 * miu1 + length2 * miu2) / (length1 + length2) fisher.FisherLinearDiscriminant(trainX, trainY, testX, testY).modelTest(w, miuAll)
def getaccuracy(ytest, predictions): correct = 0 for i in range(len(ytest)): if ytest[i] == predictions[i]: correct += 1 return (correct/float(len(ytest)))*100.0 def geterror(ytest, predictions): return (100.0 - getaccuracy(ytest, predictions)) if __name__ == '__main__': dataFile = 'dataset/bank.csv' dataloader = ld.DataLoader(dataFile) numruns = 1 # Follw the same testing format as in Assignments classalgs = {'Random': al.Classifier(), # Baseline Algorithm 'Linear SVM': al.SVMClassifier(), # Linear SVM 'Logistic Regression L2 regularizer': al.LogisticRegressionClassifier({'regularizer':'l2', 'regularizerValue':0.01}), 'Logistic Regression No regularizer': al.LogisticRegressionClassifier(), 'Neural Network': al.NeuralNetwork(), } numalgs = len(classalgs) parameters = ( {'regwgt':0.0, 'nh':(50, ), 'regularizerValue': 0.01 }, {'regwgt':0.0, 'nh':(100, ), 'regularizerValue': 0.1 },
import numpy as np def trans_label(label_matrix): temp_list = [] for label in label_matrix.T: if label.getA()[0][1] == 1.0: temp_list.append(0) else: temp_list.append(1) return temp_list if __name__ == "__main__": file_path = r'iris.txt' x_train, x_test, y_train, y_test = loadData.DataLoader( file_path).get_train_test_data() y_train_list = trans_label(y_train) y_test_list = trans_label(y_test) # logistic_model = logistic_bayes_model.LogisticBayesClassification( x_train, np.mat(y_train_list).T, x_test, np.mat(y_test_list).T) weight = logistic_model.get_weight() # logistic_model.load_model() logistic_model.save_model() for i in range(x_test.shape[1]): print(logistic_model.predict(x_test[:, i:i + 1])) print(y_test_list[i]) print('\n') print("accuracy:")
#!/usr/bin/env python # -*- coding: utf-8 -*- import loadData import dataGenerate import AIC_Logistic import showPlt if __name__ == '__main__': lD = loadData.DataLoader("iris.data.txt") dataX, dataY = lD.loadData() dG = dataGenerate.DataGenerator(dataX, dataY) trainX, trainY, testX, testY = dG.randDivide(testRatio=0.3) # print(trainX, "\n", trainY, "\n", testX, "\n", testY) aicL = AIC_Logistic.AicLogistic(trainX, trainY, testX, testY) p = aicL.phi() weights = aicL.logisticTrain(p, numIter=150) sP = showPlt.ShowPlt(trainX, trainY, weights) sP.plotMap() aicL.logisticTest(weights)
import generateData import linearRegression def get_RMSE(data_Y, pre_Y): m = len(data_Y) loss = 0.0 for i in range(m): loss += pow((data_Y[i] - pre_Y[i]), 2) return pow(loss / m, 0.5) if __name__ == "__main__": file_path = 'data.txt' # 从文件中加载数据 X, Y = loadData.DataLoader(file_path).get_data() # 划分训练集测试集 train_data_X, train_data_Y, test_data_X, test_data_Y = generateData.DataGenerator( X, Y).rand_divide() # 训练模型 liner_model = linearRegression.LinearRegression(train_data_X, train_data_Y) liner_model.train_model() # 预测 pre_Y = [] f1 = open('pre.txt', 'w') for i in test_data_X: pre = liner_model.predict_model(i) f1.write(str(pre) + '\n')