def load_file(self): try: vector_list = ld.LoadFile().read_file() except Exception: self.error_lab.config(text="Файл не загружен") rw.ResultWindow(vector_list)
def getStandardedMatData(rawData): # 将数据标准化后,再分割为数据集、标签集,然后将格式转换为矩阵 dataSet, labelSet = LoadFile.splitData(rawData) dataSet = np.mat(dataSet) labelSet = np.mat(labelSet).T dataSet = standardize(dataSet) return dataSet, labelSet
def kFoldCrossValidation(rawData, processFunc, k): rawData = pd.DataFrame(rawData) n = np.shape(rawData)[0] step = n // k bestModel = 0 leastErr = 0 for i in range(k): if i == (k-1): testData = rawData.iloc[step*i: n, :] trainData = rawData.iloc[: step*i, :] else: testData = rawData.iloc[step*i: step*(i+1), :] trainData = rawData.iloc[: step*i, :] trainData = trainData.append(rawData.iloc[step*(i+1):]) testData, testLabel = LoadFile.splitData(testData) trainData, trainLabel = LoadFile.splitData(trainData) model = getModel(trainData, trainLabel) err = testModel(model, testData, testLabel) if err < leastErr: leastErr = err bestModel = model return model
#HAItech,2018.02.14 from useful import * from random import * from LoadFile import * from PrintFile import * courses = LoadFile("/root/a/useful1.txt") space_time = SpaceTime() length = courses.__len__() # length=3 mincredit = 9 bixiucredit = 3 sequence = range(1, length) aimcredit = 16.5 weight = 1 POP = GeneratePopulation(courses, sequence, length) lower = aimcredit * weight + mincredit + bixiucredit record = [[] for score in range(int(aimcredit * (weight + 1) - lower) * 10)] Maxscore = [-1] * 5000 Maxscore[-1] = 0 while Maxscore[-1] != Maxscore[-4999]: POP, record = Fitness(POP, courses, aimcredit, weight, record, lower) Maxscore.append(POP.maxscore) POP = ReproduceOffspring(POP, sequence, length) POP.generation = POP.generation + 1 c = PrintFile("/root/a/result.txt", record, courses)
import LoadFile if __name__ == "__main__": try: shpfilePath = r"E:\data\moto_trackdata_4city_shp\755\000159_20171127_20171203.shp" shapeFac = LoadFile.LoadTraceDataFromShapefileFactory( args=(shpfilePath)) #trace data order by person,time,motiontype OnetraceDF = shapeFac.getTraceData() #trace segment stop cluster detection except Exception, e: print e.message
def testOne(self): self.assertEqual(LoadFile.get_responses(),\ {'436': '436 <FAIL>##ROUTER=KIMENG##', \ '802': '802 <APP_ID=d26d9b499041e735d00c84676b58959a|SEC_CODES=|ROUND=2|ROUND-OPTION=3>##ROUTER=KIMENG,HTTPS=1,CLIENT_TYPE=ANDROID,VERSION=1.0.1,PLATFORM_MODE=development,TIME_OUT=25,MESSAGE_ID=428642b311979cfd9e40ebe8d19d8b07,PROTO_REV=2,ECHO=8f55f93d5f6b5f6f79a6b2abc6893565_0##'})
def setUp(self): LoadFile.load()
return math.sqrt(z) def minmaxNormalize(dataSet): # 数据映射到[0, 1] dataSet = pd.DataFrame(dataSet) minDf = dataSet.min() maxDf = dataSet.max() normalizedSet = (dataSet - minDf) / (maxDf - minDf) return normalizedSet def zscoreStanderize(dataSet): # 用z-score方法进行数据标准化 dataSet = pd.DataFrame(dataSet) meanDf = dataSet.mean() stdDf = dataSet.std() standerizedSet = (dataSet - meanDf) / stdDf return standerizedSet if __name__ == "__main__": filePath = "F:/2020AI_SummerCamp/dataSet/" # rawData = LoadFile.loadCSV(filePath + "Pima.csv") rawData = LoadFile.loadCSV(filePath + "diabetesN.csv") dataSet, labelSet = LoadFile.splitData(rawData) # dataSet = solveMissingData(dataSet) # dataSet = minmaxNormalize(dataSet) dataSet = zscoreStanderize(dataSet) print(dataSet)
sortResult = sorted(labelsCnt.items(), key=operator.itemgetter(1), reverse=True) return sortResult[0][0] # 根据列表生成式生成切分后的数据集 def splitToFeat(dataSet, labelSet, feat, val): dataSet = np.array(dataSet) labelSet = np.array(labelSet) leftData = dataSet[np.nonzero(dataSet[:, feat] < val)[0]] leftLabel = labelSet[np.nonzero(dataSet[:, feat] < val)[0]] rightData = dataSet[np.nonzero(dataSet[:, feat] >= val)[0]] rightLabel = labelSet[np.nonzero(dataSet[:, feat] >= val)[0]] return leftData, leftLabel, rightData, rightLabel if __name__ == '__main__': # 读入文件 # 使用的是网上找到的一个数据集 filePath = "F:/2020AI_SummerCamp/dataSet/" rawData = LoadFile.loadCSV(filePath + "cartDS.csv") # 预处理 dataSet, labelSet = LoadFile.splitData(rawData) tree = buildTree(dataSet, labelSet) # 测试数据 testVec = np.array([7, 3.2, 4.7, 1.4]) print(classify(tree, testVec))
import LoadFile import GetLCS import ListOperate import time start = time.clock() symbol_list_readfile = LoadFile.loadfile() symbol_list = LoadFile.loadfile() symbol_list = list(set(symbol_list)) ExtractKeyWord = [] result_index = [] symbol_list_old = [] print(symbol_list) matrix = [[999 for i in range(len(symbol_list))] for j in range(len(symbol_list))] while len(symbol_list) != 1:#and ListOperate.GetMaxElement(matrix) > 0.1 and not ListOperate.CmpList(symbol_list,symbol_list_old): for i in range(0, len(symbol_list)): for j in range(0, len(symbol_list)): if i == j: matrix[i][j] = 0 else: result = GetLCS.GetLCS2(symbol_list[i], symbol_list[j]) matrix[i][j] = len(result) / max(len(symbol_list[i]),len(symbol_list[j])) print(matrix) x, y = ListOperate.GetMaxElementIndex(matrix) print("symbol_list is " + str(symbol_list)) print("x=" + str(x) + " y=" + str(y) + " symbol_list_lenth=" + str(len(symbol_list))) result = GetLCS.GetLCSandIndex(symbol_list[x], symbol_list[y]) print(result)
np.diag((1-px).getA1()) * dataSet) / n + 0.001 weights = weights - Hessian.I * grad return weights def calcAccuracy(rawData, lrFunc, alpha=0.001, maxCycles=500): # 计算回归的准确率 dataSet, labelSet = getStandardedMatData(rawData) dataSize = np.shape(dataSet)[0] weights = lrFunc(rawData, alpha, maxCycles) px = sigmoid(dataSet * weights) # 统计在训练集上,训练得到的回归函数有多少 trainErrCnt = 0 for i, pxi in enumerate(px): if pxi < 0.5 and labelSet[i] == 1: trainErrCnt += 1 elif pxi >= 0.5 and labelSet[i] == 0: trainErrCnt += 1 trainAccuracy = 1 - trainErrCnt / dataSize return trainAccuracy if __name__ == "__main__": filePath = "F:/2020AI_SummerCamp/dataSet/" rawData = LoadFile.loadCSV(filePath + "diabetesN.csv") # rawData = pd.read_table(filePath + 'testSet.txt', header=None) print(calcAccuracy(rawData, newtonLogisticRegression))