Ejemplo n.º 1
0
    def load_file(self):
        try:
            vector_list = ld.LoadFile().read_file()
        except Exception:
            self.error_lab.config(text="Файл не загружен")

        rw.ResultWindow(vector_list)
Ejemplo n.º 2
0
def getStandardedMatData(rawData):
    # 将数据标准化后,再分割为数据集、标签集,然后将格式转换为矩阵
    dataSet, labelSet = LoadFile.splitData(rawData)
    dataSet = np.mat(dataSet)
    labelSet = np.mat(labelSet).T
    dataSet = standardize(dataSet)
    return dataSet, labelSet
Ejemplo n.º 3
0
def kFoldCrossValidation(rawData, processFunc, k):
    rawData = pd.DataFrame(rawData)
    n = np.shape(rawData)[0]
    step = n // k
    bestModel = 0
    leastErr = 0
    for i in range(k):
        if i == (k-1):
            testData = rawData.iloc[step*i: n, :]
            trainData = rawData.iloc[: step*i, :]
        else:
            testData = rawData.iloc[step*i: step*(i+1), :]
            trainData = rawData.iloc[: step*i, :]
            trainData = trainData.append(rawData.iloc[step*(i+1):])
        testData, testLabel = LoadFile.splitData(testData)
        trainData, trainLabel = LoadFile.splitData(trainData)
        model = getModel(trainData, trainLabel)
        err = testModel(model, testData, testLabel)
        if err < leastErr:
            leastErr = err
            bestModel = model
    return model
Ejemplo n.º 4
0
#HAItech,2018.02.14
from useful import *
from random import *
from LoadFile import *
from PrintFile import *

courses = LoadFile("/root/a/useful1.txt")

space_time = SpaceTime()

length = courses.__len__()
# length=3
mincredit = 9
bixiucredit = 3
sequence = range(1, length)
aimcredit = 16.5
weight = 1
POP = GeneratePopulation(courses, sequence, length)
lower = aimcredit * weight + mincredit + bixiucredit
record = [[] for score in range(int(aimcredit * (weight + 1) - lower) * 10)]

Maxscore = [-1] * 5000
Maxscore[-1] = 0
while Maxscore[-1] != Maxscore[-4999]:
    POP, record = Fitness(POP, courses, aimcredit, weight, record, lower)

    Maxscore.append(POP.maxscore)
    POP = ReproduceOffspring(POP, sequence, length)
    POP.generation = POP.generation + 1

c = PrintFile("/root/a/result.txt", record, courses)
Ejemplo n.º 5
0
import LoadFile

if __name__ == "__main__":
    try:
        shpfilePath = r"E:\data\moto_trackdata_4city_shp\755\000159_20171127_20171203.shp"
        shapeFac = LoadFile.LoadTraceDataFromShapefileFactory(
            args=(shpfilePath))
        #trace data order by person,time,motiontype
        OnetraceDF = shapeFac.getTraceData()
        #trace segment  stop cluster detection
    except Exception, e:
        print e.message
Ejemplo n.º 6
0
		def testOne(self):
			self.assertEqual(LoadFile.get_responses(),\
							{'436': '436 <FAIL>##ROUTER=KIMENG##', \
							'802': '802 <APP_ID=d26d9b499041e735d00c84676b58959a|SEC_CODES=|ROUND=2|ROUND-OPTION=3>##ROUTER=KIMENG,HTTPS=1,CLIENT_TYPE=ANDROID,VERSION=1.0.1,PLATFORM_MODE=development,TIME_OUT=25,MESSAGE_ID=428642b311979cfd9e40ebe8d19d8b07,PROTO_REV=2,ECHO=8f55f93d5f6b5f6f79a6b2abc6893565_0##'})
Ejemplo n.º 7
0
		def setUp(self):
			LoadFile.load()
Ejemplo n.º 8
0
    return math.sqrt(z)


def minmaxNormalize(dataSet):
    # 数据映射到[0, 1]
    dataSet = pd.DataFrame(dataSet)
    minDf = dataSet.min()
    maxDf = dataSet.max()
    normalizedSet = (dataSet - minDf) / (maxDf - minDf)
    return normalizedSet


def zscoreStanderize(dataSet):
    # 用z-score方法进行数据标准化
    dataSet = pd.DataFrame(dataSet)
    meanDf = dataSet.mean()
    stdDf = dataSet.std()
    standerizedSet = (dataSet - meanDf) / stdDf
    return standerizedSet


if __name__ == "__main__":
    filePath = "F:/2020AI_SummerCamp/dataSet/"
    # rawData = LoadFile.loadCSV(filePath + "Pima.csv")
    rawData = LoadFile.loadCSV(filePath + "diabetesN.csv")
    dataSet, labelSet = LoadFile.splitData(rawData)
    # dataSet = solveMissingData(dataSet)
    # dataSet = minmaxNormalize(dataSet)
    dataSet = zscoreStanderize(dataSet)
    print(dataSet)
Ejemplo n.º 9
0
    sortResult = sorted(labelsCnt.items(),
                        key=operator.itemgetter(1),
                        reverse=True)
    return sortResult[0][0]


# 根据列表生成式生成切分后的数据集
def splitToFeat(dataSet, labelSet, feat, val):
    dataSet = np.array(dataSet)
    labelSet = np.array(labelSet)
    leftData = dataSet[np.nonzero(dataSet[:, feat] < val)[0]]
    leftLabel = labelSet[np.nonzero(dataSet[:, feat] < val)[0]]
    rightData = dataSet[np.nonzero(dataSet[:, feat] >= val)[0]]
    rightLabel = labelSet[np.nonzero(dataSet[:, feat] >= val)[0]]
    return leftData, leftLabel, rightData, rightLabel


if __name__ == '__main__':
    # 读入文件
    # 使用的是网上找到的一个数据集
    filePath = "F:/2020AI_SummerCamp/dataSet/"
    rawData = LoadFile.loadCSV(filePath + "cartDS.csv")

    # 预处理
    dataSet, labelSet = LoadFile.splitData(rawData)
    tree = buildTree(dataSet, labelSet)

    # 测试数据
    testVec = np.array([7, 3.2, 4.7, 1.4])
    print(classify(tree, testVec))
Ejemplo n.º 10
0
import LoadFile
import GetLCS
import ListOperate
import time
start = time.clock()

symbol_list_readfile = LoadFile.loadfile()

symbol_list = LoadFile.loadfile()
symbol_list = list(set(symbol_list))
ExtractKeyWord = []
result_index = []
symbol_list_old = []
print(symbol_list)

matrix = [[999 for i in range(len(symbol_list))] for j in range(len(symbol_list))]
while len(symbol_list) != 1:#and ListOperate.GetMaxElement(matrix) > 0.1 and not ListOperate.CmpList(symbol_list,symbol_list_old):
    for i in range(0, len(symbol_list)):
        for j in range(0, len(symbol_list)):
            if i == j:
                matrix[i][j] = 0
            else:
                result = GetLCS.GetLCS2(symbol_list[i], symbol_list[j])
                matrix[i][j] = len(result) / max(len(symbol_list[i]),len(symbol_list[j]))
    print(matrix)
    x, y = ListOperate.GetMaxElementIndex(matrix)
    print("symbol_list is " + str(symbol_list))
    print("x=" + str(x) + " y=" + str(y) + " symbol_list_lenth=" + str(len(symbol_list)))
    result = GetLCS.GetLCSandIndex(symbol_list[x], symbol_list[y])
    print(result)
Ejemplo n.º 11
0
                   np.diag((1-px).getA1()) * dataSet) / n + 0.001
        weights = weights - Hessian.I * grad
    return weights


def calcAccuracy(rawData, lrFunc, alpha=0.001, maxCycles=500):
    # 计算回归的准确率
    dataSet, labelSet = getStandardedMatData(rawData)
    dataSize = np.shape(dataSet)[0]

    weights = lrFunc(rawData, alpha, maxCycles)
    px = sigmoid(dataSet * weights)

    # 统计在训练集上,训练得到的回归函数有多少
    trainErrCnt = 0
    for i, pxi in enumerate(px):
        if pxi < 0.5 and labelSet[i] == 1:
            trainErrCnt += 1
        elif pxi >= 0.5 and labelSet[i] == 0:
            trainErrCnt += 1
    trainAccuracy = 1 - trainErrCnt / dataSize
    return trainAccuracy


if __name__ == "__main__":
    filePath = "F:/2020AI_SummerCamp/dataSet/"
    rawData = LoadFile.loadCSV(filePath + "diabetesN.csv")
    # rawData = pd.read_table(filePath + 'testSet.txt', header=None)

    print(calcAccuracy(rawData, newtonLogisticRegression))