Beispiel #1
0
def train():
    fileName = "trainData.model"
    test = ReadFile.QAData("training.data")
    test.readFile()

    ProD.wordSeg(test)
    ProD.delHighFre_useless(test)
    ProD.delHighFre_psg(test)
    trainModel(test, fileName)
Beispiel #2
0
def train():
    fileName = "trainData.model"
    test = ReadFile.QAData("training.data")
    test.readFile()

    ProD.wordSeg(test)
    ProD.delHighFre_useless(test)
    ProD.delHighFre_psg(test)
    trainModel(test, fileName)


if __name__ == "__main__":
    # '''
    fileName = "trainData.model"
    test = ReadFile.QAData("training.data")
    test.readFile()

    ProD.wordSeg(test)
    ProD.delHighFre_useless(test)
    ProD.delHighFre_psg(test)
    test.calFre()
    trainModel(test, fileName)
    # '''

    # '''
    # print(model.most_similar("性格"))
    # writeScore1(test, "score.data", "trainData.model")
    # writeScore2(test,"score.data")
    # '''
                corrAns = aIndex
                break
        highscore = float(scoreDic[qIndex][corrAns])
        flag = 1
        for i in range(1, len(scoreDic[qIndex])):
            if highscore < float(scoreDic[qIndex][i]):
                flag = 0
                showNum += 1
                print("\n\n=========================================")
                print(qAData.questionList[qIndex])
                print(qAData.qAnswersDic[qIndex][corrAns], "\t",
                      scoreDic[qIndex][corrAns])
                print(qAData.qAnswersDic[qIndex][i], "\t", scoreDic[qIndex][i])
                break
        '''
        if flag==0:
            print("\n\n=========================================")
            print(qAData.questionList[qIndex])
            for aIndex in range(len(scoreDic[qIndex])):
                print(qAData.qAnswersDic[qIndex][aIndex], "\t", scoreDic[qIndex][aIndex])
        '''
        if showNum >= Num:
            break


if __name__ == "__main__":
    test = ReadFile.QAData("develop.data")
    test.readFile()
    scoreDic = readScores(test, "score.data")
    showErr(test, scoreDic, 100)
Beispiel #4
0
    python QA_main.py fileName scoreFile
參數說明:
    fileName:問答句測試集文件名
    scoreFile:得分輸出文件名
'''

import ReadFile
import ProcessData as ProD
import WordVec
import QueTypeWay
import sys



if __name__ == "__main__":
    assert sys.argv.__len__() == 3
    fileName = sys.argv[1]
    scoreFile = sys.argv[2]
    WordVec.train()
    test = ReadFile.QAData(fileName)
    test.readFile()
    ProD.wordSeg(test)
    ProD.delHighFre_useless(test)
    ProD.delHighFre_psg(test)
    QueTypeWay.digitToSeg(test)
    test.calFre()
    WordVec.writeScore1(test, scoreFile, "trainData.model")
    #WordVec.writeScore2(test, scoreFile)
    #WordVec.writeScore3(test, scoreFile, "trainData.model")
    print("OK, DONE.")
def calQueSegFre(qAData, fileName):
    freDic = {}
    sum = 0
    for qIndex in range(qAData.questionList.__len__()):
        for tmp in qAData.questionList[qIndex]:
            sum += 1
            if tmp not in freDic.keys():
                freDic[tmp] = 1
            else:
                freDic[tmp] += 1
    delFile(fileName)
    for key, item in sorted(freDic.items(),
                            key=lambda Item: Item[1],
                            reverse=True):
        if (float(item) / freDic.__len__()) > (freThres / 10):
            writeFile(fileName, [key, item, float(item) / sum])


if __name__ == "__main__":
    dataFile = "training.data"
    test = ReadFile.QAData(dataFile)
    test.readFile()
    calWordFre(test, "calWordFre.data")
    ProD.wordSeg(test)
    calSegFre(test, "calSegFre.data")
    ProD.delHighFre_useless(test)
    calQueSegFre(test, "queSegFre.data")
    statQueAns(test, "statQueAns.data")
    print(test.questionList.__len__())