コード例 #1
0
import time

start_time = time.time()
thisTime = start_time
files = []
dataFolder = os.path.dirname(os.path.abspath(__file__)) + "/data"
resultFolder = os.path.dirname(os.path.abspath(__file__)) + "/result"
count = 0
commonWordList = {}
for i in os.listdir(dataFolder):
    if i.endswith('.txt'):
        thisFile = os.path.join(dataFolder, i)
        reflection = open(thisFile, "r", encoding="utf8")

        processData = PreProcess(reflection.read())
        wordList = processData.getWordList(reflection.read(), True)
        wordFrequency = processData.wordFrequency(wordList)

        for wordTuple in wordFrequency:
            commonWordList[wordTuple[0]] = commonWordList[
                wordTuple[0]] + wordTuple[1] if wordTuple[
                    0] in commonWordList else wordTuple[1]

        print("--- %s seconds ---" % (time.time() - thisTime))
        thisTime = time.time()

        reflection.close()

result = open(resultFolder + "/wordfrequency.csv", "a+")
result.write("Word,WordCount\n")
iter = 0
コード例 #2
0
dataFolder = os.path.dirname(os.path.abspath(__file__)) + "/data"
count = 0
for i in os.listdir(dataFolder):
    if i.endswith('.txt'):
        # if count != 3:
        #     count = count + 1
        #     continue
        thisFile = os.path.join(dataFolder, i)
        reflection = open(thisFile, "r", encoding='utf8')

        print("\n\n")

        print(os.path.basename(reflection.name))
        processData = PreProcess(reflection.read())

        wordList = processData.getWordList(True, True)
        # print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime))
        withoutContractions = processData.removeContractions(wordList)
        # print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime))
        # print(withoutContractions)
        lemmaWordList = processData.lemmatizeWordList(withoutContractions)
        print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime))

        spellErrors = findErrors(lemmaWordList, thisTime)

        print("File: " + os.path.basename(reflection.name))
        print("Words calculated: " + str(len(wordList)))
        print("Error Word Count: " + str(spellErrors['errorCount']))
        print("ErrorWord\t\t\tCorrection\t\t\tSuggestions")
        print("---------\t\t\t----------\t\t\t-----------")
        for eachError in spellErrors["errorList"]: