import time start_time = time.time() thisTime = start_time files = [] dataFolder = os.path.dirname(os.path.abspath(__file__)) + "/data" resultFolder = os.path.dirname(os.path.abspath(__file__)) + "/result" count = 0 commonWordList = {} for i in os.listdir(dataFolder): if i.endswith('.txt'): thisFile = os.path.join(dataFolder, i) reflection = open(thisFile, "r", encoding="utf8") processData = PreProcess(reflection.read()) wordList = processData.getWordList(reflection.read(), True) wordFrequency = processData.wordFrequency(wordList) for wordTuple in wordFrequency: commonWordList[wordTuple[0]] = commonWordList[ wordTuple[0]] + wordTuple[1] if wordTuple[ 0] in commonWordList else wordTuple[1] print("--- %s seconds ---" % (time.time() - thisTime)) thisTime = time.time() reflection.close() result = open(resultFolder + "/wordfrequency.csv", "a+") result.write("Word,WordCount\n") iter = 0
dataFolder = os.path.dirname(os.path.abspath(__file__)) + "/data" count = 0 for i in os.listdir(dataFolder): if i.endswith('.txt'): # if count != 3: # count = count + 1 # continue thisFile = os.path.join(dataFolder, i) reflection = open(thisFile, "r", encoding='utf8') print("\n\n") print(os.path.basename(reflection.name)) processData = PreProcess(reflection.read()) wordList = processData.getWordList(True, True) # print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime)) withoutContractions = processData.removeContractions(wordList) # print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime)) # print(withoutContractions) lemmaWordList = processData.lemmatizeWordList(withoutContractions) print("WordList Time: --- %s seconds ---\n\n\n\n\n" % (time.time() - thisTime)) spellErrors = findErrors(lemmaWordList, thisTime) print("File: " + os.path.basename(reflection.name)) print("Words calculated: " + str(len(wordList))) print("Error Word Count: " + str(spellErrors['errorCount'])) print("ErrorWord\t\t\tCorrection\t\t\tSuggestions") print("---------\t\t\t----------\t\t\t-----------") for eachError in spellErrors["errorList"]: