def createWordDocCount(): allFilesContent = document.getAllFilesContent() fname = "../initialResult/wordNumber.txt" res = os.path.isfile(fname) if res: pass else: f = open(fname, 'w') allFileContentLists = [] for fileContent in allFilesContent: allFileContentLists.append(fileContent.split(" ")) strWrite = "" index = 0 for fileContentList in allFileContentLists: if index < len(allFileContentLists): countDic = dict(Counter(allFileContentLists[index])) for wordName in OrderedDict(sorted(countDic.items())): wordNum = countDic[wordName] if wordNum > 0 and wordName != "\n": strWrite += wordName + ":" + str(wordNum) + " " strWrite += "\r\n" index += 1 f.write(strWrite) f.close()
def createDocLength(): fname = "./initialResult/docLength.txt" allFilesContent = document.getAllFilesContent() res = os.path.isfile(fname) if res: pass else: f = open(fname, 'w') for doc in allFilesContent: docLength = str(len(doc.split(" "))) + "\r\n" f.write(docLength) f.close()
def createDictionary(): s = document.getAllFilesContent() fname = "../initialResult/dictionary.txt" res = os.path.isfile(fname) if res: pass else: f = open(fname, 'w') all_the_text = "".join(str(x) for x in s) wordSet = set() for word in all_the_text.split(): wordSet.add(word) f.write("\r\n".join(wordSet)) f.close()