def getTypes(): strOutput = "" for strWord in CCorpusManager.getTokenList(): strOutput += strWord strOutput += "<br/>" return strOutput
def getSourceSimilarity(pStrWord): strText = "" lstTokens = [] for intCount in range(1, CCorpusManager.getDocumentsCount() + 1): lstTokens += word_tokenize(CFileManager.readFromFile(str(intCount))) return lstTokens #objCI = nltk.text.ContextIndex([word.lower() for word in lstTokens]) #objCI = nltk.text.ContextIndex(['tasty','fluffy','yummy','','','','','']) #return objCI.similar_words(pStrWord)
def getDocumentCount(): return str(CCorpusManager.getDocumentsCount())
def getFullSource(): for intCount in range(1, CCorpusManager.getDocumentsCount() + 1): yield "</br>" + CFileManager.readFromFile(str(intCount))