def getSourceSimilarity(pStrWord): strText = "" lstTokens = [] for intCount in range(1, CCorpusManager.getDocumentsCount() + 1): lstTokens += word_tokenize(CFileManager.readFromFile(str(intCount))) return lstTokens #objCI = nltk.text.ContextIndex([word.lower() for word in lstTokens]) #objCI = nltk.text.ContextIndex(['tasty','fluffy','yummy','','','','','']) #return objCI.similar_words(pStrWord)
def getSourcePOS(pIntId): strText = CFileManager.readFromFile(str(pIntId)) objSource = CSource("A", strText, "A") return objSource.getPOSTags()
def getFullSource(): for intCount in range(1, CCorpusManager.getDocumentsCount() + 1): yield "</br>" + CFileManager.readFromFile(str(intCount))
def getSourceTokens(pIntId): strText = CFileManager.readFromFile(str(pIntId)) objSource = CSource("A", strText, "A") return objSource.getToken()
def getSource(pIntId): objRow = CDALSource.getSource(pIntId) objRow[0] = CFileManager.readFromFile(str(objRow[0])) return objRow