def getSourceSimilarity(pStrWord):

	strText = ""
	lstTokens = []

	for intCount in range(1, CCorpusManager.getDocumentsCount() + 1):
		lstTokens += word_tokenize(CFileManager.readFromFile(str(intCount)))

	return lstTokens

	#objCI = nltk.text.ContextIndex([word.lower() for word in lstTokens])
	#objCI = nltk.text.ContextIndex(['tasty','fluffy','yummy','','','','',''])
	#return objCI.similar_words(pStrWord)
def getSourcePOS(pIntId):
	strText = CFileManager.readFromFile(str(pIntId))

	objSource = CSource("A", strText, "A")

	return objSource.getPOSTags()
def getFullSource():

	for intCount in range(1, CCorpusManager.getDocumentsCount() + 1):
		yield "</br>" + CFileManager.readFromFile(str(intCount))
def getSourceTokens(pIntId):
	strText = CFileManager.readFromFile(str(pIntId))

	objSource = CSource("A", strText, "A")

	return objSource.getToken()
def getSource(pIntId):
	objRow = CDALSource.getSource(pIntId)

	objRow[0] = CFileManager.readFromFile(str(objRow[0]))

	return objRow