def getDocsTermsByClass(items): a = [] # items = readDocsByClass(v) for item in items: word_array = utils.extractWords(item[5] + ' ' + item[6]) a.extend(utils.arrayToLowerAndUnique(word_array)) return a
def getTestFileToArray(): str = '' fp = utils.openFile('test.txt') for line in fp: str += line str = utils.toLower(str) return utils.extractWords(str)
def getVocabulary(): a = [] items = readTrainingDataToArray() for item in items: word_array = utils.extractWords(item[5] + ' ' + item[6]) a.extend(utils.arrayToLowerAndUnique(word_array)) return a
def test(): fp = utils.openFile('test.txt') str = utils.readFileToString(fp) n = utils.extractWords(str) array = ['Hello', 'hello', 'heLLO', 'Minh'] # print utils.arrayToLowerAndUnique(array) # print n # print helpers.getExamples()[1] # print helpers.getDocsByClass(2) # helpers.saveTerms("hello world", 1, 4.0) # print len(helpers.getVocabulary()) return