def getDocsTermsByClass(items):
    a = []
    # items = readDocsByClass(v)
    for item in items:
        word_array = utils.extractWords(item[5] + ' ' + item[6])
        a.extend(utils.arrayToLowerAndUnique(word_array))
    return a
def getTestFileToArray():
    str = ''
    fp = utils.openFile('test.txt')
    for line in fp:
        str += line
    str = utils.toLower(str)
    return utils.extractWords(str)
def getVocabulary():
    a = []
    items = readTrainingDataToArray()
    for item in items:
        word_array = utils.extractWords(item[5] + ' ' + item[6])
        a.extend(utils.arrayToLowerAndUnique(word_array))
    return a
Exemple #4
0
def test():
    fp = utils.openFile('test.txt')
    str = utils.readFileToString(fp)
    n = utils.extractWords(str)
    array = ['Hello', 'hello', 'heLLO', 'Minh']
    # print utils.arrayToLowerAndUnique(array)
    # print n
    # print helpers.getExamples()[1]
    # print helpers.getDocsByClass(2)
    # helpers.saveTerms("hello world", 1, 4.0)
    # print len(helpers.getVocabulary())
    return