예제 #1
0
def learnNaiveBayesText():
    print '... Learning Text ... \n'

    vocabulary = helpers.getVocabulary()
    vocabulary_count = len(vocabulary);
    examples = helpers.getExamplesCount()

    counter = 0

    # Start learning
    for v in helpers.getV():
        docs = helpers.getDocsByClass(v)
        docs_count = len(docs)
        docs_terms = helpers.getDocsTermsByClass(docs)
        # probability of class v
        p_v = (float)(docs_count) / (float)(examples)
        # total of distinct word in docs
        n = len(docs_terms)

        # foreach word wk in Vocabulary
        for w in vocabulary:
            nk = docs_terms.count(w)
            p_w_v = (float)(nk + 1) / (float)(n + vocabulary_count)
            print '#' + str(counter) + ':' + str(p_w_v)
            counter += 1
            # save term
            helpers.saveTerms(w,v,p_w_v)
    # End learning

    print '!!!! Finished !!!!\n'
    return
예제 #2
0
def classifyNaiveBayesText():
    print '... Classifying document ... \n'

    # start classification
    vocabulary = helpers.getVocabulary()
    words = helpers.getTestFileToArray()
    positions = set(vocabulary) & set(words)

    results = []

    for v in helpers.getV():
        vnb = 1
        for i in positions:
            vnb *= helpers.getTermScore(i,v)
        results.append(vnb)
        print str(v) + ':' + str(vnb)

    print 'The result is : ' + str(max(results))
    # end classification

    print '!!!! Finished !!!!\n'
    print '===== Result =====\n'
    print '==================\n'
    return