Ejemplo n.º 1
0
        for doc in doclist:
            print 'test doc: '+doc.encode('gbk')+'\n'
            
            doc_catalog_pro = {}
            wordlist = doclist[doc]
            
            for catalog_try in catalog_p:
                likely = catalog_p[catalog_try]
                for word, value in features:
                    if word in wordlist:
                        likely += catalog_word_p[catalog_try][word]
                doc_catalog_pro[catalog_try] = likely
            
            nb_prediction[doc] = doc_catalog_pro
    
    catalog_predicted = get_predicted_catalog(nb_prediction)
    return catalog_predicted


if __name__ == '__main__':
    ans = predict_nb(test_doc_to_word, FeatureFiltering.global_features)
    print 'finished predicted by naive bayes'
    true_positive, predicted_num, accuracy = KNNPredict.get_accuracy(ans)
    file = open('E:\\TextClassificationData\\nb_content.txt', 'w')
    file.write('true_positive, predicted_num, accuracy, %d %d %6lf \n' \
    %(true_positive, predicted_num, accuracy))
    for doc in ans:
        file.write(doc.encode('gbk')+'\n')
        file.write('    '+ans[doc].encode('gbk')+'\n')
    file.close()
    
Ejemplo n.º 2
0
        for doc in doclist:
            print 'test doc: ' + doc.encode('gbk') + '\n'

            doc_catalog_pro = {}
            wordlist = doclist[doc]

            for catalog_try in catalog_p:
                likely = catalog_p[catalog_try]
                for word, value in features:
                    if word in wordlist:
                        likely += catalog_word_p[catalog_try][word]
                doc_catalog_pro[catalog_try] = likely

            nb_prediction[doc] = doc_catalog_pro

    catalog_predicted = get_predicted_catalog(nb_prediction)
    return catalog_predicted


if __name__ == '__main__':
    ans = predict_nb(test_doc_to_word, FeatureFiltering.global_features)
    print 'finished predicted by naive bayes'
    true_positive, predicted_num, accuracy = KNNPredict.get_accuracy(ans)
    file = open('E:\\TextClassificationData\\nb_content.txt', 'w')
    file.write('true_positive, predicted_num, accuracy, %d %d %6lf \n' \
    %(true_positive, predicted_num, accuracy))
    for doc in ans:
        file.write(doc.encode('gbk') + '\n')
        file.write('    ' + ans[doc].encode('gbk') + '\n')
    file.close()
Ejemplo n.º 3
0
# coding: GBK
import KNNPredict
import Training
import configuration

if __name__ == '__main__':
    testFileVectorFile = open('E:\\TextClassificationData\\test_content.txt',
                              'w')
    testFileVector = None

    print 'began to get testFileVector'
    testFileVector = KNNPredict.getDocVector(KNNPredict.test_files_to_words,
                                             Training.featureVector)
    print 'finished getting testFileVector'

    print 'began to predicting by knn method'
    result, prediction = KNNPredict.KNN(KNNPredict.training_doc_vector,
                                        testFileVector,
                                        configuration.top_k_number)
    true_positive, predicted, accuracy = KNNPredict.get_accuracy(prediction)
    print 'finished predicting'

    testFileVectorFile.write('%d %d accuracy:%.6lf' %
                             (true_positive, predicted, accuracy) + '\n')
    for doc in result:
        testFileVectorFile.write(
            str(doc).encode('gbk') + ' ' + str(prediction[doc]).encode('gbk') +
            '\n')
        vector = result[doc]
        for neighbor in vector:
            testFileVectorFile.write('    ' + neighbor[0].encode('gbk') + '\n')
Ejemplo n.º 4
0
# coding: GBK
import KNNPredict
import Training
import configuration


if __name__ == '__main__':
    testFileVectorFile = open('E:\\TextClassificationData\\test_content.txt', 'w')
    testFileVector = None
    
    print 'began to get testFileVector'
    testFileVector = KNNPredict.getDocVector(KNNPredict.test_files_to_words, Training.featureVector)
    print 'finished getting testFileVector'
    
    print 'began to predicting by knn method'
    result, prediction = KNNPredict.KNN(KNNPredict.training_doc_vector, testFileVector, configuration.top_k_number)
    true_positive, predicted, accuracy = KNNPredict.get_accuracy(prediction)
    print 'finished predicting'
    
    testFileVectorFile.write('%d %d accuracy:%.6lf'%(true_positive, predicted, accuracy)+'\n')
    for doc in result:
        testFileVectorFile.write(str(doc).encode('gbk')+' '+str(prediction[doc]).encode('gbk')+'\n')
        vector = result[doc]
        for neighbor in vector:
            testFileVectorFile.write('    '+neighbor[0].encode('gbk')+'\n')
            testFileVectorFile.write('    '+str(neighbor[1])+'\n')
    testFileVectorFile.close()