for doc in doclist: print 'test doc: '+doc.encode('gbk')+'\n' doc_catalog_pro = {} wordlist = doclist[doc] for catalog_try in catalog_p: likely = catalog_p[catalog_try] for word, value in features: if word in wordlist: likely += catalog_word_p[catalog_try][word] doc_catalog_pro[catalog_try] = likely nb_prediction[doc] = doc_catalog_pro catalog_predicted = get_predicted_catalog(nb_prediction) return catalog_predicted if __name__ == '__main__': ans = predict_nb(test_doc_to_word, FeatureFiltering.global_features) print 'finished predicted by naive bayes' true_positive, predicted_num, accuracy = KNNPredict.get_accuracy(ans) file = open('E:\\TextClassificationData\\nb_content.txt', 'w') file.write('true_positive, predicted_num, accuracy, %d %d %6lf \n' \ %(true_positive, predicted_num, accuracy)) for doc in ans: file.write(doc.encode('gbk')+'\n') file.write(' '+ans[doc].encode('gbk')+'\n') file.close()
for doc in doclist: print 'test doc: ' + doc.encode('gbk') + '\n' doc_catalog_pro = {} wordlist = doclist[doc] for catalog_try in catalog_p: likely = catalog_p[catalog_try] for word, value in features: if word in wordlist: likely += catalog_word_p[catalog_try][word] doc_catalog_pro[catalog_try] = likely nb_prediction[doc] = doc_catalog_pro catalog_predicted = get_predicted_catalog(nb_prediction) return catalog_predicted if __name__ == '__main__': ans = predict_nb(test_doc_to_word, FeatureFiltering.global_features) print 'finished predicted by naive bayes' true_positive, predicted_num, accuracy = KNNPredict.get_accuracy(ans) file = open('E:\\TextClassificationData\\nb_content.txt', 'w') file.write('true_positive, predicted_num, accuracy, %d %d %6lf \n' \ %(true_positive, predicted_num, accuracy)) for doc in ans: file.write(doc.encode('gbk') + '\n') file.write(' ' + ans[doc].encode('gbk') + '\n') file.close()
# coding: GBK import KNNPredict import Training import configuration if __name__ == '__main__': testFileVectorFile = open('E:\\TextClassificationData\\test_content.txt', 'w') testFileVector = None print 'began to get testFileVector' testFileVector = KNNPredict.getDocVector(KNNPredict.test_files_to_words, Training.featureVector) print 'finished getting testFileVector' print 'began to predicting by knn method' result, prediction = KNNPredict.KNN(KNNPredict.training_doc_vector, testFileVector, configuration.top_k_number) true_positive, predicted, accuracy = KNNPredict.get_accuracy(prediction) print 'finished predicting' testFileVectorFile.write('%d %d accuracy:%.6lf' % (true_positive, predicted, accuracy) + '\n') for doc in result: testFileVectorFile.write( str(doc).encode('gbk') + ' ' + str(prediction[doc]).encode('gbk') + '\n') vector = result[doc] for neighbor in vector: testFileVectorFile.write(' ' + neighbor[0].encode('gbk') + '\n')
# coding: GBK import KNNPredict import Training import configuration if __name__ == '__main__': testFileVectorFile = open('E:\\TextClassificationData\\test_content.txt', 'w') testFileVector = None print 'began to get testFileVector' testFileVector = KNNPredict.getDocVector(KNNPredict.test_files_to_words, Training.featureVector) print 'finished getting testFileVector' print 'began to predicting by knn method' result, prediction = KNNPredict.KNN(KNNPredict.training_doc_vector, testFileVector, configuration.top_k_number) true_positive, predicted, accuracy = KNNPredict.get_accuracy(prediction) print 'finished predicting' testFileVectorFile.write('%d %d accuracy:%.6lf'%(true_positive, predicted, accuracy)+'\n') for doc in result: testFileVectorFile.write(str(doc).encode('gbk')+' '+str(prediction[doc]).encode('gbk')+'\n') vector = result[doc] for neighbor in vector: testFileVectorFile.write(' '+neighbor[0].encode('gbk')+'\n') testFileVectorFile.write(' '+str(neighbor[1])+'\n') testFileVectorFile.close()