예제 #1
def __test_articles(filenames, articles, labels):
	print 'training...'
	testPredictedLabel = supervisedTest.static_classifier_test(articles)
	if len(filenames) != len(articles) or len(filenames) != len(articles):
		print "number of file names and articles and labels don't match"
	for i in range(0,len(filenames)):
		print '-------------------------------'
		print 'File name:', filenames[i]
		print 'True category: ', labels[i]
		print 'Predict category: ', testPredictedLabel[i]
from cluster_name import cluster_name
from collections import defaultdict
from sklearn.naive_bayes import MultinomialNB
from supervisedTest import static_classifier_test
from bson.objectid import ObjectId
from sklearn.svm import SVC

new_articles = app.getArticlesByTimeStamp(time.time() - 4 * 3600) # last four hours
unlabeled_articles = [];
unlabeled_texts = [];

cleanCategoriesDict = defaultdict(list) #Maps a clean category to a list of articleIDs

for article in new_articles:
    cleanCategory = cluster_name(article.category)
    if article.text is not None:
        if cleanCategory != '': # a category was already detected
            cleanCategoriesDict[cleanCategory].append(article.id) #Add the article ID

        else: # no category was matched, we will run it through the clustering afterwards

predicted_labels = static_classifier_test(unlabeled_texts)

for new_cat, article in zip(predicted_labels, unlabeled_articles):
    if getattr(article, 'id', None) != None:
        print article