Beispiel #1
0
def get_classifier(feed_key):
    logging.info("Getting classifier for feed " + feed_key)
    classifier_key = "classifier_" + feed_key
    classifier = memcache.get(classifier_key)
    
    if classifier is None:
        classifier = Classifier(classifier_key)
        logging.info("Reloading classifier " + str(classifier.key))
        counts = SpamCounts.get_by_key_name(SPAM_COUNT_KEY) 
        if counts:
            classifier.nham = counts.nham
            classifier.nspam = counts.nspam
        
        wordInfos = db.GqlQuery("SELECT * FROM WordInfoEntity WHERE ANCESTOR IS :1", feed_key)
        count = 0
        max_sc = max_hc = 0
        for info in wordInfos:
            w = WordInfo()
            max_sc = max(max_sc, info.spamcount)
            max_hc = max(max_hc, info.hamcount)
            w.spamcount = info.spamcount
            w.hamcount = info.hamcount
            classifier.wordinfo[info.word] = w
            count += 1
        if max_sc > classifier.nspam:
            classifier.nspam = max_sc
        if max_hc > classifier.nham:
            classifier.nham = max_hc
        logging.info("Max spamcount = %s, with nspam = %s", max_sc, classifier.nspam)
        logging.info("Max hamcount = %s with nham = %s", max_hc, classifier.nham)
        logging.info("Loaded %s entities", count)
        memcache.add(classifier.key, classifier)
    return classifier