def get_classifier(feed_key): logging.info("Getting classifier for feed " + feed_key) classifier_key = "classifier_" + feed_key classifier = memcache.get(classifier_key) if classifier is None: classifier = Classifier(classifier_key) logging.info("Reloading classifier " + str(classifier.key)) counts = SpamCounts.get_by_key_name(SPAM_COUNT_KEY) if counts: classifier.nham = counts.nham classifier.nspam = counts.nspam wordInfos = db.GqlQuery("SELECT * FROM WordInfoEntity WHERE ANCESTOR IS :1", feed_key) count = 0 max_sc = max_hc = 0 for info in wordInfos: w = WordInfo() max_sc = max(max_sc, info.spamcount) max_hc = max(max_hc, info.hamcount) w.spamcount = info.spamcount w.hamcount = info.hamcount classifier.wordinfo[info.word] = w count += 1 if max_sc > classifier.nspam: classifier.nspam = max_sc if max_hc > classifier.nham: classifier.nham = max_hc logging.info("Max spamcount = %s, with nspam = %s", max_sc, classifier.nspam) logging.info("Max hamcount = %s with nham = %s", max_hc, classifier.nham) logging.info("Loaded %s entities", count) memcache.add(classifier.key, classifier) return classifier