def show_score(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.entry = meta.find(model.FeedEntry, id) feed = meta.find(model.Feed, c.entry.feed_id) c.feed = feed guesser = Guesser(feed, c.user, config) guess = guesser.guess(c.entry) log.debug("guess: %s" % guess) log.debug("c.entry.title: %s" % c.entry.title) c.score = str(guess) c.score = guesser.guess(c.entry) c.pool = guesser.trainer.poolData('spam') c.is_spam = guesser.is_spam(c.entry) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry))) return render('bayes/score.mako')
def mixed_rss_with_report(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) settings = c.rss_user.get_bayes_feed_setting(feed_data.id) meta.Session.add(settings) delta = h.timedelta_from_string(settings.summarize_at) log.debug("delta %s" % delta) if not settings.report_offset: entries = feed_data.get_entries().order_by(model.FeedEntry.id).all() log.warn("no report_offset available, read %s entries" % len(entries)) else: entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all() from rssmonster.lib.reporter import Reporter reporter = Reporter(None, None, delta, 30) for entry in entries: reporter.add_item(entry, guesser.is_spam(entry)) for entry_box in reporter.entry_queue: #log.debug("entry_box: %s" % entry_box) if entry_box['type'] == 'ham': c.entry = entry_box['entry'] c.entry.is_spam=guesser.is_spam(c.entry) feed.add_item(title=c.entry.title, link=c.entry.link, description=render('bayes/rss_summary.mako'), unique_id=c.entry.uid, pubdate=c.entry.updated) elif entry_box['type'] == 'spam': add_spam_report(feed, entry_box['entries']) settings.report_offset = reporter.offset_id() log.debug("settings.report_offset: %s" % settings.report_offset) log.debug("holding back: %s" % len(reporter.spam_entries)) meta.Session.commit() return feed.writeString('utf-8')
def __mixed_rss__(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30) for entry in entries: c.entry = entry c.entry.is_spam=guesser.is_spam(entry) if c.entry.is_spam: titel = "[SPAM] %s" % entry.title else: titel = entry.title feed.add_item(title=titel, link=entry.link, description=render('bayes/rss_summary.mako'), unique_id=entry.uid, pubdate=entry.updated) #entry.summary #meta.Session.commit() response.content_type = 'application/atom+xml' return feed.writeString('utf-8')
def redo(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) query = meta.Session\ .query(model.Classification)\ .join(model.FeedEntry)\ .filter_by(feed_id=id) guesser = Guesser(c.feed, c.user, config) guesser.clear() cnt = 0 needles_cnt = 0 for entry in query: # h.flash("%s :%s" % (entry.pool, __relevant__(entry.entry))) # guesser.trainer.train(entry.pool, __relevant__(entry.entry)) if guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'spam'): needles_cnt += 1 elif not guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'ham'): needles_cnt += 1 self.__mark_as__(entry.entry, entry.pool, guesser, True) cnt+=1 guesser.save() log.debug("FOOOOOO") if needles_cnt > 0: h.flash("%d entries were needlessly trained (total: %s)" % (needles_cnt, cnt)) else: h.flash("learned %s entries" % cnt) return h.go_back()