def show_score(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.entry = meta.find(model.FeedEntry, id) feed = meta.find(model.Feed, c.entry.feed_id) c.feed = feed guesser = Guesser(feed, c.user, config) guess = guesser.guess(c.entry) log.debug("guess: %s" % guess) log.debug("c.entry.title: %s" % c.entry.title) c.score = str(guess) c.score = guesser.guess(c.entry) c.pool = guesser.trainer.poolData('spam') c.is_spam = guesser.is_spam(c.entry) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry))) return render('bayes/score.mako')
def mark_as_ham(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) entry = meta.find(model.FeedEntry, id) feed = meta.find(model.Feed, entry.feed_id) guesser = Guesser(feed, c.user, config) return self.__mark_as__(entry, 'ham', guesser)
def mixed_rss_with_report(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) settings = c.rss_user.get_bayes_feed_setting(feed_data.id) meta.Session.add(settings) delta = h.timedelta_from_string(settings.summarize_at) log.debug("delta %s" % delta) if not settings.report_offset: entries = feed_data.get_entries().order_by(model.FeedEntry.id).all() log.warn("no report_offset available, read %s entries" % len(entries)) else: entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all() from rssmonster.lib.reporter import Reporter reporter = Reporter(None, None, delta, 30) for entry in entries: reporter.add_item(entry, guesser.is_spam(entry)) for entry_box in reporter.entry_queue: #log.debug("entry_box: %s" % entry_box) if entry_box['type'] == 'ham': c.entry = entry_box['entry'] c.entry.is_spam=guesser.is_spam(c.entry) feed.add_item(title=c.entry.title, link=c.entry.link, description=render('bayes/rss_summary.mako'), unique_id=c.entry.uid, pubdate=c.entry.updated) elif entry_box['type'] == 'spam': add_spam_report(feed, entry_box['entries']) settings.report_offset = reporter.offset_id() log.debug("settings.report_offset: %s" % settings.report_offset) log.debug("holding back: %s" % len(reporter.spam_entries)) meta.Session.commit() return feed.writeString('utf-8')
def show_guesser(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = Guesser(c.feed, c.user, config) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() # c.pool_data_spam = map(lambda x: (x[0], x[1], x[0].encode('ascii', 'ignore')), c.pool_data_spam) c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.actions = [{'link':h.url_for(controller='feed', action='show_feed', id=id), 'text':'Feed Details'}] c.stopwords = meta.Session\ .query(model.Stopword)\ .filter_by(feed_id=id, user_id=c.user.id) return render('bayes/guesser.mako')
def mixed_rss(self, user_id, id): c.rss_user = meta.find(model.User, user_id) settings = c.rss_user.get_bayes_feed_setting(id) log.debug("settings: %s" % settings) log.debug("settings.summarize_at: %s" % settings.summarize_at) if settings.summarize_at: return self.mixed_rss_with_report(user_id, id) return self.__mixed_rss__(user_id, id)
def __mixed_rss__(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30) for entry in entries: c.entry = entry c.entry.is_spam=guesser.is_spam(entry) if c.entry.is_spam: titel = "[SPAM] %s" % entry.title else: titel = entry.title feed.add_item(title=titel, link=entry.link, description=render('bayes/rss_summary.mako'), unique_id=entry.uid, pubdate=entry.updated) #entry.summary #meta.Session.commit() response.content_type = 'application/atom+xml' return feed.writeString('utf-8')
def pipe(self, id): feed_data = meta.find(model.Feed, id) cnt_added = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description=feed_data.subtitle, language=feed_data.language, ) for entry in feed_data.get_entries(): feed.add_item(title=entry.title, link=entry.link, description=entry.summary, unique_id=entry.uid) response.content_type = 'application/atom+xml' return feed.writeString('utf-8')
def redo(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) query = meta.Session\ .query(model.Classification)\ .join(model.FeedEntry)\ .filter_by(feed_id=id) guesser = Guesser(c.feed, c.user, config) guesser.clear() cnt = 0 needles_cnt = 0 for entry in query: # h.flash("%s :%s" % (entry.pool, __relevant__(entry.entry))) # guesser.trainer.train(entry.pool, __relevant__(entry.entry)) if guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'spam'): needles_cnt += 1 elif not guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'ham'): needles_cnt += 1 self.__mark_as__(entry.entry, entry.pool, guesser, True) cnt+=1 guesser.save() log.debug("FOOOOOO") if needles_cnt > 0: h.flash("%d entries were needlessly trained (total: %s)" % (needles_cnt, cnt)) else: h.flash("learned %s entries" % cnt) return h.go_back()
def show_feed(self, id, page=1): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = bayes.Guesser(c.feed, c.user, config) query = c.feed.get_entries().order_by(model.FeedEntry.updated.desc()) #.limit(30) from webhelpers import paginate c.page = paginate.Page(query, page) for e in c.page.items: e.is_spam=guesser.is_spam(e) e.score = guesser.guess(e) c.last_spam_entries = [] c.last_ham_entries = [] i = 0 for e in query.limit(500): e.is_spam=guesser.is_spam(e) if len(c.last_spam_entries) < 10 and e.is_spam: c.last_spam_entries.append(e) if len(c.last_ham_entries) < 10 and not e.is_spam: c.last_ham_entries.append(e) if len(c.last_spam_entries) >= 10 and len(c.last_ham_entries) >= 10: log.debug("breaking loop after %s rows" % i) break i += 1 # from webhelpers import pagination # from webhelpers.pagination import links # http://bel-epa.com/pylonsdocs/thirdparty/webhelpers/paginate.html # total = len(c.entries) # c.paginator, c.entries_p = pagination.paginate(c.entries, per_page=10, item_count=total) # set_count = int(c.paginator.current) # total_pages = len(c.paginator) # c.pagelist = links.pagelist(c.paginator.current) c.rss_feeds = [ {'title':'Unmodified', 'link':h.url_for(controller='feed', action='pipe') }, {'title':'Mixed', 'link':h.url_for(controller='bayes', action='mixed_rss', user_id=c.user.id) }, {'title':'Mixed with Report', 'link':h.url_for(controller='bayes', action='mixed_rss_with_report', user_id=c.user.id) } ] # import operator # ret = self.entries # ret.sort(lambda x,y: -cmp(x.id, y.id)) # return ret[:10] return render('feed/show_feed.mako')
def show_record(self, id): c.feed = meta.find(model.Feed, id) return render('feed/record.mako')
def update(self, id): #~ feed = meta.Session.query(model.Feed).get(id) feed = meta.find(model.Feed, id) cnt_added = feed.fetch() h.flash("added %s entries" % cnt_added) return h.go_back()