def show_guesser(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = Guesser(c.feed, c.user, config) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() # c.pool_data_spam = map(lambda x: (x[0], x[1], x[0].encode('ascii', 'ignore')), c.pool_data_spam) c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.actions = [{'link':h.url_for(controller='feed', action='show_feed', id=id), 'text':'Feed Details'}] c.stopwords = meta.Session\ .query(model.Stopword)\ .filter_by(feed_id=id, user_id=c.user.id) return render('bayes/guesser.mako')
def show_score(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.entry = meta.find(model.FeedEntry, id) feed = meta.find(model.Feed, c.entry.feed_id) c.feed = feed guesser = Guesser(feed, c.user, config) guess = guesser.guess(c.entry) log.debug("guess: %s" % guess) log.debug("c.entry.title: %s" % c.entry.title) c.score = str(guess) c.score = guesser.guess(c.entry) c.pool = guesser.trainer.poolData('spam') c.is_spam = guesser.is_spam(c.entry) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry))) return render('bayes/score.mako')
def mixed_rss_with_report(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) settings = c.rss_user.get_bayes_feed_setting(feed_data.id) meta.Session.add(settings) delta = h.timedelta_from_string(settings.summarize_at) log.debug("delta %s" % delta) if not settings.report_offset: entries = feed_data.get_entries().order_by(model.FeedEntry.id).all() log.warn("no report_offset available, read %s entries" % len(entries)) else: entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all() from rssmonster.lib.reporter import Reporter reporter = Reporter(None, None, delta, 30) for entry in entries: reporter.add_item(entry, guesser.is_spam(entry)) for entry_box in reporter.entry_queue: #log.debug("entry_box: %s" % entry_box) if entry_box['type'] == 'ham': c.entry = entry_box['entry'] c.entry.is_spam=guesser.is_spam(c.entry) feed.add_item(title=c.entry.title, link=c.entry.link, description=render('bayes/rss_summary.mako'), unique_id=c.entry.uid, pubdate=c.entry.updated) elif entry_box['type'] == 'spam': add_spam_report(feed, entry_box['entries']) settings.report_offset = reporter.offset_id() log.debug("settings.report_offset: %s" % settings.report_offset) log.debug("holding back: %s" % len(reporter.spam_entries)) meta.Session.commit() return feed.writeString('utf-8')
def add(self): if not request.params.get('url'): return render('feed/add.mako') feed = model.Feed() feed.url = request.params.get('url') meta.Session.add(feed) meta.Session.commit() #return "url = %s" % request.params.get('url') return redirect(url(controller='feed', action='show_list'))
def __mixed_rss__(self, user_id, id): c.rss_user = meta.find(model.User, user_id) log.debug("c.rss_user: %s" % c.rss_user) feed_data = meta.find(model.Feed, id) log.debug("feed_data.id %s" % feed_data.id) import feed fetch_result = feed_data.fetch() feed = h.DefaultFeed( title=feed_data.title, link=feed_data.link, description="TESTING", language=feed_data.language, ) c.base_url = config['base_url'] log.debug('c.base_url: %s' % c.base_url) guesser = Guesser(feed_data, c.rss_user, config) entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30) for entry in entries: c.entry = entry c.entry.is_spam=guesser.is_spam(entry) if c.entry.is_spam: titel = "[SPAM] %s" % entry.title else: titel = entry.title feed.add_item(title=titel, link=entry.link, description=render('bayes/rss_summary.mako'), unique_id=entry.uid, pubdate=entry.updated) #entry.summary #meta.Session.commit() response.content_type = 'application/atom+xml' return feed.writeString('utf-8')
def add_spam_report(feed, spam_entries): if len(spam_entries) == 0: return c.entries = spam_entries c.baseurl = config['base_url'] hasher = md5() updated = None for entry in spam_entries: if entry.updated and (not updated or entry.updated > updated): updated = entry.updated #~ hasher.update(entry.uid) ts = spam_entries[len(spam_entries)-1].updated title="RssMonster - Spam Summary - %s" % spam_entries[0].updated hasher.update(title) #log.debug("hasher.hexdigest() %s" % hasher.hexdigest()) feed.add_item(title=title, link="http://example.com", description=render('bayes/spam_report.mako'), unique_id=hasher.hexdigest(), pubdate=updated)
def show_feed(self, id, page=1): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = bayes.Guesser(c.feed, c.user, config) query = c.feed.get_entries().order_by(model.FeedEntry.updated.desc()) #.limit(30) from webhelpers import paginate c.page = paginate.Page(query, page) for e in c.page.items: e.is_spam=guesser.is_spam(e) e.score = guesser.guess(e) c.last_spam_entries = [] c.last_ham_entries = [] i = 0 for e in query.limit(500): e.is_spam=guesser.is_spam(e) if len(c.last_spam_entries) < 10 and e.is_spam: c.last_spam_entries.append(e) if len(c.last_ham_entries) < 10 and not e.is_spam: c.last_ham_entries.append(e) if len(c.last_spam_entries) >= 10 and len(c.last_ham_entries) >= 10: log.debug("breaking loop after %s rows" % i) break i += 1 # from webhelpers import pagination # from webhelpers.pagination import links # http://bel-epa.com/pylonsdocs/thirdparty/webhelpers/paginate.html # total = len(c.entries) # c.paginator, c.entries_p = pagination.paginate(c.entries, per_page=10, item_count=total) # set_count = int(c.paginator.current) # total_pages = len(c.paginator) # c.pagelist = links.pagelist(c.paginator.current) c.rss_feeds = [ {'title':'Unmodified', 'link':h.url_for(controller='feed', action='pipe') }, {'title':'Mixed', 'link':h.url_for(controller='bayes', action='mixed_rss', user_id=c.user.id) }, {'title':'Mixed with Report', 'link':h.url_for(controller='bayes', action='mixed_rss_with_report', user_id=c.user.id) } ] # import operator # ret = self.entries # ret.sort(lambda x,y: -cmp(x.id, y.id)) # return ret[:10] return render('feed/show_feed.mako')
def show_list(self): query = meta.Session.query(model.Feed) c.feeds = query.all() return render('feed/list.mako')
def show_record(self, id): c.feed = meta.find(model.Feed, id) return render('feed/record.mako')