def show_guesser(self, id): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = Guesser(c.feed, c.user, config) import operator c.pool_data_spam = guesser.trainer.poolData('spam') c.pool_data_spam.sort(key=operator.itemgetter(1)) c.pool_data_spam.reverse() # c.pool_data_spam = map(lambda x: (x[0], x[1], x[0].encode('ascii', 'ignore')), c.pool_data_spam) c.pool_data_ham = guesser.trainer.poolData('ham') c.pool_data_ham.sort(key=operator.itemgetter(1)) c.pool_data_ham.reverse() c.actions = [{'link':h.url_for(controller='feed', action='show_feed', id=id), 'text':'Feed Details'}] c.stopwords = meta.Session\ .query(model.Stopword)\ .filter_by(feed_id=id, user_id=c.user.id) return render('bayes/guesser.mako')
def __mark_as__(self, entry, pool, guesser, force=False): """ when forced the entry is updated even if the db says it is already """ log.debug("entry.id: %s" % entry.id) classy = meta.Session\ .query(model.Classification)\ .filter_by(user_id = c.user.id, entry_id=entry.id).first() if not classy: classy = model.Classification() classy.user_id = c.user.id classy.entry_id = entry.id classy.pool = pool meta.Session.add(classy) untrain_id = None else: if classy.pool == pool and not force: h.flash("entry was already classified as %s" % pool) return h.go_back(h.url_for(controller='feed', action='show_feed', id=entry.feed_id)) classy.pool = pool meta.Session.add(classy) untrain_id = entry.id meta.Session.commit() guesser.trainer.train(pool, __relevant__(entry), entry.id) if pool == 'spam': other_pool = 'ham' elif pool == 'ham': other_pool = 'spam' else: raise "bad pool" # if untraind_id: # guesser.trainer.untrain(other_pool, __relevant__(entry), untrain_id) guesser.save() if not force: h.flash("now known as %s: %s" % (pool, entry.id)) return h.go_back(h.url_for(controller='feed', action='show_feed', id=entry.feed_id))
def mark_actions(self, return_to, user): import rssmonster.lib.helpers as h from classification import Classification classy = meta.Session\ .query(Classification)\ .filter_by(user_id = user.id, entry_id=self.id).first() if not classy: ret.append({'title':'Spam', 'link':h.url_for(controller='bayes', action='mark_as_spam', id=self.id, return_to=return_to)}) ret.append({'title':'Ham', 'link':h.url_for(controller='bayes', action='mark_as_ham', id=self.id, return_to=return_to)}) elif classy.pool == 'spam': ret.append({'title':'Ham', 'link':h.url_for(controller='bayes', action='mark_as_ham', id=self.id, return_to=return_to)}) elif classy.pool == 'ham': ret.append({'title':'Spam', 'link':h.url_for(controller='bayes', action='mark_as_spam', id=self.id, return_to=return_to)}) else: raise "bad pool" return ret
def show_feed(self, id, page=1): if not c.user: return redirect(url(controller='login', action='signin', id=None, return_to=url.current())) c.feed = meta.find(model.Feed, id) guesser = bayes.Guesser(c.feed, c.user, config) query = c.feed.get_entries().order_by(model.FeedEntry.updated.desc()) #.limit(30) from webhelpers import paginate c.page = paginate.Page(query, page) for e in c.page.items: e.is_spam=guesser.is_spam(e) e.score = guesser.guess(e) c.last_spam_entries = [] c.last_ham_entries = [] i = 0 for e in query.limit(500): e.is_spam=guesser.is_spam(e) if len(c.last_spam_entries) < 10 and e.is_spam: c.last_spam_entries.append(e) if len(c.last_ham_entries) < 10 and not e.is_spam: c.last_ham_entries.append(e) if len(c.last_spam_entries) >= 10 and len(c.last_ham_entries) >= 10: log.debug("breaking loop after %s rows" % i) break i += 1 # from webhelpers import pagination # from webhelpers.pagination import links # http://bel-epa.com/pylonsdocs/thirdparty/webhelpers/paginate.html # total = len(c.entries) # c.paginator, c.entries_p = pagination.paginate(c.entries, per_page=10, item_count=total) # set_count = int(c.paginator.current) # total_pages = len(c.paginator) # c.pagelist = links.pagelist(c.paginator.current) c.rss_feeds = [ {'title':'Unmodified', 'link':h.url_for(controller='feed', action='pipe') }, {'title':'Mixed', 'link':h.url_for(controller='bayes', action='mixed_rss', user_id=c.user.id) }, {'title':'Mixed with Report', 'link':h.url_for(controller='bayes', action='mixed_rss_with_report', user_id=c.user.id) } ] # import operator # ret = self.entries # ret.sort(lambda x,y: -cmp(x.id, y.id)) # return ret[:10] return render('feed/show_feed.mako')