Esempio n. 1
0
    def show_score(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.entry = meta.find(model.FeedEntry, id)

        feed = meta.find(model.Feed, c.entry.feed_id)
        c.feed = feed
        guesser = Guesser(feed, c.user, config)
        guess = guesser.guess(c.entry)

        log.debug("guess: %s" % guess)
        log.debug("c.entry.title: %s" % c.entry.title)

        c.score = str(guess)
        c.score = guesser.guess(c.entry)
        c.pool = guesser.trainer.poolData('spam')
        c.is_spam = guesser.is_spam(c.entry)

        import operator
        c.pool_data_spam = guesser.trainer.poolData('spam')
        c.pool_data_spam.sort(key=operator.itemgetter(1))
        c.pool_data_spam.reverse()

        c.pool_data_ham = guesser.trainer.poolData('ham')
        c.pool_data_ham.sort(key=operator.itemgetter(1))
        c.pool_data_ham.reverse()

        c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry)))

        return render('bayes/score.mako')
Esempio n. 2
0
    def mark_as_ham(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        entry = meta.find(model.FeedEntry, id)
        feed = meta.find(model.Feed, entry.feed_id)
        guesser = Guesser(feed, c.user, config)
        return self.__mark_as__(entry, 'ham', guesser)
Esempio n. 3
0
    def mixed_rss_with_report(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        settings = c.rss_user.get_bayes_feed_setting(feed_data.id)
        meta.Session.add(settings)
        delta = h.timedelta_from_string(settings.summarize_at)
        log.debug("delta %s" % delta)

        if not settings.report_offset:
            entries = feed_data.get_entries().order_by(model.FeedEntry.id).all()
            log.warn("no report_offset available, read %s entries" % len(entries))
        else:
            entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all()

        from rssmonster.lib.reporter import Reporter
        reporter = Reporter(None, None, delta, 30)
        for entry in entries:
            reporter.add_item(entry, guesser.is_spam(entry))

        for entry_box in reporter.entry_queue:
            #log.debug("entry_box: %s" % entry_box)

            if entry_box['type'] == 'ham':
                c.entry = entry_box['entry']
                c.entry.is_spam=guesser.is_spam(c.entry)
                feed.add_item(title=c.entry.title,
                              link=c.entry.link,
                              description=render('bayes/rss_summary.mako'),
                              unique_id=c.entry.uid,
                              pubdate=c.entry.updated)

            elif entry_box['type'] == 'spam':
                add_spam_report(feed, entry_box['entries'])

        settings.report_offset = reporter.offset_id()
        log.debug("settings.report_offset: %s" % settings.report_offset)
        log.debug("holding back: %s" % len(reporter.spam_entries))
        meta.Session.commit()
        return feed.writeString('utf-8')
Esempio n. 4
0
    def show_guesser(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)
        guesser = Guesser(c.feed, c.user, config)

        import operator
        c.pool_data_spam = guesser.trainer.poolData('spam')
        c.pool_data_spam.sort(key=operator.itemgetter(1))
        c.pool_data_spam.reverse()
#        c.pool_data_spam = map(lambda x: (x[0], x[1], x[0].encode('ascii', 'ignore')), c.pool_data_spam)

        c.pool_data_ham = guesser.trainer.poolData('ham')
        c.pool_data_ham.sort(key=operator.itemgetter(1))
        c.pool_data_ham.reverse()

        c.actions = [{'link':h.url_for(controller='feed', action='show_feed', id=id),
                        'text':'Feed Details'}]

        c.stopwords = meta.Session\
            .query(model.Stopword)\
            .filter_by(feed_id=id, user_id=c.user.id)

        return render('bayes/guesser.mako')
Esempio n. 5
0
    def mixed_rss(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        settings = c.rss_user.get_bayes_feed_setting(id)
        log.debug("settings: %s" % settings)
        log.debug("settings.summarize_at: %s" % settings.summarize_at)
        if settings.summarize_at:
            return self.mixed_rss_with_report(user_id, id)

        return self.__mixed_rss__(user_id, id)
Esempio n. 6
0
    def __mixed_rss__(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30)

        for entry in entries:
            c.entry = entry
            c.entry.is_spam=guesser.is_spam(entry)

            if c.entry.is_spam:
                titel = "[SPAM] %s" % entry.title
            else:
                titel = entry.title

            feed.add_item(title=titel,
                          link=entry.link,
                          description=render('bayes/rss_summary.mako'),
                          unique_id=entry.uid,
                          pubdate=entry.updated) #entry.summary


        #meta.Session.commit()
        response.content_type = 'application/atom+xml'
        return feed.writeString('utf-8')
Esempio n. 7
0
    def pipe(self, id):
        feed_data = meta.find(model.Feed, id)
        cnt_added = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description=feed_data.subtitle,
            language=feed_data.language,
        )

        for entry in feed_data.get_entries():
            feed.add_item(title=entry.title,
                          link=entry.link,
                          description=entry.summary,
                          unique_id=entry.uid)

        response.content_type = 'application/atom+xml'
        return feed.writeString('utf-8')
Esempio n. 8
0
    def redo(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)

        query = meta.Session\
                .query(model.Classification)\
                .join(model.FeedEntry)\
                .filter_by(feed_id=id)

        guesser = Guesser(c.feed, c.user, config)
        guesser.clear()

        cnt = 0
        needles_cnt = 0
        for entry in query:
#            h.flash("%s :%s" % (entry.pool, __relevant__(entry.entry)))
#            guesser.trainer.train(entry.pool, __relevant__(entry.entry))

            if guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'spam'):
                needles_cnt += 1
            elif not guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'ham'):
                needles_cnt += 1

            self.__mark_as__(entry.entry, entry.pool, guesser, True)
            cnt+=1

        guesser.save()
        log.debug("FOOOOOO")


        if needles_cnt > 0:
            h.flash("%d entries were needlessly trained (total: %s)" % (needles_cnt, cnt))
        else:
            h.flash("learned %s entries" % cnt)

        return h.go_back()
Esempio n. 9
0
    def show_feed(self, id, page=1):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)
        guesser = bayes.Guesser(c.feed, c.user, config)
        query = c.feed.get_entries().order_by(model.FeedEntry.updated.desc()) #.limit(30)

        from webhelpers import paginate
        c.page = paginate.Page(query, page)

        for e in c.page.items:
            e.is_spam=guesser.is_spam(e)
            e.score = guesser.guess(e)

        c.last_spam_entries = []
        c.last_ham_entries = []
        i = 0
        for e in query.limit(500):
            e.is_spam=guesser.is_spam(e)

            if len(c.last_spam_entries) < 10 and e.is_spam:
                c.last_spam_entries.append(e)

            if len(c.last_ham_entries) < 10 and not e.is_spam:
                c.last_ham_entries.append(e)
                
            if len(c.last_spam_entries) >= 10 and len(c.last_ham_entries) >= 10:
                log.debug("breaking loop after %s rows" % i)
                break

            i += 1
        

#        from webhelpers import pagination
#        from webhelpers.pagination import links

#   http://bel-epa.com/pylonsdocs/thirdparty/webhelpers/paginate.html
#        total = len(c.entries)
#        c.paginator, c.entries_p = pagination.paginate(c.entries, per_page=10, item_count=total)
#        set_count = int(c.paginator.current)
#        total_pages = len(c.paginator)
#        c.pagelist = links.pagelist(c.paginator.current)

        c.rss_feeds = [
            {'title':'Unmodified',
             'link':h.url_for(controller='feed', action='pipe')
            },
            {'title':'Mixed',
             'link':h.url_for(controller='bayes', action='mixed_rss', user_id=c.user.id)
            },
            {'title':'Mixed with Report',
             'link':h.url_for(controller='bayes', action='mixed_rss_with_report', user_id=c.user.id)
            }
        ]
        
        
        
        
#        import operator
#        ret = self.entries
#        ret.sort(lambda x,y: -cmp(x.id, y.id))
#        return ret[:10]        
        
        
        return render('feed/show_feed.mako')
Esempio n. 10
0
 def show_record(self, id):
     c.feed = meta.find(model.Feed, id)
     return render('feed/record.mako')
Esempio n. 11
0
 def update(self, id):
     #~ feed = meta.Session.query(model.Feed).get(id)
     feed = meta.find(model.Feed, id)
     cnt_added = feed.fetch()
     h.flash("added %s entries" % cnt_added)
     return h.go_back()