Example #1
0
    def show_score(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.entry = meta.find(model.FeedEntry, id)

        feed = meta.find(model.Feed, c.entry.feed_id)
        c.feed = feed
        guesser = Guesser(feed, c.user, config)
        guess = guesser.guess(c.entry)

        log.debug("guess: %s" % guess)
        log.debug("c.entry.title: %s" % c.entry.title)

        c.score = str(guess)
        c.score = guesser.guess(c.entry)
        c.pool = guesser.trainer.poolData('spam')
        c.is_spam = guesser.is_spam(c.entry)

        import operator
        c.pool_data_spam = guesser.trainer.poolData('spam')
        c.pool_data_spam.sort(key=operator.itemgetter(1))
        c.pool_data_spam.reverse()

        c.pool_data_ham = guesser.trainer.poolData('ham')
        c.pool_data_ham.sort(key=operator.itemgetter(1))
        c.pool_data_ham.reverse()

        c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry)))

        return render('bayes/score.mako')
Example #2
0
    def mixed_rss_with_report(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        settings = c.rss_user.get_bayes_feed_setting(feed_data.id)
        meta.Session.add(settings)
        delta = h.timedelta_from_string(settings.summarize_at)
        log.debug("delta %s" % delta)

        if not settings.report_offset:
            entries = feed_data.get_entries().order_by(model.FeedEntry.id).all()
            log.warn("no report_offset available, read %s entries" % len(entries))
        else:
            entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all()

        from rssmonster.lib.reporter import Reporter
        reporter = Reporter(None, None, delta, 30)
        for entry in entries:
            reporter.add_item(entry, guesser.is_spam(entry))

        for entry_box in reporter.entry_queue:
            #log.debug("entry_box: %s" % entry_box)

            if entry_box['type'] == 'ham':
                c.entry = entry_box['entry']
                c.entry.is_spam=guesser.is_spam(c.entry)
                feed.add_item(title=c.entry.title,
                              link=c.entry.link,
                              description=render('bayes/rss_summary.mako'),
                              unique_id=c.entry.uid,
                              pubdate=c.entry.updated)

            elif entry_box['type'] == 'spam':
                add_spam_report(feed, entry_box['entries'])

        settings.report_offset = reporter.offset_id()
        log.debug("settings.report_offset: %s" % settings.report_offset)
        log.debug("holding back: %s" % len(reporter.spam_entries))
        meta.Session.commit()
        return feed.writeString('utf-8')
Example #3
0
    def __mixed_rss__(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30)

        for entry in entries:
            c.entry = entry
            c.entry.is_spam=guesser.is_spam(entry)

            if c.entry.is_spam:
                titel = "[SPAM] %s" % entry.title
            else:
                titel = entry.title

            feed.add_item(title=titel,
                          link=entry.link,
                          description=render('bayes/rss_summary.mako'),
                          unique_id=entry.uid,
                          pubdate=entry.updated) #entry.summary


        #meta.Session.commit()
        response.content_type = 'application/atom+xml'
        return feed.writeString('utf-8')
Example #4
0
    def redo(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)

        query = meta.Session\
                .query(model.Classification)\
                .join(model.FeedEntry)\
                .filter_by(feed_id=id)

        guesser = Guesser(c.feed, c.user, config)
        guesser.clear()

        cnt = 0
        needles_cnt = 0
        for entry in query:
#            h.flash("%s :%s" % (entry.pool, __relevant__(entry.entry)))
#            guesser.trainer.train(entry.pool, __relevant__(entry.entry))

            if guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'spam'):
                needles_cnt += 1
            elif not guesser.is_spam(entry.entry, use_classified=False) and (entry.pool == 'ham'):
                needles_cnt += 1

            self.__mark_as__(entry.entry, entry.pool, guesser, True)
            cnt+=1

        guesser.save()
        log.debug("FOOOOOO")


        if needles_cnt > 0:
            h.flash("%d entries were needlessly trained (total: %s)" % (needles_cnt, cnt))
        else:
            h.flash("learned %s entries" % cnt)

        return h.go_back()