Ejemplo n.º 1
0
    def show_guesser(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)
        guesser = Guesser(c.feed, c.user, config)

        import operator
        c.pool_data_spam = guesser.trainer.poolData('spam')
        c.pool_data_spam.sort(key=operator.itemgetter(1))
        c.pool_data_spam.reverse()
#        c.pool_data_spam = map(lambda x: (x[0], x[1], x[0].encode('ascii', 'ignore')), c.pool_data_spam)

        c.pool_data_ham = guesser.trainer.poolData('ham')
        c.pool_data_ham.sort(key=operator.itemgetter(1))
        c.pool_data_ham.reverse()

        c.actions = [{'link':h.url_for(controller='feed', action='show_feed', id=id),
                        'text':'Feed Details'}]

        c.stopwords = meta.Session\
            .query(model.Stopword)\
            .filter_by(feed_id=id, user_id=c.user.id)

        return render('bayes/guesser.mako')
Ejemplo n.º 2
0
    def show_score(self, id):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.entry = meta.find(model.FeedEntry, id)

        feed = meta.find(model.Feed, c.entry.feed_id)
        c.feed = feed
        guesser = Guesser(feed, c.user, config)
        guess = guesser.guess(c.entry)

        log.debug("guess: %s" % guess)
        log.debug("c.entry.title: %s" % c.entry.title)

        c.score = str(guess)
        c.score = guesser.guess(c.entry)
        c.pool = guesser.trainer.poolData('spam')
        c.is_spam = guesser.is_spam(c.entry)

        import operator
        c.pool_data_spam = guesser.trainer.poolData('spam')
        c.pool_data_spam.sort(key=operator.itemgetter(1))
        c.pool_data_spam.reverse()

        c.pool_data_ham = guesser.trainer.poolData('ham')
        c.pool_data_ham.sort(key=operator.itemgetter(1))
        c.pool_data_ham.reverse()

        c.tokens = set(guesser.trainer.getTokens(__relevant__(c.entry)))

        return render('bayes/score.mako')
Ejemplo n.º 3
0
    def mixed_rss_with_report(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        settings = c.rss_user.get_bayes_feed_setting(feed_data.id)
        meta.Session.add(settings)
        delta = h.timedelta_from_string(settings.summarize_at)
        log.debug("delta %s" % delta)

        if not settings.report_offset:
            entries = feed_data.get_entries().order_by(model.FeedEntry.id).all()
            log.warn("no report_offset available, read %s entries" % len(entries))
        else:
            entries = feed_data.get_entries().filter(model.FeedEntry.id >= settings.report_offset).order_by(model.FeedEntry.id).all()

        from rssmonster.lib.reporter import Reporter
        reporter = Reporter(None, None, delta, 30)
        for entry in entries:
            reporter.add_item(entry, guesser.is_spam(entry))

        for entry_box in reporter.entry_queue:
            #log.debug("entry_box: %s" % entry_box)

            if entry_box['type'] == 'ham':
                c.entry = entry_box['entry']
                c.entry.is_spam=guesser.is_spam(c.entry)
                feed.add_item(title=c.entry.title,
                              link=c.entry.link,
                              description=render('bayes/rss_summary.mako'),
                              unique_id=c.entry.uid,
                              pubdate=c.entry.updated)

            elif entry_box['type'] == 'spam':
                add_spam_report(feed, entry_box['entries'])

        settings.report_offset = reporter.offset_id()
        log.debug("settings.report_offset: %s" % settings.report_offset)
        log.debug("holding back: %s" % len(reporter.spam_entries))
        meta.Session.commit()
        return feed.writeString('utf-8')
Ejemplo n.º 4
0
 def add(self):
     if not request.params.get('url'):
         return render('feed/add.mako')
         
     feed = model.Feed()
     feed.url = request.params.get('url')
     meta.Session.add(feed)
     meta.Session.commit()
     
     #return "url = %s" % request.params.get('url')
     return redirect(url(controller='feed', action='show_list'))
Ejemplo n.º 5
0
    def __mixed_rss__(self, user_id, id):
        c.rss_user = meta.find(model.User, user_id)
        log.debug("c.rss_user: %s" % c.rss_user)
        feed_data = meta.find(model.Feed, id)
        log.debug("feed_data.id %s" % feed_data.id)

        import feed
        fetch_result = feed_data.fetch()

        feed = h.DefaultFeed(
            title=feed_data.title,
            link=feed_data.link,
            description="TESTING",
            language=feed_data.language,
        )

        c.base_url = config['base_url']
        log.debug('c.base_url: %s' % c.base_url)

        guesser = Guesser(feed_data, c.rss_user, config)
        entries = feed_data.get_entries().order_by(model.FeedEntry.updated.desc()).limit(30)

        for entry in entries:
            c.entry = entry
            c.entry.is_spam=guesser.is_spam(entry)

            if c.entry.is_spam:
                titel = "[SPAM] %s" % entry.title
            else:
                titel = entry.title

            feed.add_item(title=titel,
                          link=entry.link,
                          description=render('bayes/rss_summary.mako'),
                          unique_id=entry.uid,
                          pubdate=entry.updated) #entry.summary


        #meta.Session.commit()
        response.content_type = 'application/atom+xml'
        return feed.writeString('utf-8')
Ejemplo n.º 6
0
def add_spam_report(feed, spam_entries):
    if len(spam_entries) == 0:
        return

    c.entries = spam_entries
    c.baseurl = config['base_url']

    hasher = md5()
    updated = None
    for entry in spam_entries:
        if entry.updated and (not updated or entry.updated > updated):
            updated = entry.updated
        #~ hasher.update(entry.uid)

    ts = spam_entries[len(spam_entries)-1].updated
    title="RssMonster - Spam Summary - %s" % spam_entries[0].updated
    hasher.update(title)
    #log.debug("hasher.hexdigest() %s" % hasher.hexdigest())
    feed.add_item(title=title,
                  link="http://example.com",
                  description=render('bayes/spam_report.mako'),
                  unique_id=hasher.hexdigest(),
                  pubdate=updated)
Ejemplo n.º 7
0
    def show_feed(self, id, page=1):
        if not c.user:
            return redirect(url(controller='login', action='signin', id=None, return_to=url.current()))

        c.feed = meta.find(model.Feed, id)
        guesser = bayes.Guesser(c.feed, c.user, config)
        query = c.feed.get_entries().order_by(model.FeedEntry.updated.desc()) #.limit(30)

        from webhelpers import paginate
        c.page = paginate.Page(query, page)

        for e in c.page.items:
            e.is_spam=guesser.is_spam(e)
            e.score = guesser.guess(e)

        c.last_spam_entries = []
        c.last_ham_entries = []
        i = 0
        for e in query.limit(500):
            e.is_spam=guesser.is_spam(e)

            if len(c.last_spam_entries) < 10 and e.is_spam:
                c.last_spam_entries.append(e)

            if len(c.last_ham_entries) < 10 and not e.is_spam:
                c.last_ham_entries.append(e)
                
            if len(c.last_spam_entries) >= 10 and len(c.last_ham_entries) >= 10:
                log.debug("breaking loop after %s rows" % i)
                break

            i += 1
        

#        from webhelpers import pagination
#        from webhelpers.pagination import links

#   http://bel-epa.com/pylonsdocs/thirdparty/webhelpers/paginate.html
#        total = len(c.entries)
#        c.paginator, c.entries_p = pagination.paginate(c.entries, per_page=10, item_count=total)
#        set_count = int(c.paginator.current)
#        total_pages = len(c.paginator)
#        c.pagelist = links.pagelist(c.paginator.current)

        c.rss_feeds = [
            {'title':'Unmodified',
             'link':h.url_for(controller='feed', action='pipe')
            },
            {'title':'Mixed',
             'link':h.url_for(controller='bayes', action='mixed_rss', user_id=c.user.id)
            },
            {'title':'Mixed with Report',
             'link':h.url_for(controller='bayes', action='mixed_rss_with_report', user_id=c.user.id)
            }
        ]
        
        
        
        
#        import operator
#        ret = self.entries
#        ret.sort(lambda x,y: -cmp(x.id, y.id))
#        return ret[:10]        
        
        
        return render('feed/show_feed.mako')
Ejemplo n.º 8
0
 def show_list(self):
     query = meta.Session.query(model.Feed)
     c.feeds = query.all()
     return render('feed/list.mako')
Ejemplo n.º 9
0
 def show_record(self, id):
     c.feed = meta.find(model.Feed, id)
     return render('feed/record.mako')