Python getNewFeedEntriesの例、agglib.getNewFeedEntries Pythonの例

コード例 #1

0

ファイルを表示

def main():
    """
    Build aggregator report pages with Bayes rating links.
    """
    # Create a new Bayes guesser
    guesser = Bayes()

    # Attempt to load Bayes data, ignoring IOError on first run.
    try:
        guesser.load(BAYES_DATA_FN)
    except IOError:
        pass

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    # Score the new entries using the Bayesian guesser
    entries = scoreEntries(guesser, entries)

    # Write out the current run's aggregator report.
    out_fn = time.strftime(HTML_FN)
    writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL,
                        ENTRY_TMPL, PAGE_TMPL)

    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)
    guesser.save(BAYES_DATA_FN)

コード例 #2

0

ファイルを表示

ファイル: agg04_emailsubs.py プロジェクト: lmorchard/hacking_rss_and_atom

def main(): 
    """
    Poll subscribed feeds and email out entries.
    """
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)

    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    
    entries = getNewFeedEntries(feeds, feed_db, entry_db)
    
    if len(entries) > 0:
        emailEntries(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, entries)
    
    closeDBs(feed_db, entry_db)

コード例 #3

0

ファイルを表示

ファイル: agg04_emailsubs.py プロジェクト: openstake/hacking_rss_and_atom

def main():
    """
    Poll subscribed feeds and email out entries.
    """
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)

    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]

    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    if len(entries) > 0:
        emailEntries(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, entries)

    closeDBs(feed_db, entry_db)

コード例 #4

0

ファイルを表示

ファイル: agg03_emailsubs.py プロジェクト: lmorchard/hacking_rss_and_atom

def main(): 
    """
    Poll subscribed feeds and produce aggregator page.
    """
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)

    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    
    entries = getNewFeedEntries(feeds, feed_db, entry_db)
    
    if len(entries) > 0:
        out_fn = HTML_FN % time.strftime("%Y%m%d-%H%M%S")
        writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, 
            ENTRY_TMPL, PAGE_TMPL)
        emailAggregatorPage(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, out_fn)
    
    closeDBs(feed_db, entry_db)

コード例 #5

0

ファイルを表示

def main():
    """
    Poll subscribed feeds and produce aggregator page.
    """
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)

    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]

    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    if len(entries) > 0:
        out_fn = HTML_FN % time.strftime("%Y%m%d-%H%M%S")
        writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL,
                            ENTRY_TMPL, PAGE_TMPL)
        emailAggregatorPage(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, out_fn)

    closeDBs(feed_db, entry_db)

コード例 #6

0

ファイルを表示

def main(): 
    """
    Poll subscribed feeds and send off IMs
    """
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)

    # Create a new IM connection.
    conn = IM_CLASS(IM_USER, IM_PASSWD)
    conn.connect()
    
    # Read in the subscriptions
    feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    
    # Iterate through subscribed feeds.
    for feed in feeds:
        # Get new entries for the current feed and send them off
        entries = getNewFeedEntries([feed], feed_db, entry_db)
        if len(entries) > 0:
            sendEntriesViaIM(conn, IM_TO, entries, IM_CHUNK, 
                    FEED_HDR_TMPL, ENTRY_TMPL, MSG_TMPL)
    
    closeDBs(feed_db, entry_db)

コード例 #7

0

ファイルを表示

def main():
    """
    Perform a test run of the FeedFilter using defaults.
    """
    # Create a new Bayes guesser, attempt to load data
    guesser = Bayes()
    guesser.load(BAYES_DATA_FN)

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    # Build the feed filter.
    f = BayesFilter(guesser, entries)
    f.FEED_META['feed.title'] = FEED_TITLE
    f.FEED_META['feed.tagline'] = FEED_TAGLINE

    # Output the feed as both RSS and Atom.
    open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss())
    open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())

    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)

コード例 #8

0

ファイルを表示

ファイル: ch15_bayes_filter.py プロジェクト: lmorchard/hacking_rss_and_atom

def main():
    """
    Perform a test run of the FeedFilter using defaults.
    """
    # Create a new Bayes guesser, attempt to load data
    guesser = Bayes()
    guesser.load(BAYES_DATA_FN)
    
    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)
    
    # Build the feed filter.
    f = BayesFilter(guesser, entries)
    f.FEED_META['feed.title']   = FEED_TITLE
    f.FEED_META['feed.tagline'] = FEED_TAGLINE
    
    # Output the feed as both RSS and Atom.
    open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss())
    open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())
    
    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)

コード例 #9

0

ファイルを表示

ファイル: ch17_feed_reposter.py プロジェクト: openstake/hacking_rss_and_atom

def main():
    """
    Process new feed entries and repost to the blog API.
    """
    # Get a handle on the blog API server
    srv = xmlrpclib.ServerProxy(API_URI, verbose=0)

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]
    for e in getNewFeedEntries(feeds, feed_db, entry_db):

        # Get the entry and feed metadata.
        feed, entry = e.data.feed, e.entry

        # Build a blog post title using feed and entry titles.
        title = u'%s &#8212; %s' % (feed.get(
            'title', u'untitled'), entry.get('title', u'untitled'))

        # Generate an ISO8601 date using the feed entry modification,
        # with current date/time as default.
        date = time.strftime('%Y-%m-%dT%H:%M:%SZ',
                             entry.get('modified_parsed', time.gmtime()))

        # Build blog post body content from what's available in the
        # feed entry.
        content_out = []
        if entry.has_key('summary'):
            content_out.append(entry.summary)
        content_out.extend(
            [c.value for c in entry.get('content', []) if 'html' in c.type])
        content = '<br />\n'.join(content_out)

        # Build the blog post content from feed and entry.
        desc = u"""
            %(content)s
            <br />
            [ <a href="%(entry.link)s">Originally</a> posted 
              at <a href="%(feed.link)s">%(feed.title)s</a> ]
        """ % {
            'content': content,
            'entry.link': entry.get('link', u''),
            'feed.title': feed.get('title', u''),
            'feed.link': feed.get('link', u''),
        }

        # Build post item data, call blog API via XML-RPC
        post = {
            'title': title,
            'dateCreated': date,
            'description': desc,
            'mt_convert_breaks': False
        }
        try:
            srv.metaWeblog.newPost(API_BLOGID, API_USER, API_PASSWD, post,
                                   True)
            print "Posted %s" % title
        except KeyboardInterrupt:
            raise
        except:
            print "Problem posting %s" % title

コード例 #10

0

ファイルを表示

ファイル: ch17_feed_blog.py プロジェクト: openstake/hacking_rss_and_atom

def main():
    """
    Fire up the feed blog generator, write the static HTML to disk.
    """
    # Try to load up entry history, start with an empty list in
    # case of any problems.
    try:
        entries = pickle.load(open(HISTORY_FN, 'rb'))
    except:
        entries = []

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]

    # Gather new entries from all feeds.
    subs_details = []
    for feed_uri in feeds:

        # HACK: Grab 'custom' feed record details before agglib update.
        if feed_db.has_key(feed_uri):
            feed_rec = feed_db[feed_uri]
            feed_link = feed_rec.get('link', '#')
            feed_title = feed_rec.get('title', 'untitled')

        # Get new entries, if any.
        new_entries = getNewFeedEntries([feed_uri], feed_db, entry_db)

        # If there's no record of the feed in the DB, skip it.
        if not feed_db.has_key(feed_uri): continue

        # Update feed record details from fresh feed, if any entries found.
        if len(new_entries) > 0:
            feed = new_entries[0].data.feed
            feed_link = feed.get('link', '#')
            feed_title = feed.get('title', 'untitled')

        # HACK: Update 'custom' feed record details after agglib update.
        feed_rec = feed_db[feed_uri]
        feed_rec['link'] = feed_link
        feed_rec['title'] = feed_title
        feed_db[feed_uri] = feed_rec

        # Add details for this feed to the sidebar content.
        subs_details.append({
            'feed.link': feed_link,
            'feed.title': feed_title,
            'feed.url': feed_uri
        })

        # Skip ahead if no new entries found.
        if len(new_entries) < 1: continue

        # Make sure entries have a modified date, using now by default.
        for e in new_entries:
            if not e.entry.has_key('modified_parsed'):
                e.entry['modified_parsed'] = time.gmtime()

        # Tack the list of new entries onto the head of the main list.
        entries = new_entries + entries

    # Sort the subscription details, build the sidebar content.
    subs_details.sort(lambda a, b: cmp(a['feed.title'], b['feed.title']))
    subs_out = [SUBSCRIPTION_TMPL % x for x in subs_details]

    # Sort all the entries, truncate to desired length.
    entries.sort()
    entries = entries[:MAX_ENTRIES]

    # Write out the current run's aggregator report.
    out_fn = time.strftime(ARCHIVE_FN)
    writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL,
                        ENTRY_TMPL, PAGE_TMPL)

    # Build the page template from the template template.
    out = SHELL_TMPL % {
        'subs': '\n'.join(subs_out),
        'main': open(out_fn).read()
    }
    open(BLOG_FN, 'w').write(out)

    # Close the databases and save the entry history back out to disk.
    closeDBs(feed_db, entry_db)
    pickle.dump(entries, open(HISTORY_FN, 'wb'))

コード例 #11

0

ファイルを表示

ファイル: ch17_feed_reposter.py プロジェクト: lmorchard/hacking_rss_and_atom

def main():
    """
    Process new feed entries and repost to the blog API.
    """
    # Get a handle on the blog API server
    srv = xmlrpclib.ServerProxy(API_URI, verbose=0)

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    for e in getNewFeedEntries(feeds, feed_db, entry_db):
        
        # Get the entry and feed metadata.
        feed, entry = e.data.feed, e.entry
        
        # Build a blog post title using feed and entry titles.
        title = u'%s &#8212; %s' % ( feed.get('title', u'untitled'),
                                     entry.get('title', u'untitled') )      
        
        # Generate an ISO8601 date using the feed entry modification,
        # with current date/time as default.
        date = time.strftime('%Y-%m-%dT%H:%M:%SZ', 
                             entry.get('modified_parsed', 
                                       time.gmtime()))
        
        # Build blog post body content from what's available in the
        # feed entry.
        content_out = []
        if entry.has_key('summary'):
            content_out.append(entry.summary)
        content_out.extend([ c.value for c in entry.get('content', [])
                             if 'html' in c.type ])
        content = '<br />\n'.join(content_out) 
            
        # Build the blog post content from feed and entry.
        desc = u"""
            %(content)s
            <br />
            [ <a href="%(entry.link)s">Originally</a> posted 
              at <a href="%(feed.link)s">%(feed.title)s</a> ]
        """ % {
            'content'       : content,
            'entry.link'    : entry.get('link', u''),
            'feed.title'    : feed.get('title', u''),
            'feed.link'     : feed.get('link', u''),
        }
        
        # Build post item data, call blog API via XML-RPC
        post  = {
            'title'             : title,
            'dateCreated'       : date,
            'description'       : desc,
            'mt_convert_breaks' : False
        }
        try:
            srv.metaWeblog.newPost(API_BLOGID, API_USER, API_PASSWD,
                                   post, True)
            print "Posted %s" % title
        except KeyboardInterrupt:
            raise
        except:
            print "Problem posting %s" % title

コード例 #12

0

ファイルを表示

ファイル: ch17_feed_blog.py プロジェクト: lmorchard/hacking_rss_and_atom

def main():
    """
    Fire up the feed blog generator, write the static HTML to disk.
    """
    # Try to load up entry history, start with an empty list in
    # case of any problems.
    try:    entries = pickle.load(open(HISTORY_FN, 'rb'))
    except: entries = []
    
    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]

    # Gather new entries from all feeds.
    subs_details = []
    for feed_uri in feeds:

        # HACK: Grab 'custom' feed record details before agglib update.
        if feed_db.has_key(feed_uri):
            feed_rec   = feed_db[feed_uri]
            feed_link  = feed_rec.get('link',  '#')
            feed_title = feed_rec.get('title', 'untitled')
            
        # Get new entries, if any.
        new_entries = getNewFeedEntries([feed_uri], feed_db, entry_db)

        # If there's no record of the feed in the DB, skip it.
        if not feed_db.has_key(feed_uri): continue
        
        # Update feed record details from fresh feed, if any entries found.
        if len(new_entries) > 0:
            feed       = new_entries[0].data.feed
            feed_link  = feed.get('link',  '#')
            feed_title = feed.get('title', 'untitled')
        
        # HACK: Update 'custom' feed record details after agglib update.
        feed_rec = feed_db[feed_uri]
        feed_rec['link']  = feed_link
        feed_rec['title'] = feed_title
        feed_db[feed_uri] = feed_rec
        
        # Add details for this feed to the sidebar content.
        subs_details.append({
            'feed.link'  : feed_link,
            'feed.title' : feed_title,
            'feed.url'   : feed_uri
        })
        
        # Skip ahead if no new entries found.
        if len(new_entries) < 1: continue
            
        # Make sure entries have a modified date, using now by default.
        for e in new_entries:
            if not e.entry.has_key('modified_parsed'):
                e.entry['modified_parsed'] = time.gmtime()

        # Tack the list of new entries onto the head of the main list.
        entries = new_entries + entries
    
    # Sort the subscription details, build the sidebar content.
    subs_details.sort(lambda a,b: cmp( a['feed.title'], 
                                       b['feed.title'] ))
    subs_out = [ SUBSCRIPTION_TMPL % x for x in subs_details ]
    
    # Sort all the entries, truncate to desired length.
    entries.sort()
    entries = entries[:MAX_ENTRIES]

    # Write out the current run's aggregator report.
    out_fn = time.strftime(ARCHIVE_FN)
    writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, 
        ENTRY_TMPL, PAGE_TMPL)
    
    # Build the page template from the template template.
    out = SHELL_TMPL % {
        'subs' : '\n'.join(subs_out),
        'main' : open(out_fn).read()
    }
    open(BLOG_FN, 'w').write(out)
    
    # Close the databases and save the entry history back out to disk.
    closeDBs(feed_db, entry_db)
    pickle.dump(entries, open(HISTORY_FN, 'wb'))