def main(): """ Build aggregator report pages with Bayes rating links. """ # Create a new Bayes guesser guesser = Bayes() # Attempt to load Bayes data, ignoring IOError on first run. try: guesser.load(BAYES_DATA_FN) except IOError: pass # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Score the new entries using the Bayesian guesser entries = scoreEntries(guesser, entries) # Write out the current run's aggregator report. out_fn = time.strftime(HTML_FN) writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db) guesser.save(BAYES_DATA_FN)
def main(): """ Poll subscribed feeds and email out entries. """ feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] entries = getNewFeedEntries(feeds, feed_db, entry_db) if len(entries) > 0: emailEntries(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, entries) closeDBs(feed_db, entry_db)
def main(): """ Poll subscribed feeds and email out entries. """ feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) if len(entries) > 0: emailEntries(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, entries) closeDBs(feed_db, entry_db)
def main(): """ Poll subscribed feeds and produce aggregator page. """ feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] entries = getNewFeedEntries(feeds, feed_db, entry_db) if len(entries) > 0: out_fn = HTML_FN % time.strftime("%Y%m%d-%H%M%S") writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) emailAggregatorPage(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, out_fn) closeDBs(feed_db, entry_db)
def main(): """ Poll subscribed feeds and produce aggregator page. """ feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) if len(entries) > 0: out_fn = HTML_FN % time.strftime("%Y%m%d-%H%M%S") writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) emailAggregatorPage(FROM_ADDR, TO_ADDR, SUBJECT, SMTP_HOST, out_fn) closeDBs(feed_db, entry_db)
def main(): """ Poll subscribed feeds and send off IMs """ feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) # Create a new IM connection. conn = IM_CLASS(IM_USER, IM_PASSWD) conn.connect() # Read in the subscriptions feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] # Iterate through subscribed feeds. for feed in feeds: # Get new entries for the current feed and send them off entries = getNewFeedEntries([feed], feed_db, entry_db) if len(entries) > 0: sendEntriesViaIM(conn, IM_TO, entries, IM_CHUNK, FEED_HDR_TMPL, ENTRY_TMPL, MSG_TMPL) closeDBs(feed_db, entry_db)
def main(): """ Perform a test run of the FeedFilter using defaults. """ # Create a new Bayes guesser, attempt to load data guesser = Bayes() guesser.load(BAYES_DATA_FN) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Build the feed filter. f = BayesFilter(guesser, entries) f.FEED_META['feed.title'] = FEED_TITLE f.FEED_META['feed.tagline'] = FEED_TAGLINE # Output the feed as both RSS and Atom. open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom()) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db)
def main(): """ Perform a test run of the FeedFilter using defaults. """ # Create a new Bayes guesser, attempt to load data guesser = Bayes() guesser.load(BAYES_DATA_FN) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Build the feed filter. f = BayesFilter(guesser, entries) f.FEED_META['feed.title'] = FEED_TITLE f.FEED_META['feed.tagline'] = FEED_TAGLINE # Output the feed as both RSS and Atom. open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom()) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db)
def main(): """ Process new feed entries and repost to the blog API. """ # Get a handle on the blog API server srv = xmlrpclib.ServerProxy(API_URI, verbose=0) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] for e in getNewFeedEntries(feeds, feed_db, entry_db): # Get the entry and feed metadata. feed, entry = e.data.feed, e.entry # Build a blog post title using feed and entry titles. title = u'%s — %s' % (feed.get( 'title', u'untitled'), entry.get('title', u'untitled')) # Generate an ISO8601 date using the feed entry modification, # with current date/time as default. date = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.get('modified_parsed', time.gmtime())) # Build blog post body content from what's available in the # feed entry. content_out = [] if entry.has_key('summary'): content_out.append(entry.summary) content_out.extend( [c.value for c in entry.get('content', []) if 'html' in c.type]) content = '<br />\n'.join(content_out) # Build the blog post content from feed and entry. desc = u""" %(content)s <br /> [ <a href="%(entry.link)s">Originally</a> posted at <a href="%(feed.link)s">%(feed.title)s</a> ] """ % { 'content': content, 'entry.link': entry.get('link', u''), 'feed.title': feed.get('title', u''), 'feed.link': feed.get('link', u''), } # Build post item data, call blog API via XML-RPC post = { 'title': title, 'dateCreated': date, 'description': desc, 'mt_convert_breaks': False } try: srv.metaWeblog.newPost(API_BLOGID, API_USER, API_PASSWD, post, True) print "Posted %s" % title except KeyboardInterrupt: raise except: print "Problem posting %s" % title
def main(): """ Fire up the feed blog generator, write the static HTML to disk. """ # Try to load up entry history, start with an empty list in # case of any problems. try: entries = pickle.load(open(HISTORY_FN, 'rb')) except: entries = [] # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] # Gather new entries from all feeds. subs_details = [] for feed_uri in feeds: # HACK: Grab 'custom' feed record details before agglib update. if feed_db.has_key(feed_uri): feed_rec = feed_db[feed_uri] feed_link = feed_rec.get('link', '#') feed_title = feed_rec.get('title', 'untitled') # Get new entries, if any. new_entries = getNewFeedEntries([feed_uri], feed_db, entry_db) # If there's no record of the feed in the DB, skip it. if not feed_db.has_key(feed_uri): continue # Update feed record details from fresh feed, if any entries found. if len(new_entries) > 0: feed = new_entries[0].data.feed feed_link = feed.get('link', '#') feed_title = feed.get('title', 'untitled') # HACK: Update 'custom' feed record details after agglib update. feed_rec = feed_db[feed_uri] feed_rec['link'] = feed_link feed_rec['title'] = feed_title feed_db[feed_uri] = feed_rec # Add details for this feed to the sidebar content. subs_details.append({ 'feed.link': feed_link, 'feed.title': feed_title, 'feed.url': feed_uri }) # Skip ahead if no new entries found. if len(new_entries) < 1: continue # Make sure entries have a modified date, using now by default. for e in new_entries: if not e.entry.has_key('modified_parsed'): e.entry['modified_parsed'] = time.gmtime() # Tack the list of new entries onto the head of the main list. entries = new_entries + entries # Sort the subscription details, build the sidebar content. subs_details.sort(lambda a, b: cmp(a['feed.title'], b['feed.title'])) subs_out = [SUBSCRIPTION_TMPL % x for x in subs_details] # Sort all the entries, truncate to desired length. entries.sort() entries = entries[:MAX_ENTRIES] # Write out the current run's aggregator report. out_fn = time.strftime(ARCHIVE_FN) writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) # Build the page template from the template template. out = SHELL_TMPL % { 'subs': '\n'.join(subs_out), 'main': open(out_fn).read() } open(BLOG_FN, 'w').write(out) # Close the databases and save the entry history back out to disk. closeDBs(feed_db, entry_db) pickle.dump(entries, open(HISTORY_FN, 'wb'))
def main(): """ Process new feed entries and repost to the blog API. """ # Get a handle on the blog API server srv = xmlrpclib.ServerProxy(API_URI, verbose=0) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] for e in getNewFeedEntries(feeds, feed_db, entry_db): # Get the entry and feed metadata. feed, entry = e.data.feed, e.entry # Build a blog post title using feed and entry titles. title = u'%s — %s' % ( feed.get('title', u'untitled'), entry.get('title', u'untitled') ) # Generate an ISO8601 date using the feed entry modification, # with current date/time as default. date = time.strftime('%Y-%m-%dT%H:%M:%SZ', entry.get('modified_parsed', time.gmtime())) # Build blog post body content from what's available in the # feed entry. content_out = [] if entry.has_key('summary'): content_out.append(entry.summary) content_out.extend([ c.value for c in entry.get('content', []) if 'html' in c.type ]) content = '<br />\n'.join(content_out) # Build the blog post content from feed and entry. desc = u""" %(content)s <br /> [ <a href="%(entry.link)s">Originally</a> posted at <a href="%(feed.link)s">%(feed.title)s</a> ] """ % { 'content' : content, 'entry.link' : entry.get('link', u''), 'feed.title' : feed.get('title', u''), 'feed.link' : feed.get('link', u''), } # Build post item data, call blog API via XML-RPC post = { 'title' : title, 'dateCreated' : date, 'description' : desc, 'mt_convert_breaks' : False } try: srv.metaWeblog.newPost(API_BLOGID, API_USER, API_PASSWD, post, True) print "Posted %s" % title except KeyboardInterrupt: raise except: print "Problem posting %s" % title
def main(): """ Fire up the feed blog generator, write the static HTML to disk. """ # Try to load up entry history, start with an empty list in # case of any problems. try: entries = pickle.load(open(HISTORY_FN, 'rb')) except: entries = [] # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] # Gather new entries from all feeds. subs_details = [] for feed_uri in feeds: # HACK: Grab 'custom' feed record details before agglib update. if feed_db.has_key(feed_uri): feed_rec = feed_db[feed_uri] feed_link = feed_rec.get('link', '#') feed_title = feed_rec.get('title', 'untitled') # Get new entries, if any. new_entries = getNewFeedEntries([feed_uri], feed_db, entry_db) # If there's no record of the feed in the DB, skip it. if not feed_db.has_key(feed_uri): continue # Update feed record details from fresh feed, if any entries found. if len(new_entries) > 0: feed = new_entries[0].data.feed feed_link = feed.get('link', '#') feed_title = feed.get('title', 'untitled') # HACK: Update 'custom' feed record details after agglib update. feed_rec = feed_db[feed_uri] feed_rec['link'] = feed_link feed_rec['title'] = feed_title feed_db[feed_uri] = feed_rec # Add details for this feed to the sidebar content. subs_details.append({ 'feed.link' : feed_link, 'feed.title' : feed_title, 'feed.url' : feed_uri }) # Skip ahead if no new entries found. if len(new_entries) < 1: continue # Make sure entries have a modified date, using now by default. for e in new_entries: if not e.entry.has_key('modified_parsed'): e.entry['modified_parsed'] = time.gmtime() # Tack the list of new entries onto the head of the main list. entries = new_entries + entries # Sort the subscription details, build the sidebar content. subs_details.sort(lambda a,b: cmp( a['feed.title'], b['feed.title'] )) subs_out = [ SUBSCRIPTION_TMPL % x for x in subs_details ] # Sort all the entries, truncate to desired length. entries.sort() entries = entries[:MAX_ENTRIES] # Write out the current run's aggregator report. out_fn = time.strftime(ARCHIVE_FN) writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) # Build the page template from the template template. out = SHELL_TMPL % { 'subs' : '\n'.join(subs_out), 'main' : open(out_fn).read() } open(BLOG_FN, 'w').write(out) # Close the databases and save the entry history back out to disk. closeDBs(feed_db, entry_db) pickle.dump(entries, open(HISTORY_FN, 'wb'))