def run(self): """ Extract an RSS Feed and create articles. """ feed_url = self.options['feed_url'] domains = self.org.get('domains', []) entries = rss.get_entries(feed_url, []) self.publish_dates = [] # iterate through RSS entries. for article in entries: article['type'] = 'article' # set this type as article. # since we poll often, we can assume this is a good # approximation of an article publish date. if not article.get('created'): article['created'] = dates.now() # if we havent run, just yield all results. if not self.max_date_last_run: self.publish_dates.append(article['created']) yield article # only yield new articles elif article['created'] > self.max_date_last_run: self.publish_dates.append(article['created']) yield article
def test_bulk_content_items(feed_url='http://feeds.propublica.org/propublica/main', domains=['propublica.org']): """ Parse an rss feed and bulk create content items via article extraction. """ data = [] for entry in rss.get_entries(feed_url, domains): entry['type'] = 'article' if entry.get('url'): data.append(entry) start = time.time() # make request and return status url res = api.content.bulk_create(data=data)
def test_bulk_content_items( feed_url='http://feeds.propublica.org/propublica/main', domains=['propublica.org']): """ Parse an rss feed and bulk create content items via article extraction. """ data = [] for entry in rss.get_entries(feed_url, domains): entry['type'] = 'article' if entry.get('url'): data.append(entry) start = time.time() # make request and return status url res = api.content.bulk_create(data=data)
def test_bulk_events(feed_url='http://feeds.propublica.org/propublica/main', domains=['propublica.org']): """ Parse an rss feed and bulk create events. """ data = [] for entry in rss.get_entries(feed_url, domains): entry['type'] = 'article' if entry.get('url'): data.append(entry) start = time.time() # make request and return status url res = api.events.bulk_create(data) poll_status_url(res.get('status_url')) end = time.time() print "Bulk Loading {} Events Took {} seconds"\ .format(len(data), round((end-start), 2))