コード例 #1
0
ファイル: rss_feed.py プロジェクト: jjelosua/newslynx-core
    def run(self):
        """
        Extract an RSS Feed and create articles.
        """
        feed_url = self.options['feed_url']
        domains = self.org.get('domains', [])
        entries = rss.get_entries(feed_url, [])
        self.publish_dates = []

        # iterate through RSS entries.
        for article in entries:
            article['type'] = 'article'  # set this type as article.

            # since we poll often, we can assume this is a good
            # approximation of an article publish date.
            if not article.get('created'):
                article['created'] = dates.now()

            # if we havent run, just yield all results.
            if not self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article

            # only yield new articles
            elif article['created'] > self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article
コード例 #2
0
def test_bulk_content_items(feed_url='http://feeds.propublica.org/propublica/main', domains=['propublica.org']):
    """
    Parse an rss feed and bulk create content items via article extraction.
    """
    data = []
    for entry in rss.get_entries(feed_url, domains):
        entry['type'] = 'article'
        if entry.get('url'):
            data.append(entry)

    start = time.time()
    # make request and return status url
    res = api.content.bulk_create(data=data)
コード例 #3
0
def test_bulk_content_items(
        feed_url='http://feeds.propublica.org/propublica/main',
        domains=['propublica.org']):
    """
    Parse an rss feed and bulk create content items via article extraction.
    """
    data = []
    for entry in rss.get_entries(feed_url, domains):
        entry['type'] = 'article'
        if entry.get('url'):
            data.append(entry)

    start = time.time()
    # make request and return status url
    res = api.content.bulk_create(data=data)
コード例 #4
0
def test_bulk_events(feed_url='http://feeds.propublica.org/propublica/main', domains=['propublica.org']):
    """
    Parse an rss feed and bulk create events.
    """
    data = []
    for entry in rss.get_entries(feed_url, domains):
        entry['type'] = 'article'
        if entry.get('url'):
            data.append(entry)
    start = time.time()
    # make request and return status url
    res = api.events.bulk_create(data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Events Took {} seconds"\
        .format(len(data), round((end-start), 2))
コード例 #5
0
def test_bulk_events(feed_url='http://feeds.propublica.org/propublica/main',
                     domains=['propublica.org']):
    """
    Parse an rss feed and bulk create events.
    """
    data = []
    for entry in rss.get_entries(feed_url, domains):
        entry['type'] = 'article'
        if entry.get('url'):
            data.append(entry)
    start = time.time()
    # make request and return status url
    res = api.events.bulk_create(data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Events Took {} seconds"\
        .format(len(data), round((end-start), 2))