Esempio n. 1
0
def fetch_wordpress_default_rss(source, markup, url, add_rpc, got_result):
    link = url + "/?feed=rss"
    # print "Trying", link
    def callback(feed_markup):
        if feed_markup:
            def _got_result(res):
                if res:
                    res.method = 'wordpress_default_rss'
                    got_result(res)
            rss_fetch(source, feed_markup, link, add_rpc, _got_result)
    add_rpc(url_fetch_async(link, callback))
Esempio n. 2
0
def fetch_hardcoded_rss_url(source, markup, url, add_rpc, got_result):
    lookup = {
        'news.ycombinator.com': 'http://hnrss.org/newest?points=25',
        'newyorker.com': 'http://www.newyorker.com/feed/everything',
        'longform.org': 'http://longform.org/feed.rss'
    }
    rss_url = lookup.get(strip_url_prefix(url))
    if rss_url:
        def callback(feed_markup):
            def _got_result(res):
                if res:
                    res.method = 'hardcoded_rss'
                    got_result(res)
            rss_fetch(source, feed_markup, rss_url, add_rpc, _got_result)
        add_rpc(url_fetch_async(rss_url, callback))
Esempio n. 3
0
def fetch_linked_rss(source, markup, url, add_rpc, got_result):
    soup = BeautifulSoup(markup, 'lxml')
    link = soup.find('link', attrs={'rel': 'alternate', 'type': ['application/rss+xml', 'application/atom+xml']})
    if link and type(link) == bs4.element.Tag and link['href']:
        feed_url = urljoin(url, link['href'])
        print 'Found rss URL: ', feed_url
        def callback(feed_markup):
            if feed_markup:
                def _got_result(result):
                    if result:
                        result.method = 'linked_rss'
                        return got_result(result)
                    else:
                        print 'failed to parse markup'
                return rss_fetch(source, feed_markup, feed_url, add_rpc, _got_result)
            else:
                print "Error fetching linked rss {0}".format(feed_url)
        add_rpc(url_fetch_async(feed_url, callback))
    return None