def fetch_wordpress_default_rss(source, markup, url, add_rpc, got_result): link = url + "/?feed=rss" # print "Trying", link def callback(feed_markup): if feed_markup: def _got_result(res): if res: res.method = 'wordpress_default_rss' got_result(res) rss_fetch(source, feed_markup, link, add_rpc, _got_result) add_rpc(url_fetch_async(link, callback))
def fetch_hardcoded_rss_url(source, markup, url, add_rpc, got_result): lookup = { 'news.ycombinator.com': 'http://hnrss.org/newest?points=25', 'newyorker.com': 'http://www.newyorker.com/feed/everything', 'longform.org': 'http://longform.org/feed.rss' } rss_url = lookup.get(strip_url_prefix(url)) if rss_url: def callback(feed_markup): def _got_result(res): if res: res.method = 'hardcoded_rss' got_result(res) rss_fetch(source, feed_markup, rss_url, add_rpc, _got_result) add_rpc(url_fetch_async(rss_url, callback))
def fetch_linked_rss(source, markup, url, add_rpc, got_result): soup = BeautifulSoup(markup, 'lxml') link = soup.find('link', attrs={'rel': 'alternate', 'type': ['application/rss+xml', 'application/atom+xml']}) if link and type(link) == bs4.element.Tag and link['href']: feed_url = urljoin(url, link['href']) print 'Found rss URL: ', feed_url def callback(feed_markup): if feed_markup: def _got_result(result): if result: result.method = 'linked_rss' return got_result(result) else: print 'failed to parse markup' return rss_fetch(source, feed_markup, feed_url, add_rpc, _got_result) else: print "Error fetching linked rss {0}".format(feed_url) add_rpc(url_fetch_async(feed_url, callback)) return None