def test_feeds_helpers(): '''Test helper functions for feed extraction''' assert feeds.validate_url('http://example.org/article1/') is True assert feeds.validate_url('') is False assert feeds.validate_url('http://example.org/') is False assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1 assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1 assert len(feeds.determine_feed('<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>')) == 1 # no comments wanted assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>')) == 0
def test_feeds_helpers(): '''Test helper functions for feed extraction''' assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/feed/" type="application/atom+xml"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>', 'example.org', 'https://example.org')) == 1 # no comments wanted assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 0 # invalid links assert len( feeds.determine_feed( '<html><meta><link rel="alternate" href="12345tralala" title="RSS" type="application/rss+xml"></meta><body/></html>', 'example.org', 'https://example.org')) == 0 # feed discovery assert feeds.find_feed_urls('http://') == [] assert feeds.find_feed_urls('https://httpbin.org/status/404') == []
def test_json_extraction(): '''Test link extraction from a JSON feed''' # find link assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/json" title="JSON Feed" href="https://www.jsonfeed.org/feed.json" />></meta><body/></html>', 'jsonfeed.org', 'https://www.jsonfeed.org')) == 1 # extract data filepath = os.path.join(RESOURCES_DIR, 'feed.json') with open(filepath) as f: teststring = f.read() links = feeds.extract_links(teststring, 'npr.org', 'https://npr.org', '') assert len(links) == 25 # id as a backup links = feeds.extract_links( r'{"version":"https:\/\/jsonfeed.org\/version\/1","items":[{"id":"https://www.example.org/1","title":"Test"}]}', 'example.org', 'https://example.org', '') assert len(links) == 1
def test_feeds_helpers(): '''Test helper functions for feed extraction''' # nothing useful assert len(feeds.determine_feed('', 'example.org', 'https://example.org')) == 0 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed"/></meta><body/></html>', 'example.org', 'https://example.org')) == 0 # useful assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/feed/" type="application/atom+xml"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/atom/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 1 assert len( feeds.determine_feed( '<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>', 'example.org', 'https://example.org')) == 1 # no comments wanted assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>', 'example.org', 'https://example.org')) == 0 # invalid links assert len( feeds.determine_feed( '<html><meta><link rel="alternate" href="12345tralala" title="RSS" type="application/rss+xml"></meta><body/></html>', 'example.org', 'https://example.org')) == 0 # detecting in <a>-elements assert feeds.determine_feed( '<html><body><a href="https://example.org/feed.xml"><body/></html>', 'example.org', 'https://example.org') == ['https://example.org/feed.xml'] assert feeds.determine_feed( '<html><body><a href="https://example.org/feed.atom"><body/></html>', 'example.org', 'https://example.org') == ['https://example.org/feed.atom'] assert feeds.determine_feed( '<html><body><a href="https://example.org/rss"><body/></html>', 'example.org', 'https://example.org') == ['https://example.org/rss'] # feed discovery assert feeds.find_feed_urls('http://') == [] assert feeds.find_feed_urls('https://httpbin.org/status/404') == [] # Feedburner/Google links assert feeds.handle_link_list( ['https://feedproxy.google.com/ABCD'], 'example.org', 'https://example.org') == ['https://feedproxy.google.com/ABCD'] # override failed checks assert feeds.handle_link_list( ['https://feedburner.com/kat/1'], 'example.org', 'https://example.org') == ['https://feedburner.com/kat/1']
def test_feeds_helpers(): '''Test helper functions for feed extraction''' assert feeds.validate_url('http://example.org/article1/') is True assert feeds.validate_url('') is False assert feeds.validate_url('http://example.org/') is False assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1