def test_atom_extraction(): '''Test link extraction from an Atom feed''' filepath = os.path.join(RESOURCES_DIR, 'feed1.atom') with open(filepath) as f: teststring = f.read() assert len(feeds.extract_links(teststring)) > 0 assert len(feeds.extract_links('<link type="application/atom+xml" rel="self" href="https://www.dwds.de/api/feed/themenglossar/Corona"/>')) == 0
def test_atom_extraction(): '''Test link extraction from an Atom feed''' assert len(feeds.extract_links('<html></html>', 'example.org', 'https://example.org', '')) == 0 filepath = os.path.join(RESOURCES_DIR, 'feed1.atom') with open(filepath) as f: teststring = f.read() assert len(feeds.extract_links(teststring, 'example.org', 'https://example.org', '')) > 0 assert len(feeds.extract_links(XMLDECL + '<link type="application/atom+xml" rel="self" href="https://www.dwds.de/api/feed/themenglossar/Corona"/>', 'dwds.de', 'https://www.dwds.de', '')) == 0 assert len(feeds.extract_links(XMLDECL + '<link type="application/atom+xml" rel="self" href="123://api.exe"/>', 'example.org', 'https://example.org', '')) == 0
def test_rss_extraction(): '''Test link extraction from a RSS feed''' assert len(feeds.extract_links(XMLDECL + '<link>http://example.org/article1/</link>', 'example.org', 'http://example.org/', '')) == 1 assert len(feeds.extract_links(XMLDECL + '<link>http://example.org/</link>', 'example.org', 'http://example.org', 'http://example.org')) == 0 assert len(feeds.extract_links(XMLDECL + '<link rel="self">http://example.org/article1/</link>', 'example.org', 'http://example.org/', '')) == 0 assert feeds.extract_links(XMLDECL + '<link>/api/feed/themenglossar/Corona</link>', 'www.dwds.de', 'https://www.dwds.de', 'https://www.dwds.de') == ['https://www.dwds.de/api/feed/themenglossar/Corona'] filepath = os.path.join(RESOURCES_DIR, 'feed2.rss') with open(filepath) as f: teststring = f.read() assert len(feeds.extract_links(teststring, 'example.com', 'https://example.org', '')) > 0
def test_rss_extraction(): '''Test link extraction from a RSS feed''' assert len( feeds.extract_links( XMLDECL + '<link>http://example.org/article1/</link>', 'example.org', 'http://example.org/', '')) == 1 # CDATA assert feeds.extract_links( XMLDECL + '<link><![CDATA[http://example.org/article1/]]></link>', 'example.org', 'http://example.org/', '') == ['http://example.org/article1/'] # spaces assert len( feeds.extract_links( XMLDECL + '<link>\r\n https://www.ak-kurier.de/akkurier/www/artikel/108815-sinfonisches-blasorchester-spielt-1500-euro-fuer-kinder-in-drk-krankenhaus-kirchen-ein </link>', 'ak-kurier.de', 'https://www.ak-kurier.de/', '')) == 1 assert len( feeds.extract_links(XMLDECL + '<link>http://example.org/</link>', 'example.org', 'http://example.org', 'http://example.org')) == 0 assert len( feeds.extract_links(XMLDECL + '<link>https://example.org</link>', 'example.org', 'http://example.org/', '')) == 0 assert feeds.extract_links( XMLDECL + '<link>/api/feed/themenglossar/Corona</link>', 'www.dwds.de', 'https://www.dwds.de', 'https://www.dwds.de') == [ 'https://www.dwds.de/api/feed/themenglossar/Corona' ] filepath = os.path.join(RESOURCES_DIR, 'feed2.rss') with open(filepath) as f: teststring = f.read() assert len( feeds.extract_links(teststring, 'example.com', 'https://example.org', '')) > 0
def test_json_extraction(): '''Test link extraction from a JSON feed''' # find link assert len( feeds.determine_feed( '<html><meta><link rel="alternate" type="application/json" title="JSON Feed" href="https://www.jsonfeed.org/feed.json" />></meta><body/></html>', 'jsonfeed.org', 'https://www.jsonfeed.org')) == 1 # extract data filepath = os.path.join(RESOURCES_DIR, 'feed.json') with open(filepath) as f: teststring = f.read() links = feeds.extract_links(teststring, 'npr.org', 'https://npr.org', '') assert len(links) == 25 # id as a backup links = feeds.extract_links( r'{"version":"https:\/\/jsonfeed.org\/version\/1","items":[{"id":"https://www.example.org/1","title":"Test"}]}', 'example.org', 'https://example.org', '') assert len(links) == 1
def test_rss_extraction(): '''Test link extraction from a RSS feed''' filepath = os.path.join(RESOURCES_DIR, 'feed2.rss') with open(filepath) as f: teststring = f.read() assert len(feeds.extract_links(teststring)) > 0
def test_atom_extraction(): '''Test link extraction from an Atom feed''' filepath = os.path.join(RESOURCES_DIR, 'feed1.atom') with open(filepath) as f: teststring = f.read() assert len(feeds.extract_links(teststring)) > 0