Exemple #1
0
def test_feeds_helpers():
    '''Test helper functions for feed extraction'''
    assert feeds.validate_url('http://example.org/article1/') is True
    assert feeds.validate_url('') is False
    assert feeds.validate_url('http://example.org/') is False
    assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1
    assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1
    assert len(feeds.determine_feed('<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>')) == 1
    # no comments wanted
    assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>')) == 0
Exemple #2
0
def test_feeds_helpers():
    '''Test helper functions for feed extraction'''
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/feed/" type="application/atom+xml"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    # no comments wanted
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 0
    # invalid links
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" href="12345tralala" title="RSS" type="application/rss+xml"></meta><body/></html>',
            'example.org', 'https://example.org')) == 0
    # feed discovery
    assert feeds.find_feed_urls('http://') == []
    assert feeds.find_feed_urls('https://httpbin.org/status/404') == []
Exemple #3
0
def test_json_extraction():
    '''Test link extraction from a JSON feed'''
    # find link
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/json" title="JSON Feed" href="https://www.jsonfeed.org/feed.json" />></meta><body/></html>',
            'jsonfeed.org', 'https://www.jsonfeed.org')) == 1
    # extract data
    filepath = os.path.join(RESOURCES_DIR, 'feed.json')
    with open(filepath) as f:
        teststring = f.read()
    links = feeds.extract_links(teststring, 'npr.org', 'https://npr.org', '')
    assert len(links) == 25
    # id as a backup
    links = feeds.extract_links(
        r'{"version":"https:\/\/jsonfeed.org\/version\/1","items":[{"id":"https://www.example.org/1","title":"Test"}]}',
        'example.org', 'https://example.org', '')
    assert len(links) == 1
Exemple #4
0
def test_feeds_helpers():
    '''Test helper functions for feed extraction'''
    # nothing useful
    assert len(feeds.determine_feed('', 'example.org',
                                    'https://example.org')) == 0
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 0
    # useful
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/atom+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/feed/" type="application/atom+xml"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" title="Feed" href="https://example.org/blog/atom/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" href="https://www.theguardian.com/international/rss" title="RSS" type="application/rss+xml"></meta><body/></html>',
            'example.org', 'https://example.org')) == 1
    # no comments wanted
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/comments-feed/"/></meta><body/></html>',
            'example.org', 'https://example.org')) == 0
    # invalid links
    assert len(
        feeds.determine_feed(
            '<html><meta><link rel="alternate" href="12345tralala" title="RSS" type="application/rss+xml"></meta><body/></html>',
            'example.org', 'https://example.org')) == 0
    # detecting in <a>-elements
    assert feeds.determine_feed(
        '<html><body><a href="https://example.org/feed.xml"><body/></html>',
        'example.org',
        'https://example.org') == ['https://example.org/feed.xml']
    assert feeds.determine_feed(
        '<html><body><a href="https://example.org/feed.atom"><body/></html>',
        'example.org',
        'https://example.org') == ['https://example.org/feed.atom']
    assert feeds.determine_feed(
        '<html><body><a href="https://example.org/rss"><body/></html>',
        'example.org', 'https://example.org') == ['https://example.org/rss']
    # feed discovery
    assert feeds.find_feed_urls('http://') == []
    assert feeds.find_feed_urls('https://httpbin.org/status/404') == []
    # Feedburner/Google links
    assert feeds.handle_link_list(
        ['https://feedproxy.google.com/ABCD'], 'example.org',
        'https://example.org') == ['https://feedproxy.google.com/ABCD']
    # override failed checks
    assert feeds.handle_link_list(
        ['https://feedburner.com/kat/1'], 'example.org',
        'https://example.org') == ['https://feedburner.com/kat/1']
Exemple #5
0
def test_feeds_helpers():
    '''Test helper functions for feed extraction'''
    assert feeds.validate_url('http://example.org/article1/') is True
    assert feeds.validate_url('') is False
    assert feeds.validate_url('http://example.org/') is False
    assert len(feeds.determine_feed('<html><meta><link rel="alternate" type="application/rss+xml" title="Feed" href="https://example.org/blog/feed/"/></meta><body/></html>')) == 1