Exemple #1
0
 def test_is_feed(self):
     finder = feed_seeker.FeedSeeker(self.base_url,
                                     html=self.regular_feed_page)
     assert finder.is_feed()
     finder = feed_seeker.FeedSeeker(self.base_url,
                                     html=self.regular_html_template)
     assert not finder.is_feed()
Exemple #2
0
    def test_generate_feed_urls_not_a_page(self):
        feeds, _ = self.generate_responses()

        finder = feed_seeker.FeedSeeker(self.base_url + '/what_is_this_even')
        found_feeds = list(finder.generate_feed_urls())

        assert len(found_feeds) == 0
Exemple #3
0
    def test_generate_feed_urls_on_feed(self):
        feeds, _ = self.generate_responses()

        finder = feed_seeker.FeedSeeker(self.base_url + feeds[0])
        found_feeds = list(finder.generate_feed_urls())

        assert len(found_feeds) == 1
Exemple #4
0
 def test_guess_feed_links(self):
     # even empty page has some guesses
     finder = feed_seeker.FeedSeeker(self.base_url, html=self.regular_html_template)
     guessed_links = list(finder.guess_feed_links())
     assert len(guessed_links) > 0
     for feed_link in guessed_links:
         assert self.base_url in feed_link
Exemple #5
0
 def test_html_property(self):
     responses.add(responses.GET,
                   self.base_url,
                   body=self.regular_html_template,
                   status=200)
     finder = feed_seeker.FeedSeeker(self.base_url)
     found_html = finder.html
     assert found_html == self.regular_html_template
Exemple #6
0
    def test_generate_feed_urls_max_links(self):
        feeds, _ = self.generate_responses()

        finder = feed_seeker.FeedSeeker(self.base_url)
        max_links = 2
        found_feeds = list(finder.generate_feed_urls(max_links=max_links))

        assert len(found_feeds) > 0
        assert len(found_feeds) <= max_links < len(feeds)
Exemple #7
0
    def test_find_link_feeds(self):
        num_feeds = 4
        feed_urls = []
        # note that we do NOT eliminate duplicates at this level
        for _ in range(num_feeds):
            feed_urls.append(self.rss_feed_template.format('http://whatever.com'))

        html = self.regular_html_template.format(head='\n'.join(feed_urls), body='')
        finder = feed_seeker.FeedSeeker(self.base_url, html=html)
        assert len(list(finder.find_link_feeds())) == num_feeds
        assert len(list(finder.find_anchor_feeds())) == 0
Exemple #8
0
    def test_find_anchor_feeds(self):
        num_feeds = 4
        feed_urls = []
        # we should find these four links
        for feed in range(num_feeds):
            feed_urls.append('<a href="http://{}.rss"></a>'.format(feed))

        # but will not flag this one, since it does not look like a feed
        feed_urls.append('<a href="https://not_an_example.com"></a>')

        html = self.regular_html_template.format(head='', body='\n'.join(feed_urls))
        finder = feed_seeker.FeedSeeker(self.base_url, html=html)
        assert len(list(finder.find_link_feeds())) == 0
        assert len(set(finder.find_anchor_feeds())) == num_feeds
Exemple #9
0
 def test_empty_page(self):
     finder = feed_seeker.FeedSeeker(self.base_url,
                                     html=self.regular_html_template)
     # Page has no links, so should fail
     assert finder.find_feed_url() is None
Exemple #10
0
 def test_find_internal_links(self):
     self.generate_responses()
     finder = feed_seeker.FeedSeeker(self.base_url, html=None)
     internal_links = finder.find_internal_links()
     assert len(internal_links) == 1  # from `self.generate_responses`