def test_nofollow_variants(self): config = linkcheck.configuration.Configuration() aggregate = linkcheck.director.get_aggregate(config) url = "http://example.org" url_data = get_url_from(url, 0, aggregate) url_data.content_type = "text/html" url_data.soup = make_soup('<meta name="robots" content="nofollow">') self.assertFalse(url_data.content_allows_robots()) url_data.soup = make_soup('<meta name="robots" content="nocache, Nofollow, noimageindex">') self.assertFalse(url_data.content_allows_robots()) url_data.soup = make_soup('<meta name="robots" content="noindex, follow">') self.assertTrue(url_data.content_allows_robots())
def encoding_test(self, html, expected): # For encoding detection Beautiful Soup uses if available in order # of preference cchardet then chardet. # Results for html without a valid charset may differ # based on cchardet/chardet availability. soup = htmlsoup.make_soup(html) self.assertEqual(soup.original_encoding, expected)
def _test_no_link(self, content): def callback(url, line, column, name, base): self.assertTrue(False, 'URL %r found' % url) linkparse.find_links(htmlsoup.make_soup(content), callback, linkparse.LinkTags)
def _test_one_link(self, content, url): self.count_url = 0 linkparse.find_links(htmlsoup.make_soup(content), self._test_one_url(url), linkparse.LinkTags) self.assertEqual(self.count_url, 1)
def test_parse(self, _in, _out): # Parse all test patterns in one go. out = StringIO() pretty_print_html(out, htmlsoup.make_soup(_in)) self.check_results(_in, _out, out)
def encoding_test(self, html, expected): soup = htmlsoup.make_soup(html) self.assertEqual(soup.original_encoding, expected)