def test_0100_empty(self): """Test scanning of files that contain no records.""" with no_log(logger): self.assertCountEqual(scan_robots_txt([], logger), ()) self.assertCountEqual(scan_robots_txt([''], logger), ()) self.assertCountEqual(scan_robots_txt(['', ''], logger), ()) self.assertCountEqual(scan_robots_txt([' ', '\t'], logger), ()) self.assertCountEqual(scan_robots_txt(['#comment'], logger), ())
def test_trivial(self): """Test an input that should succeed without logging.""" def to_try(): yield 'us-ascii', 'header' with no_log(logger): text, encoding = decode_and_report(b'Hello', to_try(), logger) self.assertEqual(text, 'Hello') self.assertEqual(encoding, 'us-ascii')
def test_none(self): """Test whether None entries are ignored.""" to_try = ( (None, 'HTTP header'), ('utf-8', 'XML declaration'), (None, 'Unicode BOM'), ) with no_log(logger): text, encoding = decode_and_report(b'smile \xf0\x9f\x98\x83', to_try, logger) self.assertEqual(text, 'smile \U0001f603') self.assertEqual(encoding, 'utf-8')
def test_0400_unescape_valid(self): """Test unescaping of correctly escaped paths.""" records = [[(1, 'user-agent', '*'), (2, 'disallow', '/a%3cd.html'), (3, 'disallow', '/%7Ejoe/'), (4, 'disallow', '/a%2fb.html'), (5, 'disallow', '/%C2%A2'), (6, 'disallow', '/%e2%82%ac'), (7, 'disallow', '/%F0%90%8d%88')]] with no_log(logger): self.assertEqual( parse_robots_txt(records, logger), { '*': [ (False, '/a<d.html'), (False, '/~joe/'), (False, '/a%2fb.html'), (False, '/\u00A2'), (False, '/\u20AC'), (False, '/\U00010348'), ] })
def test_0100_empty(self): """Test parsing of empty record set.""" with no_log(logger): self.assertEqual(parse_robots_txt((), logger), {})
def test_0200_example(self): """Test scanning of example file.""" with no_log(logger): self.assertEqual(list(scan_robots_txt(EXAMPLE_LINES, logger)), EXAMPLE_RECORDS)
def test_0200_example(self): """Test parsing of example records.""" with no_log(logger): self.assertEqual(parse_robots_txt(EXAMPLE_RECORDS, logger), EXAMPLE_MAP)