def test_parse_1(): testdata = ['User-agent: *', 'Disallow: /'] r = parse(testdata) t = TestAgent('https://www.example.com/', r) assert t.can_fetch('*', '/') == False assert t.can_fetch('*', '/allow.html') == False assert t.can_fetch('Googlebot', '/allow.html') == False assert t.can_fetch('*', 'http://example.com/') == False
def test_parse_1(): testdata = ["User-agent: *", "Disallow: /"] r = parse(testdata) assert len(r.rulesets) == 1 assert len(r.rulesets[0].robot_names) == 1 assert r.rulesets[0].robot_names[0] == "*" assert len(r.rulesets[0].rules) == 1 assert r.rulesets[0].rules[0] == (False, "/") assert len(r.sitemaps) == 0
def test_parse_4(): testdata = ["User-agent: Googlebot", "Allow: /"] r = parse(testdata) assert len(r.rulesets) == 1 assert len(r.rulesets[0].robot_names) == 1 assert r.rulesets[0].robot_names[0] == "Googlebot" assert len(r.rulesets[0].rules) == 1 assert r.rulesets[0].rules[0] == (True, "/") assert len(r.sitemaps) == 0
def test_parse_10(): testdata = ['# default', 'Disallow: / # all', 'Allow: /allow.html'] r = parse(testdata) t = TestAgent('https://www.example.com/', r) assert t.can_fetch('*', '/') == False assert t.can_fetch('*', '/allow.html') == True assert t.can_fetch('Googlebot', '/allow.html') == True assert t.can_fetch('*', 'http://example.com/') == False
def test_parse_10(): testdata = ["# default", "Disallow: / # all", "Allow: /allow.html"] r = parse(testdata) assert len(r.rulesets) == 1 assert len(r.rulesets[0].robot_names) == 1 assert r.rulesets[0].robot_names[0] == "*" assert len(r.rulesets[0].rules) == 2 assert r.rulesets[0].rules[0] == (False, "/") assert r.rulesets[0].rules[1] == (True, "/allow.html") assert len(r.sitemaps) == 0
def test_parse_6(): testdata = ["User-agent: Googlebot", "User-agent: *", "Disallow: /", "Allow: /allow.html"] r = parse(testdata) assert len(r.rulesets) == 1 assert len(r.rulesets[0].robot_names) == 2 assert r.rulesets[0].robot_names[0] == "Googlebot" assert r.rulesets[0].robot_names[1] == "*" assert len(r.rulesets[0].rules) == 2 assert r.rulesets[0].rules[0] == (False, "/") assert r.rulesets[0].rules[1] == (True, "/allow.html") assert len(r.sitemaps) == 0
def test_parse_7(): testdata = ['User-agent: Googlebot', 'User-agent: *', 'Disallow: /', 'Allow: /allow.html', 'Sitemap: https://www.example.com/sitemap.xml'] r = parse(testdata) t = TestAgent('https://www.example.com/', r) assert t.can_fetch('*', '/') == False assert t.can_fetch('*', '/allow.html') == True assert t.can_fetch('Googlebot', '/allow.html') == True assert t.can_fetch('*', 'http://example.com/') == False
def test_parse_13(): testdata = ['User-agent: *', 'Disallow: /あ.html', 'Disallow: /う.html$', 'Disallow: /え.html?', 'Allow: /え.html?名前=*'] r = parse(testdata) t = TestAgent('https://www.example.com/', r) assert t.can_fetch('*', '/あ.html') == False assert t.can_fetch('*', '/%E3%81%82.html') == False assert t.can_fetch('*', '/い.html') == True assert t.can_fetch('*', '/う.html') == False assert t.can_fetch('*', '/え.html?年齢=不詳') == False assert t.can_fetch('*', '/え.html?名前=なまえ') == True
def test_parse_12(): testdata = ['User-agent: *', 'Disallow: /a', 'Allow: /a/b', 'Disallow: /a/b', 'Allow: /a/b/c', 'Disallow: /a/b/c'] r = parse(testdata) t1 = TestAgent('https://www.example.com/', r, 1) t2 = TestAgent('https://www.example.com/', r, 2) t3 = TestAgent('https://www.example.com/', r, 3) assert t1.can_fetch('*', '/a/b/c') == True assert t2.can_fetch('*', '/a/b/c') == False assert t3.can_fetch('*', '/a/b/c') == True
def test_parse_7(): testdata = [ "User-agent: Googlebot", "User-agent: *", "Disallow: /", "Allow: /allow.html", "Sitemap: https://www.example.com/sitemap.xml", ] r = parse(testdata) assert len(r.rulesets) == 1 assert len(r.rulesets[0].robot_names) == 2 assert r.rulesets[0].robot_names[0] == "Googlebot" assert r.rulesets[0].robot_names[1] == "*" assert len(r.rulesets[0].rules) == 2 assert r.rulesets[0].rules[0] == (False, "/") assert r.rulesets[0].rules[1] == (True, "/allow.html") assert len(r.sitemaps) == 1 assert r.sitemaps[0] == "https://www.example.com/sitemap.xml"
def test_parse_11(): testdata = ['User-agent: Googlebot', 'Disallow: /disallow.html$', 'Allow: /allow.html$', 'Disallow: /search?q=', 'Allow: /search?q=*&*$', 'User-agent: *', 'Disallow: /', 'Allow: /test/', 'Allow: /allow.html', 'Sitemap: https://www.example.com/sitemap.xml'] r = parse(testdata) t = TestAgent('https://www.example.com/', r) assert t.can_fetch('Googlebot', '/') == True assert t.can_fetch('Googlebot', '/allow.html') == True assert t.can_fetch('Googlebot', '/disallow.html') == False assert t.can_fetch('Googlebot', '/disallow.html?') == True assert t.can_fetch('Googlebot', '/search?q=') == False assert t.can_fetch('Googlebot', '/search?q=a') == False assert t.can_fetch('Googlebot', '/search?q=a&x=b') == True assert t.can_fetch('*', '/allow.html') == True assert t.can_fetch('*', '/allow.html?a=b') == True