def test_parse_1():
    testdata = ['User-agent: *',
                'Disallow: /']
    r = parse(testdata)
    t = TestAgent('https://www.example.com/', r)
    assert t.can_fetch('*', '/') == False
    assert t.can_fetch('*', '/allow.html') == False
    assert t.can_fetch('Googlebot', '/allow.html') == False
    assert t.can_fetch('*', 'http://example.com/') == False
Esempio n. 2
0
def test_parse_1():
    testdata = ["User-agent: *", "Disallow: /"]
    r = parse(testdata)
    assert len(r.rulesets) == 1
    assert len(r.rulesets[0].robot_names) == 1
    assert r.rulesets[0].robot_names[0] == "*"
    assert len(r.rulesets[0].rules) == 1
    assert r.rulesets[0].rules[0] == (False, "/")
    assert len(r.sitemaps) == 0
Esempio n. 3
0
def test_parse_4():
    testdata = ["User-agent: Googlebot", "Allow: /"]
    r = parse(testdata)
    assert len(r.rulesets) == 1
    assert len(r.rulesets[0].robot_names) == 1
    assert r.rulesets[0].robot_names[0] == "Googlebot"
    assert len(r.rulesets[0].rules) == 1
    assert r.rulesets[0].rules[0] == (True, "/")
    assert len(r.sitemaps) == 0
def test_parse_10():
    testdata = ['# default',
                'Disallow: / # all',
                'Allow: /allow.html']
    r = parse(testdata)
    t = TestAgent('https://www.example.com/', r)
    assert t.can_fetch('*', '/') == False
    assert t.can_fetch('*', '/allow.html') == True
    assert t.can_fetch('Googlebot', '/allow.html') == True
    assert t.can_fetch('*', 'http://example.com/') == False
Esempio n. 5
0
def test_parse_10():
    testdata = ["# default", "Disallow: / # all", "Allow: /allow.html"]
    r = parse(testdata)
    assert len(r.rulesets) == 1
    assert len(r.rulesets[0].robot_names) == 1
    assert r.rulesets[0].robot_names[0] == "*"
    assert len(r.rulesets[0].rules) == 2
    assert r.rulesets[0].rules[0] == (False, "/")
    assert r.rulesets[0].rules[1] == (True, "/allow.html")
    assert len(r.sitemaps) == 0
Esempio n. 6
0
def test_parse_6():
    testdata = ["User-agent: Googlebot", "User-agent: *", "Disallow: /", "Allow: /allow.html"]
    r = parse(testdata)
    assert len(r.rulesets) == 1
    assert len(r.rulesets[0].robot_names) == 2
    assert r.rulesets[0].robot_names[0] == "Googlebot"
    assert r.rulesets[0].robot_names[1] == "*"
    assert len(r.rulesets[0].rules) == 2
    assert r.rulesets[0].rules[0] == (False, "/")
    assert r.rulesets[0].rules[1] == (True, "/allow.html")
    assert len(r.sitemaps) == 0
def test_parse_7():
    testdata = ['User-agent: Googlebot',
                'User-agent: *',
                'Disallow: /',
                'Allow: /allow.html',
                'Sitemap: https://www.example.com/sitemap.xml']
    r = parse(testdata)
    t = TestAgent('https://www.example.com/', r)
    assert t.can_fetch('*', '/') == False
    assert t.can_fetch('*', '/allow.html') == True
    assert t.can_fetch('Googlebot', '/allow.html') == True
    assert t.can_fetch('*', 'http://example.com/') == False
def test_parse_13():
    testdata = ['User-agent: *',
                'Disallow: /あ.html',
                'Disallow: /う.html$',
                'Disallow: /え.html?',
                'Allow: /え.html?名前=*']
    r = parse(testdata)
    t = TestAgent('https://www.example.com/', r)
    assert t.can_fetch('*', '/あ.html') == False
    assert t.can_fetch('*', '/%E3%81%82.html') == False
    assert t.can_fetch('*', '/い.html') == True
    assert t.can_fetch('*', '/う.html') == False
    assert t.can_fetch('*', '/え.html?年齢=不詳') == False
    assert t.can_fetch('*', '/え.html?名前=なまえ') == True
def test_parse_12():
    testdata = ['User-agent: *',
                'Disallow: /a',
                'Allow: /a/b',
                'Disallow: /a/b',
                'Allow: /a/b/c',
                'Disallow: /a/b/c']
    r = parse(testdata)
    t1 = TestAgent('https://www.example.com/', r, 1)
    t2 = TestAgent('https://www.example.com/', r, 2)
    t3 = TestAgent('https://www.example.com/', r, 3)
    assert t1.can_fetch('*', '/a/b/c') == True
    assert t2.can_fetch('*', '/a/b/c') == False
    assert t3.can_fetch('*', '/a/b/c') == True
Esempio n. 10
0
def test_parse_7():
    testdata = [
        "User-agent: Googlebot",
        "User-agent: *",
        "Disallow: /",
        "Allow: /allow.html",
        "Sitemap: https://www.example.com/sitemap.xml",
    ]
    r = parse(testdata)
    assert len(r.rulesets) == 1
    assert len(r.rulesets[0].robot_names) == 2
    assert r.rulesets[0].robot_names[0] == "Googlebot"
    assert r.rulesets[0].robot_names[1] == "*"
    assert len(r.rulesets[0].rules) == 2
    assert r.rulesets[0].rules[0] == (False, "/")
    assert r.rulesets[0].rules[1] == (True, "/allow.html")
    assert len(r.sitemaps) == 1
    assert r.sitemaps[0] == "https://www.example.com/sitemap.xml"
Esempio n. 11
0
def test_parse_11():
    testdata = ['User-agent: Googlebot',
                'Disallow: /disallow.html$',
                'Allow: /allow.html$',
                'Disallow: /search?q=',
                'Allow: /search?q=*&*$',
                'User-agent: *',
                'Disallow: /',
                'Allow: /test/',
                'Allow: /allow.html',
                'Sitemap: https://www.example.com/sitemap.xml']
    r = parse(testdata)
    t = TestAgent('https://www.example.com/', r)
    assert t.can_fetch('Googlebot', '/') == True
    assert t.can_fetch('Googlebot', '/allow.html') == True
    assert t.can_fetch('Googlebot', '/disallow.html') == False
    assert t.can_fetch('Googlebot', '/disallow.html?') == True
    assert t.can_fetch('Googlebot', '/search?q=') == False
    assert t.can_fetch('Googlebot', '/search?q=a') == False
    assert t.can_fetch('Googlebot', '/search?q=a&x=b') == True
    assert t.can_fetch('*', '/allow.html') == True
    assert t.can_fetch('*', '/allow.html?a=b') == True