Beispiel #1
0
 def test_status_forbidden_allow(self):
     '''Test that if the flag is given, we allow all sites when robots.txt
     is forbidden'''
     rules = Rules('http://example.com/robots.txt',
                   401,
                   '',
                   0,
                   disallow_forbidden=False)
     self.assertTrue(rules.allowed('/foo', 't'))
     self.assertTrue(rules.allowed('http://example.com/foo', 't'))
class ReppyWrapper(python_common.web.robots_txt.parser_base.RobotsTxtParser):
    def __init__(self, content=None, expires=None):
        super(ReppyWrapper, self).__init__(content, expires)
        if content:
            self.parser = Rules('robots.txt', 200, content, self.expires)
        else:
            self.parser = None
            self.my_super = super(ReppyWrapper, self)

    def allowed(self, user_agent, url):
        return self.parser.allowed(
            url, user_agent) if self.parser else self.my_super.allowed(
                user_agent, url)

    def delay(self, user_agent):
        return self.parser.delay(
            user_agent) if self.parser else self.my_super.delay(user_agent)

    @property
    def expired(self):
        return self.parser.expired if self.parser else self.my_super.expired

    @property
    def sitemaps(self):
        return self.parser.sitemaps if self.parser else self.my_super.sitemaps
Beispiel #3
0
 def test_status_allowed(self):
     '''If no robots.txt exists, we're given free range'''
     rules = Rules('http://example.com/robots.txt', 404, '', 0)
     self.assertTrue(rules.allowed('/foo', 't'))
     self.assertTrue(rules.allowed('http://example.com/foo', 't'))
Beispiel #4
0
 def test_status_disallowed(self):
     '''Make sure that when we get a disallowed status, that we believe
     we're not allowed to crawl a site'''
     rules = Rules('http://example.com/robots.txt', 401, '', 0)
     self.assertTrue(not rules.allowed('/foo', 't'))
     self.assertTrue(not rules.allowed('http://example.com/foo', 't'))
Beispiel #5
0
 def test_status_allowed(self):
     '''If no robots.txt exists, we're given free range'''
     rules = Rules('http://example.com/robots.txt', 404, '', 0)
     self.assertTrue(rules.allowed('/foo', 't'))
     self.assertTrue(rules.allowed('http://example.com/foo', 't'))
Beispiel #6
0
 def test_status_forbidden(self):
     '''Make sure that when we get a forbidden status, that we believe
     we're not allowed to crawl a site'''
     rules = Rules('http://example.com/robots.txt', 401, '', 0)
     self.assertTrue(not rules.allowed('/foo', 't'))
     self.assertTrue(not rules.allowed('http://example.com/foo', 't'))
Beispiel #7
0
 def test_status_forbidden_allow(self):
     '''Test that if the flag is given, we allow all sites when robots.txt
     is forbidden'''
     rules = Rules('http://example.com/robots.txt', 401, '', 0, disallow_forbidden=False)
     self.assertTrue(rules.allowed('/foo', 't'))
     self.assertTrue(rules.allowed('http://example.com/foo', 't'))