Esempio n. 1
0
    def check(self, hostkey, relurl):
        """ Return True if allowed to fetch, False if not, None
        if we do not have robots.txt for this entry. """

        robotstxt, expiration = self.robots.get(hostkey, (None, None))

        if robotstxt is None:
            return None

        # FIXME: mtime?  we need to let robots.txt expire.

        robotparser = RobotExclusionRulesParser()

        if robotsparser.is_expired():
            return None

        robotparser.seturl(hostkey + '/robots.txt')
        robotparser.parse(robotstxt.splitlines())
        return robotparser.can_fetch(hostkey + relurl)