def check(self, hostkey, relurl): """ Return True if allowed to fetch, False if not, None if we do not have robots.txt for this entry. """ robotstxt, expiration = self.robots.get(hostkey, (None, None)) if robotstxt is None: return None # FIXME: mtime? we need to let robots.txt expire. robotparser = RobotExclusionRulesParser() if robotsparser.is_expired(): return None robotparser.seturl(hostkey + '/robots.txt') robotparser.parse(robotstxt.splitlines()) return robotparser.can_fetch(hostkey + relurl)