def add_handler(self, url_handler): """Appends the given url_handlers to our handler list. """ print(url_handler) url_pattern = url_handler[0] if not url_pattern.endswith("$"): url_pattern += "$" handlers = [] #wildcard .*$ should have lowest priority #notice:first we only insert a empty handlers as a placeholder if self.handlers and self.handlers[-1][0].pattern == '.*$': self.handlers.insert(-1, (re.compile(url_pattern), handlers)) else: self.handlers.append((re.compile(url_pattern), handlers)) spec = url_handler if isinstance( spec, (tuple, list)): #the url_handler should be inited with some args assert len(spec) in (2, 3, 4) spec = URLSpec(*spec) handlers.append(spec) if spec.name: if spec.name and self.named_handlers: Log4Spider.warnLog( "Multiple handlers named %s; replacing previous value", spec.name) self.named_handlers[spec.name] = spec
def add_handler(self,url_handler): """Appends the given url_handlers to our handler list. """ print(url_handler) url_pattern = url_handler[0] if not url_pattern.endswith("$"): url_pattern+="$" handlers = [] #wildcard .*$ should have lowest priority #notice:first we only insert a empty handlers as a placeholder if self.handlers and self.handlers[-1][0].pattern == '.*$': self.handlers.insert(-1,(re.compile(url_pattern),handlers)) else: self.handlers.append((re.compile(url_pattern),handlers)) spec = url_handler if isinstance(spec,(tuple,list)): #the url_handler should be inited with some args assert len(spec) in (2,3,4) spec = URLSpec(*spec) handlers.append(spec) if spec.name: if spec.name and self.named_handlers: Log4Spider.warnLog( "Multiple handlers named %s; replacing previous value", spec.name ) self.named_handlers[spec.name] = spec
def prepare_cul_opts(obj): parse = urlparse(self.env['url']) path = parse.path pic_name = parse.netloc + path.replace("/", "-") static_path = self.app.settings["static_path"] if not os.path.exists(static_path): os.mkdir(static_path) pic_path = "%s/%s" % (self.app.settings["static_path"], pic_name) Log4Spider.warnLog("PicDown path: ", pic_path) obj.setopt(pycurl.WRITEFUNCTION, open(pic_path, "wb").write)