def url_extrator(self,response): # 从href提取URL队列并加入到target_url中 try: iter_url = regex.URL_REGEX.finditer(response) # href="/b/a" filter_data = Filter(iter_url,"url",self.requests_seen) # 初始化过滤器 target_queue = filter_data.extractor(self.logger_type,self.target) # 提取到除特殊文件之外的URL队列 self.target_url.put(target_queue) except Exception: pass
def url_extrator(self,response): try: iter_url = regex.URL_REGEX.finditer(response) filter_data = Filter(iter_url,"url",self.requests_seen) target_queue = filter_data.extractor(self.logger_type,self.target) self.target_url.put(target_queue) except Exception: pass