Ejemplo n.º 1
0
 def url_extrator(self,response):    # 从href提取URL队列并加入到target_url中
     try:
         iter_url = regex.URL_REGEX.finditer(response)   # href="/b/a"
         filter_data = Filter(iter_url,"url",self.requests_seen) # 初始化过滤器
         target_queue = filter_data.extractor(self.logger_type,self.target)  # 提取到除特殊文件之外的URL队列
         self.target_url.put(target_queue)
     except Exception:
         pass
Ejemplo n.º 2
0
 def url_extrator(self,response):
     try:
         iter_url = regex.URL_REGEX.finditer(response)
         filter_data = Filter(iter_url,"url",self.requests_seen)
         target_queue = filter_data.extractor(self.logger_type,self.target)
         self.target_url.put(target_queue)
     except Exception:
         pass