コード例 #1
0
ファイル: middlewares.py プロジェクト: hankya/questions
 def process_response(self, request, response, spider):
     #log.msg('%s is type %s' % (response.url, type(response)), level=log.DEBUG)
     if type(response) is Response and not _file_pattern.match(response.url):
         response = HtmlResponse(response.url, body=response.body)
         
     if hasattr(response, 'body_as_unicode'):
         hdoc = html.fromstring(response.body_as_unicode())
         links = hdoc.xpath('//a')
         for link in links:
             href = link.get('href')
             link.set('href', urlparse.urljoin(get_base_url(response), href) )    
         return response.replace(body=html.tostring(hdoc, encoding='unicode'))            
     return response