コード例 #1
0
 def process_response(self, request, response, spider):
     try:
         # 如果返回的url中包含“captcha”,则运行此程序
         if ('captcha' in response.url):
             # 记录原始的回调函数
             callback = request.callback
             # 用requests发起一次请求,获取cookies内的captcha_uid
             r = requests.get(response.url, headers=self.headers)
             self.cookies = requests.utils.dict_from_cookiejar(r.cookies)
             # 用requests下载验证码图片
             captcha_img_url = response.url.split('?t')[0] + 'captcha-image'
             captcha_data = self.get_captcha_data(
                 captcha_img_url,
                 captcha_img_url.split('-')[1].split('/')[0])
             # 获取token值,仅需加上referer
             cdnversion = re.search('cdnversion=(.*?)"',
                                    response.text).group(1)
             token = self.get_token(response.url, cdnversion)
             captcha_data.update({'token': str(token)})
             # 用scrapy的FormRequest构建request对象
             request = FormRequest(response.url,
                                   formdata=captcha_data,
                                   callback=callback,
                                   dont_filter=True)
             # 将cookies添加到request对象
             request.cookies = self.cookies
             requests.headers = {b'Host': b'search.fang.com'}
             # 返回request
             return request
         # 如果返回的url中不包含“captcha”,则直接返回response
         else:
             print("ok" * 20)
             return response
     except:
         traceback.print_exc()
         return response