def process_response(self, request, response, spider): try: # 如果返回的url中包含“captcha”,则运行此程序 if ('captcha' in response.url): # 记录原始的回调函数 callback = request.callback # 用requests发起一次请求,获取cookies内的captcha_uid r = requests.get(response.url, headers=self.headers) self.cookies = requests.utils.dict_from_cookiejar(r.cookies) # 用requests下载验证码图片 captcha_img_url = response.url.split('?t')[0] + 'captcha-image' captcha_data = self.get_captcha_data( captcha_img_url, captcha_img_url.split('-')[1].split('/')[0]) # 获取token值,仅需加上referer cdnversion = re.search('cdnversion=(.*?)"', response.text).group(1) token = self.get_token(response.url, cdnversion) captcha_data.update({'token': str(token)}) # 用scrapy的FormRequest构建request对象 request = FormRequest(response.url, formdata=captcha_data, callback=callback, dont_filter=True) # 将cookies添加到request对象 request.cookies = self.cookies requests.headers = {b'Host': b'search.fang.com'} # 返回request return request # 如果返回的url中不包含“captcha”,则直接返回response else: print("ok" * 20) return response except: traceback.print_exc() return response