def process_response(self, request, response, spider): if 'dont_retry' in request.meta: print 'dont retry in meta' return response #print "response.status = %s" % (response.status) #print "request.url = %s" % (request.url) #if not (str(request.url).find("SearchGroupedFlightsJSONMinimum")>-1): # if str(response.body).find("[null,null]")>-1: # print "response.body = %s" % (response.body) # reason = response_status_message(400) # return self._retry(request, reason, spider) or response uuids = re.findall('\w{8}-\w{4}-\w{4}-\w{4}-\w{12}', response.body) price = re.findall('[0-9]*\.[0-9]{2}RoundTrip', response.body) if(len(uuids)>0): print "uuids: %s" % (uuids) if response.status in [200] and (str(request.url).find("SearchGroupedFlightsJSONMinimum")>-1) and int(request.meta.get('dormiu_bool', 0))<1: reason = response_status_message(response.status) segundos = random.randint(10, 15) print "Espera a resposta:" print "Dormindo %ss..." % (segundos) #time.sleep(15) time.sleep(segundos) #retryreq = request.copy() #retryreq.meta['dormiu_bool'] = 1 request.meta['dormiu_bool'] = 1 #return self._retry(retryreq, reason, spider) or response if response.status in self.retry_http_codes: print "Voltou erro 400, tenta de novo!" reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if uuids[0]=='00000000-0000-0000-0000-000000000000': print "Uid 000, tenta de novo!" retries_uuid = request.meta.get('retry_times_uuid', 0) + 1 if retries_uuid <= self.max_retry_wrong_uuid: request.meta['retry_times_uuid'] = retries_uuid print "uuids error!: %s" % (uuids) print "uuids retry count: %s" % (retries_uuid) reason = response_status_message(400) return self._retry(request, reason, spider) or response if not (str(request.url).find("SearchGroupedFlightsJSONMinimum")>-1): print "Nao tem preco ainda, tenta de novo" print "price: %s" % (price) if not len(price)>0: print "dorme e espera preco!" #dorme um pouco time.sleep(random.randint(2, 7)) reason = response_status_message(400) return self._retry(request, reason, spider) or response return response
def _redirect(self, redirected, request, spider, reason): reason = response_status_message(reason) ttl = request.meta.setdefault('redirect_ttl', self.max_redirect_times) redirects = request.meta.get('redirect_times', 0) + 1 if spider.name == "amazon" and redirected.url[11:17] != "amazon" and redirects <= self.max_redirect_times: spider.logger.info("redirect to wrong url: %s" % redirected.url) new_request = request.copy() new_request.dont_filter = True new_request.meta["redirect_times"] = redirects spider.logger.info("in _redirect redirect_times: %s re-yield response.request: %s" % (redirects, request.url)) return new_request if ttl and redirects <= self.max_redirect_times: redirected.meta['redirect_times'] = redirects redirected.meta['redirect_ttl'] = ttl - 1 redirected.meta['redirect_urls'] = request.meta.get('redirect_urls', []) + \ [request.url] redirected.dont_filter = request.dont_filter redirected.priority = request.priority + self.priority_adjust self.logger.debug("Redirecting %s to %s from %s for %s times "%(reason, redirected.url, request.url, redirected.meta.get("redirect_times"))) return redirected else: self.logger.debug("Discarding %s: max redirections reached"%request.url) request.meta["url"] = request.url if request.meta.get("callback") == "parse": spider.crawler.stats.inc_total_pages(crawlid=request.meta['crawlid'], spiderid=request.meta['spiderid'], appid=request.meta['appid']) spider._logger.info( " in redicrect request error to failed pages url:%s, exception:%s, meta:%s" % (request.url, reason, request.meta)) self.stats.set_failed_download_value(request.meta, reason) raise IgnoreRequest("max redirections reached")
def process_response(self, request, response, spider): if 'dont_retry' in request.meta: return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if response.status in self.retry_http_codes: reason = response_status_message(response.status) # 删除该代理 self.delete_proxy(request.meta.get('proxy', False)) print('返回值异常, 进行重试...') return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if response.status in [301, 302]: print('retry ' + response.url) sleep(360) # few minutes reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return super(SleepRetryMiddleware, self).process_response(request, response, spider)
def process_response(self, request, response, spider): log.msg('KxRetry process_response ===========') if 'dont_retry' in request.meta: return response if response.status != 200: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): '''对特定的http返回码进行重新抓取,主要针对500和599等''' if "proxy" in request.meta: logger.debug("Use proxy: " + request.meta["proxy"] + "to crawler") if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) self._del_invaild_proxy(request) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response retry_http_codes = self.retry_http_codes temporary_codes = request.meta.get('retry_http_codes', []) if temporary_codes: retry_http_codes |= set(int(x) for x in temporary_codes) if response.status in retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if 'dont_retry' in request.meta: return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) try: request.headers.pop('Proxy-Authorization') except: pass return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if response.status in [300, 301, 302, 303]: try: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # 重试 except Exception as e: raise IgnoreRequest elif response.status in [403, 414]: logger.error("%s! Stopping..." % response.status) os.system("pause") else: return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) # 删除该代理 print('代理失效:', request.meta.get('proxy')) self.delete_proxy(request.meta.get('proxy')) # time.sleep(random.randint(3, 5)) self.logger.warning('返回值异常, 进行重试...') return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if 'dont_retry' in request.meta: return response if response.status in self.retry_http_codes: #recode exception time and suspend spider key = self.genKey() incAttr(self.status, key) if self.maxExceptionTime and self.status[key] >= self.maxExceptionTime: time.sleep(self.suspendTime) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.url.startswith("http://safety.autohome.com.cn"): reason = "userverify retry " return self._retry(request, reason, spider) or response return response
def enrich_base_data(self, item_loader, response): item_loader.add_value('spiderid', response.meta.get('spiderid')) item_loader.add_value('url', response.request.url) item_loader.add_value("seed", response.meta.get("seed", "")) item_loader.add_value("timestamp", time.strftime("%Y%m%d%H%M%S")) item_loader.add_value('status_code', response.status) item_loader.add_value("status_msg", response_status_message(response.status)) item_loader.add_value('domain', urlparse(response.url).hostname.split(".", 1)[1]) item_loader.add_value('crawlid', response.meta.get('crawlid')) item_loader.add_value('response_url', response.url)
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) self._check_expire() request.meta['proxy'] = 'http://' + self.proxy return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if not request.meta.get('dont_retry', False): reason = response_status_message(response.status) if response.status in self.retry_http_codes: # 捕获错误 print(f"======出现{response.status}错误, url:{response.url}") return self._retry(request, reason, spider) or response elif response.status in [301, 302]: # 重定向错误 可能出现验证码 print(f"======出现{response.status}错误(重定向), url:{response.url}") return self._retry(request, reason, spider) or response return response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.status == 403: """ 单独处理封IP的情况 删除代理重新请求 """ proxy_spider = request.meta.get('proxy') proxy_redis = proxy_spider.split("//")[1] self.delete_proxy(proxy_redis) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.status in [301, 302, 400, 503]: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.status in [403, 416]: """ 单独处理封IP的情况 删除代理重新请求 """ proxy_spider = request.meta.get('proxy') proxy_redis = proxy_spider.split("//")[1] logger.info('IP被封,删除代理重试') self.delete_proxy(proxy_redis) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.status == [503, 504]: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # this is your check if response.status == 200 and response.xpath(spider.retry_xpath): return self._retry( request, 'response got xpath "{}"'.format(spider.retry_xpath), spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response elif response.status == 429: print(f'request{spider.counter}') print(f'accounts{print_coll()}') print(f'429 {time.localtime()}') self.crawler.engine.pause() time.sleep(331) self.crawler.engine.unpause() reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_douban_response(self, request, response, spider): if response.status in [403, 414, 302]: reason = response_status_message(response.status) print('change ip proxy and retrying...') proxyres = requests.get('http://proxy.nghuyong.top').text totalproxies = json.loads(proxyres)['num'] if (totalproxies > 0): proxylist = json.loads(proxyres)['data'] proxy = random.choice(proxylist) request.meta['proxy'] = "http://" + proxy['ip_and_port'] return self._retry(request, reason, spider) else: return response
def _enrich_base_data(self, response): item = self.get_item_cls()() item['spiderid'] = response.meta['spiderid'] item['workerid'] = self.worker_id item['url'] = response.meta["url"] item["seed"] = response.meta.get("seed", "") item["timestamp"] = time.strftime("%Y%m%d%H%M%S") item['status_code'] = response.status item["status_msg"] = response_status_message(response.status) item['domain'] = urlparse(response.url).hostname.split(".", 1)[1] item['crawlid'] = response.meta['crawlid'] item['response_url'] = response.url return item
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) logger.error("del proxy retry") try: del request.meta['proxy'] return self._retry(request, reason, spider) or response except KeyError: return self._retry(request, reason, spider) or response return response
def process_response(self,request,response,spider): if response.status in [400,403,404,429,500,502,503,504]: self.TIMES = 3 logger.error("%s! error..." % response.status) #pdb.set_trace() try: updateIPPOOLS(self.rconn,request.meta['proxy'].replace('http://',''),request.meta['status'],-1) except: pass reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # 重试 else: return response
def process_response(self, request, response, spider): if 'dont_retry' in request.meta: return response if response.status in self.retry_http_codes: #recode exception time and suspend spider key = self.genKey() incAttr(self.status, key) if self.maxExceptionTime and self.status[ key] >= self.maxExceptionTime: time.sleep(self.suspendTime) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): print("返回码:") print(response.status) if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) # 删除代理 print("重试中间件:") print(reason) self.delete_proxy(request.meta.get('proxy', False)) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if response.status in [200]: return response elif response.status in [300, 301, 302, 303]: try: redirect_url = bytes.decode(response.headers["location"]) if "/service/captcha" in redirect_url: print('a' * 30) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response else: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response except Exception as e: raise IgnoreRequest elif response.status in [403, 414, 400]: code = request.meta['code'] self.cookie.upDateCookie(code) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response else: return response
def process_response(self, request, response, spider): print(response.text) if request.meta.get('dont_retry', False): return response print('我是新的') if response.status in self.retry_http_codes or response.status != 200: reason = response_status_message(response.status) self.lock.acquire() self.proxy = get_proxy() print('ip我是重写的', self.proxy) self.logger.warning('返回值异常, 进行重试...') return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get("dont_retry", False): return response if response.status in self.retry_http_codes: self.loger.info( "request url: %s ,response status %s, max try time:%s , have done: %s" % (request.url, response.status, self.max_retry_times, request.meta.get("retry_times", 0))) reason = response_status_message(response.status) return self._retry(request, reason, spider) or request return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) if response.status == 403: # request.meta['dont_merge_cookies'] = True spider.logger.error("Response 403 Retry, " + log_simple_response(response)) spider.logger.error("403 Retry Set dont_merge_cookies True, " + log_simple_request(request)) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): has_proxy = 'proxy' in request.meta if 'dont_retry' in request.meta: return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) if has_proxy: self.proxy_ev.inc_failure(request.meta['proxy']) return self._retry(request, reason, spider) or response else: # Response was succesful if has_proxy: self.proxy_ev.inc_successes(request.meta['proxy']) return response
def errback_httpbin(self, failure): request = failure.request if failure.check(HttpError): response = failure.value.response errmsg = 'errback <%s> %s , response status:%s' % ( request.url, failure.value, response_status_message(response.status)) # self.err_after(request.meta, True) self.save_error_log( json.dumps({ 'meta': request.meta, "errmsg": errmsg })) elif failure.check(ResponseFailed): errmsg = 'errback <%s> ResponseFailed' % request.url # self.err_after(request.meta, True) self.save_error_log( json.dumps({ 'meta': request.meta, "errmsg": errmsg })) elif failure.check(ConnectionRefusedError): errmsg = 'errback <%s> ConnectionRefusedError' % request.url # self.err_after(request.meta, True) self.save_error_log( json.dumps({ 'meta': request.meta, "errmsg": errmsg })) elif failure.check(ResponseNeverReceived): errmsg = 'errback <%s> ResponseNeverReceived' % request.url # self.err_after(request.meta, False) self.save_error_log( json.dumps({ 'meta': request.meta, "errmsg": errmsg })) elif failure.check(TCPTimedOutError, TimeoutError): errmsg = 'errback <%s> TimeoutError' % request.url self.save_error_log( json.dumps({ 'meta': request.meta, "errmsg": errmsg })) else: errmsg = 'errback <%s> OtherError' % request.url self.save_error_log( json.dumps({ 'meta': request.meta, 'errmsg': errmsg, }))
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) if response.status == 404 and isinstance(spider, SonglistSpider): spider.current_music += 1 next_request = request.replace( url='https://music.douban.com/subject/' + str(spider.musiclist[spider.current_music]) + '/', ) return self._retry(next_request, reason, spider) or response time.sleep(10) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): logger.info('[' + str(response.status) + '] ' + response.url + ' proxy:' + str(request.meta.get('proxy', ''))) if request.meta.get('dont_retry', False): logger.info("request.meta.get('dont_retry') is set to 'True'. No need to retry.") return response # proxy = request.meta.get('proxy', None) # if response.status in [403, 404] and proxy in XHProxyMiddleware.proxy_list: # logger.info('Remove proxy due to Http-Error: %s [%s]' % (proxy, str(len(XHProxyMiddleware.proxy_list)))) # XHProxyMiddleware.proxy_list.remove(proxy) if response.status in self.retry_http_codes: logger.info("Retry from XHRetryMiddleware.process_response: " + request.url) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): # if request.meta.get('dont_retry',False): # return response # if response.status in self.retry_http_codes: if response.status != 200: self.logger('状态码 %s 异常' % response.status) max_retry_times = len(IpProxy.ips) reason = response_status_message(response.status) ip = request.meta['proxy'] request.meta['proxy'] = self.resetip(ip) request.meta['max_retry_times'] = max_retry_times self.logger('ip %s 替换为: %s 最大重连次数为: %s'%(ip, request.meta['proxy'], max_retry_times)) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if spider.name == 'detail_xml_redis_spider': data = dict(xmltodict.parse(response.text)) data = data.get('GetItemResponse') if 'Ack' not in data.keys() or data.get('Ack') == 'Failure': print(data['Ack']) spider.logger.info(data['Ack']) return self._retry(request, 'Ack Error', spider) or response return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response elif response.status == 429: self.crawler.engine.close_spider(spider) # self.crawler.engine.pause() # time.sleep(60) # If the rate limit is renewed in a minute, put 60 seconds, and so on. # self.crawler.engine.unpause() # reason = response_status_message(response.status) # return self._retry(request, reason, spider) or response elif response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): reason = response_status_message(response.status) if response.status in [300, 301, 302, 303]: if reason == '301 Moved Permanently': return self._retry(request, reason, spider) or response else: raise IgnoreRequest elif response.status in [403, 414]: logger.error("%s! Stopping..." % response.status) os.system("pause") update_cookie(request.meta['account_text'], self.rconn, spider.name, request.cookies) return self._retry(request, reason, spider) or response # 重试 else: return response
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # retry when response's content is empty, response's status is 200 # and request meta set empty_body False empty_body = request.meta.get('empty_body', False) if not empty_body and not response.body and response.status == 200: reason = '200 empty_response_body' return self._retry(request, reason, spider) or response return response
def process_response(self, request, response, spider): # 重试 if response.status in self.retry_http_codes: reason = response_status_message(response.status) # 在此处进行自己的操作,如删除不可用代理,打日志等 return self._retry(request, reason, spider) or response # 捕获状态码为40x/50x的response if str(response.status).startswith('4') or str( response.status).startswith('5'): # 随意封装,直接返回response,spider代码中根据url==''来处理response response = HtmlResponse(url='4050') return response # 其他状态码不处理 return response
def process_response(self, request, response, spider): logger.info('process_response function processed http code %s', response.status) if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response if response.status == 500: BizdirectoryDownloaderMiddleware.counter = BizdirectoryDownloaderMiddleware.counter + 1 if BizdirectoryDownloaderMiddleware.counter > 6: raise CloseSpider('Too Many Error:500') return response
def process_response(self, request, response, spider): # 判断cookie是否有效 if not request.url.endswith('.jpg'): status = response.xpath("./*//i[@class='i-notification mt9 mb9']").extract_first(default=None) if status == None: print('更换Cookie') p = Process(target=web_driver_login) p.start() p.join() print('更换成功') reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # 重试 return response return response
def handle_error(self, failure): """Handle an error due to a non-success status code or other reason. If link checking is enabled, saves the broken URL and referrers. """ try: logging.info("Handle error response status code: {}".format(failure.value.response)) logging.info("Url that failed: {}".format( failure.value.response.request.url)) except: logging.error("Could not print handle error status code.") # If we should not do link check or failure is ignore request # and it is not a http error we know it is a last-modified check. if (not self.scanner.scan_object.do_link_check or (isinstance(failure.value, IgnoreRequest) and not isinstance( failure.value, HttpError))): logging.info("We do not do link check or failure is an instance of " "IgnoreRequest: {}".format(failure.value)) return if hasattr(failure.value, "response"): response = failure.value.response url = response.request.url status_code = response.status status_message = response_status_message(status_code) if "redirect_urls" in response.request.meta: # Set URL to the original URL, not the URL after redirection url = response.request.meta["redirect_urls"][0] referer_header = response.request.headers.get("referer", None) else: url = failure.request.url status_code = -1 status_message = "%s" % failure.value referer_header = None broken_url = self.broken_url_save(status_code, status_message, url) self.broken_url_objects[url] = broken_url # Associate referer using referer heade if referer_header is not None: self.associate_url_referrer(referer_header, broken_url) self.associate_url_referrers(broken_url)
def process_response(self, request, response, spider): if response.status in [300, 301, 302, 303]: try: redirect_url = response.headers["location"] if "login.weibo" in redirect_url or "login.sina" in redirect_url: # Cookie失效 logger.warning("One Cookie need to be updating...") updateCookie(request.meta['accountText'], self.rconn, spider.name) elif "weibo.cn/security" in redirect_url: # 账号被限 logger.warning("One Account is locked! Remove it!") removeCookie(request.meta["accountText"], self.rconn, spider.name) elif "weibo.cn/pub" in redirect_url: logger.warning( "Redirect to 'http://weibo.cn/pub'!( Account:%s )" % request.meta["accountText"].split("--")[0]) reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # 重试 except Exception, e: raise IgnoreRequest
def process_response(self, request, response, spider): if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) return self._retry(request, reason, spider) or response # customiz' here # content = response.text if not response.xpath('//table[@class="list_2_tab"]/tbody/tr'): proxy = self.get_proxy() logging.info('>>>>>>>> 替换代理重试') request.meta['proxy']=proxy return self._retry(request, response.body, spider) or response return response
def process_response(self, request, response, spider): # If the request meta dict has a fallback_url property... (only the # first original_img_url request has the fallback_url property) if request.meta.get('fallback_url') is not None: # If the response returned a status code that requires us to use # the fallback_url... if response.status in self.fallback_http_codes: reason = response_status_message(response.status) log.msg(format="Trying fallback for %(request)s (fallbackurl is %(fallback_url)s): %(reason)s", level=log.DEBUG, spider=spider, request=request, fallback_url=request.meta.get('fallback_url'), reason=reason) return Request(request.meta.get('fallback_url')) else: # Try to open the image data to check it's valid. try: im = Image.open(StringIO(response.body)) except IOError as e: # use fallback_url if image can't be opened. log.msg(format="Trying fallback for %(request)s (fallbackurl is %(fallback_url)s) because image could not be opened: %(reason)s", level=log.DEBUG, spider=spider, request=request, fallback_url=request.meta.get('fallback_url'), reason=e) return Request(request.meta.get('fallback_url')) return response
def process_response(self,request,response,spider): if response.status != 200: reason = response_status_message(response.status) return self._retry(request, reason, spider) return response
def test_response_status_message(self): self.assertEqual(response_status_message(200), '200 OK') self.assertEqual(response_status_message(404), '404 Not Found') self.assertEqual(response_status_message(573), "573 Unknown Status")
def process_response(self, request, response, spider): if response.status in self.forbidden_http_codes: reason = response_status_message(response.status) self._forbidden(request, reason, spider) return response