def retry_if_cloudflare(response, args): cf = Cloudflare(response) if cf.is_cloudflare: logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time) auth_url = cf.get_url() logger.info("Autorizando... url: %s" % auth_url) auth_args = args.copy() auth_args['url'] = auth_url auth_args['follow_redirects'] = False auth_args['headers'] = {'Referer': args['url']} resp = downloadpage(**auth_args) if resp.sucess: logger.info("Autorización correcta, descargando página") args['bypass_cloudflare'] = False return downloadpage(**args).__dict__ elif resp.code == 403 and resp.headers.get('cf-chl-bypass'): if [a[3] for a in inspect.stack()].count('retry_if_cloudflare') > 2: logger.info("No se ha podido autorizar. Demasiados intentos") return response logger.info("Reintentando...") return downloadpage(**args).__dict__ else: logger.info("No se ha podido autorizar") return response
logger.info("Encoding: %s" % (response["headers"].get('content-encoding'))) if response["headers"].get('content-encoding') == 'gzip': logger.info("Descomprimiendo...") try: response["data"] = gzip.GzipFile(fileobj=StringIO(response["data"])).read() logger.info("Descomprimido") except: logger.info("No se ha podido descomprimir") # Anti Cloudflare if bypass_cloudflare: cf = Cloudflare(response) if cf.is_cloudflare: logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time) auth_url = cf.get_url() logger.info("Autorizando... url: %s" % auth_url) if downloadpage(auth_url, headers=request_headers, replace_headers = True).sucess: logger.info("Autorización correcta, descargando página") resp = downloadpage(url=response["url"], post=post, headers=headers, timeout=timeout, follow_redirects=follow_redirects, cookies=cookies, replace_headers=replace_headers, add_referer=add_referer) response["sucess"] = resp.sucess response["code"] = resp.code response["error"] = resp.error response["headers"] = resp.headers response["data"] = resp.data response["time"] = resp.time response["url"] = resp.url else: logger.info("No se ha podido autorizar")
logger.info("Encoding: %s" % (response["headers"].get('content-encoding'))) if response["headers"].get('content-encoding') == 'gzip': logger.info("Descomprimiendo...") try: response["data"] = gzip.GzipFile(fileobj=StringIO(response["data"])).read() logger.info("Descomprimido") except: logger.info("No se ha podido descomprimir") # Anti Cloudflare if bypass_cloudflare: cf = Cloudflare(response) if cf.is_cloudflare: logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time) auth_url = cf.get_url() logger.info("Autorizando... url: %s" % auth_url) if downloadpage(auth_url, headers=request_headers, replace_headers=True).sucess: logger.info("Autorización correcta, descargando página") resp = downloadpage(url=response["url"], post=post, headers=headers, timeout=timeout, follow_redirects=follow_redirects, cookies=cookies, replace_headers=replace_headers, add_referer=add_referer) response["sucess"] = resp.sucess response["code"] = resp.code response["error"] = resp.error response["headers"] = resp.headers response["data"] = resp.data response["time"] = resp.time response["url"] = resp.url else: logger.info("No se ha podido autorizar")
def anti_cloudflare(response, request_headers, url, post, headers, timeout, follow_redirects, cookies, replace_headers, add_referer, only_headers, bypass_cloudflare, count_retries, count_retries_tot, random_headers, ignore_response_code, alfa_s, proxy, proxy_web, proxy_addr_forced, forced_proxy, proxy_retries_counter, proxy_retries): from core.cloudflare import Cloudflare cf = Cloudflare(response) if cf.is_cloudflare: count_retries += 1 if not alfa_s: logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time) auth_url = cf.get_url() if not alfa_s: logger.info("Autorizando... intento %d url: %s" % (count_retries, auth_url)) tt = downloadpage(auth_url, headers=request_headers, replace_headers=True, count_retries=count_retries, ignore_response_code=True, count_retries_tot=count_retries_tot, proxy=proxy, proxy_web=proxy_web, forced_proxy=forced_proxy, proxy_addr_forced=proxy_addr_forced, proxy_retries=proxy_retries, alfa_s=alfa_s) if tt.code == 403: tt = downloadpage(url, headers=request_headers, replace_headers=True, count_retries=count_retries, ignore_response_code=True, count_retries_tot=count_retries_tot, proxy=proxy, proxy_web=proxy_web, forced_proxy=forced_proxy, proxy_addr_forced=proxy_addr_forced, proxy_retries=proxy_retries, alfa_s=alfa_s) if tt.sucess: if not alfa_s: logger.info("Autorización correcta, descargando página") resp = downloadpage(url=response["url"], post=post, headers=headers, timeout=timeout, follow_redirects=follow_redirects, count_retries=count_retries, cookies=cookies, replace_headers=replace_headers, add_referer=add_referer, proxy=proxy, proxy_web=proxy_web, count_retries_tot=count_retries_tot, forced_proxy=forced_proxy, proxy_addr_forced=proxy_addr_forced, proxy_retries=proxy_retries, alfa_s=alfa_s) response["sucess"] = resp.sucess response["code"] = resp.code response["error"] = resp.error response["headers"] = resp.headers response["data"] = resp.data response["time"] = resp.time response["url"] = resp.url else: if not alfa_s: logger.info("No se ha podido autorizar") return (response, count_retries)