Python create_scraper Examples

Programming Language: Python

Namespace/Package Name: lib.cloudscraper

Method/Function: create_scraper

Examples at hotexamples.com: 10

Python create_scraper - 10 examples found. These are the top rated real world Python examples of lib.cloudscraper.create_scraper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: torrentleech.py Project: VeNoMouS/Sick-Beard

    def _doLogin(self):
        login_params  = {
            'username': sickbeard.TORRENTLEECH_USERNAME,
            'password': sickbeard.TORRENTLEECH_PASSWORD,
            'remember_me': 'on',
            'login': '******'
        }

        self.session = cloudscraper.create_scraper()
        logger.log("[" + self.name + "] Attempting to Login")

        try:
            response = self.session.post(
                "{0}user/account/login".format(self.url),
                data=login_params,
                timeout=30,
                verify=False
            )
        except (exceptions.ConnectionError, exceptions.HTTPError), e:
            logger.log("[{0}] {1} Error: {2}".format(
                    self.name,
                    self.funcName(),
                    e
                ),
                logger.ERROR
            )
            return False

Example #2

Show file

File: torrentday.py Project: VeNoMouS/Sick-Beard

    def _doLogin(self):
        if not self.session:
            self.session = cloudscraper.create_scraper()

        if sickbeard.TORRENTDAY_EMAIL_URL:
            self._handleEmailLink()

        if sickbeard.TORRENTDAY_UID and sickbeard.TORRENTDAY_PASS:
            self.checkAuthCookies()

        response = self.session.get(self.url + '/browse.php')
        if 'login.php' in response.url:
            if sickbeard.TORRENTDAY_ANTICAPTCHA_KEY and sickbeard.TORRENTDAY_USERNAME and sickbeard.TORRENTDAY_PASSWORD:
                if not self._bypassCaptcha():
                    return False
            else:
                logger.log(
                    "[{0}] {1} Appears we cannot authenicate with TorrentDay.".
                    format(self.name, self.funcName()), logger.ERROR)
                return False

        if not self._getPassKey() or not self.rss_uid or not self.rss_passkey:
            logger.log(
                "[{0}] {1} Could not extract rss uid/passkey... aborting.".
                format(self.name, self.funcName()), logger.ERROR)
            return False

        return True

Example #3

Show file

 def __init__(self):
     generic.TorrentProvider.__init__(self, "Torrentz")
     self.cache = TORRENTZCache(self)
     self.url = 'https://torrentz2.eu/'
     self.name = "Torrentz"
     self.supportsBacklog = True
     self.session = cloudscraper.create_scraper()
     self.funcName = lambda n=0: sys._getframe(n + 1).f_code.co_name + "()"
     logger.log("[" + self.name + "] initializing...")

Example #4

Show file

    def _doLogin(self):
        self.switchURL()

        self.session = cloudscraper.create_scraper()

        logger.log("[{}] Attempting to Login".format(self.name))

        try:
            response = self.session.post("{}/take_login.php".format(self.url),
                                         data={
                                             'username':
                                             sickbeard.IPTORRENTS_USERNAME,
                                             'password':
                                             sickbeard.IPTORRENTS_PASSWORD,
                                         },
                                         timeout=30,
                                         verify=False)

        except (exceptions.ConnectionError, exceptions.HTTPError), e:
            self.session = None
            logger.log(
                "[{}] {} Error: {}".foramt(self.name, self.funcName(), str(e)),
                logger.ERROR)
            return False

Example #5

Show file

def downloadpage(url, **opt):
    # logger.info()
    """
       Open a url and return the data obtained

        @param url: url to open.
        @type url: str
        @param post: If it contains any value, it is sent by POST.
        @type post: str
        @param headers: Headers for the request, if it contains nothing the default headers will be used.
        @type headers: dict, list
        @param timeout: Timeout for the request.
        @type timeout: int
        @param follow_redirects: Indicates if redirects are to be followed.
        @type follow_redirects: bool
        @param cookies: Indicates whether cookies are to be used.
        @type cookies: bool
        @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers.
                                If False, the headers passed by the "headers" parameter will modify the headers by default.
        @type replace_headers: bool
        @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value.
        @type add_referer: bool
        @param only_headers: If True, only headers will be downloaded, omitting the content of the url.
        @type only_headers: bool
        @param random_headers: If True, use the method of selecting random headers.
        @type random_headers: bool
        @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data
        @type ignore_response_code: bool
        @return: Result of the petition
        @rtype: HTTPResponse
        @param use_requests: Use requests.session()
        @type: bool

                Parameter Type Description
                -------------------------------------------------- -------------------------------------------------- ------------
                HTTPResponse.sucess: bool True: Request successful | False: Error when making the request
                HTTPResponse.code: int Server response code or error code if an error occurs
                HTTPResponse.error: str Description of the error in case of an error
                HTTPResponse.headers: dict Dictionary with server response headers
                HTTPResponse.data: str Response obtained from server
                HTTPResponse.json: dict Response obtained from the server in json format
                HTTPResponse.time: float Time taken to make the request

        """
    url = scrapertools.unescape(url)
    domain = urlparse.urlparse(url).netloc
    global CF_LIST
    CF = False

    if domain in FORCE_CLOUDSCRAPER_LIST:
        from lib import cloudscraper
        session = cloudscraper.create_scraper()
        CF = True
    else:
        from lib import requests
        session = requests.session()

        if domain in CF_LIST or opt.get('CF', False):
            url = 'https://web.archive.org/save/' + url
            CF = True

    if config.get_setting('resolver_dns') and not opt.get(
            'use_requests', False):
        from specials import resolverdns
        session.mount('https://', resolverdns.CipherSuiteAdapter(domain, CF))

    req_headers = default_headers.copy()

    # Headers passed as parameters
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt['headers'])

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    response = {}
    info_dict = []
    payload = dict()
    files = {}
    file_name = ''

    session.verify = opt.get('verify', True)

    if opt.get('cookies', True):
        session.cookies = cj
    session.headers.update(req_headers)

    proxy_data = {'dict': {}}

    inicio = time.time()

    if opt.get(
            'timeout',
            None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
        opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
    if opt['timeout'] == 0: opt['timeout'] = None

    if len(url) > 0:
        try:
            if opt.get('post', None) is not None or opt.get('file',
                                                            None) is not None:
                if opt.get('post', None) is not None:
                    # Convert string post in dict
                    try:
                        json.loads(opt['post'])
                        payload = opt['post']
                    except:
                        if not isinstance(opt['post'], dict):
                            post = urlparse.parse_qs(opt['post'],
                                                     keep_blank_values=1)
                            payload = dict()

                            for key, value in post.items():
                                try:
                                    payload[key] = value[0]
                                except:
                                    payload[key] = ''
                        else:
                            payload = opt['post']

                # Verify 'file' and 'file_name' options to upload a buffer or file
                if opt.get('file', None) is not None:
                    if os.path.isfile(opt['file']):
                        if opt.get('file_name', None) is None:
                            path_file, opt['file_name'] = os.path.split(
                                opt['file'])
                        files = {
                            'file': (opt['file_name'], open(opt['file'], 'rb'))
                        }
                        file_name = opt['file']
                    else:
                        files = {
                            'file': (opt.get('file_name',
                                             'Default'), opt['file'])
                        }
                        file_name = opt.get('file_name',
                                            'Default') + ', Buffer de memoria'

                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                if opt.get('only_headers', False):
                    # Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    # Makes the request with POST method
                    req = session.post(url,
                                       data=payload,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       files=files,
                                       timeout=opt['timeout'])

            elif opt.get('only_headers', False):
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with HEAD method
                req = session.head(url,
                                   allow_redirects=opt.get(
                                       'follow_redirects', True),
                                   timeout=opt['timeout'])
            else:
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with GET method
                req = session.get(url,
                                  allow_redirects=opt.get(
                                      'follow_redirects', True),
                                  timeout=opt['timeout'])
        except Exception as e:
            from lib import requests
            req = requests.Response()
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                response['data'] = ''
                response['sucess'] = False
                info_dict.append(('Success', 'False'))
                response['code'] = str(e)
                info_dict.append(('Response code', str(e)))
                info_dict.append(('Finalizado en', time.time() - inicio))
                if not opt.get('alfa_s', False):
                    show_infobox(info_dict)
                return type('HTTPResponse', (), response)
            else:
                req.status_code = str(e)

    else:
        response['data'] = ''
        response['sucess'] = False
        response['code'] = ''
        return type('HTTPResponse', (), response)

    response_code = req.status_code

    if req.headers.get('Server',
                       '').startswith('cloudflare') and response_code in [
                           429, 503, 403
                       ] and not opt.get('CF', False):
        if domain not in CF_LIST:
            opt["CF"] = True
            with open(CF_LIST_PATH, "a") as CF_File:
                CF_File.write("%s\n" % domain)
            logger.debug("CF retry... for domain: %s" % domain)
            return downloadpage(url, **opt)

    response['data'] = req.content if req.content else ''
    if CF:
        import re
        response['data'] = re.sub('["|\']/save/[^"]*(https?://[^"]+)', '"\\1',
                                  response['data'])
    response['url'] = req.url

    if type(response['data']) != str:
        response['data'] = response['data'].decode('UTF-8')

    if not response['data']:
        response['data'] = ''
    try:
        response['json'] = to_utf8(req.json())
    except:
        response['json'] = dict()
    response['code'] = response_code
    response['headers'] = req.headers
    response['cookies'] = req.cookies

    info_dict, response = fill_fields_post(info_dict, req, response,
                                           req_headers, inicio)

    if opt.get('cookies', True):
        save_cookies(alfa_s=opt.get('alfa_s', False))

    # is_channel = inspect.getmodule(inspect.currentframe().f_back)
    # is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
    # if is_channel and isinstance(response_code, int):
    #     if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
    #         if response_code > 399:
    #             show_infobox(info_dict)
    #             raise WebErrorException(urlparse.urlparse(url)[1])

    if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
        show_infobox(info_dict)

    return type('HTTPResponse', (), response)

Example #6

Show file

# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# Canale per AnimeUnity
# ------------------------------------------------------------

import requests, json, copy
from core import support
from platformcode import autorenumber

try:
    from lib import cloudscraper
except:
    from lib import cloudscraper

host = support.config.get_channel_url()
response = cloudscraper.create_scraper().get(host + '/archivio')
csrf_token = support.match(response.text,
                           patron='name="csrf-token" content="([^"]+)"').match
headers = {
    'content-type': 'application/json;charset=UTF-8',
    'x-csrf-token': csrf_token,
    'Cookie': '; '.join([x.name + '=' + x.value for x in response.cookies])
}


@support.menu
def mainlist(item):
    top = [('Ultimi Episodi', ['', 'news'])]

    menu = [('Anime {bullet bold}', ['', 'menu', {}, 'tvshow']),
            ('Film {submenu}', ['', 'menu', {

Example #7

Show file

def downloadpage(url, **opt):
    logger.info()
    from . import scrapertools
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro               Tipo    Descripción
                ----------------------------------------------------------------------------------------------------------------
                HTTPResponse.sucess:    bool   True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:      int    Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:     str    Descripción del error en caso de producirse un error
                HTTPResponse.headers:   dict   Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:      str    Respuesta obtenida del servidor
                HTTPResponse.json:      dict    Respuesta obtenida del servidor en formato json
                HTTPResponse.time:      float  Tiempo empleado para realizar la petición

        """
    load_cookies()
    import requests

    # Headers por defecto, si no se especifica nada
    req_headers = default_headers.copy()

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        domain = urlparse.urlparse(url)[1]
        global CS_stat
        if domain in CF_LIST or opt.get(
                'CF', False):  #Está en la lista de CF o viene en la llamada
            from lib import cloudscraper
            session = cloudscraper.create_scraper(
            )  #El dominio necesita CloudScraper
            session.verify = True
            CS_stat = True
        else:
            session = requests.session()
            session.verify = False
            CS_stat = False

        if opt.get('cookies', True):
            session.cookies = cj
        session.headers.update(req_headers)

        # Prepara la url en caso de necesitar proxy, o si se envía "proxy_addr_forced" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxy_addr_forced', {}):
            session.proxies = opt['proxy_addr_forced']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0:
            opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None:
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in list(post.items()):
                                    try:
                                        payload[key] = value[0]
                                    except:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload a buffer or a file
                    if opt.get('file', None) is not None:
                        if len(opt['file']) < 256 and os.path.isfile(
                                opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt['timeout'])
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt['timeout'])

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt['timeout'])

            except Exception as e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                        import traceback
                        logger.error(traceback.format_exc(1))
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        if req.headers.get('Server',
                           '').startswith('cloudflare') and response_code in [
                               429, 503, 403
                           ] and not opt.get('CF', False):
            domain = urlparse.urlparse(url)[1]
            if domain not in CF_LIST:
                opt["CF"] = True
                with open(CF_LIST_PATH, "a") as CF_File:
                    CF_File.write("%s\n" % domain)
                logger.debug("CF retry... for domain: %s" % domain)
                return downloadpage(url, **opt)

        response['data'] = req.content
        try:
            encoding = req.encoding
            if not encoding:
                encoding = 'utf8'
            if PY3 and isinstance(response['data'], bytes) and 'Content-Type' in req.headers \
                        and ('text/' in req.headers['Content-Type'] or 'json' in req.headers['Content-Type'] \
                        or 'xml' in req.headers['Content-Type']):
                response['data'] = response['data'].decode(encoding)
        except:
            import traceback
            logger.error(traceback.format_exc(1))
        try:
            if PY3 and isinstance(response['data'], bytes) and 'Content-Type' in req.headers \
                        and (not 'application' in req.headers['Content-Type'] \
                        or 'javascript' in req.headers['Content-Type']):
                response['data'] = "".join(
                    chr(x) for x in bytes(response['data']))
        except:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if 'Content-Type' in req.headers and ('text/' in req.headers['Content-Type'] \
                        or 'json' in req.headers['Content-Type'] or 'xml' in req.headers['Content-Type']):
                response['data'] = response['data'].replace('&Aacute;', 'Á').replace('&Eacute;', 'É')\
                      .replace('&Iacute;', 'Í').replace('&Oacute;', 'Ó').replace('&Uacute;', 'Ú')\
                      .replace('&Uuml;', 'Ü').replace('&iexcl;', '¡').replace('&iquest;', '¿')\
                      .replace('&Ntilde;', 'Ñ').replace('&ntilde;', 'n').replace('&uuml;', 'ü')\
                      .replace('&aacute;', 'á').replace('&eacute;', 'é').replace('&iacute;', 'í')\
                      .replace('&oacute;', 'ó').replace('&uacute;', 'ú').replace('&ordf;', 'ª')\
                      .replace('&ordm;', 'º')
        except:
            import traceback
            logger.error(traceback.format_exc(1))

        response['url'] = req.url
        if not response['data']:
            response['data'] = ''
        try:
            response['json'] = to_utf8(req.json())
        except:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response['data'], response['sucess'], url, opt = proxy_post_processing(
            url, proxy_data, response, opt)
        if opt.get('out_break', False):
            break

    return type('HTTPResponse', (), response)

Example #8

Show file

def downloadpage(url, **opt):
    logger.info()
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro               Tipo    Descripción
                ----------------------------------------------------------------------------------------------------------------
                HTTPResponse.sucess:    bool   True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:      int    Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:     str    Descripción del error en caso de producirse un error
                HTTPResponse.headers:   dict   Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:      str    Respuesta obtenida del servidor
                HTTPResponse.json:      dict    Respuesta obtenida del servidor en formato json
                HTTPResponse.time:      float  Tiempo empleado para realizar la petición

        """
    load_cookies()
    import requests
    from lib import cloudscraper

    # Headers por defecto, si no se especifica nada
    req_headers = default_headers.copy()

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        session = cloudscraper.create_scraper()
        session.verify = False
        if opt.get('cookies', True):
            session.cookies = cj
        session.headers.update(req_headers)

        # Prepara la url en caso de necesitar proxy, o si se envía "proxies" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxies', None) is not None:
            session.proxies = opt['proxies']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0: opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None:
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in post.items():
                                    try:
                                        payload[key] = value[0]
                                    except:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload un buffer o file
                    if opt.get('file', None) is not None:
                        if os.path.isfile(opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt['timeout'])
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt['timeout'])

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt['timeout'])

            except Exception, e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        response['data'] = req.content
        response['url'] = req.url
        if not response['data']:
            response['data'] = ''
        try:
            response['json'] = to_utf8(req.json())
        except:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response['data'], response['sucess'], url, opt = proxy_post_processing(
            url, proxy_data, response, opt)
        if opt.get('out_break', False):
            break

Example #9

Show file

def downloadpage(url, **opt):
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str (datos json), dict
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_host: Indica si añadir el header Host al principio, como si fuese navegador común.
                         Desactivado por defecto, solo utilizarse con webs problemáticas (da problemas con proxies).
        @type add_host: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param referer: Si se establece, agrega el header "Referer" usando el parámetro proporcionado como valor.
        @type referer: str
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @param hide_infobox: Si es True, no muestra la ventana de información en el log cuando hay una petición exitosa (no hay un response_code de error).
        @type hide_infobox: bool
        @param soup: Si es True, establece un elemento BeautifulSoup en el atributo soup de HTTPResponse
        @type soup: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro             | Tipo     | Descripción
                ----------------------|----------|-------------------------------------------------------------------------------
                HTTPResponse.sucess:  | bool     | True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:    | int      | Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:   | str      | Descripción del error en caso de producirse un error
                HTTPResponse.headers: | dict     | Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:    | str      | Respuesta obtenida del servidor
                HTTPResponse.json:    | dict     | Respuesta obtenida del servidor en formato json
                HTTPResponse.soup:    | bs4/None | Objeto BeautifulSoup, si se solicita. None de otra forma
                HTTPResponse.time:    | float    | Tiempo empleado para realizar la petición
    """
    global CF_LIST
    if not opt.get('alfa_s', False):
        logger.info()
    from . import scrapertools

    load_cookies(opt.get('alfa_s', False))

    cf_ua = config.get_setting('cf_assistant_ua', None)
    url = url.strip()

    # Headers por defecto, si no se especifica nada
    req_headers = OrderedDict()
    if opt.get('add_host', False):
        req_headers['Host'] = urlparse.urlparse(url).netloc
    req_headers.update(default_headers.copy())

    if opt.get('add_referer', False):
        req_headers['Referer'] = "/".join(url.split("/")[:3])

    if isinstance(opt.get('referer'), str) and '://' in opt.get('referer'):
        req_headers['Referer'] = opt.get('referer')

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    if not PY3:
        url = urllib.quote(url.encode('utf-8'), safe="%/:=&?~#+!$,;'@()*[]")
    else:
        url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)
    if opt.get('forced_proxy_opt', None) and channel_proxy_list(url):
        if opt['forced_proxy_opt'] in ['ProxyCF', 'ProxyDirect']:
            if 'cliver' not in url:
                opt['forced_proxy_opt'] = 'ProxyJSON'
            else:
                opt['forced_proxy'] = opt['forced_proxy_opt']
        else:
            opt['forced_proxy'] = opt['forced_proxy_opt']

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        domain = urlparse.urlparse(url)[1]
        global CS_stat
        if (domain in CF_LIST or opt.get('CF', False)) and opt.get(
                'CF_test',
                True):  #Está en la lista de CF o viene en la llamada
            from lib import cloudscraper
            session = cloudscraper.create_scraper(
            )  #El dominio necesita CloudScraper
            session.verify = True
            CS_stat = True
            if cf_ua and cf_ua != 'Default' and get_cookie(
                    url, 'cf_clearance'):
                req_headers['User-Agent'] = cf_ua
        else:
            session = requests.session()
            session.verify = False
            CS_stat = False

        if opt.get('cookies', True):
            session.cookies = cj

        if not opt.get('keep_alive', True):
            #session.keep_alive =  opt['keep_alive']
            req_headers['Connection'] = "close"

        # Prepara la url en caso de necesitar proxy, o si se envía "proxy_addr_forced" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxy_addr_forced', {}):
            session.proxies = opt['proxy_addr_forced']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']
        if opt.get('headers_proxy', {}):
            req_headers.update(dict(opt['headers_proxy']))

        session.headers = req_headers.copy()

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0:
            opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None or opt.get('files', {}):
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except Exception:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in list(post.items()):
                                    try:
                                        payload[key] = value[0]
                                    except Exception:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload a buffer or a file
                    if opt.get('files', {}):
                        files = opt['files']
                        file_name = opt.get('file_name', 'File Object')
                    elif opt.get('file', None) is not None:
                        if len(opt['file']) < 256 and os.path.isfile(
                                opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt.get('timeout', None),
                                           params=opt.get('params', {}))
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt.get('timeout', None),
                                           params=opt.get('params', {}))

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt.get('timeout', None),
                                       params=opt.get('params', {}))
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt.get('timeout', None),
                                      params=opt.get('params', {}))

            except Exception as e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                        import traceback
                        logger.error(traceback.format_exc(1))
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            response['soup'] = None
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403] \
                        and not opt.get('CF', False) and opt.get('CF_test', True):
            domain = urlparse.urlparse(url)[1]
            if domain not in CF_LIST:
                CF_LIST += [domain]
                opt["CF"] = True
                with open(CF_LIST_PATH, "a") as CF_File:
                    CF_File.write("%s\n" % domain)
                logger.debug("CF retry... for domain: %s" % domain)
                return downloadpage(url, **opt)

        if req.headers.get('Server', '') == 'Alfa' and response_code in [429, 503, 403] \
                        and not opt.get('cf_v2', False) and opt.get('CF_test', True):
            opt["cf_v2"] = True
            logger.debug("CF Assistant retry... for domain: %s" %
                         urlparse.urlparse(url)[1])
            return downloadpage(url, **opt)

        response['data'] = req.content

        try:
            response['encoding'] = str(req.encoding).lower(
            ) if req.encoding and req.encoding is not None else None

            if opt.get('encoding') and opt.get('encoding') is not None:
                encoding = opt["encoding"]
            else:
                encoding = response['encoding']

            if not encoding:
                encoding = 'utf-8'

            if PY3 and isinstance(response['data'], bytes) and encoding is not None \
                    and ('text/' in req.headers.get('Content-Type', '') \
                        or 'json' in req.headers.get('Content-Type', '') \
                        or 'xml' in req.headers.get('Content-Type', '')):
                response['data'] = response['data'].decode(encoding)

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if PY3 and isinstance(response['data'], bytes) \
                    and not ('application' in req.headers.get('Content-Type', '') \
                    or 'javascript' in req.headers.get('Content-Type', '') \
                    or 'image' in req.headers.get('Content-Type', '')):
                response['data'] = "".join(
                    chr(x) for x in bytes(response['data']))

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if 'text/' in req.headers.get('Content-Type', '') \
                        or 'json' in req.headers.get('Content-Type', '') \
                        or 'xml' in req.headers.get('Content-Type', ''):
                response['data'] = response['data'].replace('&Aacute;', 'Á').replace('&Eacute;', 'É')\
                      .replace('&Iacute;', 'Í').replace('&Oacute;', 'Ó').replace('&Uacute;', 'Ú')\
                      .replace('&Uuml;', 'Ü').replace('&iexcl;', '¡').replace('&iquest;', '¿')\
                      .replace('&Ntilde;', 'Ñ').replace('&ntilde;', 'n').replace('&uuml;', 'ü')\
                      .replace('&aacute;', 'á').replace('&eacute;', 'é').replace('&iacute;', 'í')\
                      .replace('&oacute;', 'ó').replace('&uacute;', 'ú').replace('&ordf;', 'ª')\
                      .replace('&ordm;', 'º')

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        response['url'] = req.url

        if not response['data']:
            response['data'] = ''

        response['soup'] = None

        if opt.get("soup", False):
            try:
                from bs4 import BeautifulSoup
                response["soup"] = BeautifulSoup(req.content,
                                                 "html5lib",
                                                 from_encoding=opt.get(
                                                     'encoding',
                                                     response['encoding']))

            except Exception:
                import traceback
                logger.error("Error creando sopa")
                logger.error(traceback.format_exc())

        try:
            if 'bittorrent' not in req.headers.get('Content-Type', '') \
                        and 'octet-stream' not in req.headers.get('Content-Type', '') \
                        and 'zip' not in req.headers.get('Content-Type', '') \
                        and opt.get('json_to_utf8', True):
                response['json'] = to_utf8(req.json())

            else:
                response['json'] = dict()

        except Exception:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        if response['code'] == 200:
            response['sucess'] = True

        else:
            response['sucess'] = False

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    info_dict, response = fill_fields_post(
                        info_dict, req, response, req_headers, inicio)
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)
        if not 'api.themoviedb' in url and not 'api.trakt' in url and not opt.get(
                'alfa_s', False) and not opt.get("hide_infobox"):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response, url, opt = proxy_post_processing(url, proxy_data, response,
                                                   opt)

        # Si proxy ordena salir del loop, se sale
        if opt.get('out_break', False):
            break

    return type('HTTPResponse', (), response)

Example #10

Show file

def downloadpage(url, **opt):
    # logger.info()
    """
       Open a url and return the data obtained

        @param url: url to open.
        @type url: str
        @param post: If it contains any value, it is sent by POST.
        @type post: str
        @param headers: Headers for the request, if it contains nothing the default headers will be used.
        @type headers: dict, list
        @param timeout: Timeout for the request.
        @type timeout: int
        @param follow_redirects: Indicates if redirects are to be followed.
        @type follow_redirects: bool
        @param cookies: Indicates whether cookies are to be used.
        @type cookies: bool
        @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers.
                                If False, the headers passed by the "headers" parameter will modify the headers by default.
        @type replace_headers: bool
        @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value.
        @type add_referer: bool
        @param only_headers: If True, only headers will be downloaded, omitting the content of the url.
        @type only_headers: bool
        @param random_headers: If True, use the method of selecting random headers.
        @type random_headers: bool
        @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data
        @type ignore_response_code: bool
        @return: Result of the petition
        @rtype: HTTPResponse
        @param use_requests: Use requests.session()
        @type: bool

                Parameter Type Description
                -------------------------------------------------- -------------------------------------------------- ------------
                HTTPResponse.success: bool True: Request successful | False: Error when making the request
                HTTPResponse.code: int Server response code or error code if an error occurs
                HTTPResponse.error: str Description of the error in case of an error
                HTTPResponse.headers: dict Dictionary with server response headers
                HTTPResponse.data: str Response obtained from server
                HTTPResponse.json: dict Response obtained from the server in json format
                HTTPResponse.time: float Time taken to make the request

        """
    url = scrapertools.unescape(url)
    parse = urlparse.urlparse(url)
    domain = parse.netloc

    if opt.get('cloudscraper'):
        from lib import cloudscraper
        session = cloudscraper.create_scraper()
    else:
        from lib import requests
        session = requests.session()

        if config.get_setting('resolver_dns') and not opt.get(
                'use_requests', False):
            from core import resolverdns
            session.mount('https://', resolverdns.CipherSuiteAdapter(domain))

    req_headers = default_headers.copy()

    # Headers passed as parameters
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt['headers'])

    if domain in directIP.keys() and not opt.get('disable_directIP', False):
        req_headers['Host'] = domain
        url = urlparse.urlunparse(parse._replace(netloc=directIP.get(domain)))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    response = {}
    info_dict = []
    payload = dict()
    files = {}
    file_name = ''

    session.verify = opt.get('verify', True)

    if opt.get('cookies', True):
        session.cookies = cj
    session.headers.update(req_headers)

    proxy_data = {'dict': {}}

    inicio = time.time()

    if opt.get(
            'timeout',
            None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
        opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
    if opt['timeout'] == 0: opt['timeout'] = None

    if len(url) > 0:
        try:
            if opt.get('post', None) is not None or opt.get('file',
                                                            None) is not None:
                if opt.get('post', None) is not None:
                    # Convert string post in dict
                    try:
                        json.loads(opt['post'])
                        payload = opt['post']
                    except:
                        if not isinstance(opt['post'], dict):
                            post = urlparse.parse_qs(opt['post'],
                                                     keep_blank_values=1)
                            payload = dict()

                            for key, value in post.items():
                                try:
                                    payload[key] = value[0]
                                except:
                                    payload[key] = ''
                        else:
                            payload = opt['post']

                # Verify 'file' and 'file_name' options to upload a buffer or file
                if opt.get('file', None) is not None:
                    if os.path.isfile(opt['file']):
                        if opt.get('file_name', None) is None:
                            path_file, opt['file_name'] = os.path.split(
                                opt['file'])
                        files = {
                            'file': (opt['file_name'], open(opt['file'], 'rb'))
                        }
                        file_name = opt['file']
                    else:
                        files = {
                            'file': (opt.get('file_name',
                                             'Default'), opt['file'])
                        }
                        file_name = opt.get('file_name',
                                            'Default') + ', Buffer de memoria'

                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                if opt.get('only_headers', False):
                    # Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    # Makes the request with POST method
                    req = session.post(url,
                                       data=payload,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       files=files,
                                       timeout=opt['timeout'])

            elif opt.get('only_headers', False):
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with HEAD method
                req = session.head(url,
                                   allow_redirects=opt.get(
                                       'follow_redirects', True),
                                   timeout=opt['timeout'])
            else:
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with GET method
                req = session.get(url,
                                  allow_redirects=opt.get(
                                      'follow_redirects', True),
                                  timeout=opt['timeout'])
        except Exception as e:
            from lib import requests
            req = requests.Response()
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                response['data'] = ''
                response['success'] = False
                info_dict.append(('Success', 'False'))
                import traceback
                response['code'] = traceback.format_exc()
                info_dict.append(('Response code', str(e)))
                info_dict.append(('Finished in', time.time() - inicio))
                if not opt.get('alfa_s', False):
                    show_infobox(info_dict)
                return type('HTTPResponse', (), response)
            else:
                req.status_code = str(e)

    else:
        response['data'] = ''
        response['success'] = False
        response['code'] = ''
        return type('HTTPResponse', (), response)

    response_code = req.status_code
    response['url'] = req.url

    response['data'] = req.content if req.content else ''

    if type(response['data']) != str:
        try:
            response['data'] = response['data'].decode('utf-8')
        except:
            response['data'] = response['data'].decode('ISO-8859-1')

    if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
            and not opt.get('CF', False) and 'Ray ID' in response['data'] and not opt.get('post', None):
        logger.debug("CF retry... for domain: %s" % domain)
        from lib import proxytranslate
        gResp = proxytranslate.process_request_proxy(url)
        if gResp:
            req = gResp['result']
            response_code = req.status_code
            response['url'] = gResp['url']
            response['data'] = gResp['data']

    if not response['data']:
        response['data'] = ''

    try:
        response['json'] = to_utf8(req.json())
    except:
        response['json'] = dict()

    response['code'] = response_code
    response['headers'] = req.headers
    response['cookies'] = req.cookies

    info_dict, response = fill_fields_post(info_dict, req, response,
                                           req_headers, inicio)

    if opt.get('cookies', True):
        save_cookies(alfa_s=opt.get('alfa_s', False))

    if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
        show_infobox(info_dict)
    if not config.get_setting("debug"): logger.info('Page URL:', url)
    return type('HTTPResponse', (), response)