Esempio n. 1
0
def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=None, json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    if None is session:
        session = requests.session()
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))

    # request session headers
    req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/", parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            (proxy_address, pac_found) = proxy_setting(sickbeard.PROXY_SETTING, url)
            msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
            if None is proxy_address:
                logger.log('Proxy error, aborted the request using %s' % msg, logger.DEBUG)
                return
            elif proxy_address:
                logger.log('Using %s' % msg, logger.DEBUG)
                session.proxies = {
                    'http': proxy_address,
                    'https': proxy_address
                }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(u"Requested url " + url + " returned status code is " + str(
                resp.status_code) + ': ' + clients.http_error_code[resp.status_code], logger.DEBUG)
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
        return
Esempio n. 2
0
def getURL(url,
           post_data=None,
           params=None,
           headers=None,
           timeout=30,
           session=None,
           json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session,
                           cache=caches.FileCache(
                               os.path.join(cache_dir, 'sessions')))

    # request session headers
    req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/",
                           parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            session.proxies = {
                "http": sickbeard.PROXY_SETTING,
                "https": sickbeard.PROXY_SETTING,
            }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(
                u"Requested url " + url + " returned status code is " +
                str(resp.status_code) + ': ' +
                clients.http_error_code[resp.status_code], logger.DEBUG)
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url,
                   logger.WARNING)
        return
Esempio n. 3
0
def get_url(
        url,  # type: AnyStr
        post_data=None,  # type: Optional
        params=None,  # type: Optional
        headers=None,  # type: Optional[Dict]
        timeout=30,  # type: int
        session=None,  # type: Optional[requests.Session]
        parse_json=False,  # type: bool
        raise_status_code=False,  # type: bool
        raise_exceptions=False,  # type: bool
        as_binary=False,  # type: bool
        encoding=None,  # type: Optional[AnyStr]
        **kwargs):
    # type: (...) -> Optional[Union[AnyStr, bool, bytes, Dict, Tuple[Union[Dict, List], requests.Session]]]
    """
    Either
    1) Returns a byte-string retrieved from the url provider.
    2) Return True/False if success after using kwargs 'savefile' set to file pathname.
    3) Returns Tuple response, session if success after setting kwargs 'resp_sess' True.
    4) JSON Dict if parse_json=True.

    :param url: url
    :param post_data: post data
    :param params:
    :param headers: headers to add
    :param timeout: timeout
    :param session: optional session object
    :param parse_json: return JSON Dict
    :param raise_status_code: raise exception for status codes
    :param raise_exceptions: raise exceptions
    :param as_binary: return bytes instead of text
    :param encoding: overwrite encoding return header if as_binary is False
    :param kwargs:
    :return:
    """

    response_attr = ('text', 'content')[as_binary]

    # selectively mute some errors
    mute = filter_list(lambda x: kwargs.pop(x, False), [
        'mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout',
        'mute_http_error'
    ])

    # reuse or instantiate request session
    resp_sess = kwargs.pop('resp_sess', None)
    if None is session:
        session = CloudflareScraper.create_scraper()
        session.headers.update({'User-Agent': USER_AGENT})

    # download and save file or simply fetch url
    savename = kwargs.pop('savename', None)
    if savename:
        # session streaming
        session.stream = True

    if not kwargs.pop('nocache', False):
        cache_dir = CACHE_DIR or get_system_temp_dir()
        session = CacheControl(sess=session,
                               cache=caches.FileCache(
                                   ek.ek(os.path.join, cache_dir, 'sessions')))

    provider = kwargs.pop('provider', None)

    # handle legacy uses of `json` param
    if kwargs.get('json'):
        parse_json = kwargs.pop('json')

    # session master headers
    req_headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip,deflate'
    }
    if headers:
        req_headers.update(headers)
    if hasattr(session, 'reserved') and 'headers' in session.reserved:
        req_headers.update(session.reserved['headers'] or {})
    session.headers.update(req_headers)

    # session parameters
    session.params = params

    # session ssl verify
    session.verify = False

    # don't trust os environments (auth, proxies, ...)
    session.trust_env = False

    response = None
    try:
        # sanitise url
        parsed = list(urlparse(url))
        parsed[2] = re.sub('/{2,}', '/',
                           parsed[2])  # replace two or more / with one
        url = urlunparse(parsed)

        # session proxies
        if PROXY_SETTING:
            (proxy_address, pac_found) = proxy_setting(PROXY_SETTING, url)
            msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
            if None is proxy_address:
                logger.debug('Proxy error, aborted the request using %s' % msg)
                return
            elif proxy_address:
                logger.debug('Using %s' % msg)
                session.proxies = {
                    'http': proxy_address,
                    'https': proxy_address
                }

        # decide if we get or post data to server
        if post_data or 'post_json' in kwargs:
            if True is post_data:
                post_data = None

            if post_data:
                kwargs.setdefault('data', post_data)

            if 'post_json' in kwargs:
                kwargs.setdefault('json', kwargs.pop('post_json'))

            response = session.post(url, timeout=timeout, **kwargs)
        else:
            response = session.get(url, timeout=timeout, **kwargs)
            if response.ok and not response.content and 'url=' in response.headers.get(
                    'Refresh', '').lower():
                url = response.headers.get('Refresh').lower().split(
                    'url=')[1].strip('/')
                if not url.startswith('http'):
                    parsed[2] = '/%s' % url
                    url = urlunparse(parsed)
                response = session.get(url, timeout=timeout, **kwargs)

        # if encoding is not in header try to use best guess
        # ignore downloads with savename
        if not savename and not as_binary:
            if encoding:
                response.encoding = encoding
            elif not response.encoding or 'charset' not in response.headers.get(
                    'Content-Type', ''):
                response.encoding = response.apparent_encoding

        # noinspection PyProtectedMember
        if provider and provider._has_signature(response.text):
            return getattr(response, response_attr)

        if raise_status_code:
            response.raise_for_status()

        if not response.ok:
            http_err_text = 'CloudFlare Ray ID' in response.text and \
                            'CloudFlare reports, "Website is offline"; ' or ''
            if response.status_code in http_error_code:
                http_err_text += http_error_code[response.status_code]
            elif response.status_code in range(520, 527):
                http_err_text += 'Origin server connection failure'
            else:
                http_err_text = 'Custom HTTP error code'
                if 'mute_http_error' not in mute:
                    logger.debug(
                        u'Response not ok. %s: %s from requested url %s' %
                        (response.status_code, http_err_text, url))
            return

    except requests.exceptions.HTTPError as e:
        if raise_status_code:
            response.raise_for_status()
        logger.warning(u'HTTP error %s while loading URL%s' %
                       (e.errno, _maybe_request_url(e)))
        return
    except requests.exceptions.ConnectionError as e:
        if 'mute_connect_err' not in mute:
            logger.warning(u'Connection error msg:%s while loading URL%s' %
                           (ex(e), _maybe_request_url(e)))
        if raise_exceptions:
            raise e
        return
    except requests.exceptions.ReadTimeout as e:
        if 'mute_read_timeout' not in mute:
            logger.warning(u'Read timed out msg:%s while loading URL%s' %
                           (ex(e), _maybe_request_url(e)))
        if raise_exceptions:
            raise e
        return
    except (requests.exceptions.Timeout, socket.timeout) as e:
        if 'mute_connect_timeout' not in mute:
            logger.warning(
                u'Connection timed out msg:%s while loading URL %s' %
                (ex(e), _maybe_request_url(e, url)))
        if raise_exceptions:
            raise e
        return
    except (BaseException, Exception) as e:
        if ex(e):
            logger.warning(
                u'Exception caught while loading URL %s\r\nDetail... %s\r\n%s'
                % (url, ex(e), traceback.format_exc()))
        else:
            logger.warning(
                u'Unknown exception while loading URL %s\r\nDetail... %s' %
                (url, traceback.format_exc()))
        if raise_exceptions:
            raise e
        return

    if parse_json:
        try:
            data_json = response.json()
            if resp_sess:
                return ({}, data_json)[isinstance(data_json,
                                                  (dict, list))], session
            return ({}, data_json)[isinstance(data_json, (dict, list))]
        except (TypeError, Exception) as e:
            logger.warning(u'JSON data issue from URL %s\r\nDetail... %s' %
                           (url, ex(e)))
            if raise_exceptions:
                raise e
            return None

    if savename:
        try:
            write_file(savename,
                       response,
                       raw=True,
                       raise_exceptions=raise_exceptions)
        except (BaseException, Exception) as e:
            if raise_exceptions:
                raise e
            return
        return True

    if resp_sess:
        return getattr(response, response_attr), session

    return getattr(response, response_attr)
Esempio n. 4
0
def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=None, json=False, **kwargs):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    if None is session:
        session = requests.session()
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))

    # request session headers
    req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/", parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            (proxy_address, pac_found) = proxy_setting(sickbeard.PROXY_SETTING, url)
            msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
            if None is proxy_address:
                logger.log('Proxy error, aborted the request using %s' % msg, logger.DEBUG)
                return
            elif proxy_address:
                logger.log('Using %s' % msg, logger.DEBUG)
                session.proxies = {
                    'http': proxy_address,
                    'https': proxy_address
                }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout, **kwargs)
        else:
            resp = session.get(url, timeout=timeout, **kwargs)

        if not resp.ok:
            if resp.status_code in clients.http_error_code:
                http_err_text = clients.http_error_code[resp.status_code]
            elif resp.status_code in range(520, 527):
                http_err_text = 'CloudFlare to origin server connection failure'
            else:
                http_err_text = 'Custom HTTP error code'
            logger.log(u'Requested url %s returned status code is %s: %s'
                       % (url, resp.status_code, http_err_text), logger.DEBUG)
            return

    except requests.exceptions.HTTPError as e:
        logger.log(u'HTTP error %s while loading URL %s' % (e.errno, url), logger.WARNING)
        return
    except requests.exceptions.ConnectionError as e:
        logger.log(u'Internet connection error msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except requests.exceptions.ReadTimeout as e:
        logger.log(u'Read timed out msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except (requests.exceptions.Timeout, socket.timeout) as e:
        logger.log(u'Connection timed out msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except Exception as e:
        if e.message:
            logger.log(u'Exception caught while loading URL %s\r\nDetail... %s\r\n%s' % (url, str(e.message), traceback.format_exc()), logger.WARNING)
        else:
            logger.log(u'Unknown exception while loading URL %s\r\nDetail... %s' % (url, traceback.format_exc()), logger.WARNING)
        return

    if json:
        return resp.json()

    return resp.content
Esempio n. 5
0
def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=None, json=False, **kwargs):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    if None is session:
        session = requests.session()
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))

    # request session headers
    req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/", parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            (proxy_address, pac_found) = proxy_setting(sickbeard.PROXY_SETTING, url)
            msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
            if None is proxy_address:
                logger.log('Proxy error, aborted the request using %s' % msg, logger.DEBUG)
                return
            elif proxy_address:
                logger.log('Using %s' % msg, logger.DEBUG)
                session.proxies = {
                    'http': proxy_address,
                    'https': proxy_address
                }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout, **kwargs)
        else:
            resp = session.get(url, timeout=timeout, **kwargs)

        if not resp.ok:
            if resp.status_code in clients.http_error_code:
                http_err_text = clients.http_error_code[resp.status_code]
            elif resp.status_code in range(520, 527):
                http_err_text = 'CloudFlare to origin server connection failure'
            else:
                http_err_text = 'Custom HTTP error code'
            logger.log(u'Requested url %s returned status code is %s: %s'
                       % (url, resp.status_code, http_err_text), logger.DEBUG)
            return

    except requests.exceptions.HTTPError as e:
        logger.log(u'HTTP error %s while loading URL %s' % (e.errno, url), logger.WARNING)
        return
    except requests.exceptions.ConnectionError as e:
        logger.log(u'Internet connection error msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except requests.exceptions.ReadTimeout as e:
        logger.log(u'Read timed out msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except (requests.exceptions.Timeout, socket.timeout) as e:
        logger.log(u'Connection timed out msg:%s while loading URL %s' % (str(e.message), url), logger.WARNING)
        return
    except Exception as e:
        if e.message:
            logger.log(u'Exception caught while loading URL %s\r\nDetail... %s\r\n%s' % (url, str(e.message), traceback.format_exc()), logger.WARNING)
        else:
            logger.log(u'Unknown exception while loading URL %s\r\nDetail... %s' % (url, traceback.format_exc()), logger.WARNING)
        return

    if json:
        return resp.json()

    return resp.content