Ejemplo n.º 1
0
def _init_session(session=None, **kwargs):
    session_headers = headers
    if session is None:
        if kwargs.get("asynchronous"):
            session = FuturesSession(max_workers=kwargs.get("max_workers", 8))
        else:
            session = Session()
        if kwargs.get("proxies"):
            session.proxies = kwargs.get("proxies")
        retries = Retry(
            total=kwargs.get("retry", 5),
            backoff_factor=kwargs.get("backoff_factor", 0.3),
            status_forcelist=kwargs.get("status_forcelist",
                                        [429, 500, 502, 503, 504]),
            method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"],
        )
        if kwargs.get("verify"):
            session.verify = kwargs.get("verify")
        session.mount(
            "https://",
            TimeoutHTTPAdapter(max_retries=retries,
                               timeout=kwargs.get("timeout", DEFAULT_TIMEOUT)),
        )
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        user_agent = kwargs.get("user_agent", random.choice(USER_AGENT_LIST))
        session_headers["User-Agent"] = user_agent
        if kwargs.get("headers") and isinstance(kwargs.get("headers"), dict):
            session_headers.update(**headers)
        session.headers.update(**session_headers)
    return session
Ejemplo n.º 2
0
def _init_session(session, **kwargs):
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
        retries = Retry(
            total=3,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"])
        session.mount('https://', TimeoutHTTPAdapter(
            max_retries=retries,
            timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)))
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        session.headers.update({
            "User-Agent": random.choice(USER_AGENT_LIST)
        })
    return session
Ejemplo n.º 3
0
 def get_async_tor_proxy_session(self):
     import requests_futures
     from requests_futures.sessions import FuturesSession
     session = FuturesSession()
     # Tor uses the 9050 port as the default socks port
     session.proxies = {
         'http': 'socks5://127.0.0.1:9050',
         'https': 'socks5://127.0.0.1:9050'
     }
     return session
Ejemplo n.º 4
0
def _init_session(session, **kwargs):
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
    retries = \
        Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
    session.mount('http://', HTTPAdapter(max_retries=retries))
    return session
Ejemplo n.º 5
0
def _init_session(session, **kwargs):
    session_headers = headers
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
        retries = Retry(
            total=kwargs.get('retry', 5),
            backoff_factor=kwargs.get('backoff_factor', .3),
            status_forcelist=kwargs.get('status_forcelist',
                                        [429, 500, 502, 503, 504]),
            method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'TRACE'])
        if kwargs.get('verify'):
            session.verify = kwargs.get('verify')
        session.mount(
            'https://',
            TimeoutHTTPAdapter(max_retries=retries,
                               timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)))
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        user_agent = kwargs.get('user_agent', random.choice(USER_AGENT_LIST))
        # if kwargs.get('cookies'):
        #     cookies = get_cookies(user_agent)
        #     [session.cookies.set(c['name'], c['value']) for c in cookies]
        #     session_headers.update({
        #         'cookie': '; '.join([
        #             item['name'] + "=" + item['value'] for item in cookies
        #         ])
        #     })
        session_headers['User-Agent'] = user_agent
        session.headers.update(**session_headers)
    return session
Ejemplo n.º 6
0
 def _get_session(self):
     session = FuturesSession()
     session.auth = HTTPBasicAuth(self.api_key, self.api_key)
     if self.proxies and (self.proxies.get('http') or self.proxies.get('https')):
         session.proxies = self.proxies
     return session