def _init_session(session=None, **kwargs): session_headers = headers if session is None: if kwargs.get("asynchronous"): session = FuturesSession(max_workers=kwargs.get("max_workers", 8)) else: session = Session() if kwargs.get("proxies"): session.proxies = kwargs.get("proxies") retries = Retry( total=kwargs.get("retry", 5), backoff_factor=kwargs.get("backoff_factor", 0.3), status_forcelist=kwargs.get("status_forcelist", [429, 500, 502, 503, 504]), method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"], ) if kwargs.get("verify"): session.verify = kwargs.get("verify") session.mount( "https://", TimeoutHTTPAdapter(max_retries=retries, timeout=kwargs.get("timeout", DEFAULT_TIMEOUT)), ) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] user_agent = kwargs.get("user_agent", random.choice(USER_AGENT_LIST)) session_headers["User-Agent"] = user_agent if kwargs.get("headers") and isinstance(kwargs.get("headers"), dict): session_headers.update(**headers) session.headers.update(**session_headers) return session
def _init_session(session, **kwargs): if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = Retry( total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"]) session.mount('https://', TimeoutHTTPAdapter( max_retries=retries, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT))) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] session.headers.update({ "User-Agent": random.choice(USER_AGENT_LIST) }) return session
def get_async_tor_proxy_session(self): import requests_futures from requests_futures.sessions import FuturesSession session = FuturesSession() # Tor uses the 9050 port as the default socks port session.proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } return session
def _init_session(session, **kwargs): if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = \ Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504]) session.mount('http://', HTTPAdapter(max_retries=retries)) return session
def _init_session(session, **kwargs): session_headers = headers if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = Retry( total=kwargs.get('retry', 5), backoff_factor=kwargs.get('backoff_factor', .3), status_forcelist=kwargs.get('status_forcelist', [429, 500, 502, 503, 504]), method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'TRACE']) if kwargs.get('verify'): session.verify = kwargs.get('verify') session.mount( 'https://', TimeoutHTTPAdapter(max_retries=retries, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT))) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] user_agent = kwargs.get('user_agent', random.choice(USER_AGENT_LIST)) # if kwargs.get('cookies'): # cookies = get_cookies(user_agent) # [session.cookies.set(c['name'], c['value']) for c in cookies] # session_headers.update({ # 'cookie': '; '.join([ # item['name'] + "=" + item['value'] for item in cookies # ]) # }) session_headers['User-Agent'] = user_agent session.headers.update(**session_headers) return session
def _get_session(self): session = FuturesSession() session.auth = HTTPBasicAuth(self.api_key, self.api_key) if self.proxies and (self.proxies.get('http') or self.proxies.get('https')): session.proxies = self.proxies return session