def request(self, path, proxy=None): result = None error = None try: if not proxy: if self.proxylist: proxy = random.choice(self.proxylist) elif self.proxy: proxy = self.proxy if proxy: if not proxy.startswith( ("http://", "https://", "socks5://", "socks5h://", "socks4://", "socks4a://")): proxy = "http://" + proxy if proxy.startswith("https://"): proxies = {"https": proxy} else: proxies = {"https": proxy, "http": proxy} else: proxies = None url = self.url + self.base_path + path if self.random_agents: self.headers["User-Agent"] = random.choice(self.random_agents) ''' We can't just do `allow_redirects=True` because we set the host header in request headers, which will be kept in next requests (following redirects) ''' for i in range(6): headers = self.headers.copy() if i != 0: url = urljoin(url, result.redirect) headers["Host"] = url.split("/")[2] request = requests.Request( self.httpmethod, url=url, headers=headers, auth=self.auth, data=self.data, ) prepare = request.prepare() prepare.url = url response = self.session.send( prepare, proxies=proxies, allow_redirects=False, timeout=self.timeout, stream=True, verify=False, ) result = Response(response) if i == 5: raise requests.exceptions.TooManyRedirects elif self.redirect and result.redirect: continue break except requests.exceptions.SSLError: self.url = self.base_url self.set_adapter() return self.request(path, proxy=proxy) except requests.exceptions.TooManyRedirects: error = "Too many redirects: {0}".format(self.base_url) except requests.exceptions.ProxyError: error = "Error with the proxy: {0}".format(proxy) except requests.exceptions.ConnectionError: error = "Cannot connect to: {0}:{1}".format(self.host, self.port) except requests.exceptions.InvalidURL: error = "Invalid URL: {0}".format(self.base_url) except requests.exceptions.InvalidProxyURL: error = "Invalid proxy URL: {0}".format(proxy) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, requests.exceptions.Timeout, http.client.IncompleteRead, socket.timeout, ): error = "Request timeout: {0}".format(self.base_url) except Exception: error = "There was a problem in the request to: {0}".format( self.base_url) if error: raise RequestException({"message": error}) return result
def request(self, path, proxy=None): result = None error = None for i in range(self.maxRetries): try: if not proxy: if self.proxylist: proxy = random.choice(self.proxylist) elif self.proxy: proxy = self.proxy if proxy: if not proxy.startswith( ("http://", "https://", "socks5://", "socks5h://", "socks4://", "socks4a://") ): proxy = "http://" + proxy if proxy.startswith("https://"): proxies = {"https": proxy} else: proxies = {"https": proxy, "http": proxy} else: proxies = None url = self.url + self.basePath + path if self.randomAgents: self.headers["User-Agent"] = random.choice(self.randomAgents) request = requests.Request( self.httpmethod, url=url, headers=dict(self.headers), data=self.data, ) prepare = request.prepare() response = self.session.send( prepare, proxies=proxies, allow_redirects=self.redirect, timeout=self.timeout, verify=False, ) result = Response( response.status_code, response.reason, response.headers, response.content, ) break except requests.exceptions.SSLError: self.url = "{0}://{1}:{2}/".format(self.protocol, self.host, self.port) continue except requests.exceptions.TooManyRedirects: error = "Too many redirects: {0}".format(url) except requests.exceptions.ProxyError: error = "Error with the proxy: {0}".format(proxy) except requests.exceptions.ConnectionError: error = "Cannot connect to: {0}:{1}".format(self.host, self.port) except requests.exceptions.InvalidURL: error = "Invalid URL: {0}".format(url) except requests.exceptions.InvalidProxyURL: error = "Invalid proxy URL: {0}".format(proxy) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, requests.exceptions.Timeout, http.client.IncompleteRead, socket.timeout, ): error = "Request timeout: {0}".format(url) except Exception: error = "There was a problem in the request to: {0}".format(url) if error: raise RequestException({"message": error}) return result
def request(self, path, proxy=None): err_msg = None simple_err_msg = None for _ in range(self.max_retries + 1): result = None redirects = [] try: if not proxy: if self.proxylist: proxy = random.choice(self.proxylist) elif self.proxy: proxy = self.proxy if proxy: if not proxy.startswith(PROXY_SCHEMES): proxy = "http://" + proxy if proxy.startswith("https://"): proxies = {"https": proxy} else: proxies = {"https": proxy, "http": proxy} else: proxies = None url = self.url + self.base_path + path if self.random_agents: self.headers["User-Agent"] = random.choice( self.random_agents) ''' We can't just do `allow_redirects=True` because we set the host header in request headers, which will be kept in next requests (follow redirects) ''' headers = self.headers.copy() for i in range(MAX_REDIRECTS): request = requests.Request( self.httpmethod, url=url, headers=headers, auth=self.auth, data=self.data, ) prepare = request.prepare() prepare.url = url response = self.session.send( prepare, proxies=proxies, allow_redirects=False, timeout=self.timeout, stream=True, verify=False, ) result = Response(response, redirects) if self.redirect and result.redirect: url = urljoin(url, result.redirect) headers["Host"] = url.split("/")[2] redirects.append(url) continue elif i == MAX_REDIRECTS - 1: raise requests.exceptions.TooManyRedirects break return result except requests.exceptions.SSLError: self.url = self.base_url self.set_adapter() self.request(path, proxy=proxy) except Exception as e: err_msg = str(e) if e == requests.exceptions.TooManyRedirects: simple_err_msg = "Too many redirects: {0}".format( self.base_url) elif e == requests.exceptions.ProxyError: simple_err_msg = "Error with the proxy: {0}".format(proxy) elif e == requests.exceptions.ConnectionError: simple_err_msg = "Cannot connect to: {0}:{1}".format( self.host, self.port) elif e == requests.exceptions.InvalidURL: simple_err_msg = "Invalid URL: {0}".format(self.base_url) elif e == requests.exceptions.InvalidProxyURL: simple_err_msg = "Invalid proxy URL: {0}".format(proxy) elif e in ( requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, requests.exceptions.Timeout, http.client.IncompleteRead, socket.timeout, ): simple_err_msg = "Request timeout: {0}".format( self.base_url) elif e in ( requests.exceptions.ChunkedEncodingError, requests.exceptions.StreamConsumedError, requests.exceptions.UnrewindableBodyError, ): simple_err_msg = "Failed to read response body: {0}".format( self.base_url) else: simple_err_msg = "There was a problem in the request to: {0}".format( self.base_url) raise RequestException(simple_err_msg, err_msg)