def __next__(self): self.condition.acquire() try: path = self.entries[self.index] except IndexError: self.condition.release() raise StopIteration self.index += 1 self.condition.release() return safequote(path)
def __next__(self, base_path=None): _, path = self.next_with_index(base_path) return safequote(path)
def __init__( self, url, max_pool=1, max_retries=5, timeout=20, ip=None, proxy=None, proxylist=None, redirect=False, request_by_hostname=False, httpmethod="get", data=None, scheme=None, random_agents=None, ): self.httpmethod = httpmethod self.data = data self.headers = {} parsed = urlparse(url) # If no scheme specified, unset it first if "://" not in url: parsed = urlparse("{0}://{1}".format(scheme or "unknown", url)) self.base_path = parsed.path if parsed.path.startswith("/"): self.base_path = parsed.path[1:] # Safe quote all special characters in base_path to prevent from being encoded self.base_path = safequote(self.base_path) self.host = parsed.netloc.split(":")[0] port_for_scheme = {"http": 80, "https": 443, "unknown": 0} if parsed.scheme not in ("unknown", "https", "http"): raise RequestException("Unsupported URI scheme: {0}".format( self.scheme)) # If no port specified, set default (80, 443) try: self.port = int(parsed.netloc.split(":")[1]) except IndexError: self.port = port_for_scheme[parsed.scheme] except ValueError: raise RequestException("Invalid port number: {0}".format( parsed.netloc.split(":")[1])) # If no scheme is found, detect it by port number self.scheme = parsed.scheme if parsed.scheme != "unknown" else self.get_scheme( self.port) # If the user neither provide the port nor scheme, guess them based # on standard website characteristics if not self.scheme: self.scheme = "https" if self.get_scheme( 443) == "https" else "http" self.port = port_for_scheme[self.scheme] # Set the Host header, read the line 126 to know why self.headers["Host"] = self.host # Include port in Host header if it's non-standard if (self.scheme == "https" and self.port != 443) or (self.scheme == "http" and self.port != 80): self.headers["Host"] += ":{0}".format(self.port) self.max_retries = max_retries self.max_pool = max_pool self.timeout = timeout self.pool = None self.proxy = proxy self.proxylist = proxylist self.redirect = redirect self.random_agents = random_agents self.auth = None self.request_by_hostname = request_by_hostname self.ip = ip self.base_url = self.url = "{0}://{1}/".format( self.scheme, self.headers["Host"], )
def __init__( self, url, max_pool=1, max_retries=5, timeout=20, ip=None, proxy=None, proxylist=None, redirect=False, request_by_hostname=False, httpmethod="get", data=None, scheme=None, ): self.httpmethod = httpmethod self.data = data self.headers = {} parsed = urlparse(url) # If no protocol specified, set http by default if "://" not in url: parsed = urlparse("{0}://{1}".format(scheme, url)) # If protocol is not supported elif parsed.scheme not in ["https", "http"]: raise RequestException({ "message": "Unsupported URL scheme: {0}".format(parsed.scheme) }) self.base_path = parsed.path if parsed.path.startswith("/"): self.base_path = parsed.path[1:] # Safe quote all special characters in base_path to prevent from being encoded self.base_path = safequote(self.base_path) self.protocol = parsed.scheme self.host = parsed.netloc.split(":")[0] # Resolve DNS to decrease overhead if ip: self.ip = ip # A proxy could have a different DNS that would resolve the name. ThereFore. # resolving the name when using proxy to raise an error is pointless elif not proxy and not proxylist: try: self.ip = socket.gethostbyname(self.host) except socket.gaierror: # Check if hostname resolves to IPv6 address only try: self.ip = socket.getaddrinfo(self.host, None, socket.AF_INET6)[0][4][0] except socket.gaierror: raise RequestException({"message": "Couldn't resolve DNS"}) # If no port specified, set default (80, 443) try: self.port = int(parsed.netloc.split(":")[1]) except IndexError: self.port = 443 if self.protocol == "https" else 80 except ValueError: raise RequestException({ "message": "Invalid port number: {0}".format(parsed.netloc.split(":")[1]) }) # Set the Host header, this will be overwritten if the user has already set the header self.headers["Host"] = self.host # Include port in Host header if it's non-standard if (self.protocol == "https" and self.port != 443) or (self.protocol == "http" and self.port != 80): self.headers["Host"] += ":{0}".format(self.port) self.max_retries = max_retries self.max_pool = max_pool self.timeout = timeout self.pool = None self.proxy = proxy self.proxylist = proxylist self.redirect = redirect self.random_agents = None self.auth = None self.request_by_hostname = request_by_hostname self.session = requests.Session() self.url = "{0}://{1}:{2}/".format( self.protocol, self.host if self.request_by_hostname else self.ip, self.port, ) self.base_url = "{0}://{1}:{2}/".format( self.protocol, self.host, self.port, ) self.set_adapter()