def attempt_url_fix(u: str) -> str: """attempts to fix common mistakes while passing a url""" if isinstance(u, URL): return str(u) if u.startswith("//"): u = f"http{u}" p = _parse(u) if not p.scheme: u = f"http://{u}" elif "htttp" == p.scheme: u = "http:" + u[6:] return u.strip()
def update_url_meta_data(self) -> None: """Get general meta data about the url""" headers: dict url: str try: ret = req.head(str(self), headers=basic_headers, allow_redirects=True) ret.raise_for_status() headers, url = ret.headers, ret.url except: headers, url = _abort_request_after(str(self), 1500) self.has_meta_data = True self._m_headers = headers self._parsed = _parse(url)
def fetch( self, _method: str = "get", refetch: bool = False, update_on_redirect: bool = True, **kwargs, ): """use requests library functions on the url accepts all kwargs that request takes Args: _method (str, optional): method of request. Defaults to "get". refetch (bool, optional): if the url has to be fetched again. Defaults to False. update_on_redirect (bool, optional): update url's value in case a redirect is faced. Defaults to True. Raises: AttributeError: trying to use a method that request library does not support Returns: Response: Response object fro the requests library """ if self.request and not refetch: warn_refetch(self) method = _method.lower() if not hasattr(req, method): raise AttributeError( f"Requests library does not support method {method}" ) res = getattr(self.session, method)( str(self), allow_redirects=True, headers=basic_headers if not "headers" in kwargs else kwargs.pop("headers"), **kwargs, ) if update_on_redirect: self._parsed = _parse(res.url) self.request = res return res
def fetch( self, _method: str = "get", refetch: bool = False, update_on_redirect: bool = True, **kwargs, ): """use requests library functions on the url accepts all kwargs that request takes""" if self.request and not refetch: warn_refetch(self) method = _method.lower() if not hasattr(req, method): raise Exception(f"Requests library does not support method {method}") res = getattr(self.session, method)( str(self), allow_redirects=True, headers=basic_headers if not "headers" in kwargs else kwargs.pop("headers"), **kwargs, ) if update_on_redirect: self._parsed = _parse(res.url) self.request = res return res
def __init__(self, _u: str): self.session = req.Session() if not _u: raise ValueError("Cannot generate URL from a falsey value") u: str = self.attempt_url_fix(_u) self._parsed = _normalise_url(_parse(u))