def isresource(url, *args, **kwargs): url = format.url(url) kwargs.setdefault('allow_redirects', True) kwargs.setdefault('verify', False) r = requests.head(url, *args, **kwargs) if 'content-disposition' in r.headers: return True mime = "" content = r.headers.get('content-type') if content: mime, delemiter, charset = content.rpartition("charset=") else: from . import parse name = parse.name(url) root, ext = os.path.splitext(name) if ext: mime = parse.mime(name) if 'html' not in mime: return True return False
def url_to_name(url, strict=False): url = format.url(url) url_p = urllib.parse.urlparse(url) name = url_p.path.split('/')[-1] if not name: name = url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] if not name: name = url_p.netloc.split('.', 1)[0] return name.strip() if strict else format.name(name)
def name(url): url = format.url(url) up = urllib.parse.urlparse(url) name = up.path.split('/')[-1] if not name: name = up.query.split('=', 1)[::-1][0].split('&', 1)[0] if not name: name = up.netloc.split('.', 1)[0] return name.strip()
def isonline(url, *args, **kwargs): online = True url = format.url(url) kwargs.setdefault('allow_redirects', True) kwargs.setdefault('verify', False) try: requests.head(url, *args, **kwargs).raise_for_status() except requests.TooManyRedirects: online = True except (requests.ConnectionError, requests.ConnectTimeout): online = None except requests.RequestException: online = False return online
def domain(url): return tld.get_tld(format.url(url), fail_silently=True)
def isurl(url): url = format.url(url) try: return validators.url(url) except validators.ValidationFailure: return False