Exemple #1
0
def isresource(url, *args, **kwargs):
    url = format.url(url)

    kwargs.setdefault('allow_redirects', True)
    kwargs.setdefault('verify', False)
    r = requests.head(url, *args, **kwargs)

    if 'content-disposition' in r.headers:
        return True

    mime = ""
    content = r.headers.get('content-type')
    if content:
        mime, delemiter, charset = content.rpartition("charset=")
    else:
        from . import parse

        name = parse.name(url)
        root, ext = os.path.splitext(name)
        if ext:
            mime = parse.mime(name)

    if 'html' not in mime:
        return True

    return False
Exemple #2
0
def url_to_name(url, strict=False):
    url = format.url(url)
    url_p = urllib.parse.urlparse(url)
    name = url_p.path.split('/')[-1]
    if not name:
        name = url_p.query.split('=', 1)[::-1][0].split('&', 1)[0]
    if not name:
        name = url_p.netloc.split('.', 1)[0]
    return name.strip() if strict else format.name(name)
Exemple #3
0
def name(url):
    url = format.url(url)
    up = urllib.parse.urlparse(url)
    name = up.path.split('/')[-1]
    if not name:
        name = up.query.split('=', 1)[::-1][0].split('&', 1)[0]
    if not name:
        name = up.netloc.split('.', 1)[0]
    return name.strip()
Exemple #4
0
def isonline(url, *args, **kwargs):
    online = True
    url = format.url(url)

    kwargs.setdefault('allow_redirects', True)
    kwargs.setdefault('verify', False)
    try:
        requests.head(url, *args, **kwargs).raise_for_status()
    except requests.TooManyRedirects:
        online = True
    except (requests.ConnectionError, requests.ConnectTimeout):
        online = None
    except requests.RequestException:
        online = False

    return online
Exemple #5
0
def domain(url):
    return tld.get_tld(format.url(url), fail_silently=True)
Exemple #6
0
def isurl(url):
    url = format.url(url)
    try:
        return validators.url(url)
    except validators.ValidationFailure:
        return False