Python norms Examples

Programming Language: Python

Namespace/Package Name: urlnorm.urlnorm

Method/Function: norms

Examples at hotexamples.com: 2

Python norms - 2 examples found. These are the top rated real world Python examples of urlnorm.urlnorm.norms extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: util.py Project: planBrk/domaincrawler

    def normalize_with_domain(self, raw_url):
        if (raw_url is None):
            return None

        normalized_web_url = web_domain_to_scheme_url(raw_url)
        normalized_url = norms(normalized_web_url)
        url_components = urlparse(normalized_url)
        split_url = list(url_components[0:3]) #We remove the query params & fragment from the URL
        split_url.extend(['','',''])
        is_scheme_empty = is_empty(url_components.scheme)
        if is_scheme_empty:
            split_url[0] = HTTP
        is_netloc_empty = is_empty(url_components.netloc)
        if is_netloc_empty and (is_scheme_empty or (split_url[0] in ACCEPTABLE_SCHEMES)):
            split_url[1] = self._netloc
        path = split_url[2]
        if path.endswith(PATH_SEPARATOR):
            split_url[2] = path[0:len(path) - 1]
        url_with_domain = urlunparse(tuple(split_url))
        return url_with_domain

Example #2

Show file

File: util.py Project: planBrk/domaincrawler

def extract_domain_port(reference_url):
    if is_empty(reference_url):
        raise ValueError("Input URL for domain extraction cannot be null")
    trimmed_url = reference_url.strip().lower()
    trimmed_url = web_domain_to_scheme_url(trimmed_url)
    raw_split_url = urlparse(trimmed_url)
    scheme = raw_split_url.scheme
    if not (scheme is None or scheme.strip().lower() in ACCEPTABLE_SCHEMES):
        raise ValueError("The URL scheme must be http or https")
    domain = raw_split_url.hostname
    if is_empty(domain):
        raise ValueError("Null or empty domain. Expected domain to be specified in the URL tuple %s "%str(raw_split_url))

    normalized_split_url = urlsplit(norms(trimmed_url))
    port = normalized_split_url.port
    if (port == HTTP_DEFAULT_PORT):
        port = None
    domain = normalized_split_url.hostname
    if domain.startswith(WWW_PREFIX):
        domain = domain[len(WWW_PREFIX):]
    if not _is_valid_domain(domain):
        raise ValueError("Invalid domain provided in the URL")
    return domain, port