Example #1
0
def get_url_info(url, guess_and_check, max_depth):
    # Create a new Link instance
    link = Link()
    # Save the extracted URL
    link.url = url
    # Remove markdown and other artifacts from the URL
    link.url_clean = remove_markdown(url)
    # Check if the clean URL is valid, if so continue with the next steps
    link.url_clean_is_valid = check_if_valid_url(link.url_clean)
    if link.url_clean_is_valid:
        link.is_amp = check_if_amp(link.url_clean)
        if link.is_amp:
            link.is_cached = check_if_cached(link.url_clean)
            link.domain = tldextract.extract(link.url_clean).domain
            link = get_canonical(link, guess_and_check, max_depth)

    return link