def get_url_info(url, guess_and_check, max_depth): # Create a new Link instance link = Link() # Save the extracted URL link.url = url # Remove markdown and other artifacts from the URL link.url_clean = remove_markdown(url) # Check if the clean URL is valid, if so continue with the next steps link.url_clean_is_valid = check_if_valid_url(link.url_clean) if link.url_clean_is_valid: link.is_amp = check_if_amp(link.url_clean) if link.is_amp: link.is_cached = check_if_cached(link.url_clean) link.domain = tldextract.extract(link.url_clean).domain link = get_canonical(link, guess_and_check, max_depth) return link