def apply_disavow(disavow_entries, urls_list): """ Using a disavow file, tests which of a file of urls would be disavowed and which wouldn't. """ disavow_links = [] disavow_domains = [] output_dict = {} if 'urls' in disavow_entries: disavow_links_details = normalize_and_dedupe_with_counts( disavow_entries['urls']) disavow_links = disavow_links_details.clean_urls output_dict[ 'disavow_links_entered'] = disavow_links_details.urls_entered output_dict[ 'unique_disavow_links_entered'] = disavow_links_details.unique_urls_entered if 'domains' in disavow_entries: disavow_domains_details = normalize_and_dedupe_with_counts( disavow_entries['domains']) disavow_domains = subdomains(disavow_domains_details.clean_urls) urls_to_test_details = normalize_and_dedupe_with_counts(urls_list) urls = urls_to_test_details.clean_urls disavowed_urls = [] non_disavowed_urls = [] for url in urls: if (url in disavow_links) or (subdomain(url) in disavow_domains): disavowed_urls.append(url) else: non_disavowed_urls.append(url) total_disavowed_links = len(disavowed_urls) total_remaining_links = len(non_disavowed_urls) output_dict.update({ 'disavowed': disavowed_urls, 'non_disavowed': non_disavowed_urls, 'domains_entered': disavow_domains_details.urls_entered, 'unique_domains_entered': disavow_domains_details.unique_urls_entered, 'urls_entered_to_test': urls_to_test_details.urls_entered, 'unique_urls_entered_to_test': urls_to_test_details.unique_urls_entered, 'total_disavowed_links': total_disavowed_links, 'total_remaining_links': total_remaining_links }) return output_dict
def apply_disavow(disavow_entries, urls_list): """ Using a disavow file, tests which of a file of urls would be disavowed and which wouldn't. """ disavow_links = [] disavow_domains = [] output_dict = {} if 'urls' in disavow_entries: disavow_links_details = normalize_and_dedupe_with_counts(disavow_entries['urls']) disavow_links = disavow_links_details.clean_urls output_dict['disavow_links_entered'] = disavow_links_details.urls_entered output_dict['unique_disavow_links_entered'] = disavow_links_details.unique_urls_entered if 'domains' in disavow_entries: disavow_domains_details = normalize_and_dedupe_with_counts(disavow_entries['domains']) disavow_domains = subdomains(disavow_domains_details.clean_urls) urls_to_test_details = normalize_and_dedupe_with_counts(urls_list) urls = urls_to_test_details.clean_urls disavowed_urls = [] non_disavowed_urls = [] for url in urls: if (url in disavow_links) or (subdomain(url) in disavow_domains): disavowed_urls.append(url) else: non_disavowed_urls.append(url) total_disavowed_links = len(disavowed_urls) total_remaining_links = len(non_disavowed_urls) output_dict.update({ 'disavowed': disavowed_urls, 'non_disavowed': non_disavowed_urls, 'domains_entered': disavow_domains_details.urls_entered, 'unique_domains_entered': disavow_domains_details.unique_urls_entered, 'urls_entered_to_test': urls_to_test_details.urls_entered, 'unique_urls_entered_to_test': urls_to_test_details.unique_urls_entered, 'total_disavowed_links': total_disavowed_links, 'total_remaining_links': total_remaining_links }) return output_dict
def disavow_file_to_dict(file_contents, domain_limit=False): """ Takes a disavow file and applies many helper functions, outputting a dictionary with old and new domain entries, the individual links to be disavowed, as well as useful counts. """ entries_dict = import_from_file_contents(file_contents) link_entries_details = normalize_and_dedupe_with_counts( entries_dict['urls']) link_entries = link_entries_details.clean_urls domain_entries_details = normalize_and_dedupe_with_counts( entries_dict['domains']) domain_entries = subdomains(domain_entries_details.clean_urls) if domain_entries: applied_disavow = apply_disavow({"domains": entries_dict['domains']}, entries_dict['urls']) link_entries = applied_disavow['non_disavowed'] final_domain_entries = set() final_domain_entries.update(domain_entries) if domain_limit: link_entries, new_domain_entries = apply_domain_limit( link_entries, domain_limit) final_domain_entries.update(new_domain_entries) if domain_entries and domain_limit: domain_entries, new_domain_entries = remove_redundant_domains( domain_entries, new_domain_entries) # total_domains_disavowed = len(domain_entries + new_domain_entries) links_disavowed = len(link_entries) return { 'domain_entries': list(final_domain_entries), 'url_entries': link_entries, 'urls_entered_count': link_entries_details.urls_entered, 'urls_disavowed_count': links_disavowed, 'unique_urls_entered_count': link_entries_details.unique_urls_entered, 'domain_entries_entered_count': domain_entries_details.urls_entered, }
def disavow_file_to_dict(file_contents, domain_limit=False): """ Takes a disavow file and applies many helper functions, outputting a dictionary with old and new domain entries, the individual links to be disavowed, as well as useful counts. """ entries_dict = import_from_file_contents(file_contents) link_entries_details = normalize_and_dedupe_with_counts(entries_dict['urls']) link_entries = link_entries_details.clean_urls domain_entries_details = normalize_and_dedupe_with_counts(entries_dict['domains']) domain_entries = subdomains(domain_entries_details.clean_urls) if domain_entries: applied_disavow = apply_disavow({"domains": entries_dict['domains']}, entries_dict['urls']) link_entries = applied_disavow['non_disavowed'] final_domain_entries = set() final_domain_entries.update(domain_entries) if domain_limit: link_entries, new_domain_entries = apply_domain_limit(link_entries, domain_limit) final_domain_entries.update(new_domain_entries) if domain_entries and domain_limit: domain_entries, new_domain_entries = remove_redundant_domains(domain_entries, new_domain_entries) # total_domains_disavowed = len(domain_entries + new_domain_entries) links_disavowed = len(link_entries) return { 'domain_entries': list(final_domain_entries), 'url_entries': link_entries, 'urls_entered_count': link_entries_details.urls_entered, 'urls_disavowed_count': links_disavowed, 'unique_urls_entered_count': link_entries_details.unique_urls_entered, 'domain_entries_entered_count': domain_entries_details.urls_entered, }