def testGettingLinks(self): info = ArchiveOrg.get_url_info("http://susodigital.com", min_size=1, limit=-100) for item in info: link = ArchiveOrg.get_archive_link(item) print(link)
def testGetBestProfile(self): archive, rate = ArchiveOrg.get_best_archive( root_domain="susodigital.com", thread_size=100, profile_check=10, pass_threshold=0.7, res_limit=2000) profile_link = ArchiveOrg.get_archive_link(archive) print("best profile:", profile_link, "rate:", rate)
def testGettingLinksVariation(self): test_pool = pool.ThreadPool(processes=100) url = "http://bbc.co.uk" latest = ArchiveOrg.get_url_info(url, min_size=1, limit=-1)[0] timestamp ="" if isinstance(latest, ArchiveStruct): timestamp = latest.date_stamp info = ArchiveOrg.get_domain_urls(url, limit=2000) res_count = len(info) broken_res_count = 0 links = [] for item in info: item.date_stamp = timestamp links.append(ArchiveOrg.get_archive_link(item)) results = [test_pool.apply_async(func=test_response, args=(x,)) for x in links] returned = [y.get() for y in results] for result in returned: if result == False: broken_res_count += 1 print("total:", res_count, " broken res:", broken_res_count)
def testGettingLinksVariation(self): test_pool = pool.ThreadPool(processes=100) url = "http://bbc.co.uk" latest = ArchiveOrg.get_url_info(url, min_size=1, limit=-1)[0] timestamp = "" if isinstance(latest, ArchiveStruct): timestamp = latest.date_stamp info = ArchiveOrg.get_domain_urls(url, limit=2000) res_count = len(info) broken_res_count = 0 links = [] for item in info: item.date_stamp = timestamp links.append(ArchiveOrg.get_archive_link(item)) results = [ test_pool.apply_async(func=test_response, args=(x, )) for x in links ] returned = [y.get() for y in results] for result in returned: if result == False: broken_res_count += 1 print("total:", res_count, " broken res:", broken_res_count)
def testGetBestProfile(self): archive, rate = ArchiveOrg.get_best_archive(root_domain="susodigital.com", thread_size=100, profile_check=10, pass_threshold=0.7, res_limit=2000) profile_link = ArchiveOrg.get_archive_link(archive) print("best profile:", profile_link, "rate:", rate)