def test_get_random_urls(): rest.delete_full_database(full=True) rest.create_database(min_url_amount=10, max_url_amount=10) result = rest.get_random_urls(amount=5) print(result) assert len(result["url_list"]) == 5
def test_save_reservations_with_old_entries(): rest.delete_full_database(full=True) rest.create_database(fetcher_amount=3, fqdn_amount=10) fetcher_uuid = rest.get_first_fetcher_uuid() response = rest.get_simple_frontier(fetcher_uuid) fqdn = response["url_frontiers"][0]["fqdn"] frontier_response = pyd_models.FrontierResponse( uuid=fetcher_uuid, response_url=response["response_url"], latest_return=response["latest_return"], url_frontiers_count=response["url_frontiers_count"], urls_count=response["urls_count"], url_frontiers=response["url_frontiers"], ) reservation_item = (db.query(db_models.FetcherReservation).filter( db_models.FetcherReservation.fetcher_uuid == fetcher_uuid).filter( db_models.FetcherReservation.fqdn == fqdn).first()) reservation_item.latest_return = datetime.now(tz=timezone.utc) - timedelta( days=2) db.commit() db.refresh(reservation_item) assert frontier.save_reservations(db, frontier_response, datetime.now(tz=timezone.utc))
def test_get_fetcher_hashes(): fetcher_amount = 10 rest.delete_full_database(full=True) rest.create_database(fetcher_amount=fetcher_amount) fetcher_hashes = database.get_fetcher_hashes(db) print(fetcher_hashes) assert len(fetcher_hashes) == fetcher_amount * c.ch_hash_amount
def test_create_fqdn_list(): rest.delete_full_database(full=True) rest.create_database(fetcher_amount=3, fqdn_amount=10) uuid = rest.get_first_fetcher_uuid() frontier_request = pyd_models.FrontierRequest(fetcher_uuid=uuid, amount=2, length=2) fqdn_list = frontier.create_fqdn_list(db, frontier_request) assert len(fqdn_list) == 2
def test_get_referencing_urls(): rest.delete_full_database(full=True) rest.create_database() sleep(1) stats_before = rest.get_stats() rest.create_database(connection_amount=1) stats_after = rest.get_stats() assert stats_after["url_amount"] == stats_before["url_amount"] + 1 assert stats_after["url_ref_amount"] == stats_before["url_ref_amount"] + 1
def test_get_fetcher_hash_ranges(): fetcher_amount = 5 rest.delete_full_database(full=True) rest.create_database(fetcher_amount=fetcher_amount) uuid = db_query.get_fetcher_uuid_with_max_hash(db) fetcher_hash_range = database.get_fetcher_hash_ranges(db, uuid) print(fetcher_hash_range) assert len(fetcher_hash_range) == c.ch_hash_amount assert fetcher_hash_range[-1][-1] == db_query.get_min_hash(db) assert fetcher_hash_range[0][1] < fetcher_hash_range[0][2] assert fetcher_hash_range[-1][1] > fetcher_hash_range[0][1]
def test_create_fqdn_list_with_consistent_hashing(): rest.delete_full_database(full=True) rest.create_database(fetcher_amount=3, fqdn_amount=100) uuid = db_query.get_fetcher_uuid_with_max_hash(db) request = pyd_models.FrontierRequest( fetcher_uuid=uuid, amount=0, long_term_part_mode=enum.LONGPART.consistent_hashing, ) fqdn_list = frontier.create_fqdn_list(db, request) print(fqdn_list) assert len(fqdn_list) > 5 assert len(fqdn_list) < 60
def test_get_fqdn_list_with_fqdn_hash(): rest.delete_full_database(full=True) rest.create_database(fetcher_amount=3, fqdn_amount=50) fetcher_uuid = rest.get_first_fetcher_uuid() response = rest.get_frontier( json_dict={ "fetcher_uuid": fetcher_uuid, "amount": 0, "length": 0, "long_term_part_mode": enum.LONGPART.fqdn_hash, }) count_hash = response["url_frontiers_count"] db_hash_count = (db.query(db_models.Frontier).filter( db_models.Frontier.fqdn_hash_fetcher_index == 0).count()) assert count_hash == db_hash_count
def test_consistent_hashing_uniformly_distributed(): fetcher_amount = 3 fqdn_amount = 50 rest.delete_full_database(full=True) rest.create_database(fetcher_amount=fetcher_amount, fqdn_amount=fqdn_amount) fetcher_hashes = database.get_fetcher_hashes(db) hashes_sorted = sorted(fetcher_hashes, key=lambda k: k["hash"]) fetcher_hash_range = [] for i in range(len(hashes_sorted) - 1): fetcher_hash_range.append( dict( uuid=hashes_sorted[i]["uuid"], min_hash=hashes_sorted[i]["hash"], max_hash=hashes_sorted[i + 1]["hash"], )) fetcher_hash_range.append( dict( uuid=hashes_sorted[-1]["uuid"], min_hash=hashes_sorted[-1]["hash"], max_hash=hashes_sorted[0]["hash"], )) fetcher_hash_range_sorted_by_min_hash = sorted(fetcher_hash_range, key=lambda k: k["min_hash"]) for fetcher_hash_range in fetcher_hash_range_sorted_by_min_hash: if fetcher_hash_range["min_hash"] < fetcher_hash_range["max_hash"]: fetcher_hash_range["url_count"] = (db.query( func.count(db_models.Frontier.fqdn)).filter( and_( db_models.Frontier.fqdn_hash >= fetcher_hash_range["min_hash"], db_models.Frontier.fqdn_hash < fetcher_hash_range["max_hash"], ))).first()[0] else: fetcher_hash_range["url_count"] = (db.query( func.count(db_models.Frontier.fqdn)).filter( or_( db_models.Frontier.fqdn_hash >= fetcher_hash_range["min_hash"], db_models.Frontier.fqdn_hash < fetcher_hash_range["max_hash"], )).first()[0]) return_list = defaultdict(int) for d in fetcher_hash_range_sorted_by_min_hash: return_list[d["uuid"]] += d["url_count"] group_summed_hash_list = [{ "id": id_, "count": count_ } for id_, count_ in return_list.items()] url_counts = [f["count"] for f in group_summed_hash_list] assert (len(fetcher_hash_range_sorted_by_min_hash) == fetcher_amount * c.ch_hash_amount) mean = sum(url_counts) / len(url_counts) variance = sum((xi - mean)**2 for xi in url_counts) / len(url_counts) assert variance <= 5 * mean
def test_query_fqdn_hash_range(): rest.delete_full_database(full=True) rest.create_database(fetcher_amount=3, fqdn_amount=50) result = frontier.get_fqdn_hash_range(db) assert isinstance(result, float)