Beispiel #1
0
def test_get_random_urls():
    rest.delete_full_database(full=True)
    rest.create_database(min_url_amount=10, max_url_amount=10)

    result = rest.get_random_urls(amount=5)
    print(result)
    assert len(result["url_list"]) == 5
Beispiel #2
0
def test_save_reservations_with_old_entries():
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=3, fqdn_amount=10)

    fetcher_uuid = rest.get_first_fetcher_uuid()

    response = rest.get_simple_frontier(fetcher_uuid)

    fqdn = response["url_frontiers"][0]["fqdn"]

    frontier_response = pyd_models.FrontierResponse(
        uuid=fetcher_uuid,
        response_url=response["response_url"],
        latest_return=response["latest_return"],
        url_frontiers_count=response["url_frontiers_count"],
        urls_count=response["urls_count"],
        url_frontiers=response["url_frontiers"],
    )

    reservation_item = (db.query(db_models.FetcherReservation).filter(
        db_models.FetcherReservation.fetcher_uuid == fetcher_uuid).filter(
            db_models.FetcherReservation.fqdn == fqdn).first())
    reservation_item.latest_return = datetime.now(tz=timezone.utc) - timedelta(
        days=2)
    db.commit()
    db.refresh(reservation_item)

    assert frontier.save_reservations(db, frontier_response,
                                      datetime.now(tz=timezone.utc))
Beispiel #3
0
def test_get_fetcher_hashes():
    fetcher_amount = 10
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=fetcher_amount)

    fetcher_hashes = database.get_fetcher_hashes(db)
    print(fetcher_hashes)
    assert len(fetcher_hashes) == fetcher_amount * c.ch_hash_amount
Beispiel #4
0
def test_create_fqdn_list():
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=3, fqdn_amount=10)
    uuid = rest.get_first_fetcher_uuid()

    frontier_request = pyd_models.FrontierRequest(fetcher_uuid=uuid,
                                                  amount=2,
                                                  length=2)

    fqdn_list = frontier.create_fqdn_list(db, frontier_request)
    assert len(fqdn_list) == 2
Beispiel #5
0
def test_get_referencing_urls():
    rest.delete_full_database(full=True)
    rest.create_database()

    sleep(1)
    stats_before = rest.get_stats()

    rest.create_database(connection_amount=1)

    stats_after = rest.get_stats()

    assert stats_after["url_amount"] == stats_before["url_amount"] + 1
    assert stats_after["url_ref_amount"] == stats_before["url_ref_amount"] + 1
Beispiel #6
0
def test_get_fetcher_hash_ranges():

    fetcher_amount = 5
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=fetcher_amount)

    uuid = db_query.get_fetcher_uuid_with_max_hash(db)
    fetcher_hash_range = database.get_fetcher_hash_ranges(db, uuid)

    print(fetcher_hash_range)
    assert len(fetcher_hash_range) == c.ch_hash_amount
    assert fetcher_hash_range[-1][-1] == db_query.get_min_hash(db)
    assert fetcher_hash_range[0][1] < fetcher_hash_range[0][2]
    assert fetcher_hash_range[-1][1] > fetcher_hash_range[0][1]
Beispiel #7
0
def test_generate_example_db_avg_visited_date():
    rest.delete_full_database(full=True)
    response = client.post(
        c.database_endpoint,
        json={
            "fetcher_amount": 0,
            "fqdn_amount": 10,
            "min_url_amount": 10,
            "max_url_amount": 10,
            "visited_ratio": 0.5,
        },
    )
    sleep(3)
    stats = client.get(c.stats_endpoint).json()
    assert response.status_code == status.HTTP_202_ACCEPTED
    assert isinstance(stats["avg_freshness"], str)
Beispiel #8
0
def test_create_fqdn_list_with_consistent_hashing():
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=3, fqdn_amount=100)

    uuid = db_query.get_fetcher_uuid_with_max_hash(db)
    request = pyd_models.FrontierRequest(
        fetcher_uuid=uuid,
        amount=0,
        long_term_part_mode=enum.LONGPART.consistent_hashing,
    )
    fqdn_list = frontier.create_fqdn_list(db, request)

    print(fqdn_list)

    assert len(fqdn_list) > 5
    assert len(fqdn_list) < 60
Beispiel #9
0
def test_get_fqdn_list_with_fqdn_hash():
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=3, fqdn_amount=50)

    fetcher_uuid = rest.get_first_fetcher_uuid()

    response = rest.get_frontier(
        json_dict={
            "fetcher_uuid": fetcher_uuid,
            "amount": 0,
            "length": 0,
            "long_term_part_mode": enum.LONGPART.fqdn_hash,
        })
    count_hash = response["url_frontiers_count"]

    db_hash_count = (db.query(db_models.Frontier).filter(
        db_models.Frontier.fqdn_hash_fetcher_index == 0).count())

    assert count_hash == db_hash_count
Beispiel #10
0
def test_consistent_hashing_uniformly_distributed():
    fetcher_amount = 3
    fqdn_amount = 50

    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=fetcher_amount,
                         fqdn_amount=fqdn_amount)

    fetcher_hashes = database.get_fetcher_hashes(db)
    hashes_sorted = sorted(fetcher_hashes, key=lambda k: k["hash"])

    fetcher_hash_range = []
    for i in range(len(hashes_sorted) - 1):
        fetcher_hash_range.append(
            dict(
                uuid=hashes_sorted[i]["uuid"],
                min_hash=hashes_sorted[i]["hash"],
                max_hash=hashes_sorted[i + 1]["hash"],
            ))
    fetcher_hash_range.append(
        dict(
            uuid=hashes_sorted[-1]["uuid"],
            min_hash=hashes_sorted[-1]["hash"],
            max_hash=hashes_sorted[0]["hash"],
        ))

    fetcher_hash_range_sorted_by_min_hash = sorted(fetcher_hash_range,
                                                   key=lambda k: k["min_hash"])

    for fetcher_hash_range in fetcher_hash_range_sorted_by_min_hash:
        if fetcher_hash_range["min_hash"] < fetcher_hash_range["max_hash"]:
            fetcher_hash_range["url_count"] = (db.query(
                func.count(db_models.Frontier.fqdn)).filter(
                    and_(
                        db_models.Frontier.fqdn_hash >=
                        fetcher_hash_range["min_hash"],
                        db_models.Frontier.fqdn_hash <
                        fetcher_hash_range["max_hash"],
                    ))).first()[0]
        else:
            fetcher_hash_range["url_count"] = (db.query(
                func.count(db_models.Frontier.fqdn)).filter(
                    or_(
                        db_models.Frontier.fqdn_hash >=
                        fetcher_hash_range["min_hash"],
                        db_models.Frontier.fqdn_hash <
                        fetcher_hash_range["max_hash"],
                    )).first()[0])

    return_list = defaultdict(int)
    for d in fetcher_hash_range_sorted_by_min_hash:
        return_list[d["uuid"]] += d["url_count"]

    group_summed_hash_list = [{
        "id": id_,
        "count": count_
    } for id_, count_ in return_list.items()]

    url_counts = [f["count"] for f in group_summed_hash_list]

    assert (len(fetcher_hash_range_sorted_by_min_hash) == fetcher_amount *
            c.ch_hash_amount)
    mean = sum(url_counts) / len(url_counts)
    variance = sum((xi - mean)**2 for xi in url_counts) / len(url_counts)

    assert variance <= 5 * mean
Beispiel #11
0
def test_query_fqdn_hash_range():
    rest.delete_full_database(full=True)
    rest.create_database(fetcher_amount=3, fqdn_amount=50)

    result = frontier.get_fqdn_hash_range(db)
    assert isinstance(result, float)