def test_delete_duplicates_multiple_accounts(event_producer, db_create_host, db_get_host, inventory_config):
    canonical_facts = {
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
    }
    host1 = minimal_db_host(canonical_facts=canonical_facts, account="111111")
    created_host1 = db_create_host(host=host1).id
    host2 = minimal_db_host(canonical_facts=canonical_facts, account="222222")
    created_host2 = db_create_host(host=host2).id

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )
    assert deleted_hosts_count == 0
    assert db_get_host(created_host1)
    assert db_get_host(created_host2)
def test_delete_duplicates_elevated_ids_not_matching(
    event_producer, db_create_host, db_get_host, inventory_config, tested_id
):
    canonical_facts = {
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
    }
    if tested_id == "provider_id":
        canonical_facts["provider_type"] = "aws"
    if tested_id in ("insights_id", "subscription_manager_id"):
        canonical_facts.pop("provider_id", None)
    if tested_id == "subscription_manager_id":
        canonical_facts.pop("insights_id", None)

    host_count = 10
    created_hosts = []

    # Hosts with the same amount of canonical facts
    for _ in range(host_count):
        canonical_facts[tested_id] = generate_uuid()
        host = minimal_db_host(canonical_facts=canonical_facts)
        created_hosts.append(db_create_host(host=host))

    # Hosts with less canonical facts
    for _ in range(host_count):
        facts = {tested_id: generate_uuid()}
        host = minimal_db_host(canonical_facts=facts)
        created_hosts.append(db_create_host(host=host))

    # Hosts with more canonical facts
    for _ in range(host_count):
        canonical_facts[tested_id] = generate_uuid()
        canonical_facts["ip_addresses"] = ["10.0.0.10"]
        host = minimal_db_host(canonical_facts=canonical_facts)
        created_hosts.append(db_create_host(host=host))

    for host in created_hosts:
        assert db_get_host(host.id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    assert deleted_hosts_count == 0
    for host in created_hosts:
        assert db_get_host(host.id)
def test_delete_duplicates_without_elevated_matching(event_producer, db_create_host, db_get_host, inventory_config):
    canonical_facts = {
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
        "ip_addresses": ["10.0.0.1"],
        "mac_addresses": ["aa:bb:cc:dd:ee:ff"],
    }

    host_count = 10
    created_hosts = []

    # Hosts with less canonical facts
    for fact in canonical_facts:
        facts = {fact: canonical_facts[fact]}
        host = minimal_db_host(canonical_facts=facts)
        created_hosts.append(db_create_host(host=host))

    # # Hosts with more canonical facts
    # for fact in ELEVATED_IDS:
    #     facts = deepcopy(canonical_facts)
    #     facts[fact] = generate_uuid()
    #     if fact == "provider_id":
    #         facts["provider_type"] = "aws"
    #     host = minimal_db_host(canonical_facts=facts)
    #     created_hosts.append(db_create_host(host=host))  <-- Issue with missing elevated ID

    # Hosts with the same amount of canonical facts
    for _ in range(host_count):
        host = minimal_db_host(canonical_facts=canonical_facts)
        created_hosts.append(db_create_host(host=host))

    for host in created_hosts:
        assert db_get_host(host.id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    # Issue with missing elevated ID
    # assert deleted_hosts_count == host_count + len(canonical_facts) + len(ELEVATED_IDS) - 1
    assert deleted_hosts_count == host_count + len(canonical_facts) - 1
    for i in range(len(created_hosts) - 1):
        assert not db_get_host(created_hosts[i].id)
    assert db_get_host(created_hosts[-1].id)
def test_delete_duplicates_customer_scenario_2(event_producer, db_create_host, db_get_host, inventory_config):
    staleness_timestamps = get_staleness_timestamps()

    rhsm_id = generate_uuid()
    bios_uuid = generate_uuid()
    canonical_facts = {
        "insights_id": generate_uuid(),
        "subscription_manager_id": rhsm_id,
        "bios_uuid": bios_uuid,
        "satellite_id": rhsm_id,
        "fqdn": "rozrhjrad01.base.srvco.net",
        "ip_addresses": ["10.230.230.10", "10.230.230.13"],
        "mac_addresses": ["00:50:56:ac:56:45", "00:50:56:ac:48:61", "00:00:00:00:00:00"],
    }
    host_data = {
        "stale_timestamp": staleness_timestamps["stale_warning"],
        "reporter": "puptoo",
        "canonical_facts": canonical_facts,
    }
    host1 = minimal_db_host(**host_data)
    created_host1 = db_create_host(host=host1)

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.3", "10.230.230.4"]
    host2 = minimal_db_host(**host_data)
    created_host2 = db_create_host(host=host2)

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.1", "10.230.230.4"]
    host_data["stale_timestamp"] = staleness_timestamps["fresh"]
    host3 = minimal_db_host(**host_data)
    created_host3 = db_create_host(host=host3)

    assert db_get_host(created_host1.id)
    assert db_get_host(created_host2.id)
    assert db_get_host(created_host3.id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )
    assert deleted_hosts_count == 2
    assert not db_get_host(created_host1.id)
    assert not db_get_host(created_host2.id)
    assert db_get_host(created_host3.id)
def test_reaper_shutdown_handler(event_datetime_mock, db_create_host, db_get_hosts, inventory_config):
    staleness_timestamps = get_staleness_timestamps()
    created_host_ids = []

    host_count = 3
    for _ in range(host_count):
        host_data = minimal_db_host(
            stale_timestamp=staleness_timestamps["culled"].isoformat(), reporter="some reporter"
        )
        created_host = db_create_host(host_data)
        created_host_ids.append(created_host.id)

    created_hosts = db_get_hosts(created_host_ids)
    assert created_hosts.count() == host_count

    event_producer_mock = mock.Mock()

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    host_reaper_run(
        inventory_config,
        mock.Mock(),
        db.session,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.side_effect": (False, True)}),
    )

    remaining_hosts = db_get_hosts(created_host_ids)
    assert remaining_hosts.count() == 1
    assert event_producer_mock.write_event.call_count == 2
 def _db_create_host(identity=SYSTEM_IDENTITY, host=None, extra_data=None):
     extra_data = extra_data or {}
     host = host or minimal_db_host(account=identity["account_number"],
                                    **extra_data)
     db.session.add(host)
     db.session.commit()
     return host
def test_culled_host_is_removed(event_producer_mock, event_datetime_mock,
                                db_create_host, db_get_host, inventory_config):
    staleness_timestamps = get_staleness_timestamps()

    host = minimal_db_host(
        stale_timestamp=staleness_timestamps["culled"].isoformat(),
        reporter="some reporter")
    created_host = db_create_host(host)

    assert db_get_host(created_host.id)

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    host_reaper_run(
        inventory_config,
        mock.Mock(),
        db.session,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    assert not db_get_host(created_host.id)

    assert_delete_event_is_valid(event_producer=event_producer_mock,
                                 host=created_host,
                                 timestamp=event_datetime_mock)
def test_synchronize_host_event(event_producer_mock, event_datetime_mock,
                                db_create_host, db_get_host, inventory_config):
    staleness_timestamps = get_staleness_timestamps()

    host = minimal_db_host(
        stale_timestamp=staleness_timestamps["culled"].isoformat(),
        reporter="some reporter")
    created_host = db_create_host(host)

    assert db_get_host(created_host.id)

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    host_synchronizer_run(
        inventory_config,
        mock.Mock(),
        db.session,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    # check if host exist thought event synchronizer must find it to produce an update event.
    assert db_get_host(created_host.id)

    assert_synchronize_event_is_valid(event_producer=event_producer_mock,
                                      host=created_host,
                                      timestamp=event_datetime_mock)
def test_non_culled_host_is_not_removed(
    event_producer_mock, event_datetime_mock, db_create_host, db_get_hosts, inventory_config
):
    staleness_timestamps = get_staleness_timestamps()
    created_hosts = []

    for stale_timestamp in (
        staleness_timestamps["stale_warning"],
        staleness_timestamps["stale"],
        staleness_timestamps["fresh"],
    ):
        host = minimal_db_host(stale_timestamp=stale_timestamp.isoformat(), reporter="some reporter")
        created_host = db_create_host(host)
        created_hosts.append(created_host)

    created_host_ids = [host.id for host in created_hosts]
    retrieved_hosts = db_get_hosts(created_host_ids)

    assert created_host_ids == [host.id for host in retrieved_hosts]

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    host_reaper_run(
        inventory_config,
        mock.Mock(),
        db.session,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    retrieved_hosts = db_get_hosts(created_host_ids)

    assert created_host_ids == [host.id for host in retrieved_hosts]
    assert event_producer_mock.event is None
def test_find_host_using_superset_canonical_fact_match(db_create_host):
    canonical_facts = {"fqdn": "fred", "bios_uuid": generate_uuid()}

    # Create the superset of canonical facts to search by
    superset_canonical_facts = canonical_facts.copy()
    superset_canonical_facts["satellite_id"] = generate_uuid()

    host = minimal_db_host(canonical_facts=canonical_facts)
    created_host = db_create_host(host=host)
    assert_host_exists_in_db(created_host.id, superset_canonical_facts)
def test_delete_duplicate_host(event_producer_mock, db_create_host, db_get_host, inventory_config):
    # make two hosts that are the same
    canonical_facts = {
        "provider_type": ProviderType.AWS,  # Doesn't matter
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
    }
    old_host = minimal_db_host(canonical_facts=canonical_facts)
    new_host = minimal_db_host(canonical_facts=canonical_facts)

    created_old_host = db_create_host(host=old_host)
    created_new_host = db_create_host(host=new_host)

    assert created_old_host.id != created_new_host.id
    old_host_id = created_old_host.id
    assert created_old_host.canonical_facts["provider_id"] == created_new_host.canonical_facts["provider_id"]

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE

    Session = _init_db(inventory_config)
    accounts_session = Session()
    hosts_session = Session()
    misc_session = Session()

    with multi_session_guard([accounts_session, hosts_session, misc_session]):
        num_deleted = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            accounts_session,
            hosts_session,
            misc_session,
            event_producer_mock,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    print("deleted this many hosts:")
    print(num_deleted)

    assert num_deleted == 1
    assert db_get_host(created_new_host.id)
    assert not db_get_host(old_host_id)
Example #12
0
def test_creates_delete_event_when_missing_from_db(mocker, db_create_host,
                                                   inventory_config,
                                                   num_existing, num_missing):
    event_producer_mock = mock.Mock()
    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    event_list = []
    existing_hosts_created = 0
    missing_hosts_id_list = []

    # Set up DB and event queue
    while existing_hosts_created < num_existing or len(
            missing_hosts_id_list) < num_missing:
        if existing_hosts_created < num_existing:
            host = minimal_db_host()
            db_create_host(host=host)
            event_list.append(build_event(EventType.created, host))
            existing_hosts_created += 1
        if len(missing_hosts_id_list) < num_missing:
            missing_host = minimal_db_host()
            missing_host.id = generate_uuid()
            event_list.append(build_event(EventType.updated, missing_host))
            missing_hosts_id_list.append(missing_host.id)

    consumer_mock = create_kafka_consumer_mock(mocker, inventory_config, 1, 0,
                                               1, event_list)

    rebuild_events_run(
        inventory_config,
        mock.Mock(),
        db.session,
        consumer_mock,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    assert event_producer_mock.write_event.call_count == num_missing

    for i in range(num_missing):
        produced_event = json.loads(
            event_producer_mock.write_event.call_args_list[i][0][0])
        assert produced_event["type"] == "delete"
        assert produced_event["id"] in missing_hosts_id_list
Example #13
0
def test_find_host_using_subset_canonical_fact_match(db_create_host):
    fqdn = "fred.flintstone.com"
    canonical_facts = {"fqdn": fqdn, "bios_uuid": generate_uuid(), "rhel_machine_id": generate_uuid()}

    host = minimal_db_host(canonical_facts=canonical_facts)
    created_host = db_create_host(host)

    # Create the subset of canonical facts to search by
    subset_canonical_facts = {"fqdn": fqdn}

    assert_host_exists_in_db(created_host.id, subset_canonical_facts)
def test_find_correct_host_when_similar_canonical_facts(db_create_host):
    cf1 = {
        "fqdn": "fred",
        "bios_uuid": generate_uuid(),
        "insights_id": generate_uuid()
    }
    cf2 = {
        "fqdn": "george",
        "bios_uuid": generate_uuid(),
        "insights_id": generate_uuid()
    }
    cf3 = {
        "fqdn": cf1["fqdn"],
        "bios_uuid": cf1["bios_uuid"],
        "insights_id": cf2["insights_id"]
    }

    db_create_host(host=minimal_db_host(canonical_facts=cf1))
    created_host_2 = db_create_host(host=minimal_db_host(canonical_facts=cf2))

    assert_host_exists_in_db(created_host_2.id, cf3)
Example #15
0
def test_find_host_using_subscription_manager_id_match(db_create_host):
    canonical_facts = {"fqdn": "fred", "bios_uuid": generate_uuid(), "subscription_manager_id": generate_uuid()}

    # Change the bios_uuid so that falling back to subset match will fail
    search_canonical_facts = {
        "bios_uuid": generate_uuid(),
        "subscription_manager_id": canonical_facts["subscription_manager_id"],
    }

    host = minimal_db_host(canonical_facts=canonical_facts)
    created_host = db_create_host(host)

    assert_host_exists_in_db(created_host.id, search_canonical_facts)
Example #16
0
def test_find_host_using_elevated_ids_match(db_create_host, host_create_order, expected_host):
    hosts_canonical_facts = ({"subscription_manager_id": generate_uuid()}, {"insights_id": generate_uuid()})

    created_hosts = []
    for host_canonical_facts in host_create_order:
        host = minimal_db_host(canonical_facts=hosts_canonical_facts[host_canonical_facts])
        created_host = db_create_host(host)
        created_hosts.append(created_host)

    search_canonical_facts = {
        key: value for host_canonical_facts in hosts_canonical_facts for key, value in host_canonical_facts.items()
    }

    assert_host_exists_in_db(created_hosts[expected_host].id, search_canonical_facts)
Example #17
0
def test_find_host_using_insights_id_match(db_create_host):
    canonical_facts = {"fqdn": "fred", "bios_uuid": generate_uuid(), "insights_id": generate_uuid()}

    # Change the canonical facts except the insights_id...match on insights_id
    search_canonical_facts = {
        "fqdn": "barney",
        "bios_uuid": generate_uuid(),
        "insights_id": canonical_facts["insights_id"],
    }

    host = minimal_db_host(canonical_facts=canonical_facts)
    created_host = db_create_host(host)

    assert_host_exists_in_db(created_host.id, search_canonical_facts)
def test_elevated_id_priority_order_match(db_create_host, changing_id):
    base_canonical_facts = {
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
    }

    created_host = db_create_host(host=minimal_db_host(
        canonical_facts=base_canonical_facts))

    match_host_canonical_facts = base_canonical_facts.copy()
    match_host_canonical_facts[changing_id] = generate_uuid()

    assert_host_exists_in_db(created_host.id, match_host_canonical_facts)
def test_no_merge_when_different_facts(db_create_host):
    cf1 = {
        "fqdn": "fred",
        "bios_uuid": generate_uuid(),
        "insights_id": generate_uuid()
    }
    cf2 = {
        "fqdn": "george",
        "bios_uuid": generate_uuid(),
        "subscription_manager_id": generate_uuid()
    }

    db_create_host(host=minimal_db_host(canonical_facts=cf1))

    assert_host_missing_from_db(cf2)
def test_find_host_canonical_fact_subset_match_different_elevated_ids(
        db_create_host):
    base_canonical_facts = {"fqdn": "fred", "bios_uuid": generate_uuid()}

    created_host_canonical_facts = base_canonical_facts.copy()
    created_host_canonical_facts["insights_id"] = generate_uuid()

    # Create the subset of canonical facts to search by
    search_canonical_facts = {"fqdn": "fred"}
    search_canonical_facts["subscription_manager_id"] = generate_uuid()

    created_host = db_create_host(host=minimal_db_host(
        canonical_facts=created_host_canonical_facts))

    assert_host_exists_in_db(created_host.id, search_canonical_facts)
    def _db_create_multiple_hosts(hosts=None, how_many=10, extra_data=None):
        extra_data = extra_data or {}
        created_hosts = []
        if type(hosts) == list:
            for host in hosts:
                db.session.add(host)
                created_hosts.append(host)
        else:
            for _ in range(how_many):
                host = minimal_db_host(**extra_data)
                db.session.add(host)
                created_hosts.append(host)

        db.session.commit()

        return created_hosts
def test_elevated_id_priority_order_nomatch(db_create_host, changing_id):
    base_canonical_facts = {
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid()
    }

    created_host_canonical_facts = base_canonical_facts.copy()
    created_host_canonical_facts[changing_id] = generate_uuid()

    search_canonical_facts = base_canonical_facts.copy()
    search_canonical_facts[changing_id] = generate_uuid()

    created_host = db_create_host(host=minimal_db_host(
        canonical_facts=created_host_canonical_facts))

    assert_host_exists_in_db(created_host.id, created_host_canonical_facts)
    assert_host_missing_from_db(search_canonical_facts)
    def _db_create_multiple_hosts(identity=SYSTEM_IDENTITY,
                                  hosts=None,
                                  how_many=10,
                                  extra_data=None):
        extra_data = extra_data or {}
        created_hosts = []
        if type(hosts) == list:
            for host in hosts:
                db.session.add(host)
                created_hosts.append(host)
        else:
            for _ in range(how_many):
                host = minimal_db_host(account=identity["account_number"],
                                       **extra_data)
                db.session.add(host)
                created_hosts.append(host)

        db.session.commit()

        return created_hosts
def test_culled_edge_host_is_not_removed(event_producer_mock, db_create_host,
                                         db_get_host, inventory_config):
    staleness_timestamps = get_staleness_timestamps()

    host = minimal_db_host(
        stale_timestamp=staleness_timestamps["culled"],
        reporter="some reporter",
        system_profile_facts={"host_type": "edge"},
    )
    created_host = db_create_host(host=host)

    assert db_get_host(created_host.id)

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    host_reaper_run(
        inventory_config,
        mock.Mock(),
        db.session,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    assert db_get_host(created_host.id)
Example #25
0
def test_no_delete_when_hosts_present(mocker, db_create_host,
                                      inventory_config):
    event_producer_mock = mock.Mock()
    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE
    event_list = []

    for _ in range(4):
        host = minimal_db_host()
        db_create_host(host=host)
        event_list.append(build_event(EventType.created, host))

    consumer_mock = create_kafka_consumer_mock(mocker, inventory_config, 1, 0,
                                               1, event_list)

    rebuild_events_run(
        inventory_config,
        mock.Mock(),
        db.session,
        consumer_mock,
        event_producer_mock,
        shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
    )

    assert event_producer_mock.write_event.call_count == 0
def test_delete_duplicates_last_modified(event_producer, db_create_multiple_hosts, db_get_host, inventory_config):
    """Test that the deletion script always keeps host with the latest 'modified_on' date"""
    canonical_facts = {
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
        "provider_type": "aws",
    }
    host_count = 100

    hosts = [minimal_db_host(canonical_facts=canonical_facts) for _ in range(host_count)]
    created_host_ids = [host.id for host in db_create_multiple_hosts(hosts=hosts)]
    updated_host = update_host_in_db(choice(created_host_ids), display_name="new-display-name")
    for host_id in created_host_ids:
        assert db_get_host(host_id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    assert deleted_hosts_count == host_count - 1
    for host_id in created_host_ids:
        if host_id != updated_host.id:
            assert not db_get_host(host_id)
        else:
            assert db_get_host(host_id)
def test_delete_duplicates_customer_scenario_1(event_producer, db_create_host, db_get_host, inventory_config):
    staleness_timestamps = get_staleness_timestamps()

    rhsm_id = generate_uuid()
    bios_uuid = generate_uuid()
    canonical_facts = {
        "insights_id": generate_uuid(),
        "subscription_manager_id": rhsm_id,
        "bios_uuid": bios_uuid,
        "satellite_id": rhsm_id,
        "fqdn": "rn001018",
        "ip_addresses": ["10.230.230.3"],
        "mac_addresses": ["00:50:56:ab:5a:22", "00:00:00:00:00:00"],
    }
    host_data = {
        "stale_timestamp": staleness_timestamps["stale_warning"],
        "reporter": "puptoo",
        "canonical_facts": canonical_facts,
    }
    host1 = minimal_db_host(**host_data)
    created_host1 = db_create_host(host=host1)

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.30"]
    host_data["canonical_facts"].pop("bios_uuid")
    host_data["stale_timestamp"] = staleness_timestamps["stale"]
    host2 = minimal_db_host(**host_data)
    created_host2 = db_create_host(host=host2)

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.3"]
    host3 = minimal_db_host(**host_data)
    created_host3 = db_create_host(host=host3)

    host_data["reporter"] = "yupana"
    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.1"]
    host_data["canonical_facts"]["mac_addresses"] = ["00:50:56:ab:5a:22"]
    host_data["canonical_facts"]["bios_uuid"] = bios_uuid
    host_data["canonical_facts"]["fqdn"] = "rn001018.bcbst.com"
    host_data["stale_timestamp"] = staleness_timestamps["fresh"]
    host4 = minimal_db_host(**host_data)
    created_host4 = db_create_host(host=host4)

    host_data["reporter"] = "puptoo"
    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.15"]
    host_data["canonical_facts"]["mac_addresses"] = ["00:50:56:ab:5a:22", "00:00:00:00:00:00"]
    host_data["canonical_facts"].pop("bios_uuid")
    host_data["canonical_facts"]["fqdn"] = "rn001018"
    host5 = minimal_db_host(**host_data)
    created_host5 = db_create_host(host=host5)

    assert db_get_host(created_host1.id)
    assert db_get_host(created_host2.id)
    assert db_get_host(created_host3.id)
    assert db_get_host(created_host4.id)
    assert db_get_host(created_host5.id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    assert deleted_hosts_count == 4
    assert not db_get_host(created_host1.id)
    assert not db_get_host(created_host2.id)
    assert not db_get_host(created_host3.id)
    assert not db_get_host(created_host4.id)
    assert db_get_host(created_host5.id)
def test_delete_dupe_more_hosts_than_chunk_size(
    event_producer_mock, db_get_host, db_create_multiple_hosts, db_create_host, inventory_config
):
    canonical_facts_1 = {
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
    }
    canonical_facts_2 = {
        "provider_id": generate_uuid(),
        "insights_id": generate_uuid(),
        "subscription_manager_id": generate_uuid(),
    }

    chunk_size = inventory_config.script_chunk_size
    num_hosts = chunk_size * 3 + 15

    # create host before big chunk. Hosts are ordered by modified date so creation
    # order is important
    old_host_1 = minimal_db_host(canonical_facts=canonical_facts_1)
    new_host_1 = minimal_db_host(canonical_facts=canonical_facts_1)

    created_old_host_1 = db_create_host(host=old_host_1)

    created_new_host_1 = db_create_host(host=new_host_1)

    # create big chunk of hosts
    db_create_multiple_hosts(how_many=num_hosts)

    # create another host after
    old_host_2 = minimal_db_host(canonical_facts=canonical_facts_2)
    new_host_2 = minimal_db_host(canonical_facts=canonical_facts_2)

    created_old_host_2 = db_create_host(host=old_host_2)

    created_new_host_2 = db_create_host(host=new_host_2)

    assert created_old_host_1.id != created_new_host_1.id
    assert created_old_host_2.id != created_new_host_2.id

    threadctx.request_id = UNKNOWN_REQUEST_ID_VALUE

    Session = _init_db(inventory_config)
    accounts_session = Session()
    hosts_session = Session()
    misc_session = Session()

    with multi_session_guard([accounts_session, hosts_session, misc_session]):
        num_deleted = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            accounts_session,
            hosts_session,
            misc_session,
            event_producer_mock,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )
    assert num_deleted == 2

    assert db_get_host(created_new_host_1.id)
    assert not db_get_host(created_old_host_1.id)

    assert db_get_host(created_new_host_2.id)
    assert not db_get_host(created_old_host_2.id)
def test_delete_duplicates_multiple_scenarios(
    event_producer, db_create_host, db_create_multiple_hosts, db_get_host, inventory_config, script_function
):
    chunk_size = inventory_config.script_chunk_size

    # Customer scenario
    staleness_timestamps = get_staleness_timestamps()

    rhsm_id = generate_uuid()
    bios_uuid = generate_uuid()
    canonical_facts = {
        "insights_id": generate_uuid(),
        "subscription_manager_id": rhsm_id,
        "bios_uuid": bios_uuid,
        "satellite_id": rhsm_id,
        "fqdn": "rozrhjrad01.base.srvco.net",
        "ip_addresses": ["10.230.230.10", "10.230.230.13"],
        "mac_addresses": ["00:50:56:ac:56:45", "00:50:56:ac:48:61", "00:00:00:00:00:00"],
    }
    host_data = {
        "stale_timestamp": staleness_timestamps["stale_warning"],
        "reporter": "puptoo",
        "canonical_facts": canonical_facts,
    }
    customer_host1 = minimal_db_host(**host_data)
    customer_created_host1 = db_create_host(host=customer_host1).id

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.3", "10.230.230.4"]
    customer_host2 = minimal_db_host(**host_data)
    customer_created_host2 = db_create_host(host=customer_host2).id

    host_data["canonical_facts"]["ip_addresses"] = ["10.230.230.1", "10.230.230.4"]
    host_data["stale_timestamp"] = staleness_timestamps["fresh"]
    customer_host3 = minimal_db_host(**host_data)
    customer_created_host3 = db_create_host(host=customer_host3).id

    assert db_get_host(customer_created_host1)
    assert db_get_host(customer_created_host2)
    assert db_get_host(customer_created_host3)

    # Matching elevated ID
    def _gen_canonical_facts():
        return {
            "insights_id": generate_uuid(),
            "subscription_manager_id": generate_uuid(),
            "bios_uuid": generate_uuid(),
            "satellite_id": generate_uuid(),
            "fqdn": generate_random_string(),
        }

    elevated_matching_host_count = 10
    elevated_id = generate_uuid()
    elevated_matching_created_hosts = []

    # Hosts with the same amount of canonical facts
    for _ in range(elevated_matching_host_count):
        canonical_facts = _gen_canonical_facts()
        canonical_facts["insights_id"] = elevated_id
        host = minimal_db_host(canonical_facts=canonical_facts)
        elevated_matching_created_hosts.append(db_create_host(host=host).id)

    # Hosts with less canonical facts
    for _ in range(elevated_matching_host_count):
        canonical_facts = {"insights_id": elevated_id}
        host = minimal_db_host(canonical_facts=canonical_facts)
        elevated_matching_created_hosts.append(db_create_host(host=host).id)

    # Create a lot of hosts to test that the script deletes duplicates in multiple chunks
    db_create_multiple_hosts(how_many=chunk_size)

    # Hosts with more canonical facts
    for _ in range(elevated_matching_host_count):
        canonical_facts = _gen_canonical_facts()
        canonical_facts["insights_id"] = elevated_id
        canonical_facts["ip_addresses"] = [f"10.0.0.{randint(1, 255)}"]
        host = minimal_db_host(canonical_facts=canonical_facts)
        elevated_matching_created_hosts.append(db_create_host(host=host).id)

    for host in elevated_matching_created_hosts:
        assert db_get_host(host)

    # Elevated IDs not matching
    elevated_not_matching_canonical_facts = _gen_canonical_facts()
    elevated_not_matching_host_count = 10
    elevated_not_matching_created_hosts = []

    # Hosts with the same amount of canonical facts
    for _ in range(elevated_not_matching_host_count):
        elevated_not_matching_canonical_facts["insights_id"] = generate_uuid()
        host = minimal_db_host(canonical_facts=elevated_not_matching_canonical_facts)
        elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    # Hosts with less canonical facts
    for _ in range(elevated_not_matching_host_count):
        facts = {"insights_id": generate_uuid()}
        host = minimal_db_host(canonical_facts=facts)
        elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    # Hosts with more canonical facts
    for _ in range(elevated_not_matching_host_count):
        elevated_not_matching_canonical_facts["insights_id"] = generate_uuid()
        elevated_not_matching_canonical_facts["ip_addresses"] = ["10.0.0.10"]
        host = minimal_db_host(canonical_facts=elevated_not_matching_canonical_facts)
        elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    for host in elevated_not_matching_created_hosts:
        assert db_get_host(host)

    # Without elevated IDs - canonical facts matching
    without_elevated_matching_canonical_facts = {
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
        "ip_addresses": ["10.0.0.1"],
        "mac_addresses": ["aa:bb:cc:dd:ee:ff"],
    }

    without_elevated_matching_host_count = 10
    without_elevated_matching_created_hosts = []

    # Hosts with less canonical facts
    for fact in without_elevated_matching_canonical_facts:
        facts = {fact: without_elevated_matching_canonical_facts[fact]}
        host = minimal_db_host(canonical_facts=facts)
        without_elevated_matching_created_hosts.append(db_create_host(host=host).id)

    # Create a lot of hosts to test that the script deletes duplicates in multiple chunks
    db_create_multiple_hosts(how_many=chunk_size)

    # Hosts with the same amount of canonical facts
    for _ in range(without_elevated_matching_host_count):
        host = minimal_db_host(canonical_facts=without_elevated_matching_canonical_facts)
        without_elevated_matching_created_hosts.append(db_create_host(host=host).id)

    for host in without_elevated_matching_created_hosts:
        assert db_get_host(host)

    # Without elevated IDs - canonical facts not matching
    without_elevated_not_matching_canonical_facts = {
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
        "ip_addresses": ["0.0.0.0"],
        "mac_addresses": ["aa:bb:cc:dd:ee:ff"],
    }

    without_elevated_not_matching_host_count = 10
    without_elevated_not_matching_created_hosts = []

    # Hosts with the same amount of canonical facts
    for _ in range(without_elevated_not_matching_host_count):
        facts = deepcopy(without_elevated_not_matching_canonical_facts)
        facts["fqdn"] = generate_random_string()
        host = minimal_db_host(canonical_facts=facts)
        without_elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    # Hosts with less canonical facts
    for _ in range(without_elevated_not_matching_host_count):
        facts = {"fqdn": generate_random_string()}
        host = minimal_db_host(canonical_facts=facts)
        without_elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    # Hosts with more canonical facts
    for fact in ELEVATED_IDS:
        facts = deepcopy(without_elevated_not_matching_canonical_facts)
        facts["fqdn"] = generate_random_string()
        facts[fact] = generate_uuid()
        if fact == "provider_id":
            facts["provider_type"] = "aws"
        host = minimal_db_host(canonical_facts=facts)
        without_elevated_not_matching_created_hosts.append(db_create_host(host=host).id)

    for host in without_elevated_not_matching_created_hosts:
        assert db_get_host(host)

    if script_function == "run":
        Session = _init_db(inventory_config)
        sessions = [Session() for _ in range(3)]
        with multi_session_guard(sessions):
            deleted_hosts_count = host_delete_duplicates_run(
                inventory_config,
                mock.Mock(),
                *sessions,
                event_producer,
                shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
            )
        assert deleted_hosts_count == elevated_matching_host_count * 3 + without_elevated_matching_host_count + len(
            without_elevated_matching_canonical_facts
        )
    else:
        host_delete_duplicates_main(mock.Mock())

    assert not db_get_host(customer_created_host1)
    assert not db_get_host(customer_created_host2)
    assert db_get_host(customer_created_host3)

    for i in range(len(elevated_matching_created_hosts) - 1):
        assert not db_get_host(elevated_matching_created_hosts[i])
    assert db_get_host(elevated_matching_created_hosts[-1])

    for host in elevated_not_matching_created_hosts:
        assert db_get_host(host)

    for i in range(len(without_elevated_matching_created_hosts) - 1):
        assert not db_get_host(without_elevated_matching_created_hosts[i])
    assert db_get_host(without_elevated_matching_created_hosts[-1])

    for host in without_elevated_not_matching_created_hosts:
        assert db_get_host(host)
def test_delete_duplicates_without_elevated_not_matching(
    event_producer, db_create_host, db_get_host, inventory_config, tested_fact
):
    def _generate_fact(fact_name):
        if fact_name == "fqdn":
            return generate_random_string()
        if fact_name == "ip_addresses":
            return [f"{randint(1, 255)}.{randint(0, 255)}.{randint(0, 255)}.{randint(1, 255)}"]
        if fact_name == "mac_addresses":
            hex_chars = "0123456789abcdef"
            addr = ":".join([f"{choice(hex_chars)}{choice(hex_chars)}" for _ in range(6)])
            return [addr]
        return generate_uuid()

    canonical_facts = {
        "bios_uuid": generate_uuid(),
        "satellite_id": generate_uuid(),
        "fqdn": generate_random_string(),
        "ip_addresses": ["0.0.0.0"],
        "mac_addresses": ["aa:bb:cc:dd:ee:ff"],
    }

    host_count = 10
    created_hosts = []

    # Hosts with the same amount of canonical facts
    for _ in range(host_count):
        facts = deepcopy(canonical_facts)
        facts[tested_fact] = _generate_fact(tested_fact)
        host = minimal_db_host(canonical_facts=facts)
        created_hosts.append(db_create_host(host=host))

    # Hosts with less canonical facts
    for _ in range(host_count):
        facts = {tested_fact: _generate_fact(tested_fact)}
        host = minimal_db_host(canonical_facts=facts)
        created_hosts.append(db_create_host(host=host))

    # Hosts with more canonical facts
    for fact in ELEVATED_IDS:
        facts = deepcopy(canonical_facts)
        facts[tested_fact] = _generate_fact(tested_fact)
        facts[fact] = generate_uuid()
        if fact == "provider_id":
            facts["provider_type"] = "aws"
        host = minimal_db_host(canonical_facts=facts)
        created_hosts.append(db_create_host(host=host))

    for host in created_hosts:
        assert db_get_host(host.id)

    Session = _init_db(inventory_config)
    sessions = [Session() for _ in range(3)]
    with multi_session_guard(sessions):
        deleted_hosts_count = host_delete_duplicates_run(
            inventory_config,
            mock.Mock(),
            *sessions,
            event_producer,
            shutdown_handler=mock.Mock(**{"shut_down.return_value": False}),
        )

    assert deleted_hosts_count == 0
    for host in created_hosts:
        assert db_get_host(host.id)