def synchronize_hosts(select_query, event_producer, chunk_size, config, interrupt=lambda: False): query = select_query.order_by(Host.id) host_list = query.limit(chunk_size).all() while len(host_list) > 0 and not interrupt(): for host in host_list: serialized_host = serialize_host(host, Timestamps.from_config(config), EGRESS_HOST_FIELDS) event = build_event(EventType.updated, serialized_host) insights_id = host.canonical_facts.get("insights_id") headers = message_headers(EventType.updated, insights_id) # in case of a failed update event, event_producer logs the message. event_producer.write_event(event, str(host.id), headers, Topic.events) synchronize_host_count.inc() yield host.id try: # pace the events production speed as flush completes sending all buffered records. event_producer._kafka_producer.flush(300) except KafkaTimeoutError: raise KafkaTimeoutError( f"KafkaTimeoutError: failure to flush {chunk_size} records within 300 seconds" ) # load next chunk using keyset pagination host_list = query.filter( Host.id > host_list[-1].id).limit(chunk_size).all()
def handle_message(message, event_producer): validated_operation_msg = parse_operation_message(message) platform_metadata = validated_operation_msg.get("platform_metadata") or {} # create a dummy identity for working around the identity requirement for CRUD operations identity = Identity(USER_IDENTITY) # set account_number in dummy idenity to the actual account_number received in the payload identity.account_number = validated_operation_msg["data"]["account"] request_id = platform_metadata.get("request_id", "-1") initialize_thread_local_storage(request_id) payload_tracker = get_payload_tracker(request_id=request_id) with PayloadTrackerContext(payload_tracker, received_status_message="message received", current_operation="handle_message"): output_host, host_id, insights_id, add_results = add_host( validated_operation_msg["data"], identity) event_type = add_host_results_to_event_type(add_results) event = build_event(event_type, output_host, platform_metadata=platform_metadata) headers = message_headers(add_results, insights_id) event_producer.write_event(event, str(host_id), headers, Topic.egress) # for transition to platform.inventory.events if inventory_config().secondary_topic_enabled: event_producer.write_event(event, str(host_id), headers, Topic.events)
def delete_hosts(select_query, event_producer, chunk_size, interrupt=lambda: False): while select_query.count(): for host in select_query.limit(chunk_size): host_id = host.id with delete_host_processing_time.time(): _delete_host(select_query.session, host) host_deleted = _deleted_by_this_query(host) if host_deleted: delete_host_count.inc() event = build_event(EventType.delete, host) insights_id = host.canonical_facts.get("insights_id") headers = message_headers(EventType.delete, insights_id) event_producer.write_event(event, str(host.id), headers, wait=True) yield host_id, host_deleted if interrupt(): return
def delete_duplicate_hosts( accounts_session, hosts_session, misc_session, chunk_size, logger, event_producer, interrupt=lambda: False ): total_deleted = 0 hosts_query = hosts_session.query(Host) account_query = accounts_session.query(Host.account) logger.info(f"Total number of hosts in inventory: {hosts_query.count()}") logger.info(f"Total number of accounts in inventory: {account_query.distinct(Host.account).count()}") for account in account_query.distinct(Host.account).yield_per(chunk_size): logger.info(f"Processing account {account}") unique_list = list() duplicate_list = list() misc_query = misc_session.query(Host).filter(Host.account == account) def unique(host_list): if host_list[0].id not in unique_list: unique_list.append(host_list[0].id) logger.info(f"{host_list[0].id} is unique, total: {len(unique_list)}") if len(host_list) > 1: for host_id in [h.id for h in host_list[1:] if h.id not in unique_list and h.id not in duplicate_list]: duplicate_list.append(host_id) logger.info(f"{host_id} is a potential duplicate") for host in ( hosts_query.filter(Host.account == account).order_by(Host.modified_on.desc()).yield_per(chunk_size) ): canonical_facts = host.canonical_facts elevated_cfs = { key: value for key, value in canonical_facts.items() if key in ELEVATED_CANONICAL_FACT_FIELDS } if elevated_cfs: logger.info(f"find by elevated canonical facts: {elevated_cfs}") host_matches = find_host_list_by_elevated_canonical_facts(elevated_cfs, misc_query, logger) else: regular_cfs = { key: value for key, value in canonical_facts.items() if key not in ELEVATED_CANONICAL_FACT_FIELDS } logger.info(f"find by regular canonical facts: {regular_cfs}") if regular_cfs: host_matches = find_host_list_by_regular_canonical_facts(regular_cfs, misc_query, logger) unique(host_matches) hosts_session.expunge_all() accounts_session.expunge_all() # delete duplicate hosts _delete_hosts_by_id_list(misc_session, duplicate_list) for host_id in duplicate_list: log_host_delete_succeeded(logger, host_id, "DEDUP") delete_duplicate_host_count.inc() event = build_event(EventType.delete, host) insights_id = host.canonical_facts.get("insights_id") headers = message_headers(EventType.delete, insights_id) event_producer.write_event(event, str(host.id), headers, wait=True) total_deleted += len(duplicate_list) return total_deleted
def handle_message(message, event_producer): validated_operation_msg = parse_operation_message(message) platform_metadata = validated_operation_msg.get("platform_metadata") or {} request_id = platform_metadata.get("request_id", "-1") initialize_thread_local_storage(request_id) payload_tracker = get_payload_tracker(request_id=request_id) with PayloadTrackerContext( payload_tracker, received_status_message="message received", current_operation="handle_message" ): (output_host, add_results) = add_host(validated_operation_msg["data"]) event_type = add_host_results_to_event_type(add_results) event = build_event(event_type, output_host, platform_metadata=platform_metadata) event_producer.write_event(event, output_host["id"], message_headers(add_results), Topic.egress) # for transition to platform.inventory.events if inventory_config().secondary_topic_enabled: event_producer.write_event(event, output_host["id"], message_headers(add_results), Topic.events)
def delete_hosts(select_query, event_producer): while select_query.count(): for host in select_query.limit(CHUNK_SIZE): host_id = host.id with delete_host_processing_time.time(): _delete_host(select_query.session, host) host_deleted = _deleted_by_this_query(host) if host_deleted: delete_host_count.inc() event = build_event(EventType.delete, host) event_producer.write_event(event, str(host.id), message_headers(EventType.delete), Topic.events) yield host_id, host_deleted
def sync_event_message(message, session, event_producer): if message["type"] != EventType.delete.name: query = session.query( Host).filter((Host.account == message["host"]["account"]) & (Host.id == UUID(message["host"]["id"]))) # If the host doesn't exist in the DB, produce a Delete event. if not query.count(): host = deserialize_host( {k: v for k, v in message["host"].items() if v}, schema=LimitedHostSchema) host.id = message["host"]["id"] event = build_event(EventType.delete, host) insights_id = host.canonical_facts.get("insights_id") headers = message_headers(EventType.delete, insights_id) event_producer.write_event(event, host.id, headers, wait=True) return
def _delete_host(session, event_producer, host): delete_query = session.query(Host).filter(Host.id == host.id) if kafka_available(): delete_query.delete(synchronize_session="fetch") host_deleted = _deleted_by_this_query(host) if host_deleted: delete_host_count.inc() event = build_event(EventType.delete, host) insights_id = host.canonical_facts.get("insights_id") headers = message_headers(EventType.delete, insights_id) event_producer.write_event(event, str(host.id), headers, wait=True) delete_query.session.commit() return host_deleted else: delete_query.session.rollback() return host_deleted else: logger.error( f"host with {host.id} NOT deleted because Kafka server not available." ) raise KafkaError( "Kafka server not available. Stopping host deletions.")
def handle_message(message, event_producer, message_operation=add_host): validated_operation_msg = parse_operation_message(message) platform_metadata = validated_operation_msg.get("platform_metadata", {}) request_id = platform_metadata.get("request_id", UNKNOWN_REQUEST_ID_VALUE) initialize_thread_local_storage(request_id) payload_tracker = get_payload_tracker(request_id=request_id) with PayloadTrackerContext(payload_tracker, received_status_message="message received", current_operation="handle_message"): try: host = validated_operation_msg["data"] output_host, host_id, insights_id, operation_result = message_operation( host, platform_metadata) event_type = operation_results_to_event_type(operation_result) event = build_event(event_type, output_host, platform_metadata=platform_metadata) headers = message_headers(operation_result, insights_id) event_producer.write_event(event, str(host_id), headers) except ValidationException as ve: logger.error( "Validation error while adding or updating host: %s", ve, extra={"host": { "reporter": host.get("reporter") }}, ) raise except ValueError as ve: logger.error("Value error while adding or updating host: %s", ve, extra={"reporter": host.get("reporter")}) raise
def _emit_patch_event(serialized_host, host_id, insights_id): headers = message_headers(EventType.updated, insights_id) event = build_event(EventType.updated, serialized_host) current_app.event_producer.write_event(event, str(host_id), headers, Topic.events)
def _emit_patch_event(host): key = host["id"] headers = message_headers(EventType.updated) event = build_event(EventType.updated, host) current_app.event_producer.write_event(event, key, headers, Topic.events)