Esempio n. 1
0
def step_impl(context, number_of_snapshots, match_type, snapshot_type):
    s3_qualified_prefix = os.path.join(
        context.mongo_snapshot_path,
        context.test_run_name,
        context.formatted_date,
        snapshot_type,
    )
    topic_names = [
        template_helper.get_topic_name(topic["topic"])
        for topic in context.topics_for_test
    ]

    snapshot_count = int(number_of_snapshots)

    if snapshot_count == 0:
        for result in aws_helper.assert_no_snapshots_in_s3_threaded(
                topic_names, context.mongo_snapshot_bucket,
                s3_qualified_prefix, 60):
            console_printer.print_info(
                f"Asserted no snapshots created in s3 with key of {result}")
    else:
        snapshot_string = "snapshots" if snapshot_count > 1 else "snapshot"
        for result in aws_helper.assert_snapshots_in_s3_threaded(
                topic_names,
                context.mongo_snapshot_bucket,
                s3_qualified_prefix,
                snapshot_count,
                context.timeout,
            ("exact" == match_type),
        ):
            console_printer.print_info(
                f"Asserted exactly {number_of_snapshots} {snapshot_string} created in s3 with key of {result}"
            )
Esempio n. 2
0
def dynamodb_clear_ingest_start(context, snapshot_type, topics_list):
    console_printer.print_info("Executing 'dynamodb_clear_ingest_start' fixture")
    updated_topics = message_helper.get_consolidated_topics_list(
        topics_list,
        snapshot_type,
        context.default_topic_list_full_delimited,
        context.default_topic_list_incremental_delimited,
        [
            context.generate_snapshots_topics_override,
            context.send_snapshots_topics_override,
        ],
    )
    correlation_id = (
        snapshots_helper.get_snapshot_run_correlation_id(
            context.test_run_name, snapshot_type
        )
        if not context.send_snapshots_correlation_id_override
        else context.send_snapshots_correlation_id_override
    )

    for topic in updated_topics:
        topic_name = template_helper.get_topic_name(topic)

        export_status_helper.delete_item_in_export_status_table(
            context.dynamo_db_export_status_table_name, topic_name, correlation_id
        )
def get_metadata_for_id_and_timestamp_from_file(table_name,
                                                file_path,
                                                topic_name,
                                                wrap_id=False):
    """Returns the metadata for a given id and a tuple of the id and timestamp searched for.

    Arguments:
    table_name -- the table name to check
    file_path -- the file containing the id
    topic_name -- the topic name to get metadata for
    wrap_id -- True is the id format should be wrapped with an "id" object (default False)
    """
    console_printer.print_info(
        f"Retrieving metadata for id from file in '{file_path}' in metadata table '{table_name}' with topic name of '{topic_name}'"
    )

    qualified_topic_name = template_helper.get_topic_name(topic_name)
    record_id = file_helper.get_id_object_from_json_file(file_path)
    record_timestamp = file_helper.get_timestamp_as_long_from_json_file(
        file_path)
    id_string = json.dumps(record_id)

    if wrap_id:
        id_string = json.dumps({"id": id_string})

    id_string_qualified = id_string.replace(" ", "")

    results = get_metadata_for_specific_id_and_timestamp_in_topic(
        table_name, id_string_qualified, record_timestamp,
        qualified_topic_name)

    return (id_string_qualified, record_timestamp, results)
def get_metadata_for_specific_id_and_timestamp_in_topic(
        table_name, id_string, timestamp, topic_name):
    """Returns the metadata for a given id and timestamp.

    Arguments:
    table_name -- the table name to check
    id_string -- the json dumped id string
    timestamp -- the timestamp as an int
    topic_name -- the topic name to get metadata for
    """
    console_printer.print_info(
        f"Retrieving metadata for id of '{id_string}' in metadata table '{table_name}' with topic name of '{topic_name}' and timestamp of '{str(timestamp)}'"
    )

    qualified_topic_name = template_helper.get_topic_name(topic_name)

    payload_dict = {
        "table-name": table_name,
        "hbase-id-like": id_string,
        "topic-name-equals": qualified_topic_name,
        "hbase-timestamp-equals": timestamp,
    }
    payload_json = json.dumps(payload_dict)

    return invoke_lambda.invoke_ingestion_metadata_query_lambda(payload_json)
Esempio n. 5
0
def step_impl(context, table, id_format, message_type):
    folder = streaming_data_helper.generate_fixture_data_folder(message_type)

    context.latest_metadata_store_ids = []
    wrap_id_value = id_format == "wrapped"
    table_name = streaming_data_helper.get_metadata_store_table_name(
        table, context.metadata_store_tables)

    for topic in context.topics_for_test:
        topic_name = template_helper.get_topic_name(topic["topic"])
        temp_folder_for_topic = os.path.join(context.temp_folder, topic_name)
        full_folder_path = file_helper.generate_edited_files_folder(
            temp_folder_for_topic, folder)
        latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp(
            full_folder_path)

        (
            record_id,
            record_timestamp,
            results,
        ) = metadata_store_helper.get_metadata_for_id_and_timestamp_from_file(
            table_name, latest_file_path, topic_name, wrap_id_value)

        console_printer.print_info(
            f"Received {len(results)} responses in topic '{topic_name}'")
        console_printer.print_info(
            f"Actual metadata store results are '{results}' in topic '{topic_name}'"
        )
        console_printer.print_info(
            f"Asserting exactly one response received for id of '{record_id}' and timestamp of '{str(record_timestamp)}' in topic '{topic_name}'"
        )

        assert (
            len(results) == 1
        ), "Metadata table result not returned, try restarting the k2hb consumers"

        results_iterator = iter(results.items())
        result_row_key_value_pair = next(results_iterator)
        result_row_key = result_row_key_value_pair[0]
        result_row_value = result_row_key_value_pair[1]

        console_printer.print_info(
            f"Asserting the key value for the result in topic '{topic_name}'")

        assert record_id in result_row_key

        console_printer.print_info(
            f"Asserted key value of '{result_row_key}' contains expected id of '{record_id}' in topic '{topic_name}'"
        )
        console_printer.print_info(
            f"Asserting the field values for the result in topic '{topic_name}'"
        )

        assert record_id in result_row_value["hbase_id"]
        assert record_timestamp == result_row_value["hbase_timestamp"]
        assert topic_name == result_row_value["topic_name"]

        context.latest_metadata_store_ids.append(
            [topic_name, record_id, record_timestamp])
Esempio n. 6
0
def step_impl(context, message_type, date, key):
    folder = streaming_data_helper.generate_fixture_data_folder(message_type)
    topic_prefix = streaming_data_helper.generate_topic_prefix(message_type)

    qualified_key = None if key == "None" else key
    date_qualified = (None if date == "None" else datetime.strptime(
        date, "%Y-%m-%dT%H:%M:%S.%f"))
    for topic in context.topics_for_test:
        topic_name = template_helper.get_topic_name(topic["topic"])
        generated_files = kafka_data_generator.generate_kafka_files(
            test_run_name=context.test_run_name,
            s3_input_bucket=context.s3_ingest_bucket,
            input_template_name=input_template,
            output_template_name=output_template,
            new_uuid=qualified_key,
            local_files_temp_folder=os.path.join(context.temp_folder,
                                                 topic_name),
            fixture_files_root=context.fixture_path_local,
            s3_output_prefix=context.s3_temp_output_path,
            record_count=1,
            topic_name=topic["topic"],
            snapshots_output_folder=context.
            snapshot_files_hbase_records_temp_folder,
            seconds_timeout=context.timeout,
            fixture_data_folder=folder,
            custom_base_timestamp=date_qualified,
        )

        files_to_send_to_kafka_broker = [
            generated_file[0] for generated_file in generated_files
        ]
        aws_helper.send_files_to_kafka_producer_sns(
            dynamodb_table_name=context.dynamo_db_table_name,
            s3_input_bucket=context.s3_ingest_bucket,
            aws_acc_id=context.aws_acc,
            sns_topic_name=context.aws_sns_topic_name,
            fixture_files=files_to_send_to_kafka_broker,
            message_key=key,
            topic_name=topic["topic"],
            topic_prefix=topic_prefix,
            region=context.aws_region_main,
        )
Esempio n. 7
0
def step_impl(context, id_format, message_type):
    folder = streaming_data_helper.generate_fixture_data_folder(message_type)

    for topic in context.topics_for_test:
        topic_name = template_helper.get_topic_name(topic["topic"])
        temp_folder_for_topic = os.path.join(context.temp_folder, topic_name)
        full_folder_path = file_helper.generate_edited_files_folder(
            temp_folder_for_topic, folder)
        latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp(
            full_folder_path)

        wrap_id_value = id_format == "wrapped"

        file_comparer.assert_specific_file_stored_in_hbase(
            topic_name,
            latest_file_path,
            60,
            record_expected_in_hbase=False,
            wrap_id=wrap_id_value,
        )
Esempio n. 8
0
def step_impl(context, dlq_file_template):
    for topic in context.topics_for_test:
        dlq_file = None
        for dlq_files_and_topic_tuple in context.kafka_generated_dlq_output_files:
            if topic["topic"] == dlq_files_and_topic_tuple[0]:
                for dlq_file_for_topic in dlq_files_and_topic_tuple[1]:
                    if dlq_file_template in dlq_file_for_topic:
                        dlq_file = dlq_file_for_topic

        if dlq_file is None:
            raise AssertionError(
                f"No generated dlq file could be found for dlq template of {dlq_file_template}"
            )

        expected_file_content = file_helper.get_contents_of_file(
            dlq_file, True)
        id_object = file_helper.get_id_object_from_json_file(dlq_file)

        test_run_topic_name = template_helper.get_topic_name(topic["topic"])
        file_comparer.assert_specific_id_missing_in_hbase(
            test_run_topic_name, id_object, 5, True)
Esempio n. 9
0
def step_impl(context, dlq_file_template, table, id_format):
    wrap_id_value = id_format == "wrapped"
    table_name = streaming_data_helper.get_metadata_store_table_name(
        table, context.metadata_store_tables)

    for topic in context.topics_for_test:
        dlq_file = None
        for dlq_files_and_topic_tuple in context.kafka_generated_dlq_output_files:
            if topic["topic"] == dlq_files_and_topic_tuple[0]:
                for dlq_file_for_topic in dlq_files_and_topic_tuple[1]:
                    if dlq_file_template in dlq_file_for_topic:
                        dlq_file = dlq_file_for_topic

        if dlq_file is None:
            raise AssertionError(
                f"No generated dlq file could be found for dlq template of {dlq_file_template} in topic '{topic_name}'"
            )

        topic_name = template_helper.get_topic_name(topic["topic"])
        id_object = file_helper.get_id_object_from_json_file(dlq_file)
        id_string = json.dumps(id_object)

        if wrap_id_value:
            id_string = json.dumps({"id": id_string})

        id_string_qualified = id_string.replace(" ", "")

        results = metadata_store_helper.get_metadata_for_specific_id_in_topic(
            table_name, id_string_qualified, topic_name)

        console_printer.print_info(
            f"Received {len(results)} responses in topic '{topic_name}'")
        console_printer.print_info(
            f"Actual metadata store results are '{results}' in topic '{topic_name}'"
        )
        console_printer.print_info(
            f"Asserting no response received for id of '{id_string_qualified}' in topic '{topic_name}'"
        )

        assert len(results) == 0
Esempio n. 10
0
def assert_specific_file_stored_in_hbase_threaded(
        topics,
        output_folder,
        timeout,
        record_expected_in_hbase=True,
        wrap_id=False):
    """Checks the specific files in stored in HBase for the given topics and raises assertion errors if not using threads.

    Keyword arguments:
    topics -- full topic names as an array
    output_folder -- the output folder base for the generated historic data
    timeout -- the timeout in seconds
    record_expected_in_hbase -- true if the record should be in HBase and false if it should not (default True)
    wrap_id -- True is the id format should be wrapped with an "id" object (default False)
    """
    with ThreadPoolExecutor(max_workers=5) as executor:
        future_results = []

        for topic in topics:
            console_printer.print_info(
                f"Looking for HBase data for topic {topic}")
            output_folder_qualified = os.path.join(output_folder, topic)
            topic_qualified = template_helper.get_topic_name(topic)
            for output_file in os.listdir(output_folder_qualified):
                future_results.append(
                    executor.submit(
                        assert_specific_file_stored_in_hbase,
                        topic_qualified,
                        os.path.join(output_folder_qualified, output_file),
                        timeout,
                        record_expected_in_hbase=record_expected_in_hbase,
                        wrap_id=wrap_id,
                    ))

        wait(future_results)
        for future in future_results:
            try:
                yield future.result()
            except Exception as ex:
                raise AssertionError(ex)
Esempio n. 11
0
def step_impl(
    context,
    record_count,
    input_file_name,
    output_file_name,
    key_method,
    days_offset,
):
    context.uploaded_id = uuid.uuid4()

    output_template = None if output_file_name == "None" else output_file_name

    for topic in context.topics_for_test:
        key = None
        if key_method.lower() == "static":
            key = context.uploaded_id
        elif key_method.lower() == "topic":
            key = uuid.uuid4()

        topic_name = template_helper.get_topic_name(topic["topic"])
        timestamp_override = (datetime.now() +
                              timedelta(days=int(days_offset)) if days_offset
                              and days_offset.lower() != "none" else None)

        corporate_data_generator.generate_corporate_data_files(
            context.test_run_name,
            context.corporate_storage_s3_bucket_id,
            input_file_name,
            output_template,
            key,
            os.path.join(context.temp_folder, topic_name),
            context.fixture_path_local,
            context.cdl_data_load_s3_base_prefix_tests,
            record_count,
            topic["topic"],
            context.timeout,
            timestamp_override,
        )
Esempio n. 12
0
def wait_for_statuses_in_export_status_table(timeout, export_status_table_name,
                                             topics, correlation_id,
                                             desired_statuses):
    """Returns true or false for if the items for the given correlation id after waiting for the timeout and topic list match given status.

    Keyword arguments:
    timeout -- the timeout in seconds
    export_status_table_name -- the export table name
    topics -- the array of topics to check
    correlation_id -- the correlation id
    desired_statuses -- an array of allowed statuses
    """
    count = 1
    matched_topics = []

    console_printer.print_info(
        f"Checking all export statuses for all topics match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}'"
    )

    while len(matched_topics) != len(topics) and count <= timeout:
        for topic in topics:
            if topic not in matched_topics:
                topic_name = template_helper.get_topic_name(topic)

                key_dict = {
                    "CorrelationId": {
                        "S": f"{correlation_id}"
                    },
                    "CollectionName": {
                        "S": f"{topic_name}"
                    },
                }

                item_details = aws_helper.get_item_from_dynamodb(
                    export_status_table_name, key_dict)
                if "Item" not in item_details:
                    console_printer.print_debug(
                        f"No export status found for key dict of '{key_dict}'")
                    continue

                collection_status = item_details["Item"]["CollectionStatus"][
                    "S"]
                if collection_status not in desired_statuses:
                    console_printer.print_debug(
                        f"Status was '{collection_status}' which did not match any of '{desired_statuses}'"
                    )
                    continue

                console_printer.print_info(
                    f"All export statuses match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}' and topic '{topic_name}'"
                )
                matched_topics.append(topic)
        time.sleep(1)
        count += 1

    if len(matched_topics) != len(topics):
        console_printer.print_info(
            f"All export statuses for one or more topics did match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}' after '{timeout}' seconds"
        )
        return False

    console_printer.print_info(
        f"All export statuses match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}'"
    )
    return True
Esempio n. 13
0
def step_impl(context, snapshot_type):
    topic_names = [
        template_helper.get_topic_name(topic["topic"])
        for topic in context.topics_for_test
    ]

    for topic_name in topic_names:
        console_printer.print_info(
            f"Checking snapshots for topic '{topic_name}'")

        generated_file = (
            snapshot_data_generator.generate_snapshot_file_from_hbase_records(
                context.test_run_name,
                topic_name,
                context.snapshot_files_hbase_records_temp_folder,
                context.snapshot_files_temp_folder,
            ))

        expected_records = snapshots_helper.get_locally_generated_snapshot_file_records(
            generated_file)
        expected_records.sort()

        console_printer.print_info(
            f"Checking snapshots from s3 at base path '{context.snapshot_s3_output_path}' "
            +
            f"with db name of '{context.db_name}', topic name of '{topic_name}' and date of '{context.formatted_date}'"
        )

        s3_qualified_prefix = os.path.join(
            context.mongo_snapshot_path,
            context.test_run_name,
            context.formatted_date,
            snapshot_type,
        )

        console_printer.print_info(
            f"Checking generated snapshots from s3 at path '{s3_qualified_prefix}'"
        )

        generated_snapshots_count = len(
            aws_helper.get_s3_file_object_keys_matching_pattern(
                context.mongo_snapshot_bucket,
                s3_qualified_prefix,
                f"^{s3_qualified_prefix}/{topic_name}-\d{{3}}-\d{{3}}-\d+.txt.gz.enc$",
            ))

        console_printer.print_info(
            f"Found '{generated_snapshots_count}' generated snapshots from s3 at path '{s3_qualified_prefix}'"
        )

        snapshot_s3_full_output_path = (
            snapshots_helper.generate_snapshot_output_s3_path(
                context.snapshot_s3_output_path,
                topic_name,
                context.db_name,
                context.formatted_date,
                snapshot_type,
            ))

        console_printer.print_info(
            f"Checking snapshots from s3 at path '{snapshot_s3_full_output_path}'"
        )

        if not snapshots_helper.wait_for_snapshots_to_be_sent_to_s3(
                context.timeout,
                generated_snapshots_count,
                context.snapshot_s3_output_bucket,
                snapshot_s3_full_output_path,
        ):
            raise AssertionError(
                f"Snapshots found at '{snapshot_s3_full_output_path}' was not above or matching expected minimum of '{generated_snapshots_count}'"
            )

        console_printer.print_info(
            f"Length of the expected record array is '{len(expected_records)}'"
        )

        console_printer.print_info(
            f"Getting hbase records from snapshots from s3 at path '{snapshot_s3_full_output_path}'"
        )

        actual_records = snapshots_helper.retrieve_records_from_snapshots(
            context.snapshot_s3_output_bucket, snapshot_s3_full_output_path)
        actual_records.sort()
        console_printer.print_info(
            f"Length of the actual record array is '{len(actual_records)}'")

        console_printer.print_info(
            "Asserting the length of the two record arrays")
        console_printer.print_info(f"Expected: {expected_records}")
        console_printer.print_info(f"Actual: {actual_records}")
        assert len(actual_records) >= len(expected_records)

        missing_snapshots = []
        console_printer.print_info(
            "Asserting the values of the expected snapshots")

        for expected_record_number in range(0, len(expected_records)):
            console_printer.print_info(
                f"Asserting the values of expected record number '{expected_record_number}'"
            )

            expected_json = json.loads(
                expected_records[expected_record_number])

            record_found = False
            for actual_record_number in range(0, len(actual_records)):
                actual_json = json.loads(actual_records[actual_record_number])

                if expected_json == actual_json:
                    console_printer.print_info(
                        f"Expected json is '{expected_json}' and actual json (record number '{actual_record_number}') is '{actual_json}'"
                    )
                    record_found = True
                    break

            if not record_found:
                missing_snapshots.append(expected_record_number)

        if len(missing_snapshots) > 0:
            console_printer.print_info(
                f"The following snapshots were not found: '{missing_snapshots}'"
            )

        console_printer.print_info("Individual assertions complete")
        console_printer.print_info("Asserting no records are mismatched")

        assert len(missing_snapshots) == 0
Esempio n. 14
0
def step_impl(context, table):
    if context.data_streaming_tests_skip_reconciling:
        console_printer.print_warning_text(
            f"Not verifying reconciliation due to DATA_STREAMING_TESTS_SKIP_RECONCILING being set to '{str(context.data_streaming_tests_skip_reconciling)}'"
        )
        return

    table_name = streaming_data_helper.get_metadata_store_table_name(
        table, context.metadata_store_tables)

    timeout_seconds = 600
    console_printer.print_info(
        f"Checking that all ids are reconciled within '{str(timeout_seconds)}' seconds"
    )

    for latest_metadata_store_id_for_topic in context.latest_metadata_store_ids:
        reconciled = False
        timeout_time = time.time() + timeout_seconds
        while time.time() < timeout_time and reconciled == False:
            topic_name = template_helper.get_topic_name(
                latest_metadata_store_id_for_topic[0])
            console_printer.print_info(
                f"Checking that latest id is reconciled for topic '{topic_name}'"
            )

            results = metadata_store_helper.get_metadata_for_specific_id_and_timestamp_in_topic(
                table_name,
                latest_metadata_store_id_for_topic[1],
                latest_metadata_store_id_for_topic[2],
                topic_name,
            )
            assert len(results) == 1

            results_iterator = iter(results.items())
            result_row_key_value_pair = next(results_iterator)
            result_row_key = result_row_key_value_pair[0]
            result_row_value = result_row_key_value_pair[1]

            console_printer.print_info(
                f"Actual metadata store result is '{result_row_value}' in topic '{topic_name}'"
            )
            console_printer.print_info(
                f"Asserting the reconciled field values for the result in topic '{topic_name}'"
            )

            if result_row_value["reconciled_result"] == 1:
                console_printer.print_info(
                    f"Asserting reconciled timestamp is set in topic '{topic_name}'"
                )
                assert result_row_value["reconciled_timestamp"] is not None

                console_printer.print_info(
                    f"Latest id has been reconciled for topic '{topic_name}'")
                reconciled = True
                break

            time.sleep(5)
        if reconciled == False:
            raise AssertionError(
                f"Latest id is not reconciled for topic '{topic_name}' after '{str(timeout_seconds)}' seconds"
            )
Esempio n. 15
0
def step_impl(context, streaming_type, id_format, message_type):
    folder = streaming_data_helper.generate_fixture_data_folder(message_type)

    manifest_bucket = context.k2hb_manifest_write_s3_bucket

    valid_prefixes = {
        "main": context.k2hb_main_manifest_write_s3_prefix,
        "equalities": context.k2hb_equality_manifest_write_s3_prefix,
        "audit": context.k2hb_audit_manifest_write_s3_prefix,
    }
    manifest_base_prefix = valid_prefixes.get(streaming_type, "NOT_SET")

    if manifest_base_prefix == "NOT_SET":
        raise AssertionError(
            f"Could not find manifest prefix for streaming of '{streaming_type}'"
        )

    for topic in context.topics_for_test:
        topic_name = template_helper.get_topic_name(topic["topic"])
        temp_folder_for_topic = os.path.join(context.temp_folder, topic_name)
        full_folder_path = file_helper.generate_edited_files_folder(
            temp_folder_for_topic, folder)
        latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp(
            full_folder_path)

        wrap_id_value = id_format == "wrapped"
        file_pattern = f"^.*_.*_\d+-.*_.*_\d+.txt$"

        console_printer.print_info(
            f"Looking for manifest files in '{manifest_bucket}' bucket with prefix of '{manifest_base_prefix}' and pattern of '{file_pattern}'"
        )

        manifest_files = aws_helper.retrieve_files_from_s3(
            manifest_bucket,
            manifest_base_prefix,
            file_pattern,
        )

        console_printer.print_info(
            f"Found '{len(manifest_files)}' manifest files")

        manifest_lines = []
        for manifest_file in manifest_files:
            manifest_lines_in_file = manifest_file.splitlines()
            manifest_lines.extend([
                manifest_line_in_file.replace('""', '"')
                for manifest_line_in_file in manifest_lines_in_file
            ])

        record_id = file_helper.get_id_object_from_json_file(latest_file_path)
        record_timestamp = file_helper.get_timestamp_as_long_from_json_file(
            latest_file_path)

        expected = streaming_manifests_helper.generate_correct_manifest_line(
            record_id, record_timestamp, topic_name, wrap_id=wrap_id_value)

        console_printer.print_info(
            f"Expecting manifest line with data of '{expected}'")
        console_printer.print_info(
            f"Actual manifest lines were '{manifest_lines}'")

        assert expected in manifest_lines
Esempio n. 16
0
def step_impl(
    context,
    record_count,
    message_type,
    input_file_name,
    output_file_name,
    dlq_file_name,
    snapshot_record_file_name,
    encrypt_in_sender,
    wait_for_sending,
    key_method,
):
    context.uploaded_id = uuid.uuid4()

    folder = streaming_data_helper.generate_fixture_data_folder(message_type)
    topic_prefix = streaming_data_helper.generate_topic_prefix(message_type)

    skip_encryption = "true" if encrypt_in_sender == "false" else "false"
    output_template = None if output_file_name == "None" else output_file_name
    dlq_template = None if dlq_file_name == "None" else dlq_file_name
    snapshot_record_file_name = (None if snapshot_record_file_name == "None"
                                 else snapshot_record_file_name)
    wait_for_sending_bool = wait_for_sending.lower() == "true"

    message_volume = (context.kafka_message_volume
                      if context.kafka_message_volume else "1")
    random_keys = context.kafka_random_key if context.kafka_random_key else "false"

    context.kafka_generated_dlq_output_files = []

    for topic in context.topics_for_test:
        key = None
        if key_method.lower() == "static":
            key = context.uploaded_id
        elif key_method.lower() == "topic":
            key = uuid.uuid4()

        topic_name = template_helper.get_topic_name(topic["topic"])

        generated_files = kafka_data_generator.generate_kafka_files(
            test_run_name=context.test_run_name,
            s3_input_bucket=context.s3_ingest_bucket,
            input_template_name=input_file_name,
            output_template_name=output_template,
            new_uuid=key,
            local_files_temp_folder=os.path.join(context.temp_folder,
                                                 topic_name),
            fixture_files_root=context.fixture_path_local,
            s3_output_prefix=context.s3_temp_output_path,
            record_count=record_count,
            topic_name=topic["topic"],
            snapshots_output_folder=context.
            snapshot_files_hbase_records_temp_folder,
            seconds_timeout=context.timeout,
            fixture_data_folder=folder,
            dlq_template_name=dlq_template,
            snapshot_record_template_name=snapshot_record_file_name,
        )

        files_to_send_to_kafka_broker = [
            generated_file[0] for generated_file in generated_files
        ]
        aws_helper.send_files_to_kafka_producer_sns(
            dynamodb_table_name=context.dynamo_db_table_name,
            s3_input_bucket=context.s3_ingest_bucket,
            aws_acc_id=context.aws_acc,
            sns_topic_name=context.aws_sns_topic_name,
            fixture_files=files_to_send_to_kafka_broker,
            message_key=context.uploaded_id,
            topic_name=topic["topic"],
            topic_prefix=topic_prefix,
            region=context.aws_region_main,
            skip_encryption=skip_encryption,
            kafka_message_volume=message_volume,
            kafka_random_key=random_keys,
            wait_for_job_completion=wait_for_sending_bool,
        )

        dlq_files_for_topic = []
        for generated_file in generated_files:
            if len(generated_file) > 3:
                dlq_files_for_topic.append(generated_file[3])

        context.kafka_generated_dlq_output_files.append(
            (topic["topic"], dlq_files_for_topic))
Esempio n. 17
0
def step_impl(context, snapshot_type):
    topics = message_helper.get_consolidated_topics_list(
        [topic["topic"] for topic in context.topics_for_test],
        snapshot_type,
        context.default_topic_list_full_delimited,
        context.default_topic_list_incremental_delimited,
        [context.send_snapshots_topics_override],
    )

    formatted_date = (context.formatted_date
                      if not context.send_snapshots_date_override else
                      context.send_snapshots_date_override)
    correlation_id = (snapshots_helper.get_snapshot_run_correlation_id(
        context.test_run_name, snapshot_type)
                      if not context.send_snapshots_correlation_id_override
                      else context.send_snapshots_correlation_id_override)
    reprocess_files = context.send_snapshots_reprocess_files
    s3_qualified_prefix = os.path.join(context.mongo_snapshot_path,
                                       formatted_date, snapshot_type)

    console_printer.print_info(
        f"Looking for snapshots present in '{s3_qualified_prefix}'")

    for topic in topics:
        topic_qualified = template_helper.get_topic_name(topic)
        topic_name = template_helper.remove_any_pipe_values_from_topic_name(
            topic_qualified)

        snapshot_pattern = (
            f"^{s3_qualified_prefix}/{topic_name}-\d{{3}}-\d{{3}}-\d+.txt.gz.enc$"
        )

        console_printer.print_info(
            f"Looking for snapshots using pattern '{snapshot_pattern}'")

        generated_snapshot_keys = aws_helper.get_s3_file_object_keys_matching_pattern(
            context.mongo_snapshot_bucket,
            f"{s3_qualified_prefix}/{topic_name}-",
            snapshot_pattern,
        )

        generated_snapshots_count = len(generated_snapshot_keys)

        console_printer.print_info(
            f"Found '{generated_snapshots_count}' snapshots")

        export_status_helper.update_item_in_export_status_table(
            context.dynamo_db_export_status_table_name,
            topic_name,
            correlation_id,
            "Exported",
            generated_snapshots_count,
            0,
            0,
        )

        for generated_snapshot_key in generated_snapshot_keys:
            message_helper.send_start_snapshot_sending_message(
                context.aws_sqs_queue_snapshot_sender,
                generated_snapshot_key,
                topic_name,
                correlation_id,
                reprocess_files,
                formatted_date,
                snapshot_type,
            )