Example #1
0
    def test_two_sequential_reads(self, connector_config,
                                  configured_catalog_for_incremental,
                                  cursor_paths,
                                  docker_runner: ConnectorRunner):
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), "First incremental sync should produce records younger or equal to cursor value from the state"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value >= state_value
            ), "Second incremental sync should produce records older or equal to cursor value from the state"
Example #2
0
    def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner):
        configured_catalog = incremental_only_catalog(configured_catalog)
        output = docker_runner.call_read_with_state(config=connector_config, catalog=configured_catalog, state=future_state)
        records = filter_output(output, type_=Type.RECORD)
        states = filter_output(output, type_=Type.STATE)

        assert not records, "The sync should produce no records when run with the state with abnormally large values"
        assert states, "The sync should produce at least one STATE message"
Example #3
0
    def test_two_sequential_reads(
        self,
        inputs: IncrementalConfig,
        connector_config: SecretDict,
        configured_catalog_for_incremental: ConfiguredAirbyteCatalog,
        cursor_paths: dict[str, list[str]],
        docker_runner: ConnectorRunner,
    ):
        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value, stream_name in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert compare_cursor_with_threshold(
                record_value, state_value, threshold_days
            ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
Example #4
0
    def test_read_sequential_slices(self, inputs: IncrementalConfig,
                                    connector_config,
                                    configured_catalog_for_incremental,
                                    cursor_paths,
                                    docker_runner: ConnectorRunner):
        """
        Incremental test that makes calls the read method without a state checkpoint. Then we partition the results by stream and
        slice checkpoints resulting in batches of messages that look like:
        <state message>
        <record message>
        ...
        <record message>

        Using these batches, we then make additional read method calls using the state message and verify the correctness of the
        messages in the response.
        """
        if inputs.skip_comprehensive_incremental_tests:
            pytest.skip(
                "Skipping new incremental test based on acceptance-test-config.yml"
            )
            return

        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        # Create partitions made up of one state message followed by any records that come before the next state
        filtered_messages = [
            message for message in output
            if message.type == Type.STATE or message.type == Type.RECORD
        ]
        right_index = len(filtered_messages)
        checkpoint_messages = []
        for index, message in reversed(list(enumerate(filtered_messages))):
            if message.type == Type.STATE:
                message_group = (filtered_messages[index],
                                 filtered_messages[index + 1:right_index])
                checkpoint_messages.insert(0, message_group)
                right_index = index

        # We sometimes have duplicate identical state messages in a stream which we can filter out to speed things up
        checkpoint_messages = [
            message for index, message in enumerate(checkpoint_messages)
            if message not in checkpoint_messages[:index]
        ]

        # To avoid spamming APIs we only test a fraction of slices
        num_slices_to_test = 1 if len(
            checkpoint_messages) <= 5 else len(checkpoint_messages) // 5
        for message_batch in checkpoint_messages[::num_slices_to_test]:
            assert len(
                message_batch) > 0 and message_batch[0].type == Type.STATE
            current_state = message_batch[0]
            output = docker_runner.call_read_with_state(
                connector_config, configured_catalog_for_incremental,
                current_state.state.data)
            records = filter_output(output, type_=Type.RECORD)

            for record_value, state_value, stream_name in records_with_state(
                    records, current_state.state.data, stream_mapping,
                    cursor_paths):
                assert compare_cursor_with_threshold(
                    record_value, state_value, threshold_days
                ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"