Ejemplo n.º 1
0
    def test_two_sequential_reads(self, connector_config,
                                  configured_catalog_for_incremental,
                                  cursor_paths,
                                  docker_runner: ConnectorRunner):
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), "First incremental sync should produce records younger or equal to cursor value from the state"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value >= state_value
            ), "Second incremental sync should produce records older or equal to cursor value from the state"
Ejemplo n.º 2
0
    def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner):
        configured_catalog = incremental_only_catalog(configured_catalog)
        output = docker_runner.call_read_with_state(config=connector_config, catalog=configured_catalog, state=future_state)
        records = filter_output(output, type_=Type.RECORD)
        states = filter_output(output, type_=Type.STATE)

        assert not records, "The sync should produce no records when run with the state with abnormally large values"
        assert states, "The sync should produce at least one STATE message"
Ejemplo n.º 3
0
    def test_airbyte_trace_message_on_failure(self, connector_config, inputs: BasicReadTestConfig, docker_runner: ConnectorRunner):
        if not inputs.expect_trace_message_on_failure:
            pytest.skip("Skipping `test_airbyte_trace_message_on_failure` because `inputs.expect_trace_message_on_failure=False`")
            return

        invalid_configured_catalog = ConfiguredAirbyteCatalog(
            streams=[
                # create ConfiguredAirbyteStream without validation
                ConfiguredAirbyteStream.construct(
                    stream=AirbyteStream(
                        name="__AIRBYTE__stream_that_does_not_exist",
                        json_schema={"type": "object", "properties": {"f1": {"type": "string"}}},
                        supported_sync_modes=[SyncMode.full_refresh],
                    ),
                    sync_mode="INVALID",
                    destination_sync_mode="INVALID",
                )
            ]
        )

        output = docker_runner.call_read(connector_config, invalid_configured_catalog, raise_container_error=False)
        trace_messages = filter_output(output, Type.TRACE)
        error_trace_messages = list(filter(lambda m: m.trace.type == TraceType.ERROR, trace_messages))

        assert len(error_trace_messages) >= 1, "Connector should emit at least one error trace message"
Ejemplo n.º 4
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteRecordMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert (
                    pk_value is not None
                ), f"Primary key subkeys {repr(pk_path)} have null values or not present in {record.stream} stream records."

        # TODO: remove this condition after https://github.com/airbytehq/airbyte/issues/8312 is done
        if inputs.validate_data_points:
            self._validate_field_appears_at_least_once(records=records, configured_catalog=configured_catalog)

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )
Ejemplo n.º 5
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records."
                )

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )
Ejemplo n.º 6
0
def actual_connector_spec_fixture(request: BaseTest, docker_runner):
    if not request.instance.spec_cache:
        output = docker_runner.call_spec()
        spec_messages = filter_output(output, Type.SPEC)
        assert len(spec_messages) == 1, "Spec message should be emitted exactly once"
        spec = spec_messages[0].spec
        request.spec_cache = spec
    return request.spec_cache
Ejemplo n.º 7
0
    def test_discover(self, connector_config, docker_runner: ConnectorRunner):
        """Verify that discover produce correct schema."""
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = filter_output(output, Type.CATALOG)

        assert len(catalog_messages) == 1, "Catalog message should be emitted exactly once"
        assert catalog_messages[0].catalog, "Message should have catalog"
        assert catalog_messages[0].catalog.streams, "Catalog should contain streams"
Ejemplo n.º 8
0
    def test_check(self, connector_config, inputs: ConnectionTestConfig, docker_runner: ConnectorRunner):
        if inputs.status == ConnectionTestConfig.Status.Succeed:
            output = docker_runner.call_check(config=connector_config)
            con_messages = filter_output(output, Type.CONNECTION_STATUS)

            assert len(con_messages) == 1, "Connection status message should be emitted exactly once"
            assert con_messages[0].connectionStatus.status == Status.SUCCEEDED
        elif inputs.status == ConnectionTestConfig.Status.Failed:
            output = docker_runner.call_check(config=connector_config)
            con_messages = filter_output(output, Type.CONNECTION_STATUS)

            assert len(con_messages) == 1, "Connection status message should be emitted exactly once"
            assert con_messages[0].connectionStatus.status == Status.FAILED
        elif inputs.status == ConnectionTestConfig.Status.Exception:
            with pytest.raises(ContainerError) as err:
                docker_runner.call_check(config=connector_config)

            assert err.value.exit_status != 0, "Connector should exit with error code"
            assert "Traceback" in err.value.stderr, "Connector should print exception"
Ejemplo n.º 9
0
 def actual_connector_spec_fixture(request: BaseTest, docker_runner):
     if not request.spec_cache:
         output = docker_runner.call_spec()
         spec_messages = filter_output(output, Type.SPEC)
         assert len(spec_messages) == 1, "Spec message should be emitted exactly once"
         assert docker_runner.env_variables.get("AIRBYTE_ENTRYPOINT"), "AIRBYTE_ENTRYPOINT must be set in dockerfile"
         assert docker_runner.env_variables.get("AIRBYTE_ENTRYPOINT") == " ".join(
             docker_runner.entry_point
         ), "env should be equal to space-joined entrypoint"
         spec = spec_messages[0].spec
         request.spec_cache = spec
     return request.spec_cache
Ejemplo n.º 10
0
    def test_two_sequential_reads(
        self,
        inputs: IncrementalConfig,
        connector_config: SecretDict,
        configured_catalog_for_incremental: ConfiguredAirbyteCatalog,
        cursor_paths: dict[str, list[str]],
        docker_runner: ConnectorRunner,
    ):
        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value, stream_name in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert compare_cursor_with_threshold(
                record_value, state_value, threshold_days
            ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
Ejemplo n.º 11
0
    def test_read_sequential_slices(self, inputs: IncrementalConfig,
                                    connector_config,
                                    configured_catalog_for_incremental,
                                    cursor_paths,
                                    docker_runner: ConnectorRunner):
        """
        Incremental test that makes calls the read method without a state checkpoint. Then we partition the results by stream and
        slice checkpoints resulting in batches of messages that look like:
        <state message>
        <record message>
        ...
        <record message>

        Using these batches, we then make additional read method calls using the state message and verify the correctness of the
        messages in the response.
        """
        if inputs.skip_comprehensive_incremental_tests:
            pytest.skip(
                "Skipping new incremental test based on acceptance-test-config.yml"
            )
            return

        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        # Create partitions made up of one state message followed by any records that come before the next state
        filtered_messages = [
            message for message in output
            if message.type == Type.STATE or message.type == Type.RECORD
        ]
        right_index = len(filtered_messages)
        checkpoint_messages = []
        for index, message in reversed(list(enumerate(filtered_messages))):
            if message.type == Type.STATE:
                message_group = (filtered_messages[index],
                                 filtered_messages[index + 1:right_index])
                checkpoint_messages.insert(0, message_group)
                right_index = index

        # We sometimes have duplicate identical state messages in a stream which we can filter out to speed things up
        checkpoint_messages = [
            message for index, message in enumerate(checkpoint_messages)
            if message not in checkpoint_messages[:index]
        ]

        # To avoid spamming APIs we only test a fraction of slices
        num_slices_to_test = 1 if len(
            checkpoint_messages) <= 5 else len(checkpoint_messages) // 5
        for message_batch in checkpoint_messages[::num_slices_to_test]:
            assert len(
                message_batch) > 0 and message_batch[0].type == Type.STATE
            current_state = message_batch[0]
            output = docker_runner.call_read_with_state(
                connector_config, configured_catalog_for_incremental,
                current_state.state.data)
            records = filter_output(output, type_=Type.RECORD)

            for record_value, state_value, stream_name in records_with_state(
                    records, current_state.state.data, stream_mapping,
                    cursor_paths):
                assert compare_cursor_with_threshold(
                    record_value, state_value, threshold_days
                ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
Ejemplo n.º 12
0
    def test_discover(self, connector_config, docker_runner: ConnectorRunner):
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = filter_output(output, Type.CATALOG)

        assert len(catalog_messages
                   ) == 1, "Catalog message should be emitted exactly once"