Beispiel #1
0
    def test_two_sequential_reads(self, connector_config,
                                  configured_catalog_for_incremental,
                                  cursor_paths,
                                  docker_runner: ConnectorRunner):
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), "First incremental sync should produce records younger or equal to cursor value from the state"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value >= state_value
            ), "Second incremental sync should produce records older or equal to cursor value from the state"
Beispiel #2
0
    def test_check(self, connector_config, inputs: ConnectionTestConfig,
                   docker_runner: ConnectorRunner):
        if inputs.status == ConnectionTestConfig.Status.Succeed:
            output = docker_runner.call_check(config=connector_config)
            con_messages = [
                message for message in output
                if message.type == Type.CONNECTION_STATUS
            ]

            assert len(
                con_messages
            ) == 1, "Connection status message should be emitted exactly once"
            assert con_messages[0].connectionStatus.status == Status.SUCCEEDED
        elif inputs.status == ConnectionTestConfig.Status.Failed:
            output = docker_runner.call_check(config=connector_config)
            con_messages = [
                message for message in output
                if message.type == Type.CONNECTION_STATUS
            ]

            assert len(
                con_messages
            ) == 1, "Connection status message should be emitted exactly once"
            assert con_messages[0].connectionStatus.status == Status.FAILED
        elif inputs.status == ConnectionTestConfig.Status.Exception:
            with pytest.raises(ContainerError) as err:
                docker_runner.call_check(config=connector_config)

            assert err.value.exit_status != 0, "Connector should exit with error code"
            assert "Traceback" in err.value.stderr.decode(
                "utf-8"), "Connector should print exception"
    def test_sequential_reads(self, connector_config, configured_catalog, docker_runner: ConnectorRunner):
        configured_catalog = full_refresh_only_catalog(configured_catalog)
        output = docker_runner.call_read(connector_config, configured_catalog)
        records_1 = [message.record.data for message in output if message.type == Type.RECORD]

        output = docker_runner.call_read(connector_config, configured_catalog)
        records_2 = [message.record.data for message in output if message.type == Type.RECORD]
        serialize = partial(json.dumps, sort_keys=True)

        assert not (
            set(map(serialize, records_1)) - set(map(serialize, records_2))
        ), "The two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
Beispiel #4
0
    def test_sequential_reads(
        self,
        inputs: ConnectionTestConfig,
        connector_config: SecretDict,
        configured_catalog: ConfiguredAirbyteCatalog,
        docker_runner: ConnectorRunner,
        detailed_logger: Logger,
    ):
        ignored_fields = getattr(inputs, "ignored_fields") or {}
        configured_catalog = full_refresh_only_catalog(configured_catalog)
        output = docker_runner.call_read(connector_config, configured_catalog)
        records_1 = [
            message.record for message in output if message.type == Type.RECORD
        ]
        records_by_stream_1 = defaultdict(list)
        for record in records_1:
            records_by_stream_1[record.stream].append(record.data)

        output = docker_runner.call_read(connector_config, configured_catalog)
        records_2 = [
            message.record for message in output if message.type == Type.RECORD
        ]
        records_by_stream_2 = defaultdict(list)
        for record in records_2:
            records_by_stream_2[record.stream].append(record.data)

        pks_by_stream = primary_keys_by_stream(configured_catalog)

        for stream in records_by_stream_1:
            if pks_by_stream.get(stream):
                serializer = partial(primary_keys_only,
                                     pks=pks_by_stream.get(stream))
            else:
                serializer = partial(make_hashable,
                                     exclude_fields=ignored_fields.get(stream))
            stream_records_1 = records_by_stream_1.get(stream)
            stream_records_2 = records_by_stream_2.get(stream)
            # Using
            output_diff = set(map(serializer,
                                  stream_records_1)).symmetric_difference(
                                      set(map(serializer, stream_records_2)))
            if output_diff:
                msg = f"{stream}: the two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
                detailed_logger.info(msg)
                detailed_logger.info("First read")
                detailed_logger.log_json_list(stream_records_1)
                detailed_logger.info("Second read")
                detailed_logger.log_json_list(stream_records_2)
                detailed_logger.info("Difference")
                detailed_logger.log_json_list(output_diff)
                pytest.fail(msg)
Beispiel #5
0
    def test_match_expected(self, connector_spec: ConnectorSpecification,
                            connector_config: SecretDict,
                            docker_runner: ConnectorRunner):
        output = docker_runner.call_spec()
        spec_messages = [
            message for message in output if message.type == Type.SPEC
        ]

        assert len(
            spec_messages) == 1, "Spec message should be emitted exactly once"
        if connector_spec:
            assert spec_messages[
                0].spec == connector_spec, "Spec should be equal to the one in spec.json file"

        assert docker_runner.env_variables.get(
            "AIRBYTE_ENTRYPOINT"
        ), "AIRBYTE_ENTRYPOINT must be set in dockerfile"
        assert docker_runner.env_variables.get(
            "AIRBYTE_ENTRYPOINT") == " ".join(
                docker_runner.entry_point
            ), "env should be equal to space-joined entrypoint"

        # Getting rid of technical variables that start with an underscore
        config = {
            key: value
            for key, value in connector_config.data.items()
            if not key.startswith("_")
        }

        validate(instance=config,
                 schema=spec_messages[0].spec.connectionSpecification)
Beispiel #6
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records."
                )

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )
Beispiel #7
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteRecordMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert (
                    pk_value is not None
                ), f"Primary key subkeys {repr(pk_path)} have null values or not present in {record.stream} stream records."

        # TODO: remove this condition after https://github.com/airbytehq/airbyte/issues/8312 is done
        if inputs.validate_data_points:
            self._validate_field_appears_at_least_once(records=records, configured_catalog=configured_catalog)

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )
Beispiel #8
0
    def test_airbyte_trace_message_on_failure(self, connector_config, inputs: BasicReadTestConfig, docker_runner: ConnectorRunner):
        if not inputs.expect_trace_message_on_failure:
            pytest.skip("Skipping `test_airbyte_trace_message_on_failure` because `inputs.expect_trace_message_on_failure=False`")
            return

        invalid_configured_catalog = ConfiguredAirbyteCatalog(
            streams=[
                # create ConfiguredAirbyteStream without validation
                ConfiguredAirbyteStream.construct(
                    stream=AirbyteStream(
                        name="__AIRBYTE__stream_that_does_not_exist",
                        json_schema={"type": "object", "properties": {"f1": {"type": "string"}}},
                        supported_sync_modes=[SyncMode.full_refresh],
                    ),
                    sync_mode="INVALID",
                    destination_sync_mode="INVALID",
                )
            ]
        )

        output = docker_runner.call_read(connector_config, invalid_configured_catalog, raise_container_error=False)
        trace_messages = filter_output(output, Type.TRACE)
        error_trace_messages = list(filter(lambda m: m.trace.type == TraceType.ERROR, trace_messages))

        assert len(error_trace_messages) >= 1, "Connector should emit at least one error trace message"
Beispiel #9
0
    def test_spec(self, connector_spec: ConnectorSpecification, docker_runner: ConnectorRunner):
        output = docker_runner.call_spec()
        spec_messages = [message for message in output if message.type == Type.SPEC]

        assert len(spec_messages) == 1, "Spec message should be emitted exactly once"
        if connector_spec:
            assert spec_messages[0].spec == connector_spec, "Spec should be equal to the one in spec.json file"
Beispiel #10
0
 def test_connector_image_without_env(self, connector_image_without_env,
                                      tmp_path):
     docker_runner = ConnectorRunner(image_name=connector_image_without_env,
                                     volume=tmp_path)
     assert not docker_runner.env_variables.get(
         "AIRBYTE_ENTRYPOINT"
     ), "this test should fail if AIRBYTE_ENTRYPOINT defined"
Beispiel #11
0
    def test_discover(self, connector_config, docker_runner: ConnectorRunner):
        """Verify that discover produce correct schema."""
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = filter_output(output, Type.CATALOG)

        assert len(catalog_messages) == 1, "Catalog message should be emitted exactly once"
        assert catalog_messages[0].catalog, "Message should have catalog"
        assert catalog_messages[0].catalog.streams, "Catalog should contain streams"
    def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner):
        configured_catalog = incremental_only_catalog(configured_catalog)
        output = docker_runner.call_read_with_state(config=connector_config, catalog=configured_catalog, state=future_state)
        records = filter_output(output, type_=Type.RECORD)
        states = filter_output(output, type_=Type.STATE)

        assert not records, "The sync should produce no records when run with the state with abnormally large values"
        assert states, "The sync should produce at least one STATE message"
Beispiel #13
0
    def test_discover(self, connector_config, docker_runner: ConnectorRunner):
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = [
            message for message in output if message.type == Type.CATALOG
        ]

        assert len(catalog_messages
                   ) == 1, "Catalog message should be emitted exactly once"
Beispiel #14
0
 def test_correct_connector_image(self, correct_connector_image, tmp_path):
     docker_runner = ConnectorRunner(image_name=correct_connector_image,
                                     volume=tmp_path)
     assert docker_runner.env_variables.get(
         "AIRBYTE_ENTRYPOINT"
     ), "AIRBYTE_ENTRYPOINT must be set in dockerfile"
     assert docker_runner.env_variables.get(
         "AIRBYTE_ENTRYPOINT") == " ".join(
             docker_runner.entry_point
         ), "env should be equal to space-joined entrypoint"
Beispiel #15
0
def pull_docker_image(acceptance_test_config) -> None:
    """Startup fixture to pull docker image"""
    image_name = acceptance_test_config.connector_image
    config_filename = "acceptance-test-config.yml"
    try:
        ConnectorRunner(image_name=image_name, volume=Path("."))
    except errors.ImageNotFound:
        pytest.exit(
            f"Docker image `{image_name}` not found, please check your {config_filename} file",
            returncode=1)
Beispiel #16
0
    def test_discover(self, connector_config, catalog, docker_runner: ConnectorRunner):
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = [message for message in output if message.type == Type.CATALOG]

        assert len(catalog_messages) == 1, "Catalog message should be emitted exactly once"
        if catalog:
            for stream1, stream2 in zip(catalog_messages[0].catalog.streams, catalog.streams):
                assert stream1.json_schema == stream2.json_schema, f"Streams: {stream1.name} vs {stream2.name}, stream schemas should match"
                stream1.json_schema = None
                stream2.json_schema = None
                assert stream1.dict() == stream2.dict(), f"Streams {stream1.name} and {stream2.name}, stream configs should match"
Beispiel #17
0
 def test_docker_image_env_ne_entrypoint(self,
                                         connector_image_with_ne_properties,
                                         tmp_path):
     docker_runner = ConnectorRunner(
         image_name=connector_image_with_ne_properties, volume=tmp_path)
     assert docker_runner.env_variables.get(
         "AIRBYTE_ENTRYPOINT"
     ), "AIRBYTE_ENTRYPOINT must be set in dockerfile"
     assert docker_runner.env_variables.get(
         "AIRBYTE_ENTRYPOINT") != " ".join(docker_runner.entry_point), (
             "This test should fail if we have "
             ".join(ENTRYPOINT)==ENV")
Beispiel #18
0
    def test_two_sequential_reads(
        self,
        inputs: IncrementalConfig,
        connector_config: SecretDict,
        configured_catalog_for_incremental: ConfiguredAirbyteCatalog,
        cursor_paths: dict[str, list[str]],
        docker_runner: ConnectorRunner,
    ):
        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value, stream_name in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert compare_cursor_with_threshold(
                record_value, state_value, threshold_days
            ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
Beispiel #19
0
def catalog_schemas_fixture(connector_config, docker_runner: ConnectorRunner,
                            cached_schemas) -> MutableMapping[str, Any]:
    """JSON schemas for each stream"""
    if not cached_schemas:
        output = docker_runner.call_discover(config=connector_config)
        catalogs = [
            message.catalog for message in output
            if message.type == Type.CATALOG
        ]
        for stream in catalogs[-1].streams:
            cached_schemas[stream.name] = stream.json_schema

    return cached_schemas
Beispiel #20
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        for pks, record in primary_keys_for_records(
                streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} "
                    f"have null values or not present in {record.stream} stream records."
                )

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"

        if expected_records:
            actual_by_stream = self.group_by_stream(records)
            expected_by_stream = self.group_by_stream(expected_records)
            for stream_name, expected in expected_by_stream.items():
                actual = actual_by_stream.get(stream_name, [])

                self.compare_records(
                    stream_name=stream_name,
                    actual=actual,
                    expected=expected,
                    extra_fields=inputs.expect_records.extra_fields,
                    exact_order=inputs.expect_records.exact_order,
                    extra_records=inputs.expect_records.extra_records,
                )
Beispiel #21
0
    def test_read(self, connector_config, configured_catalog,
                  inputs: BasicReadTestConfig, docker_runner: ConnectorRunner):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"
Beispiel #22
0
    def test_discover(self, connector_config, docker_runner: ConnectorRunner):
        output = docker_runner.call_discover(config=connector_config)
        catalog_messages = filter_output(output, Type.CATALOG)

        assert len(catalog_messages
                   ) == 1, "Catalog message should be emitted exactly once"
Beispiel #23
0
def docker_runner_fixture(image_tag, tmp_path) -> ConnectorRunner:
    return ConnectorRunner(image_tag, volume=tmp_path)
Beispiel #24
0
    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)
        if inputs.validate_schema:
            bar = "-" * 80
            streams_errors = verify_records_schema(records, configured_catalog)
            for stream_name, errors in streams_errors.items():
                errors = map(str, errors.values())
                str_errors = f"\n{bar}\n".join(errors)
                logging.error(
                    f"The {stream_name} stream has the following schema errors:\n{str_errors}"
                )

            if streams_errors:
                pytest.fail(
                    f"Please check your json_schema in selected streams {streams_errors.keys()}."
                )

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        for pks, record in primary_keys_for_records(
                streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} "
                    f"have null values or not present in {record.stream} stream records."
                )

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"

        if expected_records:
            actual_by_stream = self.group_by_stream(records)
            expected_by_stream = self.group_by_stream(expected_records)
            for stream_name, expected in expected_by_stream.items():
                actual = actual_by_stream.get(stream_name, [])

                self.compare_records(
                    stream_name=stream_name,
                    actual=actual,
                    expected=expected,
                    extra_fields=inputs.expect_records.extra_fields,
                    exact_order=inputs.expect_records.exact_order,
                    extra_records=inputs.expect_records.extra_records,
                )
Beispiel #25
0
    def test_read_sequential_slices(self, inputs: IncrementalConfig,
                                    connector_config,
                                    configured_catalog_for_incremental,
                                    cursor_paths,
                                    docker_runner: ConnectorRunner):
        """
        Incremental test that makes calls the read method without a state checkpoint. Then we partition the results by stream and
        slice checkpoints resulting in batches of messages that look like:
        <state message>
        <record message>
        ...
        <record message>

        Using these batches, we then make additional read method calls using the state message and verify the correctness of the
        messages in the response.
        """
        if inputs.skip_comprehensive_incremental_tests:
            pytest.skip(
                "Skipping new incremental test based on acceptance-test-config.yml"
            )
            return

        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        # Create partitions made up of one state message followed by any records that come before the next state
        filtered_messages = [
            message for message in output
            if message.type == Type.STATE or message.type == Type.RECORD
        ]
        right_index = len(filtered_messages)
        checkpoint_messages = []
        for index, message in reversed(list(enumerate(filtered_messages))):
            if message.type == Type.STATE:
                message_group = (filtered_messages[index],
                                 filtered_messages[index + 1:right_index])
                checkpoint_messages.insert(0, message_group)
                right_index = index

        # We sometimes have duplicate identical state messages in a stream which we can filter out to speed things up
        checkpoint_messages = [
            message for index, message in enumerate(checkpoint_messages)
            if message not in checkpoint_messages[:index]
        ]

        # To avoid spamming APIs we only test a fraction of slices
        num_slices_to_test = 1 if len(
            checkpoint_messages) <= 5 else len(checkpoint_messages) // 5
        for message_batch in checkpoint_messages[::num_slices_to_test]:
            assert len(
                message_batch) > 0 and message_batch[0].type == Type.STATE
            current_state = message_batch[0]
            output = docker_runner.call_read_with_state(
                connector_config, configured_catalog_for_incremental,
                current_state.state.data)
            records = filter_output(output, type_=Type.RECORD)

            for record_value, state_value, stream_name in records_with_state(
                    records, current_state.state.data, stream_mapping,
                    cursor_paths):
                assert compare_cursor_with_threshold(
                    record_value, state_value, threshold_days
                ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
Beispiel #26
0
def pull_docker_image(acceptance_test_config) -> None:
    """Startup fixture to pull docker image"""
    print("Pulling docker image", acceptance_test_config.connector_image)
    ConnectorRunner(image_name=acceptance_test_config.connector_image,
                    volume=Path("."))
    print("Pulling completed")