def test_two_sequential_reads(self, connector_config, configured_catalog_for_incremental, cursor_paths, docker_runner: ConnectorRunner): stream_mapping = { stream.stream.name: stream for stream in configured_catalog_for_incremental.streams } output = docker_runner.call_read(connector_config, configured_catalog_for_incremental) records_1 = filter_output(output, type_=Type.RECORD) states_1 = filter_output(output, type_=Type.STATE) assert states_1, "Should produce at least one state" assert records_1, "Should produce at least one record" latest_state = states_1[-1].state.data for record_value, state_value in records_with_state( records_1, latest_state, stream_mapping, cursor_paths): assert ( record_value <= state_value ), "First incremental sync should produce records younger or equal to cursor value from the state" output = docker_runner.call_read_with_state( connector_config, configured_catalog_for_incremental, state=latest_state) records_2 = filter_output(output, type_=Type.RECORD) for record_value, state_value in records_with_state( records_2, latest_state, stream_mapping, cursor_paths): assert ( record_value >= state_value ), "Second incremental sync should produce records older or equal to cursor value from the state"
def test_check(self, connector_config, inputs: ConnectionTestConfig, docker_runner: ConnectorRunner): if inputs.status == ConnectionTestConfig.Status.Succeed: output = docker_runner.call_check(config=connector_config) con_messages = [ message for message in output if message.type == Type.CONNECTION_STATUS ] assert len( con_messages ) == 1, "Connection status message should be emitted exactly once" assert con_messages[0].connectionStatus.status == Status.SUCCEEDED elif inputs.status == ConnectionTestConfig.Status.Failed: output = docker_runner.call_check(config=connector_config) con_messages = [ message for message in output if message.type == Type.CONNECTION_STATUS ] assert len( con_messages ) == 1, "Connection status message should be emitted exactly once" assert con_messages[0].connectionStatus.status == Status.FAILED elif inputs.status == ConnectionTestConfig.Status.Exception: with pytest.raises(ContainerError) as err: docker_runner.call_check(config=connector_config) assert err.value.exit_status != 0, "Connector should exit with error code" assert "Traceback" in err.value.stderr.decode( "utf-8"), "Connector should print exception"
def test_sequential_reads(self, connector_config, configured_catalog, docker_runner: ConnectorRunner): configured_catalog = full_refresh_only_catalog(configured_catalog) output = docker_runner.call_read(connector_config, configured_catalog) records_1 = [message.record.data for message in output if message.type == Type.RECORD] output = docker_runner.call_read(connector_config, configured_catalog) records_2 = [message.record.data for message in output if message.type == Type.RECORD] serialize = partial(json.dumps, sort_keys=True) assert not ( set(map(serialize, records_1)) - set(map(serialize, records_2)) ), "The two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
def test_sequential_reads( self, inputs: ConnectionTestConfig, connector_config: SecretDict, configured_catalog: ConfiguredAirbyteCatalog, docker_runner: ConnectorRunner, detailed_logger: Logger, ): ignored_fields = getattr(inputs, "ignored_fields") or {} configured_catalog = full_refresh_only_catalog(configured_catalog) output = docker_runner.call_read(connector_config, configured_catalog) records_1 = [ message.record for message in output if message.type == Type.RECORD ] records_by_stream_1 = defaultdict(list) for record in records_1: records_by_stream_1[record.stream].append(record.data) output = docker_runner.call_read(connector_config, configured_catalog) records_2 = [ message.record for message in output if message.type == Type.RECORD ] records_by_stream_2 = defaultdict(list) for record in records_2: records_by_stream_2[record.stream].append(record.data) pks_by_stream = primary_keys_by_stream(configured_catalog) for stream in records_by_stream_1: if pks_by_stream.get(stream): serializer = partial(primary_keys_only, pks=pks_by_stream.get(stream)) else: serializer = partial(make_hashable, exclude_fields=ignored_fields.get(stream)) stream_records_1 = records_by_stream_1.get(stream) stream_records_2 = records_by_stream_2.get(stream) # Using output_diff = set(map(serializer, stream_records_1)).symmetric_difference( set(map(serializer, stream_records_2))) if output_diff: msg = f"{stream}: the two sequential reads should produce either equal set of records or one of them is a strict subset of the other" detailed_logger.info(msg) detailed_logger.info("First read") detailed_logger.log_json_list(stream_records_1) detailed_logger.info("Second read") detailed_logger.log_json_list(stream_records_2) detailed_logger.info("Difference") detailed_logger.log_json_list(output_diff) pytest.fail(msg)
def test_match_expected(self, connector_spec: ConnectorSpecification, connector_config: SecretDict, docker_runner: ConnectorRunner): output = docker_runner.call_spec() spec_messages = [ message for message in output if message.type == Type.SPEC ] assert len( spec_messages) == 1, "Spec message should be emitted exactly once" if connector_spec: assert spec_messages[ 0].spec == connector_spec, "Spec should be equal to the one in spec.json file" assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT" ), "AIRBYTE_ENTRYPOINT must be set in dockerfile" assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT") == " ".join( docker_runner.entry_point ), "env should be equal to space-joined entrypoint" # Getting rid of technical variables that start with an underscore config = { key: value for key, value in connector_config.data.items() if not key.startswith("_") } validate(instance=config, schema=spec_messages[0].spec.connectionSpecification)
def test_read( self, connector_config, configured_catalog, inputs: BasicReadTestConfig, expected_records: List[AirbyteMessage], docker_runner: ConnectorRunner, detailed_logger, ): output = docker_runner.call_read(connector_config, configured_catalog) records = [message.record for message in filter_output(output, Type.RECORD)] assert records, "At least one record should be read using provided catalog" if inputs.validate_schema: self._validate_schema(records=records, configured_catalog=configured_catalog) self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams) for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records): for pk_path, pk_value in pks.items(): assert pk_value is not None, ( f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records." ) if expected_records: self._validate_expected_records( records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger )
def test_read( self, connector_config, configured_catalog, inputs: BasicReadTestConfig, expected_records: List[AirbyteRecordMessage], docker_runner: ConnectorRunner, detailed_logger, ): output = docker_runner.call_read(connector_config, configured_catalog) records = [message.record for message in filter_output(output, Type.RECORD)] assert records, "At least one record should be read using provided catalog" if inputs.validate_schema: self._validate_schema(records=records, configured_catalog=configured_catalog) self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams) for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records): for pk_path, pk_value in pks.items(): assert ( pk_value is not None ), f"Primary key subkeys {repr(pk_path)} have null values or not present in {record.stream} stream records." # TODO: remove this condition after https://github.com/airbytehq/airbyte/issues/8312 is done if inputs.validate_data_points: self._validate_field_appears_at_least_once(records=records, configured_catalog=configured_catalog) if expected_records: self._validate_expected_records( records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger )
def test_airbyte_trace_message_on_failure(self, connector_config, inputs: BasicReadTestConfig, docker_runner: ConnectorRunner): if not inputs.expect_trace_message_on_failure: pytest.skip("Skipping `test_airbyte_trace_message_on_failure` because `inputs.expect_trace_message_on_failure=False`") return invalid_configured_catalog = ConfiguredAirbyteCatalog( streams=[ # create ConfiguredAirbyteStream without validation ConfiguredAirbyteStream.construct( stream=AirbyteStream( name="__AIRBYTE__stream_that_does_not_exist", json_schema={"type": "object", "properties": {"f1": {"type": "string"}}}, supported_sync_modes=[SyncMode.full_refresh], ), sync_mode="INVALID", destination_sync_mode="INVALID", ) ] ) output = docker_runner.call_read(connector_config, invalid_configured_catalog, raise_container_error=False) trace_messages = filter_output(output, Type.TRACE) error_trace_messages = list(filter(lambda m: m.trace.type == TraceType.ERROR, trace_messages)) assert len(error_trace_messages) >= 1, "Connector should emit at least one error trace message"
def test_spec(self, connector_spec: ConnectorSpecification, docker_runner: ConnectorRunner): output = docker_runner.call_spec() spec_messages = [message for message in output if message.type == Type.SPEC] assert len(spec_messages) == 1, "Spec message should be emitted exactly once" if connector_spec: assert spec_messages[0].spec == connector_spec, "Spec should be equal to the one in spec.json file"
def test_connector_image_without_env(self, connector_image_without_env, tmp_path): docker_runner = ConnectorRunner(image_name=connector_image_without_env, volume=tmp_path) assert not docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT" ), "this test should fail if AIRBYTE_ENTRYPOINT defined"
def test_discover(self, connector_config, docker_runner: ConnectorRunner): """Verify that discover produce correct schema.""" output = docker_runner.call_discover(config=connector_config) catalog_messages = filter_output(output, Type.CATALOG) assert len(catalog_messages) == 1, "Catalog message should be emitted exactly once" assert catalog_messages[0].catalog, "Message should have catalog" assert catalog_messages[0].catalog.streams, "Catalog should contain streams"
def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner): configured_catalog = incremental_only_catalog(configured_catalog) output = docker_runner.call_read_with_state(config=connector_config, catalog=configured_catalog, state=future_state) records = filter_output(output, type_=Type.RECORD) states = filter_output(output, type_=Type.STATE) assert not records, "The sync should produce no records when run with the state with abnormally large values" assert states, "The sync should produce at least one STATE message"
def test_discover(self, connector_config, docker_runner: ConnectorRunner): output = docker_runner.call_discover(config=connector_config) catalog_messages = [ message for message in output if message.type == Type.CATALOG ] assert len(catalog_messages ) == 1, "Catalog message should be emitted exactly once"
def test_correct_connector_image(self, correct_connector_image, tmp_path): docker_runner = ConnectorRunner(image_name=correct_connector_image, volume=tmp_path) assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT" ), "AIRBYTE_ENTRYPOINT must be set in dockerfile" assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT") == " ".join( docker_runner.entry_point ), "env should be equal to space-joined entrypoint"
def pull_docker_image(acceptance_test_config) -> None: """Startup fixture to pull docker image""" image_name = acceptance_test_config.connector_image config_filename = "acceptance-test-config.yml" try: ConnectorRunner(image_name=image_name, volume=Path(".")) except errors.ImageNotFound: pytest.exit( f"Docker image `{image_name}` not found, please check your {config_filename} file", returncode=1)
def test_discover(self, connector_config, catalog, docker_runner: ConnectorRunner): output = docker_runner.call_discover(config=connector_config) catalog_messages = [message for message in output if message.type == Type.CATALOG] assert len(catalog_messages) == 1, "Catalog message should be emitted exactly once" if catalog: for stream1, stream2 in zip(catalog_messages[0].catalog.streams, catalog.streams): assert stream1.json_schema == stream2.json_schema, f"Streams: {stream1.name} vs {stream2.name}, stream schemas should match" stream1.json_schema = None stream2.json_schema = None assert stream1.dict() == stream2.dict(), f"Streams {stream1.name} and {stream2.name}, stream configs should match"
def test_docker_image_env_ne_entrypoint(self, connector_image_with_ne_properties, tmp_path): docker_runner = ConnectorRunner( image_name=connector_image_with_ne_properties, volume=tmp_path) assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT" ), "AIRBYTE_ENTRYPOINT must be set in dockerfile" assert docker_runner.env_variables.get( "AIRBYTE_ENTRYPOINT") != " ".join(docker_runner.entry_point), ( "This test should fail if we have " ".join(ENTRYPOINT)==ENV")
def test_two_sequential_reads( self, inputs: IncrementalConfig, connector_config: SecretDict, configured_catalog_for_incremental: ConfiguredAirbyteCatalog, cursor_paths: dict[str, list[str]], docker_runner: ConnectorRunner, ): threshold_days = getattr(inputs, "threshold_days") or 0 stream_mapping = { stream.stream.name: stream for stream in configured_catalog_for_incremental.streams } output = docker_runner.call_read(connector_config, configured_catalog_for_incremental) records_1 = filter_output(output, type_=Type.RECORD) states_1 = filter_output(output, type_=Type.STATE) assert states_1, "Should produce at least one state" assert records_1, "Should produce at least one record" latest_state = states_1[-1].state.data for record_value, state_value, stream_name in records_with_state( records_1, latest_state, stream_mapping, cursor_paths): assert ( record_value <= state_value ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}" output = docker_runner.call_read_with_state( connector_config, configured_catalog_for_incremental, state=latest_state) records_2 = filter_output(output, type_=Type.RECORD) for record_value, state_value, stream_name in records_with_state( records_2, latest_state, stream_mapping, cursor_paths): assert compare_cursor_with_threshold( record_value, state_value, threshold_days ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
def catalog_schemas_fixture(connector_config, docker_runner: ConnectorRunner, cached_schemas) -> MutableMapping[str, Any]: """JSON schemas for each stream""" if not cached_schemas: output = docker_runner.call_discover(config=connector_config) catalogs = [ message.catalog for message in output if message.type == Type.CATALOG ] for stream in catalogs[-1].streams: cached_schemas[stream.name] = stream.json_schema return cached_schemas
def test_read( self, connector_config, configured_catalog, inputs: BasicReadTestConfig, expected_records: List[AirbyteMessage], docker_runner: ConnectorRunner, ): output = docker_runner.call_read(connector_config, configured_catalog) records = [ message.record for message in output if message.type == Type.RECORD ] counter = Counter(record.stream for record in records) all_streams = set(stream.stream.name for stream in configured_catalog.streams) streams_with_records = set(counter.keys()) streams_without_records = all_streams - streams_with_records assert records, "At least one record should be read using provided catalog" for pks, record in primary_keys_for_records( streams=configured_catalog.streams, records=records): for pk_path, pk_value in pks.items(): assert pk_value is not None, ( f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records." ) if inputs.validate_output_from_all_streams: assert ( not streams_without_records ), f"All streams should return some records, streams without records: {streams_without_records}" if expected_records: actual_by_stream = self.group_by_stream(records) expected_by_stream = self.group_by_stream(expected_records) for stream_name, expected in expected_by_stream.items(): actual = actual_by_stream.get(stream_name, []) self.compare_records( stream_name=stream_name, actual=actual, expected=expected, extra_fields=inputs.expect_records.extra_fields, exact_order=inputs.expect_records.exact_order, extra_records=inputs.expect_records.extra_records, )
def test_read(self, connector_config, configured_catalog, inputs: BasicReadTestConfig, docker_runner: ConnectorRunner): output = docker_runner.call_read(connector_config, configured_catalog) records = [ message.record for message in output if message.type == Type.RECORD ] counter = Counter(record.stream for record in records) all_streams = set(stream.stream.name for stream in configured_catalog.streams) streams_with_records = set(counter.keys()) streams_without_records = all_streams - streams_with_records assert records, "At least one record should be read using provided catalog" if inputs.validate_output_from_all_streams: assert ( not streams_without_records ), f"All streams should return some records, streams without records: {streams_without_records}"
def test_discover(self, connector_config, docker_runner: ConnectorRunner): output = docker_runner.call_discover(config=connector_config) catalog_messages = filter_output(output, Type.CATALOG) assert len(catalog_messages ) == 1, "Catalog message should be emitted exactly once"
def docker_runner_fixture(image_tag, tmp_path) -> ConnectorRunner: return ConnectorRunner(image_tag, volume=tmp_path)
def test_read( self, connector_config, configured_catalog, inputs: BasicReadTestConfig, expected_records: List[AirbyteMessage], docker_runner: ConnectorRunner, ): output = docker_runner.call_read(connector_config, configured_catalog) records = [ message.record for message in output if message.type == Type.RECORD ] counter = Counter(record.stream for record in records) if inputs.validate_schema: bar = "-" * 80 streams_errors = verify_records_schema(records, configured_catalog) for stream_name, errors in streams_errors.items(): errors = map(str, errors.values()) str_errors = f"\n{bar}\n".join(errors) logging.error( f"The {stream_name} stream has the following schema errors:\n{str_errors}" ) if streams_errors: pytest.fail( f"Please check your json_schema in selected streams {streams_errors.keys()}." ) all_streams = set(stream.stream.name for stream in configured_catalog.streams) streams_with_records = set(counter.keys()) streams_without_records = all_streams - streams_with_records assert records, "At least one record should be read using provided catalog" for pks, record in primary_keys_for_records( streams=configured_catalog.streams, records=records): for pk_path, pk_value in pks.items(): assert pk_value is not None, ( f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records." ) if inputs.validate_output_from_all_streams: assert ( not streams_without_records ), f"All streams should return some records, streams without records: {streams_without_records}" if expected_records: actual_by_stream = self.group_by_stream(records) expected_by_stream = self.group_by_stream(expected_records) for stream_name, expected in expected_by_stream.items(): actual = actual_by_stream.get(stream_name, []) self.compare_records( stream_name=stream_name, actual=actual, expected=expected, extra_fields=inputs.expect_records.extra_fields, exact_order=inputs.expect_records.exact_order, extra_records=inputs.expect_records.extra_records, )
def test_read_sequential_slices(self, inputs: IncrementalConfig, connector_config, configured_catalog_for_incremental, cursor_paths, docker_runner: ConnectorRunner): """ Incremental test that makes calls the read method without a state checkpoint. Then we partition the results by stream and slice checkpoints resulting in batches of messages that look like: <state message> <record message> ... <record message> Using these batches, we then make additional read method calls using the state message and verify the correctness of the messages in the response. """ if inputs.skip_comprehensive_incremental_tests: pytest.skip( "Skipping new incremental test based on acceptance-test-config.yml" ) return threshold_days = getattr(inputs, "threshold_days") or 0 stream_mapping = { stream.stream.name: stream for stream in configured_catalog_for_incremental.streams } output = docker_runner.call_read(connector_config, configured_catalog_for_incremental) records_1 = filter_output(output, type_=Type.RECORD) states_1 = filter_output(output, type_=Type.STATE) assert states_1, "Should produce at least one state" assert records_1, "Should produce at least one record" latest_state = states_1[-1].state.data for record_value, state_value, stream_name in records_with_state( records_1, latest_state, stream_mapping, cursor_paths): assert ( record_value <= state_value ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}" # Create partitions made up of one state message followed by any records that come before the next state filtered_messages = [ message for message in output if message.type == Type.STATE or message.type == Type.RECORD ] right_index = len(filtered_messages) checkpoint_messages = [] for index, message in reversed(list(enumerate(filtered_messages))): if message.type == Type.STATE: message_group = (filtered_messages[index], filtered_messages[index + 1:right_index]) checkpoint_messages.insert(0, message_group) right_index = index # We sometimes have duplicate identical state messages in a stream which we can filter out to speed things up checkpoint_messages = [ message for index, message in enumerate(checkpoint_messages) if message not in checkpoint_messages[:index] ] # To avoid spamming APIs we only test a fraction of slices num_slices_to_test = 1 if len( checkpoint_messages) <= 5 else len(checkpoint_messages) // 5 for message_batch in checkpoint_messages[::num_slices_to_test]: assert len( message_batch) > 0 and message_batch[0].type == Type.STATE current_state = message_batch[0] output = docker_runner.call_read_with_state( connector_config, configured_catalog_for_incremental, current_state.state.data) records = filter_output(output, type_=Type.RECORD) for record_value, state_value, stream_name in records_with_state( records, current_state.state.data, stream_mapping, cursor_paths): assert compare_cursor_with_threshold( record_value, state_value, threshold_days ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"
def pull_docker_image(acceptance_test_config) -> None: """Startup fixture to pull docker image""" print("Pulling docker image", acceptance_test_config.connector_image) ConnectorRunner(image_name=acceptance_test_config.connector_image, volume=Path(".")) print("Pulling completed")