def __init__(self, config: KafkaSinkConfig, ctx): super().__init__(ctx) self.config = config self.report = SinkReport() schema_registry_conf = { 'url': self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict(mce: MetadataChangeEvent, ctx): tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding avro_serializer = AvroSerializer(SCHEMA_JSON_STR, schema_registry_client, to_dict=convert_mce_to_dict) producer_config = { "bootstrap.servers": self.config.connection.bootstrap, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer, **self.config.connection.producer_config, } self.producer = SerializingProducer(producer_config)
def __init__(self, ctx: PipelineContext, config: FileSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() fpath = pathlib.Path(self.config.filename) self.file = fpath.open("w") self.file.write("[\n") self.wrote_something = False
def __init__(self, ctx: PipelineContext, config: FileSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() fpath = pathlib.Path(self.config.filename) logger.info(f'Will write to {fpath}') self.file = fpath.open('w') self.file.write('[\n') self.wrote_something = False
def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() self.emitter = DatahubRestEmitter( self.config.server, self.config.token, connect_timeout_sec=self.config.timeout_sec, # reuse timeout_sec for connect timeout read_timeout_sec=self.config.timeout_sec, ) self.emitter.test_connection()
def test_kafka_callback_class(self, mock_w_callback, mock_re): callback = KafkaCallback(SinkReport(), record_envelope=mock_re, write_callback=mock_w_callback) mock_error = MagicMock() mock_message = MagicMock() callback.kafka_callback(mock_error, mock_message) assert mock_w_callback.on_failure.call_count == 1 mock_w_callback.on_failure.called_with(mock_re, None, {"error", mock_error}) callback.kafka_callback(None, mock_message) mock_w_callback.on_success.called_once_with(mock_re, {"msg", mock_message})
def test_kafka_callback_class(self, mock_w_callback, mock_re): callback = _KafkaCallback( SinkReport(), record_envelope=mock_re, write_callback=mock_w_callback ) mock_error = MagicMock() mock_message = MagicMock() callback.kafka_callback(mock_error, mock_message) mock_w_callback.on_failure.assert_called_once() assert mock_w_callback.on_failure.call_args[0][0] == mock_re assert mock_w_callback.on_failure.call_args[0][1] == mock_error callback.kafka_callback(None, mock_message) mock_w_callback.on_success.assert_called_once() assert mock_w_callback.on_success.call_args[0][0] == mock_re
def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() self.emitter = DatahubRestEmitter( self.config.server, self.config.token, connect_timeout_sec=self.config. timeout_sec, # reuse timeout_sec for connect timeout read_timeout_sec=self.config.timeout_sec, extra_headers=self.config.extra_headers, ca_certificate_path=self.config.ca_certificate_path, ) self.emitter.test_connection() self.executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.config.max_threads)
def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() self.emitter = DatahubRestEmitter(self.config.server, self.config.token)
def __init__(self, config: KafkaSinkConfig, ctx): super().__init__(ctx) self.config = config self.report = SinkReport() self.emitter = DatahubKafkaEmitter(self.config)