예제 #1
0
    def __init__(self, config: KafkaSinkConfig, ctx):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()

        schema_registry_conf = {
            'url': self.config.connection.schema_registry_url,
            **self.config.connection.schema_registry_config,
        }
        schema_registry_client = SchemaRegistryClient(schema_registry_conf)

        def convert_mce_to_dict(mce: MetadataChangeEvent, ctx):
            tuple_encoding = mce.to_obj(tuples=True)
            return tuple_encoding

        avro_serializer = AvroSerializer(SCHEMA_JSON_STR,
                                         schema_registry_client,
                                         to_dict=convert_mce_to_dict)

        producer_config = {
            "bootstrap.servers": self.config.connection.bootstrap,
            'key.serializer': StringSerializer('utf_8'),
            'value.serializer': avro_serializer,
            **self.config.connection.producer_config,
        }

        self.producer = SerializingProducer(producer_config)
예제 #2
0
파일: file.py 프로젝트: ydyzs/datahub
    def __init__(self, ctx: PipelineContext, config: FileSinkConfig):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()

        fpath = pathlib.Path(self.config.filename)
        self.file = fpath.open("w")
        self.file.write("[\n")
        self.wrote_something = False
예제 #3
0
    def __init__(self, ctx: PipelineContext, config: FileSinkConfig):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()

        fpath = pathlib.Path(self.config.filename)
        logger.info(f'Will write to {fpath}')
        self.file = fpath.open('w')
        self.file.write('[\n')
        self.wrote_something = False
예제 #4
0
 def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
     super().__init__(ctx)
     self.config = config
     self.report = SinkReport()
     self.emitter = DatahubRestEmitter(
         self.config.server,
         self.config.token,
         connect_timeout_sec=self.config.timeout_sec,  # reuse timeout_sec for connect timeout
         read_timeout_sec=self.config.timeout_sec,
     )
     self.emitter.test_connection()
예제 #5
0
 def test_kafka_callback_class(self, mock_w_callback, mock_re):
     callback = KafkaCallback(SinkReport(),
                              record_envelope=mock_re,
                              write_callback=mock_w_callback)
     mock_error = MagicMock()
     mock_message = MagicMock()
     callback.kafka_callback(mock_error, mock_message)
     assert mock_w_callback.on_failure.call_count == 1
     mock_w_callback.on_failure.called_with(mock_re, None,
                                            {"error", mock_error})
     callback.kafka_callback(None, mock_message)
     mock_w_callback.on_success.called_once_with(mock_re,
                                                 {"msg", mock_message})
예제 #6
0
 def test_kafka_callback_class(self, mock_w_callback, mock_re):
     callback = _KafkaCallback(
         SinkReport(), record_envelope=mock_re, write_callback=mock_w_callback
     )
     mock_error = MagicMock()
     mock_message = MagicMock()
     callback.kafka_callback(mock_error, mock_message)
     mock_w_callback.on_failure.assert_called_once()
     assert mock_w_callback.on_failure.call_args[0][0] == mock_re
     assert mock_w_callback.on_failure.call_args[0][1] == mock_error
     callback.kafka_callback(None, mock_message)
     mock_w_callback.on_success.assert_called_once()
     assert mock_w_callback.on_success.call_args[0][0] == mock_re
예제 #7
0
 def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
     super().__init__(ctx)
     self.config = config
     self.report = SinkReport()
     self.emitter = DatahubRestEmitter(
         self.config.server,
         self.config.token,
         connect_timeout_sec=self.config.
         timeout_sec,  # reuse timeout_sec for connect timeout
         read_timeout_sec=self.config.timeout_sec,
         extra_headers=self.config.extra_headers,
         ca_certificate_path=self.config.ca_certificate_path,
     )
     self.emitter.test_connection()
     self.executor = concurrent.futures.ThreadPoolExecutor(
         max_workers=self.config.max_threads)
예제 #8
0
 def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
     super().__init__(ctx)
     self.config = config
     self.report = SinkReport()
     self.emitter = DatahubRestEmitter(self.config.server,
                                       self.config.token)
예제 #9
0
 def __init__(self, config: KafkaSinkConfig, ctx):
     super().__init__(ctx)
     self.config = config
     self.report = SinkReport()
     self.emitter = DatahubKafkaEmitter(self.config)