def test_kafka_sink_config(self, mock_producer, mock_context): kafka_sink = DatahubKafkaSink.create( {"connection": { "bootstrap": "foobar:9092" }}, mock_context) kafka_sink.close() assert mock_producer.call_count == 1 # constructor should be called
def test_kafka_sink_write(self, mock_k_callback, mock_producer, mock_context): mock_producer_instance = mock_producer.return_value mock_k_callback_instance = mock_k_callback.return_value callback = MagicMock(spec=WriteCallback) kafka_sink = DatahubKafkaSink.create( {"connection": { "bootstrap": "foobar:9092" }}, mock_context) mce = builder.make_lineage_mce( [ builder.make_dataset_urn("bigquery", "upstream1"), builder.make_dataset_urn("bigquery", "upstream2"), ], builder.make_dataset_urn("bigquery", "downstream1"), ) re = RecordEnvelope(record=mce, metadata={}) kafka_sink.write_record_async(re, callback) mock_producer_instance.poll.assert_called_once( ) # producer should call poll() first self.validate_kafka_callback( mock_k_callback, re, callback) # validate kafka callback was constructed appropriately # validate that confluent_kafka.Producer.produce was called with the right arguments mock_producer_instance.produce.assert_called_once() args, kwargs = mock_producer_instance.produce.call_args assert kwargs["value"] == mce assert kwargs["key"] # produce call should include a Kafka key created_callback = kwargs["on_delivery"] assert created_callback == mock_k_callback_instance.kafka_callback
def test_kafka_sink_write(self, mock_k_callback, mock_producer, mock_context): mock_producer_instance = mock_producer.return_value mock_k_callback_instance = mock_k_callback.return_value callback = MagicMock(spec=WriteCallback) kafka_sink = DatahubKafkaSink.create( {"connection": {"bootstrap": "foobar:9092"}}, mock_context ) re = RecordEnvelope(record=sentinel, metadata={}) kafka_sink.write_record_async(re, callback) assert mock_producer_instance.poll.call_count == 1 # poll() called once self.validate_kafka_callback( mock_k_callback, re, callback ) # validate kafka callback was constructed appropriately # validate that confluent_kafka.Producer.produce was called with the right arguments args, kwargs = mock_producer_instance.produce.call_args created_callback = kwargs["on_delivery"] assert created_callback == mock_k_callback_instance.kafka_callback
def test_kafka_sink_mcp(self, mock_producer, mock_callback): from datahub.emitter.mcp import MetadataChangeProposalWrapper mcp = MetadataChangeProposalWrapper( entityType="dataset", entityUrn="urn:li:dataset:(urn:li:dataPlatform:mysql,User.UserAccount,PROD)", changeType=models.ChangeTypeClass.UPSERT, aspectName="datasetProfile", aspect=models.DatasetProfileClass( rowCount=2000, columnCount=15, timestampMillis=1626995099686, ), ) kafka_sink = DatahubKafkaSink.create( {"connection": {"bootstrap": "localhost:9092"}}, PipelineContext(run_id="test"), ) kafka_sink.write_record_async( RecordEnvelope(record=mcp, metadata={}), mock_callback ) kafka_sink.close() assert mock_producer.call_count == 2 # constructor should be called
def test_kafka_sink_close(self, mock_producer, mock_context): mock_producer_instance = mock_producer.return_value kafka_sink = DatahubKafkaSink.create({}, mock_context) kafka_sink.close() mock_producer_instance.flush.assert_called_once()