Example #1
0
class DatahubKafkaSink(Sink):
    config: KafkaSinkConfig
    report: SinkReport
    emitter: DatahubKafkaEmitter

    def __init__(self, config: KafkaSinkConfig, ctx: PipelineContext):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()
        self.emitter = DatahubKafkaEmitter(self.config)

    @classmethod
    def create(cls, config_dict: dict,
               ctx: PipelineContext) -> "DatahubKafkaSink":
        config = KafkaSinkConfig.parse_obj(config_dict)
        return cls(config, ctx)

    def handle_work_unit_start(self, workunit: WorkUnit) -> None:
        pass

    def handle_work_unit_end(self, workunit: WorkUnit) -> None:
        self.emitter.flush()

    def write_record_async(
        self,
        record_envelope: RecordEnvelope[
            Union[MetadataChangeEvent, MetadataChangeProposal,
                  MetadataChangeProposalWrapper, ]],
        write_callback: WriteCallback,
    ) -> None:
        record = record_envelope.record
        if isinstance(record, MetadataChangeEvent):
            self.emitter.emit_mce_async(
                record,
                callback=_KafkaCallback(self.report, record_envelope,
                                        write_callback).kafka_callback,
            )
        elif isinstance(record, MetadataChangeProposalWrapper) or isinstance(
                record, MetadataChangeProposalClass):
            self.emitter.emit_mcp_async(
                record,
                callback=_KafkaCallback(self.report, record_envelope,
                                        write_callback).kafka_callback,
            )
        else:
            raise ValueError(
                f"The datahub-kafka sink only supports MetadataChangeEvent/MetadataChangeProposal[Wrapper] classes, not {type(record)}"
            )

    def get_report(self):
        return self.report

    def close(self) -> None:
        self.emitter.flush()
Example #2
0
class DatahubKafkaSink(Sink):
    config: KafkaSinkConfig
    report: SinkReport
    emitter: DatahubKafkaEmitter

    def __init__(self, config: KafkaSinkConfig, ctx: PipelineContext):
        super().__init__(ctx)
        self.config = config
        self.report = SinkReport()
        self.emitter = DatahubKafkaEmitter(self.config)

    @classmethod
    def create(cls, config_dict: dict,
               ctx: PipelineContext) -> "DatahubKafkaSink":
        config = KafkaSinkConfig.parse_obj(config_dict)
        return cls(config, ctx)

    def handle_work_unit_start(self, workunit: WorkUnit) -> None:
        pass

    def handle_work_unit_end(self, workunit: WorkUnit) -> None:
        self.emitter.flush()

    def write_record_async(
        self,
        record_envelope: RecordEnvelope[MetadataChangeEvent],
        write_callback: WriteCallback,
    ) -> None:
        mce = record_envelope.record
        self.emitter.emit_mce_async(
            mce,
            callback=_KafkaCallback(self.report, record_envelope,
                                    write_callback).kafka_callback,
        )

    def get_report(self):
        return self.report

    def close(self) -> None:
        self.emitter.flush()
Example #3
0
 def make_emitter(self) -> DatahubKafkaEmitter:
     sink_config = self._get_config()
     return DatahubKafkaEmitter(sink_config)
Example #4
0
 def __init__(self, config: KafkaSinkConfig, ctx):
     super().__init__(ctx)
     self.config = config
     self.report = SinkReport()
     self.emitter = DatahubKafkaEmitter(self.config)
Example #5
0
# Construct a lineage object.
lineage_mce = builder.make_lineage_mce(
    [
        builder.make_dataset_urn("bigquery", "upstream1"),
        builder.make_dataset_urn("bigquery", "upstream2"),
    ],
    builder.make_dataset_urn("bigquery", "downstream"),
)

# Create an emitter to DataHub's Kafka broker.
emitter = DatahubKafkaEmitter(
    KafkaEmitterConfig.parse_obj(
        # This is the same config format as the standard Kafka sink's YAML.
        {
            "connection": {
                "bootstrap": "broker:9092",
                "producer_config": {},
                "schema_registry_url": "http://schema-registry:8081",
            }
        }))


# Emit metadata!
def callback(err, msg):
    if err:
        # Handle the metadata emission error.
        print("error:", err)


emitter.emit_mce_async(lineage_mce, callback)
emitter.flush()