def construct_lineage_workunits( self, connector: ConnectorManifest) -> Iterable[MetadataWorkUnit]: lineages = connector.lineages if lineages: for lineage in lineages: source_dataset = lineage.source_dataset source_platform = lineage.source_platform target_dataset = lineage.target_dataset target_platform = lineage.target_platform mce = models.MetadataChangeEventClass( proposedSnapshot=models.DatasetSnapshotClass( urn=builder.make_dataset_urn( target_platform, target_dataset, self.config.env), aspects=[ models.UpstreamLineageClass(upstreams=[ models.UpstreamClass( dataset=builder.make_dataset_urn( source_platform, source_dataset, self.config.env, ), type=models.DatasetLineageTypeClass. TRANSFORMED, ) ]) ], )) wu = MetadataWorkUnit(id=source_dataset, mce=mce) self.report.report_workunit(wu) yield wu
@pytest.mark.parametrize( "record,path,snapshot", [ ( # Simple test. models. MetadataChangeEventClass(proposedSnapshot=models.DatasetSnapshotClass( urn= "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", aspects=[ models.UpstreamLineageClass(upstreams=[ models.UpstreamClass( auditStamp=basicAuditStamp, dataset= "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)", type="TRANSFORMED", ), models.UpstreamClass( auditStamp=basicAuditStamp, dataset= "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)", type="TRANSFORMED", ), ]) ], ), ), "/entities?action=ingest", { "entity": { "value": {
def get_lineage_metadata_change_event_proposal( entities: List[EntityNodeConfig], preserve_upstream: bool ) -> Iterable[MetadataChangeProposalWrapper]: """ Builds a list of events to be emitted to datahub by going through each entity and its upstream nodes :param preserve_upstream: This field determines if we want to query the datahub backend to extract the existing upstream lineages for each entity and preserve it :param entities: A list of entities we want to build a proposal on :return: Returns a list of metadata change event proposals to be emitted to datahub """ def _get_entity_urn(entity_config: EntityConfig) -> Optional[str]: """Helper inner function to extract a given entity_urn A return value of None represents an unsupported entity type """ if entity_config.type == "dataset": return make_dataset_urn_with_platform_instance( platform=entity_config.platform, name=entity_config.name, env=entity_config.env, platform_instance=entity_config.platform_instance, ) logger.warning( f"Entity type: {entity_config.type} is not supported!") return None # loop through all the entities for entity_node in entities: new_upstreams: List[models.UpstreamClass] = [] # if this entity has upstream nodes defined, we'll want to do some work. # if no upstream nodes are present, we don't emit an MCP for it. if entity_node.upstream: entity = entity_node.entity logger.info( f"Upstream detected for {entity}. Extracting urn...") entity_urn = _get_entity_urn(entity) if entity_urn: # extract the old lineage and save it for the new mcp if preserve_upstream: old_upstream_lineage = get_aspects_for_entity( entity_urn=entity_urn, aspects=["upstreamLineage"], typed=True, ).get("upstreamLineage") if old_upstream_lineage: # Can't seem to get mypy to be happy about # `Argument 1 to "list" has incompatible type "Optional[Any]"; # expected "Iterable[UpstreamClass]"` new_upstreams.extend( old_upstream_lineage.get( "upstreams") # type: ignore ) for upstream_entity_node in entity_node.upstream: upstream_entity = upstream_entity_node.entity upstream_entity_urn = _get_entity_urn(upstream_entity) if upstream_entity_urn: new_upstream = models.UpstreamClass( dataset=upstream_entity_urn, type=models.DatasetLineageTypeClass. TRANSFORMED, auditStamp=auditStamp, ) new_upstreams.append(new_upstream) else: logger.warning( f"Entity type: {upstream_entity.type} is unsupported. Upstream lineage will be skipped " f"for {upstream_entity.name}->{entity.name}") new_upstream_lineage = models.UpstreamLineageClass( upstreams=new_upstreams) yield MetadataChangeProposalWrapper( entityType=entity.type, changeType=models.ChangeTypeClass.UPSERT, entityUrn=entity_urn, aspectName="upstreamLineage", aspect=new_upstream_lineage, ) else: logger.warning( f"Entity type: {entity.type} is unsupported. Entity node {entity.name} and its " f"upstream lineages will be skipped")