def test_operation_processor_not_matching(): # no property matches to the rules raw_props = { "user_owner_test": "*****@*****.**", "group.owner_test": "*****@*****.**", "governance.team_owner": "Binance", "pii": False, "int_property": 3, "double_property": 25, } processor = OperationProcessor(get_operation_defs()) aspect_map = processor.process(raw_props) assert "add_tag" not in aspect_map assert "add_term" not in aspect_map assert "add_owner" not in aspect_map
def test_operation_processor_matching(): raw_props = { "user_owner": "*****@*****.**", "user_owner_2": "test_user_2", "group.owner": "*****@*****.**", "governance.team_owner": "Finance", "pii": True, "int_property": 1, "double_property": 2.5, "tag": "Finance", } processor = OperationProcessor( operation_defs=get_operation_defs(), owner_source_type="SOURCE_CONTROL", strip_owner_email_id=True, ) aspect_map = processor.process(raw_props) assert "add_tag" in aspect_map assert "add_term" in aspect_map assert "add_owner" in aspect_map tag_aspect: GlobalTags = aspect_map["add_tag"] tags_added = [ tag_association_class.tag for tag_association_class in tag_aspect.tags ] term_aspect: GlossaryTermsClass = aspect_map["add_term"] terms_added = [ term_association_class.urn for term_association_class in term_aspect.terms ] assert (len(tags_added) == 3 and "urn:li:tag:has_pii_test" in tags_added and "urn:li:tag:int_property" in tags_added and "urn:li:tag:Finance" in tags_added) assert (len(terms_added) == 2 and "urn:li:glossaryTerm:Finance.test" in terms_added and "urn:li:glossaryTerm:double_property" in terms_added) ownership_aspect: OwnershipClass = aspect_map["add_owner"] assert len(ownership_aspect.owners) == 3 owner_set = { "urn:li:corpuser:test_user", "urn:li:corpuser:test_user_2", "urn:li:corpGroup:test.group", } for single_owner in ownership_aspect.owners: assert single_owner.owner in owner_set assert (single_owner.source and single_owner.source.type == OwnershipSourceTypeClass.SOURCE_CONTROL)
def test_operation_processor_no_email_strip_source_type_not_null(): raw_props = { "user_owner": "*****@*****.**", } processor = OperationProcessor( operation_defs=get_operation_defs(), owner_source_type="SERVICE", strip_owner_email_id=False, ) aspect_map = processor.process(raw_props) assert "add_owner" in aspect_map ownership_aspect: OwnershipClass = aspect_map["add_owner"] assert len(ownership_aspect.owners) == 1 new_owner: OwnerClass = ownership_aspect.owners[0] assert new_owner.owner == "urn:li:corpuser:[email protected]" assert new_owner.source and new_owner.source.type == "SERVICE"
def create_platform_mces( self, dbt_nodes: List[DBTNode], additional_custom_props_filtered: Dict[str, str], manifest_nodes_raw: Dict[str, Dict[str, Any]], mce_platform: str, ) -> Iterable[MetadataWorkUnit]: """ This function creates mce based out of dbt nodes. Since dbt ingestion creates "dbt" nodes and nodes for underlying platform the function gets called twice based on the mce_platform parameter. Further, this function takes specific actions based on the mce_platform passed in. If disable_dbt_node_creation = True, Create empty entities of the underlying platform with only lineage/key aspect. Create dbt entities with all metadata information. If disable_dbt_node_creation = False Create platform entities with all metadata information. """ action_processor = OperationProcessor( self.config.meta_mapping, self.config.tag_prefix, "SOURCE_CONTROL", self.config.strip_user_ids_from_email, ) for node in dbt_nodes: node_datahub_urn = get_urn_from_dbtNode( node.database, node.schema, node.name, mce_platform, self.config.env, ) meta_aspects: Dict[str, Any] = {} if self.config.enable_meta_mapping and node.meta: meta_aspects = action_processor.process(node.meta) aspects = self._generate_base_aspects( node, additional_custom_props_filtered, mce_platform, meta_aspects) if mce_platform == DBT_PLATFORM: # add upstream lineage upstream_lineage_class = self._create_lineage_aspect_for_dbt_node( node, manifest_nodes_raw) if upstream_lineage_class: aspects.append(upstream_lineage_class) # add view properties aspect if node.raw_sql: view_prop_aspect = self._create_view_properties_aspect( node) aspects.append(view_prop_aspect) # emit subtype mcp sub_type_wu = self._create_subType_wu(node, node_datahub_urn) if sub_type_wu: yield sub_type_wu self.report.report_workunit(sub_type_wu) else: if not self.config.disable_dbt_node_creation: # if dbt node creation is enabled we are creating empty node for platform and only add # lineage/keyaspect. aspects = [] if node.materialization == "ephemeral" or node.node_type == "test": continue # This code block is run when we are generating entities of platform type. # We will not link the platform not to the dbt node for type "source" because # in this case the platform table existed first. if node.node_type != "source": upstream_dbt_urn = get_urn_from_dbtNode( node.database, node.schema, node.name, DBT_PLATFORM, self.config.env, ) upstreams_lineage_class = get_upstream_lineage( [upstream_dbt_urn]) aspects.append(upstreams_lineage_class) else: # add upstream lineage platform_upstream_aspect = ( self._create_lineage_aspect_for_platform_node( node, manifest_nodes_raw)) if platform_upstream_aspect: aspects.append(platform_upstream_aspect) if len(aspects) == 0: continue dataset_snapshot = DatasetSnapshot(urn=node_datahub_urn, aspects=aspects) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) if self.config.write_semantics == "PATCH": mce = self.get_patched_mce(mce) wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu