Example #1
0
def test_operation_processor_not_matching():
    # no property matches to the rules
    raw_props = {
        "user_owner_test": "*****@*****.**",
        "group.owner_test": "*****@*****.**",
        "governance.team_owner": "Binance",
        "pii": False,
        "int_property": 3,
        "double_property": 25,
    }
    processor = OperationProcessor(get_operation_defs())
    aspect_map = processor.process(raw_props)
    assert "add_tag" not in aspect_map
    assert "add_term" not in aspect_map
    assert "add_owner" not in aspect_map
Example #2
0
def test_operation_processor_matching():
    raw_props = {
        "user_owner": "*****@*****.**",
        "user_owner_2": "test_user_2",
        "group.owner": "*****@*****.**",
        "governance.team_owner": "Finance",
        "pii": True,
        "int_property": 1,
        "double_property": 2.5,
        "tag": "Finance",
    }
    processor = OperationProcessor(
        operation_defs=get_operation_defs(),
        owner_source_type="SOURCE_CONTROL",
        strip_owner_email_id=True,
    )
    aspect_map = processor.process(raw_props)
    assert "add_tag" in aspect_map
    assert "add_term" in aspect_map
    assert "add_owner" in aspect_map
    tag_aspect: GlobalTags = aspect_map["add_tag"]
    tags_added = [
        tag_association_class.tag for tag_association_class in tag_aspect.tags
    ]
    term_aspect: GlossaryTermsClass = aspect_map["add_term"]
    terms_added = [
        term_association_class.urn
        for term_association_class in term_aspect.terms
    ]
    assert (len(tags_added) == 3 and "urn:li:tag:has_pii_test" in tags_added
            and "urn:li:tag:int_property" in tags_added
            and "urn:li:tag:Finance" in tags_added)
    assert (len(terms_added) == 2
            and "urn:li:glossaryTerm:Finance.test" in terms_added
            and "urn:li:glossaryTerm:double_property" in terms_added)

    ownership_aspect: OwnershipClass = aspect_map["add_owner"]
    assert len(ownership_aspect.owners) == 3
    owner_set = {
        "urn:li:corpuser:test_user",
        "urn:li:corpuser:test_user_2",
        "urn:li:corpGroup:test.group",
    }
    for single_owner in ownership_aspect.owners:
        assert single_owner.owner in owner_set
        assert (single_owner.source and single_owner.source.type
                == OwnershipSourceTypeClass.SOURCE_CONTROL)
Example #3
0
def test_operation_processor_no_email_strip_source_type_not_null():
    raw_props = {
        "user_owner": "*****@*****.**",
    }
    processor = OperationProcessor(
        operation_defs=get_operation_defs(),
        owner_source_type="SERVICE",
        strip_owner_email_id=False,
    )
    aspect_map = processor.process(raw_props)
    assert "add_owner" in aspect_map

    ownership_aspect: OwnershipClass = aspect_map["add_owner"]
    assert len(ownership_aspect.owners) == 1
    new_owner: OwnerClass = ownership_aspect.owners[0]
    assert new_owner.owner == "urn:li:corpuser:[email protected]"
    assert new_owner.source and new_owner.source.type == "SERVICE"
Example #4
0
    def create_platform_mces(
        self,
        dbt_nodes: List[DBTNode],
        additional_custom_props_filtered: Dict[str, str],
        manifest_nodes_raw: Dict[str, Dict[str, Any]],
        mce_platform: str,
    ) -> Iterable[MetadataWorkUnit]:
        """
        This function creates mce based out of dbt nodes. Since dbt ingestion creates "dbt" nodes
        and nodes for underlying platform the function gets called twice based on the mce_platform
        parameter. Further, this function takes specific actions based on the mce_platform passed in.
        If  disable_dbt_node_creation = True,
            Create empty entities of the underlying platform with only lineage/key aspect.
            Create dbt entities with all metadata information.
        If  disable_dbt_node_creation = False
            Create platform entities with all metadata information.
        """
        action_processor = OperationProcessor(
            self.config.meta_mapping,
            self.config.tag_prefix,
            "SOURCE_CONTROL",
            self.config.strip_user_ids_from_email,
        )

        for node in dbt_nodes:
            node_datahub_urn = get_urn_from_dbtNode(
                node.database,
                node.schema,
                node.name,
                mce_platform,
                self.config.env,
            )

            meta_aspects: Dict[str, Any] = {}
            if self.config.enable_meta_mapping and node.meta:
                meta_aspects = action_processor.process(node.meta)

            aspects = self._generate_base_aspects(
                node, additional_custom_props_filtered, mce_platform,
                meta_aspects)

            if mce_platform == DBT_PLATFORM:
                # add upstream lineage
                upstream_lineage_class = self._create_lineage_aspect_for_dbt_node(
                    node, manifest_nodes_raw)
                if upstream_lineage_class:
                    aspects.append(upstream_lineage_class)

                # add view properties aspect
                if node.raw_sql:
                    view_prop_aspect = self._create_view_properties_aspect(
                        node)
                    aspects.append(view_prop_aspect)

                # emit subtype mcp
                sub_type_wu = self._create_subType_wu(node, node_datahub_urn)
                if sub_type_wu:
                    yield sub_type_wu
                    self.report.report_workunit(sub_type_wu)

            else:
                if not self.config.disable_dbt_node_creation:
                    # if dbt node creation is enabled we are creating empty node for platform and only add
                    # lineage/keyaspect.
                    aspects = []
                    if node.materialization == "ephemeral" or node.node_type == "test":
                        continue

                    # This code block is run when we are generating entities of platform type.
                    # We will not link the platform not to the dbt node for type "source" because
                    # in this case the platform table existed first.
                    if node.node_type != "source":
                        upstream_dbt_urn = get_urn_from_dbtNode(
                            node.database,
                            node.schema,
                            node.name,
                            DBT_PLATFORM,
                            self.config.env,
                        )
                        upstreams_lineage_class = get_upstream_lineage(
                            [upstream_dbt_urn])
                        aspects.append(upstreams_lineage_class)
                else:
                    # add upstream lineage
                    platform_upstream_aspect = (
                        self._create_lineage_aspect_for_platform_node(
                            node, manifest_nodes_raw))
                    if platform_upstream_aspect:
                        aspects.append(platform_upstream_aspect)

            if len(aspects) == 0:
                continue
            dataset_snapshot = DatasetSnapshot(urn=node_datahub_urn,
                                               aspects=aspects)
            mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
            if self.config.write_semantics == "PATCH":
                mce = self.get_patched_mce(mce)
            wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce)
            self.report.report_workunit(wu)
            yield wu