Exemple #1
0
    def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]:
        user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}"
        try:
            user_info_response = self.session.get(user_info_url)
            user_info_response.raise_for_status()
            user_details = user_info_response.json()
        except HTTPError as http_error:
            self.report.report_failure(
                key=f"metabase-user-{creator_id}",
                reason=f"Unable to retrieve User info. "
                f"Reason: {str(http_error)}",
            )
            return None

        owner_urn = builder.make_user_urn(user_details.get("email", ""))
        if owner_urn is not None:
            ownership: OwnershipClass = OwnershipClass(owners=[
                OwnerClass(
                    owner=owner_urn,
                    type=OwnershipTypeClass.DATAOWNER,
                )
            ])
            return ownership

        return None
Exemple #2
0
 def _get_owners_aspect(self, node: DBTNode) -> OwnershipClass:
     owners = [
         OwnerClass(
             owner=f"urn:li:corpuser:{node.owner}",
             type=OwnershipTypeClass.DATAOWNER,
         )
     ]
     return OwnershipClass(owners=owners, )
def create_metadata_work_unit(timestamp):
    dataset_snapshot = DatasetSnapshot(
        urn="urn:li:dataset:(urn:li:dataPlatform:glue,datalake_grilled.Barbeque,PROD)",
        aspects=[],
    )

    dataset_snapshot.aspects.append(Status(removed=False))

    dataset_snapshot.aspects.append(
        OwnershipClass(
            owners=[
                OwnerClass(
                    owner="urn:li:corpuser:Susan", type=OwnershipTypeClass.DATAOWNER
                )
            ],
            lastModified=AuditStampClass(
                time=timestamp, actor="urn:li:corpuser:datahub"
            ),
        )
    )

    dataset_snapshot.aspects.append(
        DatasetPropertiesClass(
            description="Grilled Food",
            customProperties={},
            uri=None,
            tags=[],
        )
    )

    fields = [
        SchemaField(
            fieldPath="Size",
            nativeDataType="int",
            type=SchemaFieldDataType(type=NumberTypeClass()),
            description="Maximum attendees permitted",
            nullable=True,
            recursive=False,
        )
    ]

    schema_metadata = SchemaMetadata(
        schemaName="datalake_grilled.Barbeque",
        version=0,
        fields=fields,
        platform="urn:li:dataPlatform:glue",
        created=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"),
        lastModified=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"),
        hash="",
        platformSchema=MySqlDDL(tableSchema=""),
    )
    dataset_snapshot.aspects.append(schema_metadata)

    mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
    return MetadataWorkUnit(id="glue-datalake_grilled.Barbeque", mce=mce)
 def __init__(self, config: PatternDatasetOwnershipConfig,
              ctx: PipelineContext):
     owner_pattern = config.owner_pattern
     generic_config = AddDatasetOwnershipConfig(
         get_owners_to_add=lambda _: [
             OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER)
             for owner in owner_pattern.value(_.urn)
         ],
         default_actor=config.default_actor,
     )
     super().__init__(generic_config, ctx)
    def __init__(self, config: AddCustomOwnershipConfig, ctx: PipelineContext):
        self.ctx = ctx
        self.config = config

        with open(self.config.owners_json, "r") as f:
            raw_owner_urns = json.load(f)

        self.owners = [
            OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER)
            for owner in raw_owner_urns
        ]
Exemple #6
0
 def get_owner() -> Optional[OwnershipClass]:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
         return OwnershipClass(owners=owners, )
     return None
Exemple #7
0
 def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
     assert tag_urn in LookerUtil.tag_definitions
     ownership = OwnershipClass(owners=[
         OwnerClass(
             owner="urn:li:corpuser:datahub",
             type=OwnershipTypeClass.DATAOWNER,
         )
     ])
     return MetadataChangeEvent(proposedSnapshot=TagSnapshotClass(
         urn=tag_urn,
         aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]))
Exemple #8
0
    def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
        if user is not None:
            owner_urn = builder.make_user_urn(user)
            ownership: OwnershipClass = OwnershipClass(owners=[
                OwnerClass(
                    owner=owner_urn,
                    type=OwnershipTypeClass.DATAOWNER,
                )
            ])
            return ownership

        return None
    def __init__(self, config: SimpleDatasetOwnershipConfig,
                 ctx: PipelineContext):
        owners = [
            OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER)
            for owner in config.owner_urns
        ]

        generic_config = AddDatasetOwnershipConfig(
            get_owners_to_add=lambda _: owners,
            default_actor=config.default_actor,
        )
        super().__init__(generic_config, ctx)
Exemple #10
0
 def get_owner() -> OwnershipClass:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
     else:
         owners = []
     return OwnershipClass(owners=owners, )
Exemple #11
0
def create_owners_list_from_urn_list(owner_urns: List[str],
                                     source_type: str) -> List[OwnerClass]:
    ownership_source_type: Union[None, OwnershipSourceClass] = None
    if source_type:
        ownership_source_type = OwnershipSourceClass(type=source_type)
    owners_list = [
        OwnerClass(
            owner=owner_urn,
            type=OwnershipTypeClass.DATAOWNER,
            source=ownership_source_type,
        ) for owner_urn in owner_urns
    ]
    return owners_list
 def getOwners(
     self,
     key: str,
     owner_pattern: KeyValuePattern,
     ownership_type: Optional[str] = None,
 ) -> List[OwnerClass]:
     owners = [
         OwnerClass(
             owner=owner,
             type=builder.validate_ownership_type(ownership_type),
         ) for owner in owner_pattern.value(key)
     ]
     return owners
Exemple #13
0
    def get_group_wu(
        self, group_details: "DescribeModelPackageGroupOutputTypeDef"
    ) -> MetadataWorkUnit:
        """
        Get a workunit for a model group.
        """

        # params to remove since we extract them
        redundant_fields = {"ModelPackageGroupName", "CreationTime"}

        group_arn = group_details["ModelPackageGroupArn"]
        group_name = group_details["ModelPackageGroupName"]

        self.group_arn_to_name[group_arn] = group_name

        owners = []

        if group_details.get("CreatedBy",
                             {}).get("UserProfileName") is not None:
            owners.append(
                OwnerClass(
                    owner=
                    f"urn:li:corpuser:{group_details['CreatedBy']['UserProfileName']}",
                    type=OwnershipTypeClass.DATAOWNER,
                ))

        group_snapshot = MLModelGroupSnapshot(
            urn=builder.make_ml_model_group_urn("sagemaker", group_name,
                                                self.env),
            aspects=[
                MLModelGroupPropertiesClass(
                    createdAt=int(
                        group_details.get("CreationTime",
                                          datetime.now()).timestamp() * 1000),
                    description=group_details.get(
                        "ModelPackageGroupDescription"),
                    customProperties={
                        key: str(value)
                        for key, value in group_details.items()
                        if key not in redundant_fields
                    },
                ),
                OwnershipClass(owners),
                BrowsePathsClass(paths=[f"/sagemaker/{group_name}"]),
            ],
        )

        # make the MCE and workunit
        mce = MetadataChangeEvent(proposedSnapshot=group_snapshot)

        return MetadataWorkUnit(id=group_name, mce=mce)
Exemple #14
0
    def _aggregate_owners(self, node: DBTNode,
                          meta_owner_aspects: Any) -> List[OwnerClass]:
        owner_list: List[OwnerClass] = []
        if node.owner:
            owner_list.append(
                OwnerClass(
                    owner=f"urn:li:corpuser:{node.owner}",
                    type=OwnershipTypeClass.DATAOWNER,
                ))
        if meta_owner_aspects and self.config.enable_meta_mapping:
            owner_list += meta_owner_aspects.owners

        owner_list = sorted(owner_list, key=lambda x: x.owner)
        return owner_list
Exemple #15
0
 def get_ownership(
         self,
         looker_dashboard: LookerDashboard) -> Optional[OwnershipClass]:
     if looker_dashboard.owner is not None:
         owner_urn = looker_dashboard.owner._get_urn(
             self.source_config.strip_user_ids_from_email)
         if owner_urn is not None:
             ownership: OwnershipClass = OwnershipClass(owners=[
                 OwnerClass(
                     owner=owner_urn,
                     type=OwnershipTypeClass.DATAOWNER,
                 )
             ])
             return ownership
     return None
    def __init__(self, config: SimpleDatasetOwnershipConfig,
                 ctx: PipelineContext):
        ownership_type = builder.validate_ownership_type(config.ownership_type)
        owners = [
            OwnerClass(
                owner=owner,
                type=ownership_type,
            ) for owner in config.owner_urns
        ]

        generic_config = AddDatasetOwnershipConfig(
            get_owners_to_add=lambda _: owners,
            default_actor=config.default_actor,
        )
        super().__init__(generic_config, ctx)
Exemple #17
0
def add_owner_to_entity_wu(entity_type: str, entity_urn: str,
                           owner_urn: str) -> Iterable[MetadataWorkUnit]:
    mcp = MetadataChangeProposalWrapper(
        entityType=entity_type,
        changeType=ChangeTypeClass.UPSERT,
        entityUrn=f"{entity_urn}",
        aspectName="ownership",
        aspect=OwnershipClass(owners=[
            OwnerClass(
                owner=owner_urn,
                type=OwnershipTypeClass.DATAOWNER,
            )
        ]),
    )
    wu = MetadataWorkUnit(id=f"{owner_urn}-to-{entity_urn}", mcp=mcp)
    yield wu
Exemple #18
0
    def _make_dashboard_and_chart_mces(
        self, looker_dashboard: LookerDashboard
    ) -> List[MetadataChangeEvent]:
        actor = self.source_config.actor
        sys_time = get_sys_time()

        chart_mces = [
            self._make_chart_mce(element)
            for element in looker_dashboard.dashboard_elements
        ]

        dashboard_urn = f"urn:li:dashboard:({self.source_config.platform_name},{looker_dashboard.get_urn_dashboard_id()})"
        dashboard_snapshot = DashboardSnapshot(
            urn=dashboard_urn,
            aspects=[],
        )

        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=sys_time, actor=actor),
            lastModified=AuditStamp(time=sys_time, actor=actor),
        )

        dashboard_info = DashboardInfoClass(
            description=looker_dashboard.description
            if looker_dashboard.description is not None
            else "",
            title=looker_dashboard.title,
            charts=[mce.proposedSnapshot.urn for mce in chart_mces],
            lastModified=last_modified,
            dashboardUrl=looker_dashboard.url(self.source_config.base_url),
        )

        dashboard_snapshot.aspects.append(dashboard_info)
        owners = [OwnerClass(owner=actor, type=OwnershipTypeClass.DATAOWNER)]
        dashboard_snapshot.aspects.append(
            OwnershipClass(
                owners=owners,
                lastModified=AuditStampClass(
                    time=sys_time, actor=self.source_config.actor
                ),
            )
        )
        dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))

        dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)

        return chart_mces + [dashboard_mce]
Exemple #19
0
 def generate_ownership_aspect(self):
     ownership = OwnershipClass(
         owners=[
             OwnerClass(
                 owner=builder.make_user_urn(owner),
                 type=OwnershipTypeClass.DEVELOPER,
                 source=OwnershipSourceClass(
                     type=OwnershipSourceTypeClass.SERVICE,
                     # url=dag.filepath,
                 ),
             ) for owner in (self.owners or [])
         ],
         lastModified=AuditStampClass(time=0,
                                      actor=builder.make_user_urn(
                                          self.orchestrator)),
     )
     return [ownership]
 def get_owner(time: int) -> OwnershipClass:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
     else:
         owners = []
     return OwnershipClass(
         owners=owners,
         lastModified=AuditStampClass(
             time=time,
             actor="urn:li:corpuser:datahub",
         ),
     )
Exemple #21
0
def create_ownership_aspect_mce(
        directive: Directive) -> MetadataChangeEventClass:
    return MetadataChangeEventClass(proposedSnapshot=DatasetSnapshotClass(
        urn=dataset_name_to_urn(directive.table),
        aspects=[
            OwnershipClass(
                owners=[
                    OwnerClass(
                        owner=owner_name_to_urn(clean_owner_name(owner)),
                        type=OwnershipTypeClass.DATAOWNER,
                    ) for owner in directive.owners
                ],
                lastModified=AuditStampClass(
                    time=int(time.time() * 1000),
                    actor="urn:li:corpuser:datahub",
                ),
            )
        ],
    ))
Exemple #22
0
def make_ownership_aspect_from_urn_list(
    owner_urns: List[str],
    source_type: Optional[Union[str,
                                OwnershipSourceTypeClass]]) -> OwnershipClass:
    for owner_urn in owner_urns:
        assert owner_urn.startswith(
            "urn:li:corpuser:"******"urn:li:corpGroup:")
    ownership_source_type: Union[None, OwnershipSourceClass] = None
    if source_type:
        ownership_source_type = OwnershipSourceClass(type=source_type)

    owners_list = [
        OwnerClass(
            owner=owner_urn,
            type=OwnershipTypeClass.DATAOWNER,
            source=ownership_source_type,
        ) for owner_urn in owner_urns
    ]
    return OwnershipClass(owners=owners_list, )
Exemple #23
0
    def __to_datahub_dashboard(
        self,
        dashboard: PowerBiAPI.Dashboard,
        chart_mcps: List[MetadataChangeProposalWrapper],
        user_mcps: List[MetadataChangeProposalWrapper],
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi dashboard to Datahub dashboard
        """

        dashboard_urn = builder.make_dashboard_urn(self.__config.platform_name,
                                                   dashboard.get_urn_part())

        chart_urn_list: List[str] = self.to_urn_set(chart_mcps)
        user_urn_list: List[str] = self.to_urn_set(user_mcps)

        def chart_custom_properties(dashboard: PowerBiAPI.Dashboard) -> dict:
            return {
                "chartCount": str(len(dashboard.tiles)),
                "workspaceName": dashboard.workspace_name,
                "workspaceId": dashboard.id,
            }

        # DashboardInfo mcp
        dashboard_info_cls = DashboardInfoClass(
            description=dashboard.displayName or "",
            title=dashboard.displayName or "",
            charts=chart_urn_list,
            lastModified=ChangeAuditStamps(),
            dashboardUrl=dashboard.webUrl,
            customProperties={**chart_custom_properties(dashboard)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_INFO,
            aspect=dashboard_info_cls,
        )

        # removed status mcp
        removed_status_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # dashboardKey mcp
        dashboard_key_cls = DashboardKeyClass(
            dashboardTool=self.__config.platform_name,
            dashboardId=Constant.DASHBOARD_ID.format(dashboard.id),
        )

        # Dashboard key
        dashboard_key_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_KEY,
            aspect=dashboard_key_cls,
        )

        # Dashboard Ownership
        owners = [
            OwnerClass(owner=user_urn, type=OwnershipTypeClass.CONSUMER)
            for user_urn in user_urn_list if user_urn is not None
        ]
        ownership = OwnershipClass(owners=owners)
        # Dashboard owner MCP
        owner_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.OWNERSHIP,
            aspect=ownership,
        )

        # Dashboard browsePaths
        browse_path = BrowsePathsClass(
            paths=["/powerbi/{}".format(self.__config.workspace_id)])
        browse_path_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.BROWSERPATH,
            aspect=browse_path,
        )

        return [
            browse_path_mcp,
            info_mcp,
            removed_status_mcp,
            dashboard_key_mcp,
            owner_mcp,
        ]
Exemple #24
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        (
            nodes,
            manifest_schema,
            manifest_version,
            catalog_schema,
            catalog_version,
        ) = loadManifestAndCatalog(
            self.config.manifest_path,
            self.config.catalog_path,
            self.config.sources_path,
            self.config.load_schemas,
            self.config.use_identifiers,
            self.config.tag_prefix,
            self.config.target_platform,
            self.config.env,
            self.config.node_type_pattern,
            self.report,
        )

        additional_custom_props = {
            "manifest_schema": manifest_schema,
            "manifest_version": manifest_version,
            "catalog_schema": catalog_schema,
            "catalog_version": catalog_version,
        }

        additional_custom_props_filtered = {
            key: value
            for key, value in additional_custom_props.items()
            if value is not None
        }

        for node in nodes:

            dataset_snapshot = DatasetSnapshot(urn=node.datahub_urn,
                                               aspects=[])

            description = None

            if node.comment and node.description and node.comment != node.description:
                description = f"{self.config.target_platform} comment: {node.comment}\n\ndbt model description: {node.description}"
            elif node.comment:
                description = node.comment
            elif node.description:
                description = node.description

            custom_props = {
                **get_custom_properties(node),
                **additional_custom_props_filtered,
            }

            dbt_properties = DatasetPropertiesClass(
                description=description,
                customProperties=custom_props,
                tags=node.tags)
            dataset_snapshot.aspects.append(dbt_properties)

            if node.owner:
                owners = [
                    OwnerClass(
                        owner=f"urn:li:corpuser:{node.owner}",
                        type=OwnershipTypeClass.DATAOWNER,
                    )
                ]
                dataset_snapshot.aspects.append(OwnershipClass(
                    owners=owners, ))

            if node.tags:
                dataset_snapshot.aspects.append(
                    GlobalTagsClass(tags=[
                        TagAssociationClass(f"urn:li:tag:{tag}")
                        for tag in node.tags
                    ]))

            upstreams = get_upstream_lineage(node.upstream_urns)
            if upstreams is not None:
                dataset_snapshot.aspects.append(upstreams)

            if self.config.load_schemas:
                schema_metadata = get_schema_metadata(
                    self.report, node, self.config.target_platform)
                dataset_snapshot.aspects.append(schema_metadata)

            mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
            wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce)
            self.report.report_workunit(wu)

            yield wu
    OwnershipClass,
    OwnershipTypeClass,
)

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Inputs -> owner, ownership_type, dataset
owner_to_add = make_user_urn("jdoe")
ownership_type = OwnershipTypeClass.DATAOWNER
dataset_urn = make_dataset_urn(platform="hive",
                               name="realestate_db.sales",
                               env="PROD")

# Some objects to help with conditional pathways later
owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type)
ownership_to_add = OwnershipClass(owners=[owner_class_to_add])

# First we get the current owners
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))

current_owners: Optional[OwnershipClass] = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="ownership",
    aspect_type=OwnershipClass,
)

need_write = False
if current_owners:
    if (owner_to_add, ownership_type) not in [(x.owner, x.type)
 def getOwners(self, key, owner_pattern):
     owners = [
         OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER)
         for owner in owner_pattern.value(key)
     ]
     return owners