def _get_entity_workunit( self, feature_view: FeatureView, entity: Entity ) -> MetadataWorkUnit: """ Generate an MLPrimaryKey work unit for a Feast entity. """ feature_view_name = f"{self.feature_store.project}.{feature_view.name}" entity_snapshot = MLPrimaryKeySnapshot( urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name), aspects=[StatusClass(removed=False)], ) entity_snapshot.aspects.append( MLPrimaryKeyPropertiesClass( description=entity.description, dataType=self._get_field_type(entity.value_type, entity.name), sources=self._get_data_sources(feature_view), ) ) mce = MetadataChangeEvent(proposedSnapshot=entity_snapshot) return MetadataWorkUnit(id=entity.name, mce=mce)
def generate_data_input_output_mcp( self) -> Iterable[MetadataChangeProposalWrapper]: mcp = MetadataChangeProposalWrapper( entityType="datajob", entityUrn=str(self.urn), aspectName="dataJobInputOutput", aspect=DataJobInputOutputClass( inputDatasets=[str(urn) for urn in self.inlets], outputDatasets=[str(urn) for urn in self.outlets], inputDatajobs=[str(urn) for urn in self.upstream_urns], ), changeType=ChangeTypeClass.UPSERT, ) yield mcp # Force entity materialization for iolet in self.inlets + self.outlets: mcp = MetadataChangeProposalWrapper( entityType="dataset", entityUrn=str(iolet), aspectName="status", aspect=StatusClass(removed=False), changeType=ChangeTypeClass.UPSERT, ) yield mcp
def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[builder.Aspect]) -> Optional[builder.Aspect]: assert aspect is None or isinstance(aspect, StatusClass) status_aspect: StatusClass = aspect if aspect else StatusClass( removed=None) status_aspect.removed = self.config.removed return status_aspect # type: ignore
def _get_feature_workunit( self, feature_view: Union[FeatureView, OnDemandFeatureView], feature: Feature, ) -> MetadataWorkUnit: """ Generate an MLFeature work unit for a Feast feature. """ feature_view_name = f"{self.feature_store.project}.{feature_view.name}" feature_snapshot = MLFeatureSnapshot( urn=builder.make_ml_feature_urn(feature_view_name, feature.name), aspects=[StatusClass(removed=False)], ) feature_sources = [] if isinstance(feature_view, FeatureView): feature_sources = self._get_data_sources(feature_view) elif isinstance(feature_view, OnDemandFeatureView): if feature_view.input_request_data_sources is not None: for request_source in feature_view.input_request_data_sources.values(): source_platform, source_name = self._get_data_source_details( request_source ) feature_sources.append( builder.make_dataset_urn( source_platform, source_name, self.source_config.environment, ) ) if feature_view.input_feature_view_projections is not None: for ( feature_view_projection ) in feature_view.input_feature_view_projections.values(): feature_view_source = self.feature_store.get_feature_view( feature_view_projection.name ) feature_sources.extend(self._get_data_sources(feature_view_source)) feature_snapshot.aspects.append( MLFeaturePropertiesClass( description=feature.labels.get("description"), dataType=self._get_field_type(feature.dtype, feature.name), sources=feature_sources, ) ) mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot) return MetadataWorkUnit(id=feature.name, mce=mce)
def _generate_base_aspects( self, node: DBTNode, additional_custom_props_filtered: Dict[str, str], mce_platform: str, meta_aspects: Dict[str, Any], ) -> List[Any]: """ There are some common aspects that get generated for both dbt node and platform node depending on whether dbt node creation is enabled or not. """ # create an empty list of aspects and keep adding to it. Initializing with Any to avoid a # large union of aspect types. aspects: List[Any] = [] # add dataset properties aspect dbt_properties = self._create_dataset_properties_aspect( node, additional_custom_props_filtered) aspects.append(dbt_properties) # add status aspect status = StatusClass(removed=False) aspects.append(status) # add owners aspect # we need to aggregate owners added by meta properties and the owners that are coming from server. meta_owner_aspects = meta_aspects.get(Constants.ADD_OWNER_OPERATION) aggregated_owners = self._aggregate_owners(node, meta_owner_aspects) if aggregated_owners: aspects.append(OwnershipClass(owners=aggregated_owners)) # add tags aspects meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION) aggregated_tags = self._aggregate_tags(node, meta_tags_aspect) if aggregated_tags: aspects.append( mce_builder.make_global_tag_aspect_with_tag_list( aggregated_tags)) # add meta term aspects if (meta_aspects.get(Constants.ADD_TERM_OPERATION) and self.config.enable_meta_mapping): aspects.append(meta_aspects.get(Constants.ADD_TERM_OPERATION)) # add schema metadata aspect schema_metadata = get_schema_metadata(self.report, node, mce_platform) # When generating these aspects for a dbt node, we will always include schema information. When generating # these aspects for a platform node (which only happens when disable_dbt_node_creation is set to true) we # honor the flag. if mce_platform == DBT_PLATFORM: aspects.append(schema_metadata) else: if self.config.load_schemas: aspects.append(schema_metadata) return aspects
def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass: if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass): return mce status_aspect = builder.get_or_add_aspect( mce, StatusClass( removed=None, ), ) status_aspect.removed = self.config.removed return mce
def __to_datahub_dataset( self, dataset: Optional[PowerBiAPI.Dataset] ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi dataset to datahub dataset. Here we are mapping each table of PowerBi Dataset to Datahub dataset. In PowerBi Tile would be having single dataset, However corresponding Datahub's chart might have many input sources. """ dataset_mcps: List[MetadataChangeProposalWrapper] = [] if dataset is None: return dataset_mcps # We are only suporting relation PowerBi DataSources if (dataset.datasource is None or dataset.datasource.metadata.is_relational is False): LOGGER.warning( "Dataset {}({}) is not created from relational datasource". format(dataset.name, dataset.id)) return dataset_mcps LOGGER.info("Converting dataset={}(id={}) to datahub dataset".format( dataset.name, dataset.id)) for table in dataset.tables: # Create an URN for dataset ds_urn = builder.make_dataset_urn( platform=self.__config.dataset_type_mapping[ dataset.datasource.type], name="{}.{}.{}".format(dataset.datasource.database, table.schema_name, table.name), env=self.__config.env, ) LOGGER.info("{}={}".format(Constant.Dataset_URN, ds_urn)) # Create datasetProperties mcp ds_properties = DatasetPropertiesClass(description=table.name) info_mcp = self.new_mcp( entity_type=Constant.DATASET, entity_urn=ds_urn, aspect_name=Constant.DATASET_PROPERTIES, aspect=ds_properties, ) # Remove status mcp status_mcp = self.new_mcp( entity_type=Constant.DATASET, entity_urn=ds_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) dataset_mcps.extend([info_mcp, status_mcp]) return dataset_mcps
def _delete_one_urn( urn: str, soft: bool = False, dry_run: bool = False, entity_type: str = "dataset", cached_session_host: Optional[Tuple[sessions.Session, str]] = None, cached_emitter: Optional[rest_emitter.DatahubRestEmitter] = None, run_id: str = "delete-run-id", deletion_timestamp: int = _get_current_time(), ) -> DeletionResult: deletion_result = DeletionResult() deletion_result.num_entities = 1 deletion_result.num_records = UNKNOWN_NUM_RECORDS # Default is unknown if soft: # Add removed aspect if not cached_emitter: _, gms_host = cli_utils.get_session_and_host() token = cli_utils.get_token() emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token) else: emitter = cached_emitter if not dry_run: emitter.emit_mcp( MetadataChangeProposalWrapper( entityType=entity_type, changeType=ChangeTypeClass.UPSERT, entityUrn=urn, aspectName="status", aspect=StatusClass(removed=True), systemMetadata=SystemMetadataClass( runId=run_id, lastObserved=deletion_timestamp ), ) ) else: logger.info(f"[Dry-run] Would soft-delete {urn}") else: if not dry_run: payload_obj = {"urn": urn} urn, rows_affected = cli_utils.post_delete_endpoint( payload_obj, "/entities?action=delete", cached_session_host=cached_session_host, ) deletion_result.num_records = rows_affected else: logger.info(f"[Dry-run] Would hard-delete {urn}") deletion_result.num_records = UNKNOWN_NUM_RECORDS # since we don't know how many rows will be affected deletion_result.end() return deletion_result
def _to_mce( # noqa: C901 self, config: LookerCommonConfig, reporter: SourceReport, ) -> Optional[MetadataChangeEvent]: # We only generate MCE-s for explores that contain from clauses and do NOT contain joins # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph. dataset_snapshot = DatasetSnapshot( urn=self.get_explore_urn(config), aspects=[], # we append to this list later on ) browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)]) dataset_snapshot.aspects.append(browse_paths) dataset_snapshot.aspects.append(StatusClass(removed=False)) custom_properties = {"looker.type": "explore"} if self.label is not None: custom_properties["looker.explore.label"] = str(self.label) dataset_props = DatasetPropertiesClass( description=self.description, customProperties=custom_properties, ) dataset_snapshot.aspects.append(dataset_props) if self.upstream_views is not None: assert self.project_name is not None upstreams = [ UpstreamClass( dataset=LookerViewId( project_name=self.project_name, model_name=self.model_name, view_name=view_name, ).get_urn(config), type=DatasetLineageTypeClass.VIEW, ) for view_name in self.upstream_views ] upstream_lineage = UpstreamLineage(upstreams=upstreams) dataset_snapshot.aspects.append(upstream_lineage) if self.fields is not None: schema_metadata = LookerUtil._get_schema( platform_name=config.platform_name, schema_name=self.name, view_fields=self.fields, reporter=reporter, ) dataset_snapshot.aspects.append(schema_metadata) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) return mce
def soft_delete_item(urn: str, type: str) -> Iterable[MetadataWorkUnit]: logger.info( f"Soft-deleting stale entity of type {type} - {urn}.") mcp = MetadataChangeProposalWrapper( entityType=type, entityUrn=urn, changeType=ChangeTypeClass.UPSERT, aspectName="status", aspect=StatusClass(removed=True), ) wu = MetadataWorkUnit(id=f"soft-delete-{type}-{urn}", mcp=mcp) self.report.report_workunit(wu) self.report.report_stale_entity_soft_deleted(urn) yield wu
def to_datahub_user( self, user: PowerBiAPI.User) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi user to datahub user """ LOGGER.info("Converting user {}(id={}) to datahub's user".format( user.displayName, user.id)) # Create an URN for user user_urn = builder.make_user_urn(user.get_urn_part()) user_info_instance = CorpUserInfoClass( displayName=user.displayName, email=user.emailAddress, title=user.displayName, active=True, ) info_mcp = self.new_mcp( entity_type=Constant.CORP_USER, entity_urn=user_urn, aspect_name=Constant.CORP_USER_INFO, aspect=user_info_instance, ) # removed status mcp status_mcp = self.new_mcp( entity_type=Constant.CORP_USER, entity_urn=user_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) user_key = CorpUserKeyClass(username=user.id) user_key_mcp = self.new_mcp( entity_type=Constant.CORP_USER, entity_urn=user_urn, aspect_name=Constant.CORP_USER_KEY, aspect=user_key, ) return [info_mcp, status_mcp, user_key_mcp]
def get_workunits(self) -> Iterable[WorkUnit]: return [ workunit.MetadataWorkUnit( id="test-workunit", mcp=MetadataChangeProposalWrapper( entityType="dataset", changeType=ChangeTypeClass.UPSERT, entityUrn=str( DatasetUrn.create_from_ids( platform_id="elasticsearch", table_name="fooIndex", env="PROD", ) ), aspectName="status", aspect=StatusClass(removed=False), ), ) ]
def _get_on_demand_feature_view_workunit( self, on_demand_feature_view: OnDemandFeatureView ) -> MetadataWorkUnit: """ Generate an MLFeatureTable work unit for a Feast on-demand feature view. """ on_demand_feature_view_name = ( f"{self.feature_store.project}.{on_demand_feature_view.name}" ) on_demand_feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", on_demand_feature_view_name), aspects=[ BrowsePathsClass( paths=[ f"/feast/{self.feature_store.project}/{on_demand_feature_view_name}" ] ), StatusClass(removed=False), ], ) on_demand_feature_view_snapshot.aspects.append( MLFeatureTablePropertiesClass( mlFeatures=[ builder.make_ml_feature_urn( on_demand_feature_view_name, feature.name, ) for feature in on_demand_feature_view.features ], mlPrimaryKeys=[], ) ) mce = MetadataChangeEvent(proposedSnapshot=on_demand_feature_view_snapshot) return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkUnit: """ Generate an MLFeatureTable work unit for a Feast feature view. """ feature_view_name = f"{self.feature_store.project}.{feature_view.name}" feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", feature_view_name), aspects=[ BrowsePathsClass( paths=[f"/feast/{self.feature_store.project}/{feature_view_name}"] ), StatusClass(removed=False), ], ) feature_view_snapshot.aspects.append( MLFeatureTablePropertiesClass( mlFeatures=[ builder.make_ml_feature_urn( feature_view_name, feature.name, ) for feature in feature_view.features ], mlPrimaryKeys=[ builder.make_ml_primary_key_urn(feature_view_name, entity_name) for entity_name in feature_view.entities ], ) ) mce = MetadataChangeEvent(proposedSnapshot=feature_view_snapshot) return MetadataWorkUnit(id=feature_view_name, mce=mce)
def _to_metadata_events( # noqa: C901 self, config: LookerCommonConfig, reporter: SourceReport, base_url: str) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]: # We only generate MCE-s for explores that contain from clauses and do NOT contain joins # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph. dataset_snapshot = DatasetSnapshot( urn=self.get_explore_urn(config), aspects=[], # we append to this list later on ) browse_paths = BrowsePathsClass( paths=[self.get_explore_browse_path(config)]) dataset_snapshot.aspects.append(browse_paths) dataset_snapshot.aspects.append(StatusClass(removed=False)) custom_properties = {} if self.label is not None: custom_properties["looker.explore.label"] = str(self.label) if self.source_file is not None: custom_properties["looker.explore.file"] = str(self.source_file) dataset_props = DatasetPropertiesClass( description=self.description, customProperties=custom_properties, ) dataset_props.externalUrl = self._get_url(base_url) dataset_snapshot.aspects.append(dataset_props) if self.upstream_views is not None: assert self.project_name is not None upstreams = [ UpstreamClass( dataset=LookerViewId( project_name=self.project_name, model_name=self.model_name, view_name=view_name, ).get_urn(config), type=DatasetLineageTypeClass.VIEW, ) for view_name in sorted(self.upstream_views) ] upstream_lineage = UpstreamLineage(upstreams=upstreams) dataset_snapshot.aspects.append(upstream_lineage) if self.fields is not None: schema_metadata = LookerUtil._get_schema( platform_name=config.platform_name, schema_name=self.name, view_fields=self.fields, reporter=reporter, ) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) mcp = MetadataChangeProposalWrapper( entityType="dataset", changeType=ChangeTypeClass.UPSERT, entityUrn=dataset_snapshot.urn, aspectName="subTypes", aspect=SubTypesClass(typeNames=["explore"]), ) return [mce, mcp]
def test_mark_status_dataset(tmp_path): dataset = make_generic_dataset() transformer = MarkDatasetStatus.create( {"removed": True}, PipelineContext(run_id="test"), ) removed = list( transformer.transform([ RecordEnvelope(dataset, metadata={}), ])) assert len(removed) == 1 status_aspect = builder.get_aspect_if_available(removed[0].record, models.StatusClass) assert status_aspect assert status_aspect.removed is True transformer = MarkDatasetStatus.create( {"removed": False}, PipelineContext(run_id="test"), ) not_removed = list( transformer.transform([ RecordEnvelope(dataset, metadata={}), ])) assert len(not_removed) == 1 status_aspect = builder.get_aspect_if_available(not_removed[0].record, models.StatusClass) assert status_aspect assert status_aspect.removed is False mcp = make_generic_dataset_mcp( aspect_name="datasetProperties", aspect=DatasetPropertiesClass(description="Test dataset"), ) events_file = create_and_run_test_pipeline( events=[mcp], transformers=[{ "type": "mark_dataset_status", "config": { "removed": True } }], path=tmp_path, ) # assert dataset properties aspect was preserved assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="datasetProperties", aspect_field_matcher={"description": "Test dataset"}, file=events_file, ) == 1) # assert Status aspect was generated assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="status", aspect_field_matcher={"removed": True}, file=events_file, ) == 1) # MCE only test_aspect = DatasetPropertiesClass(description="Test dataset") events_file = create_and_run_test_pipeline( events=[make_generic_dataset(aspects=[test_aspect])], transformers=[{ "type": "mark_dataset_status", "config": { "removed": True } }], path=tmp_path, ) # assert dataset properties aspect was preserved assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect( entity_urn=mcp.entityUrn or "", aspect=test_aspect, aspect_type=DatasetPropertiesClass, file=events_file, ) == 1) # assert Status aspect was generated assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="status", aspect_field_matcher={"removed": True}, file=events_file, ) == 1) # MCE (non-matching) + MCP (matching) test_aspect = DatasetPropertiesClass(description="Test dataset") events_file = create_and_run_test_pipeline( events=[ make_generic_dataset(aspects=[test_aspect]), make_generic_dataset_mcp(), ], transformers=[{ "type": "mark_dataset_status", "config": { "removed": True } }], path=tmp_path, ) # assert dataset properties aspect was preserved assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect( entity_urn=mcp.entityUrn or "", aspect=test_aspect, aspect_type=DatasetPropertiesClass, file=events_file, ) == 1) # assert Status aspect was generated assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="status", aspect_field_matcher={"removed": True}, file=events_file, ) == 1) # MCE (matching) + MCP (non-matching) test_status_aspect = StatusClass(removed=False) events_file = create_and_run_test_pipeline( events=[ make_generic_dataset(aspects=[test_status_aspect]), make_generic_dataset_mcp( aspect_name="datasetProperties", aspect=DatasetPropertiesClass(description="test dataset"), ), ], transformers=[{ "type": "mark_dataset_status", "config": { "removed": True } }], path=tmp_path, ) # assert MCE was transformed assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect( entity_urn=mcp.entityUrn or "", aspect=StatusClass(removed=True), aspect_type=StatusClass, file=events_file, ) == 1) # assert MCP aspect was preserved assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="datasetProperties", aspect_field_matcher={"description": "test dataset"}, file=events_file, ) == 1) # MCE (non-matching) + MCP (non-matching) test_mcp_aspect = GlobalTagsClass( tags=[TagAssociationClass(tag="urn:li:tag:test")]) test_dataset_props_aspect = DatasetPropertiesClass( description="Test dataset") events_file = create_and_run_test_pipeline( events=[ make_generic_dataset(aspects=[test_dataset_props_aspect]), make_generic_dataset_mcp(aspect_name="globalTags", aspect=test_mcp_aspect), ], transformers=[{ "type": "mark_dataset_status", "config": { "removed": True } }], path=tmp_path, ) # assert MCE was preserved assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect( entity_urn=mcp.entityUrn or "", aspect=test_dataset_props_aspect, aspect_type=DatasetPropertiesClass, file=events_file, ) == 1) # assert MCP aspect was preserved assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="globalTags", aspect_field_matcher={"tags": [{ "tag": "urn:li:tag:test" }]}, file=events_file, ) == 1) # assert MCP Status aspect was generated assert (tests.test_helpers.mce_helpers.assert_for_each_entity( entity_type="dataset", aspect_name="status", aspect_field_matcher={"removed": True}, file=events_file, ) == 1)
def get_status_removed_aspect() -> StatusClass: return StatusClass(removed=False)
def __to_datahub_chart( self, tile: PowerBiAPI.Tile, ds_mcps: List[MetadataChangeProposalWrapper] ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi tile to datahub chart """ LOGGER.info("Converting tile {}(id={}) to chart".format( tile.title, tile.id)) # Create an URN for chart chart_urn = builder.make_chart_urn(self.__config.platform_name, tile.get_urn_part()) LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn)) ds_input: List[str] = self.to_urn_set(ds_mcps) def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict: custom_properties = { "datasetId": tile.dataset.id if tile.dataset else "", "reportId": tile.report.id if tile.report else "", "datasetWebUrl": tile.dataset.webUrl if tile.dataset is not None else "", "createdFrom": tile.createdFrom.value, } return custom_properties # Create chartInfo mcp # Set chartUrl only if tile is created from Report chart_info_instance = ChartInfoClass( title=tile.title or "", description=tile.title or "", lastModified=ChangeAuditStamps(), inputs=ds_input, externalUrl=tile.report.webUrl if tile.report else None, customProperties={**tile_custom_properties(tile)}, ) info_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # ChartKey status chart_key_instance = ChartKeyClass( dashboardTool=self.__config.platform_name, chartId=Constant.CHART_ID.format(tile.id), ) chartkey_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_KEY, aspect=chart_key_instance, ) return [info_mcp, status_mcp, chartkey_mcp]
def test_add_dataset_browse_paths(): dataset = make_generic_dataset() transformer = AddDatasetBrowsePathTransformer.create( {"path_templates": ["/abc"]}, PipelineContext(run_id="test"), ) transformed = list( transformer.transform([ RecordEnvelope(dataset, metadata={}), RecordEnvelope(EndOfStream(), metadata={}), ])) browse_path_aspect = transformed[1].record.aspect assert browse_path_aspect assert browse_path_aspect.paths == ["/abc"] # use an mce with a pre-existing browse path dataset_mce = make_generic_dataset( aspects=[StatusClass(removed=False), browse_path_aspect]) transformer = AddDatasetBrowsePathTransformer.create( { "path_templates": [ "/PLATFORM/foo/DATASET_PARTS/ENV", "/ENV/PLATFORM/bar/DATASET_PARTS/", ] }, PipelineContext(run_id="test"), ) transformed = list( transformer.transform([ RecordEnvelope(dataset_mce, metadata={}), RecordEnvelope(EndOfStream(), metadata={}), ])) assert len(transformed) == 2 browse_path_aspect = builder.get_aspect_if_available( transformed[0].record, BrowsePathsClass) assert browse_path_aspect assert browse_path_aspect.paths == [ "/abc", "/bigquery/foo/example1/prod", "/prod/bigquery/bar/example1/", ] transformer = AddDatasetBrowsePathTransformer.create( { "path_templates": [ "/xyz", ], "replace_existing": True, }, PipelineContext(run_id="test"), ) transformed = list( transformer.transform([ RecordEnvelope(dataset_mce, metadata={}), RecordEnvelope(EndOfStream(), metadata={}), ])) assert len(transformed) == 2 browse_path_aspect = builder.get_aspect_if_available( transformed[0].record, BrowsePathsClass) assert browse_path_aspect assert browse_path_aspect.paths == [ "/xyz", ]
def __to_datahub_dashboard( self, dashboard: PowerBiAPI.Dashboard, chart_mcps: List[MetadataChangeProposalWrapper], user_mcps: List[MetadataChangeProposalWrapper], ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi dashboard to Datahub dashboard """ dashboard_urn = builder.make_dashboard_urn(self.__config.platform_name, dashboard.get_urn_part()) chart_urn_list: List[str] = self.to_urn_set(chart_mcps) user_urn_list: List[str] = self.to_urn_set(user_mcps) def chart_custom_properties(dashboard: PowerBiAPI.Dashboard) -> dict: return { "chartCount": str(len(dashboard.tiles)), "workspaceName": dashboard.workspace_name, "workspaceId": dashboard.id, } # DashboardInfo mcp dashboard_info_cls = DashboardInfoClass( description=dashboard.displayName or "", title=dashboard.displayName or "", charts=chart_urn_list, lastModified=ChangeAuditStamps(), dashboardUrl=dashboard.webUrl, customProperties={**chart_custom_properties(dashboard)}, ) info_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # dashboardKey mcp dashboard_key_cls = DashboardKeyClass( dashboardTool=self.__config.platform_name, dashboardId=Constant.DASHBOARD_ID.format(dashboard.id), ) # Dashboard key dashboard_key_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) # Dashboard Ownership owners = [ OwnerClass(owner=user_urn, type=OwnershipTypeClass.CONSUMER) for user_urn in user_urn_list if user_urn is not None ] ownership = OwnershipClass(owners=owners) # Dashboard owner MCP owner_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.OWNERSHIP, aspect=ownership, ) # Dashboard browsePaths browse_path = BrowsePathsClass( paths=["/powerbi/{}".format(self.__config.workspace_id)]) browse_path_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) return [ browse_path_mcp, info_mcp, removed_status_mcp, dashboard_key_mcp, owner_mcp, ]