Ejemplo n.º 1
0
    def _get_entity_workunit(
        self, feature_view: FeatureView, entity: Entity
    ) -> MetadataWorkUnit:
        """
        Generate an MLPrimaryKey work unit for a Feast entity.
        """

        feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

        entity_snapshot = MLPrimaryKeySnapshot(
            urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
            aspects=[StatusClass(removed=False)],
        )

        entity_snapshot.aspects.append(
            MLPrimaryKeyPropertiesClass(
                description=entity.description,
                dataType=self._get_field_type(entity.value_type, entity.name),
                sources=self._get_data_sources(feature_view),
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=entity_snapshot)

        return MetadataWorkUnit(id=entity.name, mce=mce)
Ejemplo n.º 2
0
    def generate_data_input_output_mcp(
            self) -> Iterable[MetadataChangeProposalWrapper]:
        mcp = MetadataChangeProposalWrapper(
            entityType="datajob",
            entityUrn=str(self.urn),
            aspectName="dataJobInputOutput",
            aspect=DataJobInputOutputClass(
                inputDatasets=[str(urn) for urn in self.inlets],
                outputDatasets=[str(urn) for urn in self.outlets],
                inputDatajobs=[str(urn) for urn in self.upstream_urns],
            ),
            changeType=ChangeTypeClass.UPSERT,
        )
        yield mcp

        # Force entity materialization
        for iolet in self.inlets + self.outlets:
            mcp = MetadataChangeProposalWrapper(
                entityType="dataset",
                entityUrn=str(iolet),
                aspectName="status",
                aspect=StatusClass(removed=False),
                changeType=ChangeTypeClass.UPSERT,
            )

            yield mcp
Ejemplo n.º 3
0
 def transform_aspect(
         self, entity_urn: str, aspect_name: str,
         aspect: Optional[builder.Aspect]) -> Optional[builder.Aspect]:
     assert aspect is None or isinstance(aspect, StatusClass)
     status_aspect: StatusClass = aspect if aspect else StatusClass(
         removed=None)
     status_aspect.removed = self.config.removed
     return status_aspect  # type: ignore
Ejemplo n.º 4
0
    def _get_feature_workunit(
        self,
        feature_view: Union[FeatureView, OnDemandFeatureView],
        feature: Feature,
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeature work unit for a Feast feature.
        """
        feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

        feature_snapshot = MLFeatureSnapshot(
            urn=builder.make_ml_feature_urn(feature_view_name, feature.name),
            aspects=[StatusClass(removed=False)],
        )

        feature_sources = []

        if isinstance(feature_view, FeatureView):
            feature_sources = self._get_data_sources(feature_view)
        elif isinstance(feature_view, OnDemandFeatureView):
            if feature_view.input_request_data_sources is not None:
                for request_source in feature_view.input_request_data_sources.values():
                    source_platform, source_name = self._get_data_source_details(
                        request_source
                    )

                    feature_sources.append(
                        builder.make_dataset_urn(
                            source_platform,
                            source_name,
                            self.source_config.environment,
                        )
                    )

            if feature_view.input_feature_view_projections is not None:
                for (
                    feature_view_projection
                ) in feature_view.input_feature_view_projections.values():
                    feature_view_source = self.feature_store.get_feature_view(
                        feature_view_projection.name
                    )

                    feature_sources.extend(self._get_data_sources(feature_view_source))

        feature_snapshot.aspects.append(
            MLFeaturePropertiesClass(
                description=feature.labels.get("description"),
                dataType=self._get_field_type(feature.dtype, feature.name),
                sources=feature_sources,
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)

        return MetadataWorkUnit(id=feature.name, mce=mce)
Ejemplo n.º 5
0
    def _generate_base_aspects(
        self,
        node: DBTNode,
        additional_custom_props_filtered: Dict[str, str],
        mce_platform: str,
        meta_aspects: Dict[str, Any],
    ) -> List[Any]:
        """
        There are some common aspects that get generated for both dbt node and platform node depending on whether dbt
        node creation is enabled or not.
        """

        # create an empty list of aspects and keep adding to it. Initializing with Any to avoid a
        # large union of aspect types.
        aspects: List[Any] = []

        # add dataset properties aspect
        dbt_properties = self._create_dataset_properties_aspect(
            node, additional_custom_props_filtered)
        aspects.append(dbt_properties)

        # add status aspect
        status = StatusClass(removed=False)
        aspects.append(status)
        # add owners aspect
        # we need to aggregate owners added by meta properties and the owners that are coming from server.
        meta_owner_aspects = meta_aspects.get(Constants.ADD_OWNER_OPERATION)
        aggregated_owners = self._aggregate_owners(node, meta_owner_aspects)
        if aggregated_owners:
            aspects.append(OwnershipClass(owners=aggregated_owners))

        # add tags aspects
        meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION)
        aggregated_tags = self._aggregate_tags(node, meta_tags_aspect)
        if aggregated_tags:
            aspects.append(
                mce_builder.make_global_tag_aspect_with_tag_list(
                    aggregated_tags))

        # add meta term aspects
        if (meta_aspects.get(Constants.ADD_TERM_OPERATION)
                and self.config.enable_meta_mapping):
            aspects.append(meta_aspects.get(Constants.ADD_TERM_OPERATION))

        # add schema metadata aspect
        schema_metadata = get_schema_metadata(self.report, node, mce_platform)
        # When generating these aspects for a dbt node, we will always include schema information. When generating
        # these aspects for a platform node (which only happens when disable_dbt_node_creation is set to true) we
        # honor the flag.
        if mce_platform == DBT_PLATFORM:
            aspects.append(schema_metadata)
        else:
            if self.config.load_schemas:
                aspects.append(schema_metadata)
        return aspects
Ejemplo n.º 6
0
 def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
     if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass):
         return mce
     status_aspect = builder.get_or_add_aspect(
         mce,
         StatusClass(
             removed=None,
         ),
     )
     status_aspect.removed = self.config.removed
     return mce
Ejemplo n.º 7
0
    def __to_datahub_dataset(
        self, dataset: Optional[PowerBiAPI.Dataset]
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi dataset to datahub dataset. Here we are mapping each table of PowerBi Dataset to Datahub dataset.
        In PowerBi Tile would be having single dataset, However corresponding Datahub's chart might have many input sources.
        """

        dataset_mcps: List[MetadataChangeProposalWrapper] = []
        if dataset is None:
            return dataset_mcps

        # We are only suporting relation PowerBi DataSources
        if (dataset.datasource is None
                or dataset.datasource.metadata.is_relational is False):
            LOGGER.warning(
                "Dataset {}({}) is not created from relational datasource".
                format(dataset.name, dataset.id))
            return dataset_mcps

        LOGGER.info("Converting dataset={}(id={}) to datahub dataset".format(
            dataset.name, dataset.id))

        for table in dataset.tables:
            # Create an URN for dataset
            ds_urn = builder.make_dataset_urn(
                platform=self.__config.dataset_type_mapping[
                    dataset.datasource.type],
                name="{}.{}.{}".format(dataset.datasource.database,
                                       table.schema_name, table.name),
                env=self.__config.env,
            )
            LOGGER.info("{}={}".format(Constant.Dataset_URN, ds_urn))
            # Create datasetProperties mcp
            ds_properties = DatasetPropertiesClass(description=table.name)

            info_mcp = self.new_mcp(
                entity_type=Constant.DATASET,
                entity_urn=ds_urn,
                aspect_name=Constant.DATASET_PROPERTIES,
                aspect=ds_properties,
            )

            # Remove status mcp
            status_mcp = self.new_mcp(
                entity_type=Constant.DATASET,
                entity_urn=ds_urn,
                aspect_name=Constant.STATUS,
                aspect=StatusClass(removed=False),
            )

            dataset_mcps.extend([info_mcp, status_mcp])

        return dataset_mcps
Ejemplo n.º 8
0
def _delete_one_urn(
    urn: str,
    soft: bool = False,
    dry_run: bool = False,
    entity_type: str = "dataset",
    cached_session_host: Optional[Tuple[sessions.Session, str]] = None,
    cached_emitter: Optional[rest_emitter.DatahubRestEmitter] = None,
    run_id: str = "delete-run-id",
    deletion_timestamp: int = _get_current_time(),
) -> DeletionResult:

    deletion_result = DeletionResult()
    deletion_result.num_entities = 1
    deletion_result.num_records = UNKNOWN_NUM_RECORDS  # Default is unknown

    if soft:
        # Add removed aspect
        if not cached_emitter:
            _, gms_host = cli_utils.get_session_and_host()
            token = cli_utils.get_token()
            emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token)
        else:
            emitter = cached_emitter
        if not dry_run:
            emitter.emit_mcp(
                MetadataChangeProposalWrapper(
                    entityType=entity_type,
                    changeType=ChangeTypeClass.UPSERT,
                    entityUrn=urn,
                    aspectName="status",
                    aspect=StatusClass(removed=True),
                    systemMetadata=SystemMetadataClass(
                        runId=run_id, lastObserved=deletion_timestamp
                    ),
                )
            )
        else:
            logger.info(f"[Dry-run] Would soft-delete {urn}")
    else:
        if not dry_run:
            payload_obj = {"urn": urn}
            urn, rows_affected = cli_utils.post_delete_endpoint(
                payload_obj,
                "/entities?action=delete",
                cached_session_host=cached_session_host,
            )
            deletion_result.num_records = rows_affected
        else:
            logger.info(f"[Dry-run] Would hard-delete {urn}")
            deletion_result.num_records = UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected

    deletion_result.end()
    return deletion_result
Ejemplo n.º 9
0
    def _to_mce(  # noqa: C901
        self,
        config: LookerCommonConfig,
        reporter: SourceReport,
    ) -> Optional[MetadataChangeEvent]:
        # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
        # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.

        dataset_snapshot = DatasetSnapshot(
            urn=self.get_explore_urn(config),
            aspects=[],  # we append to this list later on
        )
        browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
        dataset_snapshot.aspects.append(browse_paths)
        dataset_snapshot.aspects.append(StatusClass(removed=False))

        custom_properties = {"looker.type": "explore"}
        if self.label is not None:
            custom_properties["looker.explore.label"] = str(self.label)
        dataset_props = DatasetPropertiesClass(
            description=self.description,
            customProperties=custom_properties,
        )
        dataset_snapshot.aspects.append(dataset_props)
        if self.upstream_views is not None:
            assert self.project_name is not None
            upstreams = [
                UpstreamClass(
                    dataset=LookerViewId(
                        project_name=self.project_name,
                        model_name=self.model_name,
                        view_name=view_name,
                    ).get_urn(config),
                    type=DatasetLineageTypeClass.VIEW,
                )
                for view_name in self.upstream_views
            ]
            upstream_lineage = UpstreamLineage(upstreams=upstreams)
            dataset_snapshot.aspects.append(upstream_lineage)
        if self.fields is not None:
            schema_metadata = LookerUtil._get_schema(
                platform_name=config.platform_name,
                schema_name=self.name,
                view_fields=self.fields,
                reporter=reporter,
            )
            dataset_snapshot.aspects.append(schema_metadata)

        mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
        return mce
Ejemplo n.º 10
0
            def soft_delete_item(urn: str,
                                 type: str) -> Iterable[MetadataWorkUnit]:

                logger.info(
                    f"Soft-deleting stale entity of type {type} - {urn}.")
                mcp = MetadataChangeProposalWrapper(
                    entityType=type,
                    entityUrn=urn,
                    changeType=ChangeTypeClass.UPSERT,
                    aspectName="status",
                    aspect=StatusClass(removed=True),
                )
                wu = MetadataWorkUnit(id=f"soft-delete-{type}-{urn}", mcp=mcp)
                self.report.report_workunit(wu)
                self.report.report_stale_entity_soft_deleted(urn)
                yield wu
Ejemplo n.º 11
0
    def to_datahub_user(
            self,
            user: PowerBiAPI.User) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi user to datahub user
        """

        LOGGER.info("Converting user {}(id={}) to datahub's user".format(
            user.displayName, user.id))

        # Create an URN for user
        user_urn = builder.make_user_urn(user.get_urn_part())

        user_info_instance = CorpUserInfoClass(
            displayName=user.displayName,
            email=user.emailAddress,
            title=user.displayName,
            active=True,
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.CORP_USER,
            entity_urn=user_urn,
            aspect_name=Constant.CORP_USER_INFO,
            aspect=user_info_instance,
        )

        # removed status mcp
        status_mcp = self.new_mcp(
            entity_type=Constant.CORP_USER,
            entity_urn=user_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        user_key = CorpUserKeyClass(username=user.id)

        user_key_mcp = self.new_mcp(
            entity_type=Constant.CORP_USER,
            entity_urn=user_urn,
            aspect_name=Constant.CORP_USER_KEY,
            aspect=user_key,
        )

        return [info_mcp, status_mcp, user_key_mcp]
Ejemplo n.º 12
0
 def get_workunits(self) -> Iterable[WorkUnit]:
     return [
         workunit.MetadataWorkUnit(
             id="test-workunit",
             mcp=MetadataChangeProposalWrapper(
                 entityType="dataset",
                 changeType=ChangeTypeClass.UPSERT,
                 entityUrn=str(
                     DatasetUrn.create_from_ids(
                         platform_id="elasticsearch",
                         table_name="fooIndex",
                         env="PROD",
                     )
                 ),
                 aspectName="status",
                 aspect=StatusClass(removed=False),
             ),
         )
     ]
Ejemplo n.º 13
0
    def _get_on_demand_feature_view_workunit(
        self, on_demand_feature_view: OnDemandFeatureView
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeatureTable work unit for a Feast on-demand feature view.
        """

        on_demand_feature_view_name = (
            f"{self.feature_store.project}.{on_demand_feature_view.name}"
        )

        on_demand_feature_view_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("feast", on_demand_feature_view_name),
            aspects=[
                BrowsePathsClass(
                    paths=[
                        f"/feast/{self.feature_store.project}/{on_demand_feature_view_name}"
                    ]
                ),
                StatusClass(removed=False),
            ],
        )

        on_demand_feature_view_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        on_demand_feature_view_name,
                        feature.name,
                    )
                    for feature in on_demand_feature_view.features
                ],
                mlPrimaryKeys=[],
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=on_demand_feature_view_snapshot)

        return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
Ejemplo n.º 14
0
    def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkUnit:
        """
        Generate an MLFeatureTable work unit for a Feast feature view.
        """

        feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

        feature_view_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
            aspects=[
                BrowsePathsClass(
                    paths=[f"/feast/{self.feature_store.project}/{feature_view_name}"]
                ),
                StatusClass(removed=False),
            ],
        )

        feature_view_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        feature_view_name,
                        feature.name,
                    )
                    for feature in feature_view.features
                ],
                mlPrimaryKeys=[
                    builder.make_ml_primary_key_urn(feature_view_name, entity_name)
                    for entity_name in feature_view.entities
                ],
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=feature_view_snapshot)

        return MetadataWorkUnit(id=feature_view_name, mce=mce)
Ejemplo n.º 15
0
    def _to_metadata_events(  # noqa: C901
        self, config: LookerCommonConfig, reporter: SourceReport,
        base_url: str) -> Optional[List[Union[MetadataChangeEvent,
                                              MetadataChangeProposalWrapper]]]:
        # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
        # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.

        dataset_snapshot = DatasetSnapshot(
            urn=self.get_explore_urn(config),
            aspects=[],  # we append to this list later on
        )
        browse_paths = BrowsePathsClass(
            paths=[self.get_explore_browse_path(config)])
        dataset_snapshot.aspects.append(browse_paths)
        dataset_snapshot.aspects.append(StatusClass(removed=False))

        custom_properties = {}
        if self.label is not None:
            custom_properties["looker.explore.label"] = str(self.label)
        if self.source_file is not None:
            custom_properties["looker.explore.file"] = str(self.source_file)
        dataset_props = DatasetPropertiesClass(
            description=self.description,
            customProperties=custom_properties,
        )
        dataset_props.externalUrl = self._get_url(base_url)

        dataset_snapshot.aspects.append(dataset_props)
        if self.upstream_views is not None:
            assert self.project_name is not None
            upstreams = [
                UpstreamClass(
                    dataset=LookerViewId(
                        project_name=self.project_name,
                        model_name=self.model_name,
                        view_name=view_name,
                    ).get_urn(config),
                    type=DatasetLineageTypeClass.VIEW,
                ) for view_name in sorted(self.upstream_views)
            ]
            upstream_lineage = UpstreamLineage(upstreams=upstreams)
            dataset_snapshot.aspects.append(upstream_lineage)
        if self.fields is not None:
            schema_metadata = LookerUtil._get_schema(
                platform_name=config.platform_name,
                schema_name=self.name,
                view_fields=self.fields,
                reporter=reporter,
            )
            if schema_metadata is not None:
                dataset_snapshot.aspects.append(schema_metadata)

        mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
        mcp = MetadataChangeProposalWrapper(
            entityType="dataset",
            changeType=ChangeTypeClass.UPSERT,
            entityUrn=dataset_snapshot.urn,
            aspectName="subTypes",
            aspect=SubTypesClass(typeNames=["explore"]),
        )

        return [mce, mcp]
Ejemplo n.º 16
0
def test_mark_status_dataset(tmp_path):
    dataset = make_generic_dataset()

    transformer = MarkDatasetStatus.create(
        {"removed": True},
        PipelineContext(run_id="test"),
    )
    removed = list(
        transformer.transform([
            RecordEnvelope(dataset, metadata={}),
        ]))
    assert len(removed) == 1
    status_aspect = builder.get_aspect_if_available(removed[0].record,
                                                    models.StatusClass)
    assert status_aspect
    assert status_aspect.removed is True

    transformer = MarkDatasetStatus.create(
        {"removed": False},
        PipelineContext(run_id="test"),
    )
    not_removed = list(
        transformer.transform([
            RecordEnvelope(dataset, metadata={}),
        ]))
    assert len(not_removed) == 1
    status_aspect = builder.get_aspect_if_available(not_removed[0].record,
                                                    models.StatusClass)
    assert status_aspect
    assert status_aspect.removed is False

    mcp = make_generic_dataset_mcp(
        aspect_name="datasetProperties",
        aspect=DatasetPropertiesClass(description="Test dataset"),
    )
    events_file = create_and_run_test_pipeline(
        events=[mcp],
        transformers=[{
            "type": "mark_dataset_status",
            "config": {
                "removed": True
            }
        }],
        path=tmp_path,
    )

    # assert dataset properties aspect was preserved
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="datasetProperties",
        aspect_field_matcher={"description": "Test dataset"},
        file=events_file,
    ) == 1)

    # assert Status aspect was generated
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="status",
        aspect_field_matcher={"removed": True},
        file=events_file,
    ) == 1)

    # MCE only
    test_aspect = DatasetPropertiesClass(description="Test dataset")
    events_file = create_and_run_test_pipeline(
        events=[make_generic_dataset(aspects=[test_aspect])],
        transformers=[{
            "type": "mark_dataset_status",
            "config": {
                "removed": True
            }
        }],
        path=tmp_path,
    )

    # assert dataset properties aspect was preserved
    assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect(
        entity_urn=mcp.entityUrn or "",
        aspect=test_aspect,
        aspect_type=DatasetPropertiesClass,
        file=events_file,
    ) == 1)

    # assert Status aspect was generated
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="status",
        aspect_field_matcher={"removed": True},
        file=events_file,
    ) == 1)

    # MCE (non-matching) + MCP (matching)
    test_aspect = DatasetPropertiesClass(description="Test dataset")
    events_file = create_and_run_test_pipeline(
        events=[
            make_generic_dataset(aspects=[test_aspect]),
            make_generic_dataset_mcp(),
        ],
        transformers=[{
            "type": "mark_dataset_status",
            "config": {
                "removed": True
            }
        }],
        path=tmp_path,
    )

    # assert dataset properties aspect was preserved
    assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect(
        entity_urn=mcp.entityUrn or "",
        aspect=test_aspect,
        aspect_type=DatasetPropertiesClass,
        file=events_file,
    ) == 1)

    # assert Status aspect was generated
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="status",
        aspect_field_matcher={"removed": True},
        file=events_file,
    ) == 1)

    # MCE (matching) + MCP (non-matching)
    test_status_aspect = StatusClass(removed=False)
    events_file = create_and_run_test_pipeline(
        events=[
            make_generic_dataset(aspects=[test_status_aspect]),
            make_generic_dataset_mcp(
                aspect_name="datasetProperties",
                aspect=DatasetPropertiesClass(description="test dataset"),
            ),
        ],
        transformers=[{
            "type": "mark_dataset_status",
            "config": {
                "removed": True
            }
        }],
        path=tmp_path,
    )

    # assert MCE was transformed
    assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect(
        entity_urn=mcp.entityUrn or "",
        aspect=StatusClass(removed=True),
        aspect_type=StatusClass,
        file=events_file,
    ) == 1)

    # assert MCP aspect was preserved
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="datasetProperties",
        aspect_field_matcher={"description": "test dataset"},
        file=events_file,
    ) == 1)

    # MCE (non-matching) + MCP (non-matching)
    test_mcp_aspect = GlobalTagsClass(
        tags=[TagAssociationClass(tag="urn:li:tag:test")])
    test_dataset_props_aspect = DatasetPropertiesClass(
        description="Test dataset")
    events_file = create_and_run_test_pipeline(
        events=[
            make_generic_dataset(aspects=[test_dataset_props_aspect]),
            make_generic_dataset_mcp(aspect_name="globalTags",
                                     aspect=test_mcp_aspect),
        ],
        transformers=[{
            "type": "mark_dataset_status",
            "config": {
                "removed": True
            }
        }],
        path=tmp_path,
    )

    # assert MCE was preserved
    assert (tests.test_helpers.mce_helpers.assert_entity_mce_aspect(
        entity_urn=mcp.entityUrn or "",
        aspect=test_dataset_props_aspect,
        aspect_type=DatasetPropertiesClass,
        file=events_file,
    ) == 1)

    # assert MCP aspect was preserved
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="globalTags",
        aspect_field_matcher={"tags": [{
            "tag": "urn:li:tag:test"
        }]},
        file=events_file,
    ) == 1)

    # assert MCP Status aspect was generated
    assert (tests.test_helpers.mce_helpers.assert_for_each_entity(
        entity_type="dataset",
        aspect_name="status",
        aspect_field_matcher={"removed": True},
        file=events_file,
    ) == 1)
Ejemplo n.º 17
0
def get_status_removed_aspect() -> StatusClass:
    return StatusClass(removed=False)
Ejemplo n.º 18
0
    def __to_datahub_chart(
        self, tile: PowerBiAPI.Tile,
        ds_mcps: List[MetadataChangeProposalWrapper]
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi tile to datahub chart
        """
        LOGGER.info("Converting tile {}(id={}) to chart".format(
            tile.title, tile.id))
        # Create an URN for chart
        chart_urn = builder.make_chart_urn(self.__config.platform_name,
                                           tile.get_urn_part())

        LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn))

        ds_input: List[str] = self.to_urn_set(ds_mcps)

        def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict:
            custom_properties = {
                "datasetId":
                tile.dataset.id if tile.dataset else "",
                "reportId":
                tile.report.id if tile.report else "",
                "datasetWebUrl":
                tile.dataset.webUrl if tile.dataset is not None else "",
                "createdFrom":
                tile.createdFrom.value,
            }

            return custom_properties

        # Create chartInfo mcp
        # Set chartUrl only if tile is created from Report
        chart_info_instance = ChartInfoClass(
            title=tile.title or "",
            description=tile.title or "",
            lastModified=ChangeAuditStamps(),
            inputs=ds_input,
            externalUrl=tile.report.webUrl if tile.report else None,
            customProperties={**tile_custom_properties(tile)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_INFO,
            aspect=chart_info_instance,
        )

        # removed status mcp
        status_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # ChartKey status
        chart_key_instance = ChartKeyClass(
            dashboardTool=self.__config.platform_name,
            chartId=Constant.CHART_ID.format(tile.id),
        )

        chartkey_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_KEY,
            aspect=chart_key_instance,
        )

        return [info_mcp, status_mcp, chartkey_mcp]
Ejemplo n.º 19
0
def test_add_dataset_browse_paths():
    dataset = make_generic_dataset()

    transformer = AddDatasetBrowsePathTransformer.create(
        {"path_templates": ["/abc"]},
        PipelineContext(run_id="test"),
    )
    transformed = list(
        transformer.transform([
            RecordEnvelope(dataset, metadata={}),
            RecordEnvelope(EndOfStream(), metadata={}),
        ]))
    browse_path_aspect = transformed[1].record.aspect
    assert browse_path_aspect
    assert browse_path_aspect.paths == ["/abc"]

    # use an mce with a pre-existing browse path
    dataset_mce = make_generic_dataset(
        aspects=[StatusClass(removed=False), browse_path_aspect])

    transformer = AddDatasetBrowsePathTransformer.create(
        {
            "path_templates": [
                "/PLATFORM/foo/DATASET_PARTS/ENV",
                "/ENV/PLATFORM/bar/DATASET_PARTS/",
            ]
        },
        PipelineContext(run_id="test"),
    )
    transformed = list(
        transformer.transform([
            RecordEnvelope(dataset_mce, metadata={}),
            RecordEnvelope(EndOfStream(), metadata={}),
        ]))
    assert len(transformed) == 2
    browse_path_aspect = builder.get_aspect_if_available(
        transformed[0].record, BrowsePathsClass)
    assert browse_path_aspect
    assert browse_path_aspect.paths == [
        "/abc",
        "/bigquery/foo/example1/prod",
        "/prod/bigquery/bar/example1/",
    ]

    transformer = AddDatasetBrowsePathTransformer.create(
        {
            "path_templates": [
                "/xyz",
            ],
            "replace_existing": True,
        },
        PipelineContext(run_id="test"),
    )
    transformed = list(
        transformer.transform([
            RecordEnvelope(dataset_mce, metadata={}),
            RecordEnvelope(EndOfStream(), metadata={}),
        ]))
    assert len(transformed) == 2
    browse_path_aspect = builder.get_aspect_if_available(
        transformed[0].record, BrowsePathsClass)
    assert browse_path_aspect
    assert browse_path_aspect.paths == [
        "/xyz",
    ]
Ejemplo n.º 20
0
    def __to_datahub_dashboard(
        self,
        dashboard: PowerBiAPI.Dashboard,
        chart_mcps: List[MetadataChangeProposalWrapper],
        user_mcps: List[MetadataChangeProposalWrapper],
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi dashboard to Datahub dashboard
        """

        dashboard_urn = builder.make_dashboard_urn(self.__config.platform_name,
                                                   dashboard.get_urn_part())

        chart_urn_list: List[str] = self.to_urn_set(chart_mcps)
        user_urn_list: List[str] = self.to_urn_set(user_mcps)

        def chart_custom_properties(dashboard: PowerBiAPI.Dashboard) -> dict:
            return {
                "chartCount": str(len(dashboard.tiles)),
                "workspaceName": dashboard.workspace_name,
                "workspaceId": dashboard.id,
            }

        # DashboardInfo mcp
        dashboard_info_cls = DashboardInfoClass(
            description=dashboard.displayName or "",
            title=dashboard.displayName or "",
            charts=chart_urn_list,
            lastModified=ChangeAuditStamps(),
            dashboardUrl=dashboard.webUrl,
            customProperties={**chart_custom_properties(dashboard)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_INFO,
            aspect=dashboard_info_cls,
        )

        # removed status mcp
        removed_status_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # dashboardKey mcp
        dashboard_key_cls = DashboardKeyClass(
            dashboardTool=self.__config.platform_name,
            dashboardId=Constant.DASHBOARD_ID.format(dashboard.id),
        )

        # Dashboard key
        dashboard_key_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_KEY,
            aspect=dashboard_key_cls,
        )

        # Dashboard Ownership
        owners = [
            OwnerClass(owner=user_urn, type=OwnershipTypeClass.CONSUMER)
            for user_urn in user_urn_list if user_urn is not None
        ]
        ownership = OwnershipClass(owners=owners)
        # Dashboard owner MCP
        owner_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.OWNERSHIP,
            aspect=ownership,
        )

        # Dashboard browsePaths
        browse_path = BrowsePathsClass(
            paths=["/powerbi/{}".format(self.__config.workspace_id)])
        browse_path_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.BROWSERPATH,
            aspect=browse_path,
        )

        return [
            browse_path_mcp,
            info_mcp,
            removed_status_mcp,
            dashboard_key_mcp,
            owner_mcp,
        ]