Ejemplo n.º 1
0
    def get_feature_table_wu(self, ingest_table):
        """
        Generate an MLFeatureTable workunit for a Feast feature table.

        Parameters
        ----------
            ingest_table:
                ingested Feast table
        """

        featuretable_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("feast",
                                                  ingest_table["name"]),
            aspects=[],
        )

        featuretable_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        ingest_table["name"],
                        feature["name"],
                    ) for feature in ingest_table["features"]
                ],
                # a feature table can have multiple primary keys, which then act as a composite key
                mlPrimaryKeys=[
                    builder.make_ml_primary_key_urn(ingest_table["name"],
                                                    entity["name"])
                    for entity in ingest_table["entities"]
                ],
            ))

        # make the MCE and workunit
        mce = MetadataChangeEvent(proposedSnapshot=featuretable_snapshot)
        return MetadataWorkUnit(id=ingest_table["name"], mce=mce)
Ejemplo n.º 2
0
    def _get_feature_workunit(
        self,
        feature_view: Union[FeatureView, OnDemandFeatureView],
        feature: Feature,
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeature work unit for a Feast feature.
        """
        feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

        feature_snapshot = MLFeatureSnapshot(
            urn=builder.make_ml_feature_urn(feature_view_name, feature.name),
            aspects=[StatusClass(removed=False)],
        )

        feature_sources = []

        if isinstance(feature_view, FeatureView):
            feature_sources = self._get_data_sources(feature_view)
        elif isinstance(feature_view, OnDemandFeatureView):
            if feature_view.input_request_data_sources is not None:
                for request_source in feature_view.input_request_data_sources.values():
                    source_platform, source_name = self._get_data_source_details(
                        request_source
                    )

                    feature_sources.append(
                        builder.make_dataset_urn(
                            source_platform,
                            source_name,
                            self.source_config.environment,
                        )
                    )

            if feature_view.input_feature_view_projections is not None:
                for (
                    feature_view_projection
                ) in feature_view.input_feature_view_projections.values():
                    feature_view_source = self.feature_store.get_feature_view(
                        feature_view_projection.name
                    )

                    feature_sources.extend(self._get_data_sources(feature_view_source))

        feature_snapshot.aspects.append(
            MLFeaturePropertiesClass(
                description=feature.labels.get("description"),
                dataType=self._get_field_type(feature.dtype, feature.name),
                sources=feature_sources,
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)

        return MetadataWorkUnit(id=feature.name, mce=mce)
Ejemplo n.º 3
0
    def get_feature_wu(self, ingest_table, ingest_feature):
        """
        Generate an MLFeature workunit for a Feast feature.

        Parameters
        ----------
            ingest_table:
                ingested Feast table
            ingest_feature:
                ingested Feast feature
        """

        # create snapshot instance for the feature
        feature_snapshot = MLFeatureSnapshot(
            urn=builder.make_ml_feature_urn(
                ingest_table["name"], ingest_feature["name"]
            ),
            aspects=[],
        )

        feature_sources = []

        if ingest_feature["batch_source"] is not None:
            feature_sources.append(
                builder.make_dataset_urn(
                    ingest_feature["batch_source_platform"],
                    ingest_feature["batch_source_name"],
                    self.config.env,
                )
            )

        if ingest_feature["stream_source"] is not None:
            feature_sources.append(
                builder.make_dataset_urn(
                    ingest_feature["stream_source_platform"],
                    ingest_feature["stream_source_name"],
                    self.config.env,
                )
            )

        # append feature name and type
        feature_snapshot.aspects.append(
            MLFeaturePropertiesClass(
                dataType=self.get_field_type(
                    ingest_feature["type"], ingest_feature["name"]
                ),
                sources=feature_sources,
            )
        )

        # make the MCE and workunit
        mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)
        return MetadataWorkUnit(id=ingest_feature["name"], mce=mce)
Ejemplo n.º 4
0
    def get_feature_group_wu(
        self, feature_group_details: Dict[str, Any]
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeatureTable workunit for a SageMaker feature group.

        Parameters
        ----------
            feature_group_details:
                ingested SageMaker feature group from get_feature_group_details()
        """

        feature_group_name = feature_group_details["FeatureGroupName"]

        feature_group_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("sagemaker", feature_group_name),
            aspects=[
                BrowsePathsClass(paths=[f"sagemaker/{feature_group_name}"]),
            ],
        )

        feature_group_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                description=feature_group_details.get("Description"),
                # non-primary key features
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        feature_group_name,
                        feature["FeatureName"],
                    )
                    for feature in feature_group_details["FeatureDefinitions"]
                    if feature["FeatureName"]
                    != feature_group_details["RecordIdentifierFeatureName"]
                ],
                mlPrimaryKeys=[
                    builder.make_ml_primary_key_urn(
                        feature_group_name,
                        feature_group_details["RecordIdentifierFeatureName"],
                    )
                ],
                # additional metadata
                customProperties={
                    "arn": feature_group_details["FeatureGroupArn"],
                    "creation_time": str(feature_group_details["CreationTime"]),
                    "status": feature_group_details["FeatureGroupStatus"],
                },
            )
        )

        # make the MCE and workunit
        mce = MetadataChangeEvent(proposedSnapshot=feature_group_snapshot)
        return MetadataWorkUnit(id=feature_group_name, mce=mce)
Ejemplo n.º 5
0
    def _get_on_demand_feature_view_workunit(
        self, on_demand_feature_view: OnDemandFeatureView
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeatureTable work unit for a Feast on-demand feature view.
        """

        on_demand_feature_view_name = (
            f"{self.feature_store.project}.{on_demand_feature_view.name}"
        )

        on_demand_feature_view_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("feast", on_demand_feature_view_name),
            aspects=[
                BrowsePathsClass(
                    paths=[
                        f"/feast/{self.feature_store.project}/{on_demand_feature_view_name}"
                    ]
                ),
                StatusClass(removed=False),
            ],
        )

        on_demand_feature_view_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        on_demand_feature_view_name,
                        feature.name,
                    )
                    for feature in on_demand_feature_view.features
                ],
                mlPrimaryKeys=[],
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=on_demand_feature_view_snapshot)

        return MetadataWorkUnit(id=on_demand_feature_view_name, mce=mce)
Ejemplo n.º 6
0
    def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkUnit:
        """
        Generate an MLFeatureTable work unit for a Feast feature view.
        """

        feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

        feature_view_snapshot = MLFeatureTableSnapshot(
            urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
            aspects=[
                BrowsePathsClass(
                    paths=[f"/feast/{self.feature_store.project}/{feature_view_name}"]
                ),
                StatusClass(removed=False),
            ],
        )

        feature_view_snapshot.aspects.append(
            MLFeatureTablePropertiesClass(
                mlFeatures=[
                    builder.make_ml_feature_urn(
                        feature_view_name,
                        feature.name,
                    )
                    for feature in feature_view.features
                ],
                mlPrimaryKeys=[
                    builder.make_ml_primary_key_urn(feature_view_name, entity_name)
                    for entity_name in feature_view.entities
                ],
            )
        )

        mce = MetadataChangeEvent(proposedSnapshot=feature_view_snapshot)

        return MetadataWorkUnit(id=feature_view_name, mce=mce)
Ejemplo n.º 7
0
    def get_feature_wu(
        self, feature_group_details: Dict[str, Any], feature: Dict[str, Any]
    ) -> MetadataWorkUnit:
        """
        Generate an MLFeature workunit for a SageMaker feature.

        Parameters
        ----------
            feature_group_details:
                ingested SageMaker feature group from get_feature_group_details()
            feature:
                ingested SageMaker feature
        """

        # if the feature acts as the record identifier, then we ingest it as an MLPrimaryKey
        # the RecordIdentifierFeatureName is guaranteed to exist as it's required on creation
        is_record_identifier = (
            feature_group_details["RecordIdentifierFeatureName"]
            == feature["FeatureName"]
        )

        feature_sources = []

        if "OfflineStoreConfig" in feature_group_details:

            # remove S3 prefix (s3://)
            s3_name = feature_group_details["OfflineStoreConfig"]["S3StorageConfig"][
                "S3Uri"
            ][5:]

            if s3_name.endswith("/"):
                s3_name = s3_name[:-1]

            feature_sources.append(
                builder.make_dataset_urn(
                    "s3",
                    s3_name,
                    self.env,
                )
            )

            if "DataCatalogConfig" in feature_group_details["OfflineStoreConfig"]:

                # if Glue catalog associated with offline store
                glue_database = feature_group_details["OfflineStoreConfig"][
                    "DataCatalogConfig"
                ]["Database"]
                glue_table = feature_group_details["OfflineStoreConfig"][
                    "DataCatalogConfig"
                ]["TableName"]

                full_table_name = f"{glue_database}.{glue_table}"

                self.report.report_warning(
                    full_table_name,
                    f"""Note: table {full_table_name} is an AWS Glue object.
                        To view full table metadata, run Glue ingestion
                        (see https://datahubproject.io/docs/metadata-ingestion/#aws-glue-glue)""",
                )

                feature_sources.append(
                    f"urn:li:dataset:(urn:li:dataPlatform:glue,{full_table_name},{self.env})"
                )

        # note that there's also an OnlineStoreConfig field, but this
        # lacks enough metadata to create a dataset
        # (only specifies the security config and whether it's enabled at all)

        # append feature name and type
        if is_record_identifier:
            primary_key_snapshot: MLPrimaryKeySnapshot = MLPrimaryKeySnapshot(
                urn=builder.make_ml_primary_key_urn(
                    feature_group_details["FeatureGroupName"],
                    feature["FeatureName"],
                ),
                aspects=[
                    MLPrimaryKeyPropertiesClass(
                        dataType=self.get_feature_type(
                            feature["FeatureType"], feature["FeatureName"]
                        ),
                        sources=feature_sources,
                    ),
                ],
            )

            # make the MCE and workunit
            mce = MetadataChangeEvent(proposedSnapshot=primary_key_snapshot)
        else:
            # create snapshot instance for the feature
            feature_snapshot: MLFeatureSnapshot = MLFeatureSnapshot(
                urn=builder.make_ml_feature_urn(
                    feature_group_details["FeatureGroupName"],
                    feature["FeatureName"],
                ),
                aspects=[
                    MLFeaturePropertiesClass(
                        dataType=self.get_feature_type(
                            feature["FeatureType"], feature["FeatureName"]
                        ),
                        sources=feature_sources,
                    )
                ],
            )

            # make the MCE and workunit
            mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)

        return MetadataWorkUnit(
            id=f'{feature_group_details["FeatureGroupName"]}-{feature["FeatureName"]}',
            mce=mce,
        )