Beispiel #1
0
def test_get_column_names_preserves_feature_ordering():
    entity = Entity("my-entity",
                    description="My entity",
                    value_type=ValueType.STRING)
    fv = FeatureView(
        name="my-fv",
        entities=["my-entity"],
        ttl=timedelta(days=1),
        batch_source=BigQuerySource(table="non-existent-mock"),
        schema=[
            Field(name="a", dtype=String),
            Field(name="b", dtype=String),
            Field(name="c", dtype=String),
            Field(name="d", dtype=String),
            Field(name="e", dtype=String),
            Field(name="f", dtype=String),
            Field(name="g", dtype=String),
            Field(name="h", dtype=String),
            Field(name="i", dtype=String),
            Field(name="j", dtype=String),
        ],
    )

    _, feature_list, _, _ = _get_column_names(fv, [entity])
    assert feature_list == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
Beispiel #2
0
def test_on_demand_features_type_inference():
    # Create Feature Views
    date_request = RequestSource(
        name="date_request",
        schema=[Field(name="some_date", dtype=UnixTimestamp)],
    )

    @on_demand_feature_view(
        sources=[date_request],
        schema=[
            Field(name="output", dtype=UnixTimestamp),
            Field(name="string_output", dtype=String),
        ],
    )
    def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
        data = pd.DataFrame()
        data["output"] = features_df["some_date"]
        data["string_output"] = features_df["some_date"].astype(
            pd.StringDtype())
        return data

    test_view.infer_features()

    @on_demand_feature_view(
        # Note: we deliberately use `inputs` instead of `sources` to test that `inputs`
        # still works correctly, even though it is deprecated.
        # TODO(felixwang9817): Remove references to `inputs` once it is fully deprecated.
        inputs={"date_request": date_request},
        features=[
            Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP),
            Feature(name="object_output", dtype=ValueType.STRING),
        ],
    )
    def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame:
        data = pd.DataFrame()
        data["output"] = features_df["some_date"]
        data["object_output"] = features_df["some_date"].astype(str)
        return data

    with pytest.raises(ValueError, match="Value with native type object"):
        invalid_test_view.infer_features()

    @on_demand_feature_view(
        # Note: we deliberately use positional arguments here to test that they work correctly,
        # even though positional arguments are deprecated in favor of keyword arguments.
        # TODO(felixwang9817): Remove positional arguments once they are fully deprecated.
        [
            Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP),
            Feature(name="missing", dtype=ValueType.STRING),
        ],
        {"date_request": date_request},
    )
    def test_view_with_missing_feature(
            features_df: pd.DataFrame) -> pd.DataFrame:
        data = pd.DataFrame()
        data["output"] = features_df["some_date"]
        return data

    with pytest.raises(SpecifiedFeaturesNotPresentError):
        test_view_with_missing_feature.infer_features()
Beispiel #3
0
def test_apply_feature_view_integration(test_feature_store):
    # Create Feature Views
    batch_source = FileSource(
        file_format=ParquetFormat(),
        path="file://feast/*",
        timestamp_field="ts_col",
        created_timestamp_column="timestamp",
        date_partition_column="date_partition_col",
    )

    fv1 = FeatureView(
        name="my_feature_view_1",
        schema=[
            Field(name="fs1_my_feature_1", dtype=Int64),
            Field(name="fs1_my_feature_2", dtype=String),
            Field(name="fs1_my_feature_3", dtype=Array(String)),
            Field(name="fs1_my_feature_4", dtype=Array(Bytes)),
        ],
        entities=["fs1_my_entity_1"],
        tags={"team": "matchmaking"},
        batch_source=batch_source,
        ttl=timedelta(minutes=5),
    )

    # Register Feature View
    test_feature_store.apply([fv1])

    feature_views = test_feature_store.list_feature_views()

    # List Feature Views
    assert (len(feature_views) == 1
            and feature_views[0].name == "my_feature_view_1"
            and feature_views[0].features[0].name == "fs1_my_feature_1"
            and feature_views[0].features[0].dtype == Int64
            and feature_views[0].features[1].name == "fs1_my_feature_2"
            and feature_views[0].features[1].dtype == String
            and feature_views[0].features[2].name == "fs1_my_feature_3"
            and feature_views[0].features[2].dtype == Array(String)
            and feature_views[0].features[3].name == "fs1_my_feature_4"
            and feature_views[0].features[3].dtype == Array(Bytes)
            and feature_views[0].entities[0] == "fs1_my_entity_1")

    feature_view = test_feature_store.get_feature_view("my_feature_view_1")
    assert (feature_view.name == "my_feature_view_1"
            and feature_view.features[0].name == "fs1_my_feature_1"
            and feature_view.features[0].dtype == Int64
            and feature_view.features[1].name == "fs1_my_feature_2"
            and feature_view.features[1].dtype == String
            and feature_view.features[2].name == "fs1_my_feature_3"
            and feature_view.features[2].dtype == Array(String)
            and feature_view.features[3].name == "fs1_my_feature_4"
            and feature_view.features[3].dtype == Array(Bytes)
            and feature_view.entities[0] == "fs1_my_entity_1")

    test_feature_store.delete_feature_view("my_feature_view_1")
    feature_views = test_feature_store.list_feature_views()
    assert len(feature_views) == 0

    test_feature_store.teardown()
Beispiel #4
0
def test_reapply_feature_view_success(test_feature_store, dataframe_source):
    with prep_file_source(df=dataframe_source,
                          event_timestamp_column="ts_1") as file_source:

        e = Entity(name="id",
                   join_keys=["id_join_key"],
                   value_type=ValueType.STRING)

        # Create Feature View
        fv1 = FeatureView(
            name="my_feature_view_1",
            schema=[Field(name="string_col", dtype=String)],
            entities=["id"],
            batch_source=file_source,
            ttl=timedelta(minutes=5),
        )

        # Register Feature View
        test_feature_store.apply([fv1, e])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 0

        # Run materialization
        test_feature_store.materialize(datetime(2020, 1, 1),
                                       datetime(2021, 1, 1))

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 1

        # Apply again
        test_feature_store.apply([fv1])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 1

        # Change and apply Feature View
        fv1 = FeatureView(
            name="my_feature_view_1",
            schema=[Field(name="int64_col", dtype=Int64)],
            entities=["id"],
            batch_source=file_source,
            ttl=timedelta(minutes=5),
        )
        test_feature_store.apply([fv1])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 0

        test_feature_store.teardown()
def create_driver_hourly_stats_feature_view(source):
    driver_stats_feature_view = FeatureView(
        name="driver_stats",
        entities=["driver_id"],
        schema=[
            Field(name="conv_rate", dtype=Float32),
            Field(name="acc_rate", dtype=Float32),
            Field(name="avg_daily_trips", dtype=Int32),
        ],
        source=source,
        ttl=timedelta(hours=2),
    )
    return driver_stats_feature_view
Beispiel #6
0
def test_feature_view_kw_args_normal():
    file_source = FileSource(name="my-file-source", path="test.parquet")
    feature_view = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    _ = FeatureService(
        name="my-feature-service", features=[feature_view[["feature1", "feature2"]]]
    )
Beispiel #7
0
def test_apply_object_and_read(test_feature_store):
    assert isinstance(test_feature_store, FeatureStore)
    # Create Feature Views
    batch_source = FileSource(
        file_format=ParquetFormat(),
        path="file://feast/*",
        timestamp_field="ts_col",
        created_timestamp_column="timestamp",
    )

    e1 = Entity(name="fs1_my_entity_1",
                value_type=ValueType.STRING,
                description="something")

    e2 = Entity(name="fs1_my_entity_2",
                value_type=ValueType.STRING,
                description="something")

    fv1 = FeatureView(
        name="my_feature_view_1",
        schema=[
            Field(name="fs1_my_feature_1", dtype=Int64),
            Field(name="fs1_my_feature_2", dtype=String),
            Field(name="fs1_my_feature_3", dtype=Array(String)),
            Field(name="fs1_my_feature_4", dtype=Array(Bytes)),
        ],
        entities=["fs1_my_entity_1"],
        tags={"team": "matchmaking"},
        batch_source=batch_source,
        ttl=timedelta(minutes=5),
    )

    fv2 = FeatureView(
        name="my_feature_view_2",
        schema=[
            Field(name="fs1_my_feature_1", dtype=Int64),
            Field(name="fs1_my_feature_2", dtype=String),
            Field(name="fs1_my_feature_3", dtype=Array(String)),
            Field(name="fs1_my_feature_4", dtype=Array(Bytes)),
        ],
        entities=["fs1_my_entity_1"],
        tags={"team": "matchmaking"},
        batch_source=batch_source,
        ttl=timedelta(minutes=5),
    )

    # Register Feature View
    test_feature_store.apply([fv1, e1, fv2, e2])

    fv1_actual = test_feature_store.get_feature_view("my_feature_view_1")
    e1_actual = test_feature_store.get_entity("fs1_my_entity_1")

    assert fv1 == fv1_actual
    assert e1 == e1_actual
    assert fv2 != fv1_actual
    assert e2 != e1_actual

    test_feature_store.teardown()
Beispiel #8
0
    def from_proto(data_source: DataSourceProto):

        deprecated_schema = data_source.request_data_options.deprecated_schema
        schema_pb = data_source.request_data_options.schema

        if deprecated_schema and not schema_pb:
            warnings.warn(
                "Schema in RequestSource is changing type. The schema data type Dict[str, ValueType] is being deprecated in Feast 0.23. "
                "Please use List[Field] instead for the schema",
                DeprecationWarning,
            )
            dict_schema = {}
            for key, val in deprecated_schema.items():
                dict_schema[key] = ValueType(val)
            return RequestSource(
                name=data_source.name,
                schema=dict_schema,
                description=data_source.description,
                tags=dict(data_source.tags),
                owner=data_source.owner,
            )
        else:
            list_schema = []
            for field_proto in schema_pb:
                list_schema.append(Field.from_proto(field_proto))

            return RequestSource(
                name=data_source.name,
                schema=list_schema,
                description=data_source.description,
                tags=dict(data_source.tags),
                owner=data_source.owner,
            )
Beispiel #9
0
def test_update_feature_views_with_inferred_features():
    file_source = FileSource(name="test", path="test path")
    entity1 = Entity(name="test1", join_keys=["test_column_1"])
    entity2 = Entity(name="test2", join_keys=["test_column_2"])
    feature_view_1 = FeatureView(
        name="test1",
        entities=[entity1],
        schema=[
            Field(name="feature", dtype=Float32),
            Field(name="test_column_1", dtype=String),
        ],
        source=file_source,
    )
    feature_view_2 = FeatureView(
        name="test2",
        entities=[entity1, entity2],
        schema=[
            Field(name="feature", dtype=Float32),
            Field(name="test_column_1", dtype=String),
            Field(name="test_column_2", dtype=String),
        ],
        source=file_source,
    )

    assert len(feature_view_1.schema) == 2
    assert len(feature_view_1.features) == 2

    # The entity field should be deleted from the schema and features of the feature view.
    update_feature_views_with_inferred_features([feature_view_1], [entity1],
                                                RepoConfig(provider="local",
                                                           project="test"))
    assert len(feature_view_1.schema) == 1
    assert len(feature_view_1.features) == 1

    assert len(feature_view_2.schema) == 3
    assert len(feature_view_2.features) == 3

    # The entity fields should be deleted from the schema and features of the feature view.
    update_feature_views_with_inferred_features(
        [feature_view_2],
        [entity1, entity2],
        RepoConfig(provider="local", project="test"),
    )
    assert len(feature_view_2.schema) == 1
    assert len(feature_view_2.features) == 1
    def from_proto(proto: FeatureViewProjectionProto):
        feature_view_projection = FeatureViewProjection(
            name=proto.feature_view_name,
            name_alias=proto.feature_view_name_alias,
            features=[],
            join_key_map=dict(proto.join_key_map),
        )
        for feature_column in proto.feature_columns:
            feature_view_projection.features.append(Field.from_proto(feature_column))

        return feature_view_projection
Beispiel #11
0
    def from_proto(cls, feature_view_proto: FeatureViewProto):
        """
        Creates a feature view from a protobuf representation of a feature view.

        Args:
            feature_view_proto: A protobuf representation of a feature view.

        Returns:
            A FeatureViewProto object based on the feature view protobuf.
        """
        batch_source = DataSource.from_proto(
            feature_view_proto.spec.batch_source)
        stream_source = (
            DataSource.from_proto(feature_view_proto.spec.stream_source)
            if feature_view_proto.spec.HasField("stream_source") else None)
        feature_view = cls(
            name=feature_view_proto.spec.name,
            entities=[entity for entity in feature_view_proto.spec.entities],
            schema=[
                Field.from_proto(field_proto)
                for field_proto in feature_view_proto.spec.features
            ],
            description=feature_view_proto.spec.description,
            tags=dict(feature_view_proto.spec.tags),
            owner=feature_view_proto.spec.owner,
            online=feature_view_proto.spec.online,
            ttl=(timedelta(
                days=0) if feature_view_proto.spec.ttl.ToNanoseconds() == 0
                 else feature_view_proto.spec.ttl.ToTimedelta()),
            source=batch_source,
        )
        if stream_source:
            feature_view.stream_source = stream_source

        # FeatureViewProjections are not saved in the FeatureView proto.
        # Create the default projection.
        feature_view.projection = FeatureViewProjection.from_definition(
            feature_view)

        if feature_view_proto.meta.HasField("created_timestamp"):
            feature_view.created_timestamp = (
                feature_view_proto.meta.created_timestamp.ToDatetime())
        if feature_view_proto.meta.HasField("last_updated_timestamp"):
            feature_view.last_updated_timestamp = (
                feature_view_proto.meta.last_updated_timestamp.ToDatetime())

        for interval in feature_view_proto.meta.materialization_intervals:
            feature_view.materialization_intervals.append((
                utils.make_tzaware(interval.start_time.ToDatetime()),
                utils.make_tzaware(interval.end_time.ToDatetime()),
            ))

        return feature_view
Beispiel #12
0
    def __init__(
        self,
        *args,
        name: Optional[str] = None,
        schema: Optional[Union[Dict[str, ValueType], List[Field]]] = None,
        description: Optional[str] = "",
        tags: Optional[Dict[str, str]] = None,
        owner: Optional[str] = "",
    ):
        """Creates a RequestSource object."""
        positional_attributes = ["name", "schema"]
        _name = name
        _schema = schema
        if args:
            warnings.warn(
                ("Request source parameters should be specified as a keyword argument instead of a positional arg."
                 "Feast 0.23+ will not support positional arguments to construct request sources"
                 ),
                DeprecationWarning,
            )
            if len(args) > len(positional_attributes):
                raise ValueError(
                    f"Only {', '.join(positional_attributes)} are allowed as positional args when defining "
                    f"feature views, for backwards compatibility.")
            if len(args) >= 1:
                _name = args[0]
            if len(args) >= 2:
                _schema = args[1]

        super().__init__(name=_name,
                         description=description,
                         tags=tags,
                         owner=owner)
        if not _schema:
            raise ValueError("Schema needs to be provided for Request Source")
        if isinstance(_schema, Dict):
            warnings.warn(
                "Schema in RequestSource is changing type. The schema data type Dict[str, ValueType] is being deprecated in Feast 0.23. "
                "Please use List[Field] instead for the schema",
                DeprecationWarning,
            )
            schemaList = []
            for key, valueType in _schema.items():
                schemaList.append(
                    Field(name=key,
                          dtype=VALUE_TYPES_TO_FEAST_TYPES[valueType]))
            self.schema = schemaList
        elif isinstance(_schema, List):
            self.schema = _schema
        else:
            raise Exception(
                "Schema type must be either dictionary or list, not " +
                str(type(_schema)))
Beispiel #13
0
def test_hash():
    file_source = FileSource(name="my-file-source", path="test.parquet")
    feature_view = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    feature_service_1 = FeatureService(
        name="my-feature-service", features=[feature_view[["feature1", "feature2"]]]
    )
    feature_service_2 = FeatureService(
        name="my-feature-service", features=[feature_view[["feature1", "feature2"]]]
    )
    feature_service_3 = FeatureService(
        name="my-feature-service", features=[feature_view[["feature1"]]]
    )
    feature_service_4 = FeatureService(
        name="my-feature-service",
        features=[feature_view[["feature1"]]],
        description="test",
    )

    s1 = {feature_service_1, feature_service_2}
    assert len(s1) == 1

    s2 = {feature_service_1, feature_service_3}
    assert len(s2) == 2

    s3 = {feature_service_3, feature_service_4}
    assert len(s3) == 2

    s4 = {feature_service_1, feature_service_2, feature_service_3, feature_service_4}
    assert len(s4) == 3
Beispiel #14
0
    def infer_features(self):
        """
        Infers the set of features associated to this feature view from the input source.

        Raises:
            RegistryInferenceFailure: The set of features could not be inferred.
        """
        df = pd.DataFrame()
        for feature_view_projection in self.source_feature_view_projections.values(
        ):
            for feature in feature_view_projection.features:
                dtype = feast_value_type_to_pandas_type(
                    feature.dtype.to_value_type())
                df[f"{feature_view_projection.name}__{feature.name}"] = pd.Series(
                    dtype=dtype)
                df[f"{feature.name}"] = pd.Series(dtype=dtype)
        for request_data in self.source_request_sources.values():
            for field in request_data.schema:
                dtype = feast_value_type_to_pandas_type(
                    field.dtype.to_value_type())
                df[f"{field.name}"] = pd.Series(dtype=dtype)
        output_df: pd.DataFrame = self.udf.__call__(df)
        inferred_features = []
        for f, dt in zip(output_df.columns, output_df.dtypes):
            inferred_features.append(
                Field(
                    name=f,
                    dtype=from_value_type(
                        python_type_to_feast_value_type(f, type_name=str(dt))),
                ))

        if self.features:
            missing_features = []
            for specified_features in self.features:
                if specified_features not in inferred_features:
                    missing_features.append(specified_features)
            if missing_features:
                raise SpecifiedFeaturesNotPresentError(
                    [f.name for f in missing_features], self.name)
        else:
            self.features = inferred_features

        if not self.features:
            raise RegistryInferenceFailure(
                "OnDemandFeatureView",
                f"Could not infer Features for the feature view '{self.name}'.",
            )
Beispiel #15
0
    def __init__(
        self,
        name: str,
        request_data_source: RequestSource,
        description: str = "",
        tags: Optional[Dict[str, str]] = None,
        owner: str = "",
    ):
        """
        Creates a RequestFeatureView object.

        Args:
            name: The unique name of the request feature view.
            request_data_source: The request data source that specifies the schema and
                features of the request feature view.
            description (optional): A human-readable description.
            tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
            owner (optional): The owner of the request feature view, typically the email
                of the primary maintainer.
        """
        warnings.warn(
            "Request feature view is deprecated. "
            "Please use request data source instead",
            DeprecationWarning,
        )

        if isinstance(request_data_source.schema, Dict):
            new_features = [
                Field(name=name, dtype=dtype)
                for name, dtype in request_data_source.schema.items()
            ]
        else:
            new_features = request_data_source.schema

        super().__init__(
            name=name,
            features=new_features,
            description=description,
            tags=tags,
            owner=owner,
        )
        self.request_source = request_data_source
Beispiel #16
0
    def to_proto(self) -> DataSourceProto:

        schema_pb = []

        if isinstance(self.schema, Dict):
            for key, value in self.schema.items():
                schema_pb.append(
                    Field(name=key,
                          dtype=VALUE_TYPES_TO_FEAST_TYPES[
                              value.value]).to_proto())
        else:
            for field in self.schema:
                schema_pb.append(field.to_proto())
        data_source_proto = DataSourceProto(
            name=self.name,
            type=DataSourceProto.REQUEST_SOURCE,
            description=self.description,
            tags=self.tags,
            owner=self.owner,
        )
        data_source_proto.request_data_options.schema.extend(schema_pb)

        return data_source_proto
Beispiel #17
0
    def __init__(  # noqa: C901
        self,
        *args,
        name: Optional[str] = None,
        features: Optional[List[Feature]] = None,
        sources: Optional[List[Union[BatchFeatureView, StreamFeatureView,
                                     RequestSource,
                                     FeatureViewProjection, ]]] = None,
        udf: Optional[MethodType] = None,
        inputs: Optional[Dict[str, Union[FeatureView, FeatureViewProjection,
                                         RequestSource]]] = None,
        schema: Optional[List[Field]] = None,
        description: str = "",
        tags: Optional[Dict[str, str]] = None,
        owner: str = "",
    ):
        """
        Creates an OnDemandFeatureView object.

        Args:
            name: The unique name of the on demand feature view.
            features (deprecated): The list of features in the output of the on demand
                feature view, after the transformation has been applied.
            sources (optional): A map from input source names to the actual input sources,
                which may be feature views, or request data sources.
                These sources serve as inputs to the udf, which will refer to them by name.
            udf (optional): The user defined transformation function, which must take pandas
                dataframes as inputs.
            inputs (optional): (Deprecated) A map from input source names to the actual input sources,
                which may be feature views, feature view projections, or request data sources.
                These sources serve as inputs to the udf, which will refer to them by name.
            schema (optional): The list of features in the output of the on demand feature
                view, after the transformation has been applied.
            description (optional): A human-readable description.
            tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
            owner (optional): The owner of the on demand feature view, typically the email
                of the primary maintainer.
        """
        positional_attributes = ["name", "features", "inputs", "udf"]

        _name = name

        _schema = schema or []
        if len(_schema) == 0 and features is not None:
            _schema = [Field.from_feature(feature) for feature in features]
        if features is not None:
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )
        _sources = sources or []
        if inputs and sources:
            raise ValueError(
                "At most one of `sources` or `inputs` can be specified.")
        elif inputs:
            warnings.warn(
                ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                 "Feast 0.21 and onwards will not support the `inputs` parameter."
                 ),
                DeprecationWarning,
            )
            for _, source in inputs.items():
                if isinstance(source, FeatureView):
                    _sources.append(feature_view_to_batch_feature_view(source))
                elif isinstance(source, RequestSource) or isinstance(
                        source, FeatureViewProjection):
                    _sources.append(source)
                else:
                    raise ValueError(
                        "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                    )
        _udf = udf

        if args:
            warnings.warn(
                ("On demand feature view parameters should be specified as keyword arguments "
                 "instead of positional arguments. Feast 0.23 and onwards will not support "
                 "positional arguments in on demand feature view definitions."
                 ),
                DeprecationWarning,
            )
            if len(args) > len(positional_attributes):
                raise ValueError(
                    f"Only {', '.join(positional_attributes)} are allowed as positional args "
                    f"when defining feature views, for backwards compatibility."
                )
            if len(args) >= 1:
                _name = args[0]
            if len(args) >= 2:
                _schema = args[1]
                # Convert Features to Fields.
                if len(_schema) > 0 and isinstance(_schema[0], Feature):
                    _schema = [
                        Field.from_feature(feature) for feature in _schema
                    ]
                warnings.warn(
                    ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                     "Please switch from using `features` to `schema`. This will also requiring switching "
                     "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                     "support the `features` parameter."),
                    DeprecationWarning,
                )
            if len(args) >= 3:
                _inputs = args[2]
                for _, source in _inputs.items():
                    if isinstance(source, FeatureView):
                        _sources.append(
                            feature_view_to_batch_feature_view(source))
                    elif isinstance(source, RequestSource) or isinstance(
                            source, FeatureViewProjection):
                        _sources.append(source)
                    else:
                        raise ValueError(
                            "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                        )
                warnings.warn(
                    ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                     "Feast 0.21 and onwards will not support the `inputs` parameter."
                     ),
                    DeprecationWarning,
                )
            if len(args) >= 4:
                _udf = args[3]

        if not _name:
            raise ValueError(
                "The name of the on demand feature view must be specified.")

        if not _sources:
            raise ValueError("The `sources` parameter must be specified.")

        super().__init__(
            name=_name,
            features=_schema,
            description=description,
            tags=tags,
            owner=owner,
        )
        assert _sources is not None
        self.source_feature_view_projections: Dict[str,
                                                   FeatureViewProjection] = {}
        self.source_request_sources: Dict[str, RequestSource] = {}
        for odfv_source in _sources:
            if isinstance(odfv_source, RequestSource):
                self.source_request_sources[odfv_source.name] = odfv_source
            elif isinstance(odfv_source, FeatureViewProjection):
                self.source_feature_view_projections[
                    odfv_source.name] = odfv_source
            else:
                self.source_feature_view_projections[
                    odfv_source.name] = odfv_source.projection

        if _udf is None:
            raise ValueError("The `udf` parameter must be specified.")
        assert _udf
        self.udf = _udf
Beispiel #18
0
def on_demand_feature_view(
    *args,
    features: Optional[List[Feature]] = None,
    sources: Optional[List[Union[BatchFeatureView, StreamFeatureView,
                                 RequestSource,
                                 FeatureViewProjection, ]]] = None,
    inputs: Optional[Dict[str, Union[FeatureView, RequestSource]]] = None,
    schema: Optional[List[Field]] = None,
    description: str = "",
    tags: Optional[Dict[str, str]] = None,
    owner: str = "",
):
    """
    Creates an OnDemandFeatureView object with the given user function as udf.

    Args:
        features (deprecated): The list of features in the output of the on demand
            feature view, after the transformation has been applied.
        sources (optional): A map from input source names to the actual input sources,
            which may be feature views, or request data sources.
            These sources serve as inputs to the udf, which will refer to them by name.
        inputs (optional): A map from input source names to the actual input sources,
            which may be feature views, feature view projections, or request data sources.
            These sources serve as inputs to the udf, which will refer to them by name.
        schema (optional): The list of features in the output of the on demand feature
            view, after the transformation has been applied.
        description (optional): A human-readable description.
        tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
        owner (optional): The owner of the on demand feature view, typically the email
            of the primary maintainer.
    """
    positional_attributes = ["features", "inputs"]

    _schema = schema or []
    if len(_schema) == 0 and features is not None:
        _schema = [Field.from_feature(feature) for feature in features]
    if features is not None:
        warnings.warn(
            ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
             "Please switch from using `features` to `schema`. This will also requiring switching "
             "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
             "support the `features` parameter."),
            DeprecationWarning,
        )
    _sources = sources or []
    if inputs and sources:
        raise ValueError(
            "At most one of `sources` or `inputs` can be specified.")
    elif inputs:
        warnings.warn(
            ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
             "Feast 0.21 and onwards will not support the `inputs` parameter."
             ),
            DeprecationWarning,
        )
        for _, source in inputs.items():
            if isinstance(source, FeatureView):
                _sources.append(feature_view_to_batch_feature_view(source))
            elif isinstance(source, RequestSource) or isinstance(
                    source, FeatureViewProjection):
                _sources.append(source)
            else:
                raise ValueError(
                    "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                )

    if args:
        warnings.warn(
            ("On demand feature view parameters should be specified as keyword arguments "
             "instead of positional arguments. Feast 0.23 and onwards will not support "
             "positional arguments in on demand feature view definitions."),
            DeprecationWarning,
        )
        if len(args) > len(positional_attributes):
            raise ValueError(
                f"Only {', '.join(positional_attributes)} are allowed as positional args "
                f"when defining feature views, for backwards compatibility.")
        if len(args) >= 1:
            _schema = args[0]
            # Convert Features to Fields.
            if len(_schema) > 0 and isinstance(_schema[0], Feature):
                _schema = [Field.from_feature(feature) for feature in _schema]
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )
        if len(args) >= 2:
            _inputs = args[1]
            for _, source in _inputs.items():
                if isinstance(source, FeatureView):
                    _sources.append(feature_view_to_batch_feature_view(source))
                elif isinstance(source, RequestSource) or isinstance(
                        source, FeatureViewProjection):
                    _sources.append(source)
                else:
                    raise ValueError(
                        "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                    )
                warnings.warn(
                    ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                     "Feast 0.21 and onwards will not support the `inputs` parameter."
                     ),
                    DeprecationWarning,
                )

    if not _sources:
        raise ValueError("The `sources` parameter must be specified.")

    def decorator(user_function):
        on_demand_feature_view_obj = OnDemandFeatureView(
            name=user_function.__name__,
            sources=_sources,
            schema=_schema,
            udf=user_function,
            description=description,
            tags=tags,
            owner=owner,
        )
        functools.update_wrapper(wrapper=on_demand_feature_view_obj,
                                 wrapped=user_function)
        return on_demand_feature_view_obj

    return decorator
Beispiel #19
0
    def from_proto(cls,
                   on_demand_feature_view_proto: OnDemandFeatureViewProto):
        """
        Creates an on demand feature view from a protobuf representation.

        Args:
            on_demand_feature_view_proto: A protobuf representation of an on-demand feature view.

        Returns:
            A OnDemandFeatureView object based on the on-demand feature view protobuf.
        """
        sources = []
        for (
                _,
                on_demand_source,
        ) in on_demand_feature_view_proto.spec.sources.items():
            if on_demand_source.WhichOneof("source") == "feature_view":
                sources.append(
                    FeatureView.from_proto(
                        on_demand_source.feature_view).projection)
            elif on_demand_source.WhichOneof(
                    "source") == "feature_view_projection":
                sources.append(
                    FeatureViewProjection.from_proto(
                        on_demand_source.feature_view_projection))
            else:
                sources.append(
                    RequestSource.from_proto(
                        on_demand_source.request_data_source))
        on_demand_feature_view_obj = cls(
            name=on_demand_feature_view_proto.spec.name,
            schema=[
                Field(
                    name=feature.name,
                    dtype=from_value_type(ValueType(feature.value_type)),
                ) for feature in on_demand_feature_view_proto.spec.features
            ],
            sources=sources,
            udf=dill.loads(
                on_demand_feature_view_proto.spec.user_defined_function.body),
            description=on_demand_feature_view_proto.spec.description,
            tags=dict(on_demand_feature_view_proto.spec.tags),
            owner=on_demand_feature_view_proto.spec.owner,
        )

        # FeatureViewProjections are not saved in the OnDemandFeatureView proto.
        # Create the default projection.
        on_demand_feature_view_obj.projection = FeatureViewProjection.from_definition(
            on_demand_feature_view_obj)

        if on_demand_feature_view_proto.meta.HasField("created_timestamp"):
            on_demand_feature_view_obj.created_timestamp = (
                on_demand_feature_view_proto.meta.created_timestamp.ToDatetime(
                ))
        if on_demand_feature_view_proto.meta.HasField(
                "last_updated_timestamp"):
            on_demand_feature_view_obj.last_updated_timestamp = (
                on_demand_feature_view_proto.meta.last_updated_timestamp.
                ToDatetime())

        return on_demand_feature_view_obj
Beispiel #20
0
def test_inputs_parameter_deprecation_in_odfv():
    date_request = RequestSource(
        name="date_request",
        schema=[Field(name="some_date", dtype=UnixTimestamp)],
    )
    with pytest.warns(DeprecationWarning):

        @on_demand_feature_view(
            inputs={"date_request": date_request},
            schema=[
                Field(name="output", dtype=UnixTimestamp),
                Field(name="string_output", dtype=String),
            ],
        )
        def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
            data = pd.DataFrame()
            data["output"] = features_df["some_date"]
            data["string_output"] = features_df["some_date"].astype(
                pd.StringDtype())
            return data

    odfv = test_view
    assert odfv.name == "test_view"
    assert len(odfv.source_request_sources) == 1
    assert odfv.source_request_sources["date_request"].name == "date_request"
    assert odfv.source_request_sources[
        "date_request"].schema == date_request.schema

    with pytest.raises(ValueError):

        @on_demand_feature_view(
            inputs={"date_request": date_request},
            sources=[date_request],
            schema=[
                Field(name="output", dtype=UnixTimestamp),
                Field(name="string_output", dtype=String),
            ],
        )
        def incorrect_testview(features_df: pd.DataFrame) -> pd.DataFrame:
            data = pd.DataFrame()
            data["output"] = features_df["some_date"]
            data["string_output"] = features_df["some_date"].astype(
                pd.StringDtype())
            return data

    @on_demand_feature_view(
        inputs={"odfv": date_request},
        schema=[
            Field(name="output", dtype=UnixTimestamp),
            Field(name="string_output", dtype=String),
        ],
    )
    def test_correct_view(features_df: pd.DataFrame) -> pd.DataFrame:
        data = pd.DataFrame()
        data["output"] = features_df["some_date"]
        data["string_output"] = features_df["some_date"].astype(
            pd.StringDtype())
        return data

    odfv = test_correct_view
    assert odfv.name == "test_correct_view"
    assert odfv.source_request_sources[
        "date_request"].schema == date_request.schema
Beispiel #21
0
    def __init__(
        self,
        *args,
        name: Optional[str] = None,
        entities: Optional[Union[List[Entity], List[str]]] = None,
        ttl: Optional[Union[Duration, timedelta]] = None,
        batch_source: Optional[DataSource] = None,
        stream_source: Optional[DataSource] = None,
        features: Optional[List[Feature]] = None,
        tags: Optional[Dict[str, str]] = None,
        online: bool = True,
        description: str = "",
        owner: str = "",
        schema: Optional[List[Field]] = None,
        source: Optional[DataSource] = None,
    ):
        """
        Creates a FeatureView object.

        Args:
            name: The unique name of the feature view.
            entities: The list of entities with which this group of features is associated.
            ttl: The amount of time this group of features lives. A ttl of 0 indicates that
                this group of features lives forever. Note that large ttl's or a ttl of 0
                can result in extremely computationally intensive queries.
            batch_source: The batch source of data where this group of features is stored.
            stream_source (optional): The stream source of data where this group of features
                is stored.
            features (deprecated): The list of features defined as part of this feature view.
            tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
            online (optional): A boolean indicating whether online retrieval is enabled for
                this feature view.
            description (optional): A human-readable description.
            owner (optional): The owner of the feature view, typically the email of the
                primary maintainer.
            schema (optional): The schema of the feature view, including feature, timestamp,
                and entity columns.
            source (optional): The source of data for this group of features. May be a stream source, or a batch source.
                If a stream source, the source should contain a batch_source for backfills & batch materialization.

        Raises:
            ValueError: A field mapping conflicts with an Entity or a Feature.
        """

        positional_attributes = ["name", "entities", "ttl"]

        _name = name
        _entities = entities
        _ttl = ttl

        if args:
            warnings.warn(
                ("feature view parameters should be specified as a keyword argument instead of a positional arg."
                 "Feast 0.23+ will not support positional arguments to construct feature views"
                 ),
                DeprecationWarning,
            )
            if len(args) > len(positional_attributes):
                raise ValueError(
                    f"Only {', '.join(positional_attributes)} are allowed as positional args when defining "
                    f"feature views, for backwards compatibility.")
            if len(args) >= 1:
                _name = args[0]
            if len(args) >= 2:
                _entities = args[1]
            if len(args) >= 3:
                _ttl = args[2]

        if not _name:
            raise ValueError("feature view name needs to be specified")

        self.name = _name
        self.entities = (
            [e.name if isinstance(e, Entity) else e
             for e in _entities] if _entities else [DUMMY_ENTITY_NAME])

        self._initialize_sources(_name, batch_source, stream_source, source)

        if isinstance(_ttl, Duration):
            self.ttl = timedelta(seconds=int(_ttl.seconds))
            warnings.warn(
                ("The option to pass a Duration object to the ttl parameter is being deprecated. "
                 "Please pass a timedelta object instead. Feast 0.21 and onwards will not support "
                 "Duration objects."),
                DeprecationWarning,
            )
        elif isinstance(_ttl, timedelta) or _ttl is None:
            self.ttl = _ttl
        else:
            raise ValueError(
                f"unknown value type specified for ttl {type(_ttl)}")

        if features is not None:
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )

        _schema = schema or []
        if len(_schema) == 0 and features is not None:
            _schema = [Field.from_feature(feature) for feature in features]
        self.schema = _schema

        # TODO(felixwang9817): Infer which fields in the schema are features, timestamps,
        # and entities. For right now we assume that all fields are features, since the
        # current `features` parameter only accepts feature columns.
        _features = _schema

        cols = [entity for entity in self.entities
                ] + [field.name for field in _features]
        for col in cols:
            if (self.batch_source.field_mapping is not None
                    and col in self.batch_source.field_mapping.keys()):
                raise ValueError(
                    f"The field {col} is mapped to {self.batch_source.field_mapping[col]} for this data source. "
                    f"Please either remove this field mapping or use {self.batch_source.field_mapping[col]} as the "
                    f"Entity or Feature name.")

        super().__init__(
            name=_name,
            features=_features,
            description=description,
            tags=tags,
            owner=owner,
        )
        self.online = online
        self.materialization_intervals = []