Ejemplo n.º 1
0
def test_create_stream_feature_view():
    stream_source = KafkaSource(
        name="kafka",
        event_timestamp_column="",
        bootstrap_servers="",
        message_format=AvroFormat(""),
        topic="topic",
        batch_source=FileSource(path="some path"),
    )
    StreamFeatureView(
        name="test batch feature view",
        entities=[],
        ttl=timedelta(days=30),
        source=stream_source,
    )

    with pytest.raises(ValueError):
        StreamFeatureView(name="test batch feature view",
                          entities=[],
                          ttl=timedelta(days=30))

    with pytest.raises(ValueError):
        StreamFeatureView(
            name="test batch feature view",
            entities=[],
            ttl=timedelta(days=30),
            source=FileSource(path="some path"),
        )
Ejemplo n.º 2
0
    def test_feature_table_import_export_yaml(self, batch_source):

        stream_source = KafkaSource(
            field_mapping={
                "ride_distance": "ride_distance",
                "ride_duration": "ride_duration",
            },
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat(class_path="class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
        )

        test_feature_table = FeatureTable(
            name="car_driver",
            features=[
                Feature(name="ride_distance", dtype=ValueType.FLOAT),
                Feature(name="ride_duration", dtype=ValueType.STRING),
            ],
            entities=["car_driver_entity"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Create a string YAML representation of the feature table
        string_yaml = test_feature_table.to_yaml()

        # Create a new feature table object from the YAML string
        actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature table
        assert test_feature_table == actual_feature_table_from_string
Ejemplo n.º 3
0
    def test_apply_feature_table_success(self, test_client):

        test_client.set_project("project1")

        # Create Feature Tables
        batch_source = FileSource(
            file_format="parquet",
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            class_path="random/path/to/class",
            topic="test_topic",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
        )

        ft1 = FeatureTable(
            name="my-feature-table-1",
            features=[
                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64),
                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING),
                Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST),
                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST),
            ],
            entities=["fs1-my-entity-1"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        test_client.apply_feature_table(ft1)

        feature_tables = test_client.list_feature_tables()

        # List Feature Tables
        assert (
            len(feature_tables) == 1
            and feature_tables[0].name == "my-feature-table-1"
            and feature_tables[0].features[0].name == "fs1-my-feature-1"
            and feature_tables[0].features[0].dtype == ValueType.INT64
            and feature_tables[0].features[1].name == "fs1-my-feature-2"
            and feature_tables[0].features[1].dtype == ValueType.STRING
            and feature_tables[0].features[2].name == "fs1-my-feature-3"
            and feature_tables[0].features[2].dtype == ValueType.STRING_LIST
            and feature_tables[0].features[3].name == "fs1-my-feature-4"
            and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST
            and feature_tables[0].entities[0] == "fs1-my-entity-1"
        )
def feature_table():
    return FeatureTable(
        name="ft",
        entities=[],
        features=[],
        stream_source=KafkaSource(
            topic="t",
            bootstrap_servers="",
            message_format=AvroFormat(""),
            event_timestamp_column="",
        ),
    )
Ejemplo n.º 5
0
def basic_featuretable():
    batch_source = FileSource(
        field_mapping={
            "dev_entity": "dev_entity_field",
            "dev_feature_float": "dev_feature_float_field",
            "dev_feature_string": "dev_feature_string_field",
        },
        file_format="PARQUET",
        file_url="gs://example/feast/*",
        event_timestamp_column="datetime_col",
        created_timestamp_column="timestamp",
        date_partition_column="datetime",
    )
    stream_source = KafkaSource(
        field_mapping={
            "dev_entity": "dev_entity_field",
            "dev_feature_float": "dev_feature_float_field",
            "dev_feature_string": "dev_feature_string_field",
        },
        bootstrap_servers="localhost:9094",
        class_path="random/path/to/class",
        topic="test_topic",
        event_timestamp_column="datetime_col",
        created_timestamp_column="timestamp",
    )
    return FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
        stream_source=stream_source,
        labels={
            "key1": "val1",
            "key2": "val2"
        },
    )
Ejemplo n.º 6
0
    def _create_ft(self, client: Client, features) -> None:
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )

        # Register Entity with Core
        client.apply_entity(entity)

        # Create Feature Tables
        batch_source = FileSource(
            file_format=ParquetFormat(),
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat("class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
        )

        ft1 = FeatureTable(
            name=self.table_name,
            features=features,
            entities=["driver_car_id"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        client.apply_feature_table(ft1)
Ejemplo n.º 7
0
    def test_apply_feature_table_integration(self, test_client):

        # Create Feature Tables
        batch_source = FileSource(
            file_format=ParquetFormat(),
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat("class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
        )

        ft1 = FeatureTable(
            name="my-feature-table-1",
            features=[
                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64),
                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING),
                Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST),
                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST),
            ],
            entities=["fs1-my-entity-1"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        test_client.apply(ft1)

        feature_tables = test_client.list_feature_tables()

        # List Feature Tables
        assert (len(feature_tables) == 1
                and feature_tables[0].name == "my-feature-table-1"
                and feature_tables[0].features[0].name == "fs1-my-feature-1"
                and feature_tables[0].features[0].dtype == ValueType.INT64
                and feature_tables[0].features[1].name == "fs1-my-feature-2"
                and feature_tables[0].features[1].dtype == ValueType.STRING
                and feature_tables[0].features[2].name == "fs1-my-feature-3"
                and feature_tables[0].features[2].dtype
                == ValueType.STRING_LIST
                and feature_tables[0].features[3].name == "fs1-my-feature-4"
                and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST
                and feature_tables[0].entities[0] == "fs1-my-entity-1")

        feature_table = test_client.get_feature_table("my-feature-table-1")
        assert (feature_table.name == "my-feature-table-1"
                and feature_table.features[0].name == "fs1-my-feature-1"
                and feature_table.features[0].dtype == ValueType.INT64
                and feature_table.features[1].name == "fs1-my-feature-2"
                and feature_table.features[1].dtype == ValueType.STRING
                and feature_table.features[2].name == "fs1-my-feature-3"
                and feature_table.features[2].dtype == ValueType.STRING_LIST
                and feature_table.features[3].name == "fs1-my-feature-4"
                and feature_table.features[3].dtype == ValueType.BYTES_LIST
                and feature_table.entities[0] == "fs1-my-entity-1")

        test_client.delete_feature_table("my-feature-table-1")
        feature_tables = test_client.list_feature_tables()
        assert len(feature_tables) == 0
Ejemplo n.º 8
0
def test_default_data_source_kw_arg_warning():
    # source_class = request.param
    with pytest.warns(DeprecationWarning):
        source = KafkaSource(
            "name", "column", "bootstrap_servers", ProtoFormat("class_path"), "topic"
        )
        assert source.name == "name"
        assert source.timestamp_field == "column"
        assert source.kafka_options.bootstrap_servers == "bootstrap_servers"
        assert source.kafka_options.topic == "topic"
    with pytest.raises(ValueError):
        KafkaSource("name", "column", "bootstrap_servers", topic="topic")

    with pytest.warns(DeprecationWarning):
        source = KinesisSource(
            "name",
            "column",
            "c_column",
            ProtoFormat("class_path"),
            "region",
            "stream_name",
        )
        assert source.name == "name"
        assert source.timestamp_field == "column"
        assert source.created_timestamp_column == "c_column"
        assert source.kinesis_options.region == "region"
        assert source.kinesis_options.stream_name == "stream_name"

    with pytest.raises(ValueError):
        KinesisSource(
            "name", "column", "c_column", region="region", stream_name="stream_name"
        )

    with pytest.warns(DeprecationWarning):
        source = RequestSource(
            "name", [Field(name="val_to_add", dtype=Int64)], description="description"
        )
        assert source.name == "name"
        assert source.description == "description"

    with pytest.raises(ValueError):
        RequestSource("name")

    with pytest.warns(DeprecationWarning):
        source = PushSource(
            "name",
            BigQuerySource(name="bigquery_source", table="table"),
            description="description",
        )
        assert source.name == "name"
        assert source.description == "description"
        assert source.batch_source.name == "bigquery_source"

    with pytest.raises(ValueError):
        PushSource("name")

    # No name warning for DataSource
    with pytest.warns(UserWarning):
        source = KafkaSource(
            event_timestamp_column="column",
            bootstrap_servers="bootstrap_servers",
            message_format=ProtoFormat("class_path"),
            topic="topic",
        )