Exemple #1
0
def test_feature_view_inference_success(test_feature_store, dataframe_source):
    with prep_file_source(
        df=dataframe_source, event_timestamp_column="ts_1"
    ) as file_source:
        fv1 = FeatureView(
            name="fv1",
            entities=["id"],
            ttl=timedelta(minutes=5),
            online=True,
            batch_source=file_source,
            tags={},
        )

        fv2 = FeatureView(
            name="fv2",
            entities=["id"],
            ttl=timedelta(minutes=5),
            online=True,
            batch_source=simple_bq_source_using_table_ref_arg(dataframe_source, "ts_1"),
            tags={},
        )

        fv3 = FeatureView(
            name="fv3",
            entities=["id"],
            ttl=timedelta(minutes=5),
            online=True,
            batch_source=simple_bq_source_using_query_arg(dataframe_source, "ts_1"),
            tags={},
        )

        test_feature_store.apply([fv1, fv2, fv3])  # Register Feature Views
        feature_view_1 = test_feature_store.list_feature_views()[0]
        feature_view_2 = test_feature_store.list_feature_views()[1]
        feature_view_3 = test_feature_store.list_feature_views()[2]

        actual_file_source = {
            (feature.name, feature.dtype) for feature in feature_view_1.features
        }
        actual_bq_using_table_ref_arg_source = {
            (feature.name, feature.dtype) for feature in feature_view_2.features
        }
        actual_bq_using_query_arg_source = {
            (feature.name, feature.dtype) for feature in feature_view_3.features
        }
        expected = {
            ("float_col", ValueType.DOUBLE),
            ("int64_col", ValueType.INT64),
            ("string_col", ValueType.STRING),
        }

        assert (
            expected
            == actual_file_source
            == actual_bq_using_table_ref_arg_source
            == actual_bq_using_query_arg_source
        )

        test_feature_store.teardown()
Exemple #2
0
def test_update_data_sources_with_inferred_event_timestamp_col(
        simple_dataset_1):
    df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True)
    df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"]

    with prep_file_source(df=simple_dataset_1) as file_source:
        data_sources = [
            file_source,
            simple_bq_source_using_table_ref_arg(simple_dataset_1),
            simple_bq_source_using_query_arg(simple_dataset_1),
        ]
        update_data_sources_with_inferred_event_timestamp_col(
            data_sources, RepoConfig(provider="local", project="test"))
        actual_event_timestamp_cols = [
            source.event_timestamp_column for source in data_sources
        ]

        assert actual_event_timestamp_cols == ["ts_1", "ts_1", "ts_1"]

    with prep_file_source(df=df_with_two_viable_timestamp_cols) as file_source:
        with pytest.raises(RegistryInferenceFailure):
            # two viable event_timestamp_columns
            update_data_sources_with_inferred_event_timestamp_col(
                [file_source], RepoConfig(provider="local", project="test"))