def test_feature_view_inference_success(test_feature_store, dataframe_source): with prep_file_source( df=dataframe_source, event_timestamp_column="ts_1" ) as file_source: fv1 = FeatureView( name="fv1", entities=["id"], ttl=timedelta(minutes=5), online=True, batch_source=file_source, tags={}, ) fv2 = FeatureView( name="fv2", entities=["id"], ttl=timedelta(minutes=5), online=True, batch_source=simple_bq_source_using_table_ref_arg(dataframe_source, "ts_1"), tags={}, ) fv3 = FeatureView( name="fv3", entities=["id"], ttl=timedelta(minutes=5), online=True, batch_source=simple_bq_source_using_query_arg(dataframe_source, "ts_1"), tags={}, ) test_feature_store.apply([fv1, fv2, fv3]) # Register Feature Views feature_view_1 = test_feature_store.list_feature_views()[0] feature_view_2 = test_feature_store.list_feature_views()[1] feature_view_3 = test_feature_store.list_feature_views()[2] actual_file_source = { (feature.name, feature.dtype) for feature in feature_view_1.features } actual_bq_using_table_ref_arg_source = { (feature.name, feature.dtype) for feature in feature_view_2.features } actual_bq_using_query_arg_source = { (feature.name, feature.dtype) for feature in feature_view_3.features } expected = { ("float_col", ValueType.DOUBLE), ("int64_col", ValueType.INT64), ("string_col", ValueType.STRING), } assert ( expected == actual_file_source == actual_bq_using_table_ref_arg_source == actual_bq_using_query_arg_source ) test_feature_store.teardown()
def test_update_data_sources_with_inferred_event_timestamp_col( simple_dataset_1): df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True) df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"] with prep_file_source(df=simple_dataset_1) as file_source: data_sources = [ file_source, simple_bq_source_using_table_ref_arg(simple_dataset_1), simple_bq_source_using_query_arg(simple_dataset_1), ] update_data_sources_with_inferred_event_timestamp_col( data_sources, RepoConfig(provider="local", project="test")) actual_event_timestamp_cols = [ source.event_timestamp_column for source in data_sources ] assert actual_event_timestamp_cols == ["ts_1", "ts_1", "ts_1"] with prep_file_source(df=df_with_two_viable_timestamp_cols) as file_source: with pytest.raises(RegistryInferenceFailure): # two viable event_timestamp_columns update_data_sources_with_inferred_event_timestamp_col( [file_source], RepoConfig(provider="local", project="test"))