def test_create_stream_feature_view(): stream_source = KafkaSource( name="kafka", event_timestamp_column="", bootstrap_servers="", message_format=AvroFormat(""), topic="topic", batch_source=FileSource(path="some path"), ) StreamFeatureView( name="test batch feature view", entities=[], ttl=timedelta(days=30), source=stream_source, ) with pytest.raises(ValueError): StreamFeatureView(name="test batch feature view", entities=[], ttl=timedelta(days=30)) with pytest.raises(ValueError): StreamFeatureView( name="test batch feature view", entities=[], ttl=timedelta(days=30), source=FileSource(path="some path"), )
def test_infer_odfv_list_features(environment, infer_features, tmp_path): fake_embedding = [1.0, 1.0] items_df = pd.DataFrame( data={ "item_id": [0], "embedding_float": [fake_embedding], "embedding_double": [fake_embedding], "event_timestamp": [pd.Timestamp(datetime.utcnow())], "created": [pd.Timestamp(datetime.utcnow())], } ) output_path = f"{tmp_path}/items.parquet" items_df.to_parquet(output_path) fake_items_src = FileSource( path=output_path, event_timestamp_column="event_timestamp", created_timestamp_column="created", ) items = create_item_embeddings_feature_view(fake_items_src) sim_odfv = similarity_feature_view( {"items": items, "input_request": create_similarity_request_data_source()}, infer_features=infer_features, ) store = environment.feature_store store.apply([item(), items, sim_odfv]) odfv = store.get_on_demand_feature_view("similarity") assert len(odfv.features) == 2
def test_feature_view_kw_args_normal(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) _ = FeatureService( name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] )
def from_proto(data_source: DataSourceProto) -> Any: """ Converts data source config in protobuf spec to a DataSource class object. Args: data_source: A protobuf representation of a DataSource. Returns: A DataSource class object. Raises: ValueError: The type of DataSource could not be identified. """ if data_source.data_source_class_type: cls = get_data_source_class_from_type(data_source.data_source_class_type) return cls.from_proto(data_source) if data_source.file_options.file_format and data_source.file_options.file_url: from feast.infra.offline_stores.file_source import FileSource data_source_obj = FileSource.from_proto(data_source) elif ( data_source.bigquery_options.table_ref or data_source.bigquery_options.query ): from feast.infra.offline_stores.bigquery_source import BigQuerySource data_source_obj = BigQuerySource.from_proto(data_source) elif data_source.redshift_options.table or data_source.redshift_options.query: from feast.infra.offline_stores.redshift_source import RedshiftSource data_source_obj = RedshiftSource.from_proto(data_source) elif ( data_source.kafka_options.bootstrap_servers and data_source.kafka_options.topic and data_source.kafka_options.message_format ): data_source_obj = KafkaSource.from_proto(data_source) elif ( data_source.kinesis_options.record_format and data_source.kinesis_options.region and data_source.kinesis_options.stream_name ): data_source_obj = KinesisSource.from_proto(data_source) else: raise ValueError("Could not identify the source type being added.") return data_source_obj
def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view_1 = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) feature_view_2 = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) feature_view_3 = FeatureView( name="my-feature-view", entities=[], schema=[Field(name="feature1", dtype=Float32)], source=file_source, ) feature_view_4 = FeatureView( name="my-feature-view", entities=[], schema=[Field(name="feature1", dtype=Float32)], source=file_source, description="test", ) s1 = {feature_view_1, feature_view_2} assert len(s1) == 1 s2 = {feature_view_1, feature_view_3} assert len(s2) == 2 s3 = {feature_view_3, feature_view_4} assert len(s3) == 2 s4 = {feature_view_1, feature_view_2, feature_view_3, feature_view_4} assert len(s4) == 3
def _localize_feature_view(self, feature_view: FeatureView): """ This function ensures that the `FeatureView` object points to files in the local disk """ if not isinstance(feature_view.batch_source, FileSource): return # Copy parquet file to a local file file_source: FileSource = feature_view.batch_source random_local_path = ( FlyteContext.current_context().file_access.get_random_local_path( file_source.path)) FlyteContext.current_context().file_access.get_data( file_source.path, random_local_path, is_multipart=True, ) feature_view.batch_source = FileSource( path=random_local_path, event_timestamp_column=file_source.event_timestamp_column, )
def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) sources = [feature_view] on_demand_feature_view_1 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf1, ) on_demand_feature_view_2 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf1, ) on_demand_feature_view_3 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf2, ) on_demand_feature_view_4 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf2, description="test", ) s1 = {on_demand_feature_view_1, on_demand_feature_view_2} assert len(s1) == 1 s2 = {on_demand_feature_view_1, on_demand_feature_view_3} assert len(s2) == 2 s3 = {on_demand_feature_view_3, on_demand_feature_view_4} assert len(s3) == 2 s4 = { on_demand_feature_view_1, on_demand_feature_view_2, on_demand_feature_view_3, on_demand_feature_view_4, } assert len(s4) == 3