def test_feature_table_import_export_yaml(self, batch_source): stream_source = KafkaSource( field_mapping={ "ride_distance": "ride_distance", "ride_duration": "ride_duration", }, bootstrap_servers="localhost:9094", message_format=ProtoFormat(class_path="class.path"), topic="test_topic", event_timestamp_column="ts_col", ) test_feature_table = FeatureTable( name="car_driver", features=[ Feature(name="ride_distance", dtype=ValueType.FLOAT), Feature(name="ride_duration", dtype=ValueType.STRING), ], entities=["car_driver_entity"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Create a string YAML representation of the feature table string_yaml = test_feature_table.to_yaml() # Create a new feature table object from the YAML string actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml) # Ensure equality is upheld to original feature table assert test_feature_table == actual_feature_table_from_string
def apply_feature_table(self, feature_table: FeatureTable, project: str, commit: bool = True): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered project: Feast project that this feature table belongs to commit: Whether the change should be persisted immediately """ feature_table.is_valid() feature_table_proto = feature_table.to_proto() feature_table_proto.spec.project = project self._prepare_registry_for_changes() assert self.cached_registry_proto for idx, existing_feature_table_proto in enumerate( self.cached_registry_proto.feature_tables): if (existing_feature_table_proto.spec.name == feature_table_proto.spec.name and existing_feature_table_proto.spec.project == project): del self.cached_registry_proto.feature_tables[idx] break self.cached_registry_proto.feature_tables.append(feature_table_proto) if commit: self.commit()
def apply_feature_table(self, feature_table: FeatureTable, project: str): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered project: Feast project that this feature table belongs to """ feature_table.is_valid() feature_table_proto = feature_table.to_proto() feature_table_proto.spec.project = project def updater(registry_proto: RegistryProto): for idx, existing_feature_table_proto in enumerate( registry_proto.feature_tables): if (existing_feature_table_proto.spec.name == feature_table_proto.spec.name and existing_feature_table_proto.spec.project == project): del registry_proto.feature_tables[idx] registry_proto.feature_tables.append(feature_table_proto) return registry_proto registry_proto.feature_tables.append(feature_table_proto) return registry_proto self._registry_store.update_registry(updater) return
def test_feature_table_import_export_yaml(self): batch_source = DataSource( type=SourceType(1).name, field_mapping={ "ride_distance": "ride_distance", "ride_duration": "ride_duration", }, options=FileOptions(file_format="avro", file_url="data/test.avro"), timestamp_column="ts_col", date_partition_column="date_partition_col", ) stream_source = DataSource( type=SourceType(3).name, field_mapping={ "ride_distance": "ride_distance", "ride_duration": "ride_duration", }, options=KafkaOptions( bootstrap_servers="localhost:9094", class_path="random/path/to/class", topic="test_topic", ), timestamp_column="ts_col", ) test_feature_table = FeatureTable( name="car_driver", features=[ FeatureV2(name="ride_distance", dtype=ValueType.FLOAT).to_proto(), FeatureV2(name="ride_duration", dtype=ValueType.STRING).to_proto(), ], entities=["car_driver_entity"], labels={"team": "matchmaking"}, batch_source=batch_source.to_proto(), stream_source=stream_source.to_proto(), ) # Create a string YAML representation of the feature table string_yaml = test_feature_table.to_yaml() # Create a new feature table object from the YAML string actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml) # Ensure equality is upheld to original feature table assert test_feature_table == actual_feature_table_from_string
def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ if self._telemetry_enabled: log_usage( "get_feature_table", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) if project is None: project = self.project if self._use_object_store_registry: return self._registry.get_feature_table(name, project) else: try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table)
def list_feature_tables( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[FeatureTable]: """ Retrieve a list of feature tables from Feast Core Args: project: Filter feature tables based on project name Returns: List of feature tables """ if project is None: project = self.project filter = ListFeatureTablesRequest.Filter(project=project, labels=labels) # Get latest feature tables from Feast Core feature_table_protos = self._core_service.ListFeatureTables( ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureTablesResponse # Extract feature tables and return feature_tables = [] for feature_table_proto in feature_table_protos.tables: feature_table = FeatureTable.from_proto(feature_table_proto) feature_table._client = self feature_tables.append(feature_table) return feature_tables
def test_ingest_into_bq( feast_client: Client, customer_entity: Entity, driver_entity: Entity, bq_dataframe: pd.DataFrame, bq_dataset: str, pytestconfig, ): bq_project = pytestconfig.getoption("bq_project") bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}" ft = FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=BigQuerySource( table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}", event_timestamp_column="datetime", created_timestamp_column="timestamp", ), ) # ApplyEntity feast_client.apply(customer_entity) feast_client.apply(driver_entity) # ApplyFeatureTable feast_client.apply(ft) feast_client.ingest(ft, bq_dataframe, timeout=120) bq_client = bigquery.Client(project=bq_project) # Poll BQ for table until the table has been created def try_get_table(): try: table = bq_client.get_table( bigquery.TableReference( bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id ) ) except NotFound: return None, False else: return table, True wait_retry_backoff( retry_fn=try_get_table, timeout_secs=30, timeout_msg="Timed out trying to get bigquery table", ) query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`" job = bq_client.query(query_string) query_df = job.to_dataframe() assert_frame_equal(query_df, bq_dataframe)
def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ if project is None: project = self.project try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table)
def alltypes_featuretable(): batch_source = FileSource( file_format="parquet", file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) return FeatureTable( name="alltypes", entities=["alltypes_id"], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), ], max_age=Duration(seconds=3600), batch_source=batch_source, labels={"cat": "alltypes"}, )
def feature_table_create(filename): """ Create or update a feature table """ feature_tables = [ FeatureTable.from_dict(ft_dict) for ft_dict in yaml_loader(filename) ] feast_client = Client() # type: Client feast_client.apply_feature_table(feature_tables)
def test_add_feature(self, batch_source): test_feature_table = FeatureTable( name="car_driver", features=[ Feature(name="ride_distance", dtype=ValueType.FLOAT), Feature(name="ride_duration", dtype=ValueType.STRING), ], entities=["car_driver_entity"], labels={"team": "matchmaking"}, batch_source=batch_source, ) test_feature_table.add_feature( Feature(name="new_ride_distance", dtype=ValueType.FLOAT)) features = test_feature_table.features assert (len(features) == 3 and features[0].name == "ride_distance" and features[1].name == "ride_duration" and features[2].name == "new_ride_distance")
def test_apply_feature_table_success(self, test_client): test_client.set_project("project1") # Create Feature Tables batch_source = FileSource( file_format="parquet", file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", class_path="random/path/to/class", topic="test_topic", event_timestamp_column="ts_col", created_timestamp_column="timestamp", ) ft1 = FeatureTable( name="my-feature-table-1", features=[ Feature(name="fs1-my-feature-1", dtype=ValueType.INT64), Feature(name="fs1-my-feature-2", dtype=ValueType.STRING), Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST), Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST), ], entities=["fs1-my-entity-1"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core test_client.apply_feature_table(ft1) feature_tables = test_client.list_feature_tables() # List Feature Tables assert ( len(feature_tables) == 1 and feature_tables[0].name == "my-feature-table-1" and feature_tables[0].features[0].name == "fs1-my-feature-1" and feature_tables[0].features[0].dtype == ValueType.INT64 and feature_tables[0].features[1].name == "fs1-my-feature-2" and feature_tables[0].features[1].dtype == ValueType.STRING and feature_tables[0].features[2].name == "fs1-my-feature-3" and feature_tables[0].features[2].dtype == ValueType.STRING_LIST and feature_tables[0].features[3].name == "fs1-my-feature-4" and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST and feature_tables[0].entities[0] == "fs1-my-entity-1" )
def feature_table(): return FeatureTable( name="ft", entities=[], features=[], stream_source=KafkaSource( topic="t", bootstrap_servers="", message_format=AvroFormat(""), event_timestamp_column="", ), )
def _apply_feature_table(self, project: str, feature_table: FeatureTable): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered """ feature_table.is_valid() feature_table_proto = feature_table.to_spec_proto() # Convert the feature table to a request and send to Feast Core try: apply_feature_table_response = self._core_service.ApplyFeatureTable( ApplyFeatureTableRequest(project=project, table_spec=feature_table_proto), # type: ignore timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature table applied_feature_table = FeatureTable.from_proto( apply_feature_table_response.table ) # Deep copy from the returned feature table to the local entity feature_table._update_from_feature_table(applied_feature_table)
def bq_featuretable(bq_table_id): batch_source = BigQuerySource( table_ref=bq_table_id, timestamp_column="datetime", ) return FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=batch_source, )
def list_feature_tables(self, project: str) -> List[FeatureTable]: """ Retrieve a list of feature tables from the registry Args: project: Filter feature tables based on project name Returns: List of feature tables """ registry_proto = self._get_registry_proto() feature_tables = [] for feature_table_proto in registry_proto.feature_tables: if feature_table_proto.spec.project == project: feature_tables.append(FeatureTable.from_proto(feature_table_proto)) return feature_tables
def get_feature_table(self, name: str, project: str) -> FeatureTable: """ Retrieves a feature table. Args: name: Name of feature table project: Feast project that this feature table belongs to Returns: Returns either the specified feature table, or raises an exception if none is found """ registry_proto = self._get_registry_proto() for feature_table_proto in registry_proto.feature_tables: if (feature_table_proto.spec.name == name and feature_table_proto.spec.project == project): return FeatureTable.from_proto(feature_table_proto) raise FeatureTableNotFoundException(project, name)
def basic_featuretable(): batch_source = FileSource( field_mapping={ "dev_entity": "dev_entity_field", "dev_feature_float": "dev_feature_float_field", "dev_feature_string": "dev_feature_string_field", }, file_format="PARQUET", file_url="gs://example/feast/*", event_timestamp_column="datetime_col", created_timestamp_column="timestamp", date_partition_column="datetime", ) stream_source = KafkaSource( field_mapping={ "dev_entity": "dev_entity_field", "dev_feature_float": "dev_feature_float_field", "dev_feature_string": "dev_feature_string_field", }, bootstrap_servers="localhost:9094", class_path="random/path/to/class", topic="test_topic", event_timestamp_column="datetime_col", created_timestamp_column="timestamp", ) return FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=batch_source, stream_source=stream_source, labels={ "key1": "val1", "key2": "val2" }, )
def _create_ft(self, client: Client, features) -> None: entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, labels={"team": "matchmaking"}, ) # Register Entity with Core client.apply_entity(entity) # Create Feature Tables batch_source = FileSource( file_format=ParquetFormat(), file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", message_format=ProtoFormat("class.path"), topic="test_topic", event_timestamp_column="ts_col", created_timestamp_column="timestamp", ) ft1 = FeatureTable( name=self.table_name, features=features, entities=["driver_car_id"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core client.apply_feature_table(ft1)
created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) # first feature table for testing, with all of Feast's datatypes table_1 = FeatureTable( name="test_feature_table_all_feature_dtypes", features=[ Feature(name="test_BYTES_feature", dtype=ValueType.BYTES), Feature(name="test_STRING_feature", dtype=ValueType.STRING), Feature(name="test_INT32_feature", dtype=ValueType.INT32), Feature(name="test_INT64_feature", dtype=ValueType.INT64), Feature(name="test_DOUBLE_feature", dtype=ValueType.DOUBLE), Feature(name="test_FLOAT_feature", dtype=ValueType.FLOAT), Feature(name="test_BOOL_feature", dtype=ValueType.BOOL), Feature(name="test_BYTES_LIST_feature", dtype=ValueType.BYTES_LIST), Feature(name="test_STRING_LIST_feature", dtype=ValueType.STRING_LIST), Feature(name="test_INT32_LIST_feature", dtype=ValueType.INT32_LIST), Feature(name="test_INT64_LIST_feature", dtype=ValueType.INT64_LIST), Feature(name="test_DOUBLE_LIST_feature", dtype=ValueType.DOUBLE_LIST), Feature(name="test_FLOAT_LIST_feature", dtype=ValueType.FLOAT_LIST), Feature(name="test_BOOL_LIST_feature", dtype=ValueType.BOOL_LIST), ], entities=["dummy_entity_1", "dummy_entity_2"], labels={"team": "matchmaking"}, batch_source=batch_source, ) # second feature table for testing, with just a single feature table_2 = FeatureTable( name="test_feature_table_single_feature", features=[
def _mock_feature_table(self, labels: dict = {}, add_stream_source: bool = False) -> None: table_spec = { "name": "driver_trips", "entities": ["driver_id"], "features": [{ "name": "trips_today", "valueType": "INT32" }], "labels": labels, "batchSource": { "type": "BATCH_FILE", "fileOptions": { "fileFormat": { "parquetFormat": {} }, "fileUrl": "file:///some/location", }, }, } if add_stream_source: avro_schema_json = json.dumps({ "type": "record", "name": "DriverTrips", "fields": [ { "name": "driver_id", "type": "long" }, { "name": "trips_today", "type": "int" }, { "name": "datetime", "type": { "type": "long", "logicalType": "timestamp-micros" }, }, ], }) table_spec["streamSource"] = { "type": "STREAM_KAFKA", "eventTimestampColumn": "datetime", "createdTimestampColumn": "datetime", "kafkaOptions": { "bootstrapServers": "broker1", "topic": "driver_trips", "messageFormat": { "avroFormat": { "schemaJson": avro_schema_json, } }, }, } self.extractor._client.list_feature_tables.return_value = [ FeatureTable.from_dict({ "spec": table_spec, "meta": { "createdTimestamp": "2020-01-01T00:00:00Z" }, }) ] self.extractor._client.get_entity.return_value = Entity.from_dict({ "spec": { "name": "driver_id", "valueType": "INT64", "description": "Internal identifier of the driver", } })
def test_apply_feature_table_integration(self, test_client): # Create Feature Tables batch_source = FileSource( file_format=ParquetFormat(), file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", message_format=ProtoFormat("class.path"), topic="test_topic", event_timestamp_column="ts_col", ) ft1 = FeatureTable( name="my-feature-table-1", features=[ Feature(name="fs1-my-feature-1", dtype=ValueType.INT64), Feature(name="fs1-my-feature-2", dtype=ValueType.STRING), Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST), Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST), ], entities=["fs1-my-entity-1"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core test_client.apply(ft1) feature_tables = test_client.list_feature_tables() # List Feature Tables assert (len(feature_tables) == 1 and feature_tables[0].name == "my-feature-table-1" and feature_tables[0].features[0].name == "fs1-my-feature-1" and feature_tables[0].features[0].dtype == ValueType.INT64 and feature_tables[0].features[1].name == "fs1-my-feature-2" and feature_tables[0].features[1].dtype == ValueType.STRING and feature_tables[0].features[2].name == "fs1-my-feature-3" and feature_tables[0].features[2].dtype == ValueType.STRING_LIST and feature_tables[0].features[3].name == "fs1-my-feature-4" and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST and feature_tables[0].entities[0] == "fs1-my-entity-1") feature_table = test_client.get_feature_table("my-feature-table-1") assert (feature_table.name == "my-feature-table-1" and feature_table.features[0].name == "fs1-my-feature-1" and feature_table.features[0].dtype == ValueType.INT64 and feature_table.features[1].name == "fs1-my-feature-2" and feature_table.features[1].dtype == ValueType.STRING and feature_table.features[2].name == "fs1-my-feature-3" and feature_table.features[2].dtype == ValueType.STRING_LIST and feature_table.features[3].name == "fs1-my-feature-4" and feature_table.features[3].dtype == ValueType.BYTES_LIST and feature_table.entities[0] == "fs1-my-entity-1") test_client.delete_feature_table("my-feature-table-1") feature_tables = test_client.list_feature_tables() assert len(feature_tables) == 0