Beispiel #1
0
    def test_feature_table_import_export_yaml(self, batch_source):

        stream_source = KafkaSource(
            field_mapping={
                "ride_distance": "ride_distance",
                "ride_duration": "ride_duration",
            },
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat(class_path="class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
        )

        test_feature_table = FeatureTable(
            name="car_driver",
            features=[
                Feature(name="ride_distance", dtype=ValueType.FLOAT),
                Feature(name="ride_duration", dtype=ValueType.STRING),
            ],
            entities=["car_driver_entity"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Create a string YAML representation of the feature table
        string_yaml = test_feature_table.to_yaml()

        # Create a new feature table object from the YAML string
        actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature table
        assert test_feature_table == actual_feature_table_from_string
Beispiel #2
0
    def apply_feature_table(self,
                            feature_table: FeatureTable,
                            project: str,
                            commit: bool = True):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
            project: Feast project that this feature table belongs to
            commit: Whether the change should be persisted immediately
        """
        feature_table.is_valid()
        feature_table_proto = feature_table.to_proto()
        feature_table_proto.spec.project = project
        self._prepare_registry_for_changes()
        assert self.cached_registry_proto

        for idx, existing_feature_table_proto in enumerate(
                self.cached_registry_proto.feature_tables):
            if (existing_feature_table_proto.spec.name
                    == feature_table_proto.spec.name
                    and existing_feature_table_proto.spec.project == project):
                del self.cached_registry_proto.feature_tables[idx]
                break

        self.cached_registry_proto.feature_tables.append(feature_table_proto)
        if commit:
            self.commit()
Beispiel #3
0
    def apply_feature_table(self, feature_table: FeatureTable, project: str):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
            project: Feast project that this feature table belongs to
        """
        feature_table.is_valid()
        feature_table_proto = feature_table.to_proto()
        feature_table_proto.spec.project = project

        def updater(registry_proto: RegistryProto):
            for idx, existing_feature_table_proto in enumerate(
                    registry_proto.feature_tables):
                if (existing_feature_table_proto.spec.name
                        == feature_table_proto.spec.name and
                        existing_feature_table_proto.spec.project == project):
                    del registry_proto.feature_tables[idx]
                    registry_proto.feature_tables.append(feature_table_proto)
                    return registry_proto
            registry_proto.feature_tables.append(feature_table_proto)
            return registry_proto

        self._registry_store.update_registry(updater)
        return
Beispiel #4
0
    def test_feature_table_import_export_yaml(self):

        batch_source = DataSource(
            type=SourceType(1).name,
            field_mapping={
                "ride_distance": "ride_distance",
                "ride_duration": "ride_duration",
            },
            options=FileOptions(file_format="avro", file_url="data/test.avro"),
            timestamp_column="ts_col",
            date_partition_column="date_partition_col",
        )

        stream_source = DataSource(
            type=SourceType(3).name,
            field_mapping={
                "ride_distance": "ride_distance",
                "ride_duration": "ride_duration",
            },
            options=KafkaOptions(
                bootstrap_servers="localhost:9094",
                class_path="random/path/to/class",
                topic="test_topic",
            ),
            timestamp_column="ts_col",
        )

        test_feature_table = FeatureTable(
            name="car_driver",
            features=[
                FeatureV2(name="ride_distance",
                          dtype=ValueType.FLOAT).to_proto(),
                FeatureV2(name="ride_duration",
                          dtype=ValueType.STRING).to_proto(),
            ],
            entities=["car_driver_entity"],
            labels={"team": "matchmaking"},
            batch_source=batch_source.to_proto(),
            stream_source=stream_source.to_proto(),
        )

        # Create a string YAML representation of the feature table
        string_yaml = test_feature_table.to_yaml()

        # Create a new feature table object from the YAML string
        actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml)

        # Ensure equality is upheld to original feature table
        assert test_feature_table == actual_feature_table_from_string
Beispiel #5
0
    def get_feature_table(self, name: str, project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_feature_table",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.get_feature_table(name, project)
        else:
            try:
                get_feature_table_response = self._core_service.GetFeatureTable(
                    GetFeatureTableRequest(project=project, name=name.strip()),
                    metadata=self._get_grpc_metadata(),
                )  # type: GetFeatureTableResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())
            return FeatureTable.from_proto(get_feature_table_response.table)
Beispiel #6
0
    def list_feature_tables(
        self, project: str = None, labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project, labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables
Beispiel #7
0
def test_ingest_into_bq(
    feast_client: Client,
    customer_entity: Entity,
    driver_entity: Entity,
    bq_dataframe: pd.DataFrame,
    bq_dataset: str,
    pytestconfig,
):
    bq_project = pytestconfig.getoption("bq_project")
    bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}"
    ft = FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=BigQuerySource(
            table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}",
            event_timestamp_column="datetime",
            created_timestamp_column="timestamp",
        ),
    )

    # ApplyEntity
    feast_client.apply(customer_entity)
    feast_client.apply(driver_entity)

    # ApplyFeatureTable
    feast_client.apply(ft)
    feast_client.ingest(ft, bq_dataframe, timeout=120)

    bq_client = bigquery.Client(project=bq_project)

    # Poll BQ for table until the table has been created
    def try_get_table():
        try:
            table = bq_client.get_table(
                bigquery.TableReference(
                    bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id
                )
            )
        except NotFound:
            return None, False
        else:
            return table, True

    wait_retry_backoff(
        retry_fn=try_get_table,
        timeout_secs=30,
        timeout_msg="Timed out trying to get bigquery table",
    )

    query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`"

    job = bq_client.query(query_string)
    query_df = job.to_dataframe()

    assert_frame_equal(query_df, bq_dataframe)
Beispiel #8
0
    def get_feature_table(self,
                          name: str,
                          project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)
Beispiel #9
0
def alltypes_featuretable():
    batch_source = FileSource(
        file_format="parquet",
        file_url="file://feast/*",
        event_timestamp_column="ts_col",
        created_timestamp_column="timestamp",
        date_partition_column="date_partition_col",
    )
    return FeatureTable(
        name="alltypes",
        entities=["alltypes_id"],
        features=[
            Feature(name="float_feature", dtype=ValueType.FLOAT),
            Feature(name="int64_feature", dtype=ValueType.INT64),
            Feature(name="int32_feature", dtype=ValueType.INT32),
            Feature(name="string_feature", dtype=ValueType.STRING),
            Feature(name="bytes_feature", dtype=ValueType.BYTES),
            Feature(name="bool_feature", dtype=ValueType.BOOL),
            Feature(name="double_feature", dtype=ValueType.DOUBLE),
            Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
            Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
            Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
            Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
            Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
            Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
        labels={"cat": "alltypes"},
    )
Beispiel #10
0
def feature_table_create(filename):
    """
    Create or update a feature table
    """

    feature_tables = [
        FeatureTable.from_dict(ft_dict) for ft_dict in yaml_loader(filename)
    ]
    feast_client = Client()  # type: Client
    feast_client.apply_feature_table(feature_tables)
Beispiel #11
0
    def test_add_feature(self, batch_source):

        test_feature_table = FeatureTable(
            name="car_driver",
            features=[
                Feature(name="ride_distance", dtype=ValueType.FLOAT),
                Feature(name="ride_duration", dtype=ValueType.STRING),
            ],
            entities=["car_driver_entity"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
        )

        test_feature_table.add_feature(
            Feature(name="new_ride_distance", dtype=ValueType.FLOAT))

        features = test_feature_table.features
        assert (len(features) == 3 and features[0].name == "ride_distance"
                and features[1].name == "ride_duration"
                and features[2].name == "new_ride_distance")
Beispiel #12
0
    def test_apply_feature_table_success(self, test_client):

        test_client.set_project("project1")

        # Create Feature Tables
        batch_source = FileSource(
            file_format="parquet",
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            class_path="random/path/to/class",
            topic="test_topic",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
        )

        ft1 = FeatureTable(
            name="my-feature-table-1",
            features=[
                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64),
                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING),
                Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST),
                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST),
            ],
            entities=["fs1-my-entity-1"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        test_client.apply_feature_table(ft1)

        feature_tables = test_client.list_feature_tables()

        # List Feature Tables
        assert (
            len(feature_tables) == 1
            and feature_tables[0].name == "my-feature-table-1"
            and feature_tables[0].features[0].name == "fs1-my-feature-1"
            and feature_tables[0].features[0].dtype == ValueType.INT64
            and feature_tables[0].features[1].name == "fs1-my-feature-2"
            and feature_tables[0].features[1].dtype == ValueType.STRING
            and feature_tables[0].features[2].name == "fs1-my-feature-3"
            and feature_tables[0].features[2].dtype == ValueType.STRING_LIST
            and feature_tables[0].features[3].name == "fs1-my-feature-4"
            and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST
            and feature_tables[0].entities[0] == "fs1-my-entity-1"
        )
def feature_table():
    return FeatureTable(
        name="ft",
        entities=[],
        features=[],
        stream_source=KafkaSource(
            topic="t",
            bootstrap_servers="",
            message_format=AvroFormat(""),
            event_timestamp_column="",
        ),
    )
Beispiel #14
0
    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(project=project, table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table
        )

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)
Beispiel #15
0
def bq_featuretable(bq_table_id):
    batch_source = BigQuerySource(
        table_ref=bq_table_id,
        timestamp_column="datetime",
    )
    return FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
    )
Beispiel #16
0
    def list_feature_tables(self, project: str) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from the registry

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """
        registry_proto = self._get_registry_proto()
        feature_tables = []
        for feature_table_proto in registry_proto.feature_tables:
            if feature_table_proto.spec.project == project:
                feature_tables.append(FeatureTable.from_proto(feature_table_proto))
        return feature_tables
Beispiel #17
0
    def get_feature_table(self, name: str, project: str) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            name: Name of feature table
            project: Feast project that this feature table belongs to

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """
        registry_proto = self._get_registry_proto()
        for feature_table_proto in registry_proto.feature_tables:
            if (feature_table_proto.spec.name == name
                    and feature_table_proto.spec.project == project):
                return FeatureTable.from_proto(feature_table_proto)
        raise FeatureTableNotFoundException(project, name)
Beispiel #18
0
def basic_featuretable():
    batch_source = FileSource(
        field_mapping={
            "dev_entity": "dev_entity_field",
            "dev_feature_float": "dev_feature_float_field",
            "dev_feature_string": "dev_feature_string_field",
        },
        file_format="PARQUET",
        file_url="gs://example/feast/*",
        event_timestamp_column="datetime_col",
        created_timestamp_column="timestamp",
        date_partition_column="datetime",
    )
    stream_source = KafkaSource(
        field_mapping={
            "dev_entity": "dev_entity_field",
            "dev_feature_float": "dev_feature_float_field",
            "dev_feature_string": "dev_feature_string_field",
        },
        bootstrap_servers="localhost:9094",
        class_path="random/path/to/class",
        topic="test_topic",
        event_timestamp_column="datetime_col",
        created_timestamp_column="timestamp",
    )
    return FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
        stream_source=stream_source,
        labels={
            "key1": "val1",
            "key2": "val2"
        },
    )
Beispiel #19
0
    def _create_ft(self, client: Client, features) -> None:
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )

        # Register Entity with Core
        client.apply_entity(entity)

        # Create Feature Tables
        batch_source = FileSource(
            file_format=ParquetFormat(),
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat("class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
        )

        ft1 = FeatureTable(
            name=self.table_name,
            features=features,
            entities=["driver_car_id"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        client.apply_feature_table(ft1)
Beispiel #20
0
        created_timestamp_column="timestamp",
        date_partition_column="date_partition_col",
    )

    # first feature table for testing, with all of Feast's datatypes
    table_1 = FeatureTable(
        name="test_feature_table_all_feature_dtypes",
        features=[
            Feature(name="test_BYTES_feature", dtype=ValueType.BYTES),
            Feature(name="test_STRING_feature", dtype=ValueType.STRING),
            Feature(name="test_INT32_feature", dtype=ValueType.INT32),
            Feature(name="test_INT64_feature", dtype=ValueType.INT64),
            Feature(name="test_DOUBLE_feature", dtype=ValueType.DOUBLE),
            Feature(name="test_FLOAT_feature", dtype=ValueType.FLOAT),
            Feature(name="test_BOOL_feature", dtype=ValueType.BOOL),
            Feature(name="test_BYTES_LIST_feature", dtype=ValueType.BYTES_LIST),
            Feature(name="test_STRING_LIST_feature", dtype=ValueType.STRING_LIST),
            Feature(name="test_INT32_LIST_feature", dtype=ValueType.INT32_LIST),
            Feature(name="test_INT64_LIST_feature", dtype=ValueType.INT64_LIST),
            Feature(name="test_DOUBLE_LIST_feature", dtype=ValueType.DOUBLE_LIST),
            Feature(name="test_FLOAT_LIST_feature", dtype=ValueType.FLOAT_LIST),
            Feature(name="test_BOOL_LIST_feature", dtype=ValueType.BOOL_LIST),
        ],
        entities=["dummy_entity_1", "dummy_entity_2"],
        labels={"team": "matchmaking"},
        batch_source=batch_source,
    )

    # second feature table for testing, with just a single feature
    table_2 = FeatureTable(
        name="test_feature_table_single_feature",
        features=[
    def _mock_feature_table(self,
                            labels: dict = {},
                            add_stream_source: bool = False) -> None:
        table_spec = {
            "name": "driver_trips",
            "entities": ["driver_id"],
            "features": [{
                "name": "trips_today",
                "valueType": "INT32"
            }],
            "labels": labels,
            "batchSource": {
                "type": "BATCH_FILE",
                "fileOptions": {
                    "fileFormat": {
                        "parquetFormat": {}
                    },
                    "fileUrl": "file:///some/location",
                },
            },
        }

        if add_stream_source:
            avro_schema_json = json.dumps({
                "type":
                "record",
                "name":
                "DriverTrips",
                "fields": [
                    {
                        "name": "driver_id",
                        "type": "long"
                    },
                    {
                        "name": "trips_today",
                        "type": "int"
                    },
                    {
                        "name": "datetime",
                        "type": {
                            "type": "long",
                            "logicalType": "timestamp-micros"
                        },
                    },
                ],
            })

            table_spec["streamSource"] = {
                "type": "STREAM_KAFKA",
                "eventTimestampColumn": "datetime",
                "createdTimestampColumn": "datetime",
                "kafkaOptions": {
                    "bootstrapServers": "broker1",
                    "topic": "driver_trips",
                    "messageFormat": {
                        "avroFormat": {
                            "schemaJson": avro_schema_json,
                        }
                    },
                },
            }

        self.extractor._client.list_feature_tables.return_value = [
            FeatureTable.from_dict({
                "spec": table_spec,
                "meta": {
                    "createdTimestamp": "2020-01-01T00:00:00Z"
                },
            })
        ]
        self.extractor._client.get_entity.return_value = Entity.from_dict({
            "spec": {
                "name": "driver_id",
                "valueType": "INT64",
                "description": "Internal identifier of the driver",
            }
        })
Beispiel #22
0
    def test_apply_feature_table_integration(self, test_client):

        # Create Feature Tables
        batch_source = FileSource(
            file_format=ParquetFormat(),
            file_url="file://feast/*",
            event_timestamp_column="ts_col",
            created_timestamp_column="timestamp",
            date_partition_column="date_partition_col",
        )

        stream_source = KafkaSource(
            bootstrap_servers="localhost:9094",
            message_format=ProtoFormat("class.path"),
            topic="test_topic",
            event_timestamp_column="ts_col",
        )

        ft1 = FeatureTable(
            name="my-feature-table-1",
            features=[
                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64),
                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING),
                Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST),
                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST),
            ],
            entities=["fs1-my-entity-1"],
            labels={"team": "matchmaking"},
            batch_source=batch_source,
            stream_source=stream_source,
        )

        # Register Feature Table with Core
        test_client.apply(ft1)

        feature_tables = test_client.list_feature_tables()

        # List Feature Tables
        assert (len(feature_tables) == 1
                and feature_tables[0].name == "my-feature-table-1"
                and feature_tables[0].features[0].name == "fs1-my-feature-1"
                and feature_tables[0].features[0].dtype == ValueType.INT64
                and feature_tables[0].features[1].name == "fs1-my-feature-2"
                and feature_tables[0].features[1].dtype == ValueType.STRING
                and feature_tables[0].features[2].name == "fs1-my-feature-3"
                and feature_tables[0].features[2].dtype
                == ValueType.STRING_LIST
                and feature_tables[0].features[3].name == "fs1-my-feature-4"
                and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST
                and feature_tables[0].entities[0] == "fs1-my-entity-1")

        feature_table = test_client.get_feature_table("my-feature-table-1")
        assert (feature_table.name == "my-feature-table-1"
                and feature_table.features[0].name == "fs1-my-feature-1"
                and feature_table.features[0].dtype == ValueType.INT64
                and feature_table.features[1].name == "fs1-my-feature-2"
                and feature_table.features[1].dtype == ValueType.STRING
                and feature_table.features[2].name == "fs1-my-feature-3"
                and feature_table.features[2].dtype == ValueType.STRING_LIST
                and feature_table.features[3].name == "fs1-my-feature-4"
                and feature_table.features[3].dtype == ValueType.BYTES_LIST
                and feature_table.entities[0] == "fs1-my-entity-1")

        test_client.delete_feature_table("my-feature-table-1")
        feature_tables = test_client.list_feature_tables()
        assert len(feature_tables) == 0