コード例 #1
0
ファイル: test_feature_set.py プロジェクト: kiranvajja/feast
    def test_feature_set_ingest_success(self, dataframe, client):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")
        driver_fs._message_producer = MagicMock()
        driver_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        # Ingest data into Feast
        driver_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        driver_fs._message_producer.send.assert_called()
コード例 #2
0
ファイル: test_feature_set.py プロジェクト: kiranvajja/feast
    def test_feature_set_types_success(self, client, dataframe):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        all_types_fs.source = KafkaSource(topic="feature-topic",
                                          brokers="127.0.0.1")
        all_types_fs._message_producer = MagicMock()
        all_types_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(all_types_fs)

        # Ingest data into Feast
        all_types_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        all_types_fs._message_producer.send.assert_called()
コード例 #3
0
ファイル: test_feature_set.py プロジェクト: kiranvajja/feast
    def test_feature_set_ingest_failure(self, client, dataframe, exception):
        with pytest.raises(exception):
            # Create feature set
            driver_fs = FeatureSet("driver-feature-set")
            driver_fs.source = KafkaSource(topic="feature-topic",
                                           brokers="fake.broker.com")
            driver_fs._message_producer = MagicMock()
            driver_fs._message_producer.send = MagicMock()

            # Update based on dataset
            driver_fs.update_from_dataset(
                dataframe,
                column_mapping={
                    "entity_id": Entity(name="entity", dtype=ValueType.INT64)
                },
            )

            # Register with Feast core
            client.apply(driver_fs)

            # Ingest data into Feast
            driver_fs.ingest(dataframe=dataframe)
コード例 #4
0
def test_all_types(client):
    all_types_fs = client.get_feature_set(name="all_types", version=1)

    if all_types_fs is None:
        # Register new feature set if it doesnt exist
        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register feature set
        client.apply(all_types_fs)

        # Feast Core needs some time to fully commit the FeatureSet applied
        # when there is no existing job yet for the Featureset
        time.sleep(10)
        all_types_fs = client.get_feature_set(name="all_types", version=1)

        if all_types_fs is None:
            raise Exception(
                "Client cannot retrieve 'all_types_fs' FeatureSet "
                "after registration. Either Feast Core does not save the "
                "FeatureSet correctly or the client needs to wait longer for FeatureSet "
                "to be committed.")

    all_types_df = pd.DataFrame({
        "datetime":
        [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)],
        "user_id": [1001, 1002, 1003],
        "int32_feature": [np.int32(1), np.int32(2),
                          np.int32(3)],
        "int64_feature": [np.int64(1), np.int64(2),
                          np.int64(3)],
        "float_feature": [np.float(0.1),
                          np.float(0.2),
                          np.float(0.3)],
        "double_feature": [np.float64(0.1),
                           np.float64(0.2),
                           np.float64(0.3)],
        "string_feature": ["one", "two", "three"],
        "bytes_feature": [b"one", b"two", b"three"],
        "bool_feature": [True, False, False],
        "int32_list_feature": [
            np.array([1, 2, 3, 4], dtype=np.int32),
            np.array([1, 2, 3, 4], dtype=np.int32),
            np.array([1, 2, 3, 4], dtype=np.int32),
        ],
        "int64_list_feature": [
            np.array([1, 2, 3, 4], dtype=np.int64),
            np.array([1, 2, 3, 4], dtype=np.int64),
            np.array([1, 2, 3, 4], dtype=np.int64),
        ],
        "float_list_feature": [
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
        ],
        "double_list_feature": [
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
        ],
        "string_list_feature": [
            np.array(["one", "two", "three"]),
            np.array(["one", "two", "three"]),
            np.array(["one", "two", "three"]),
        ],
        "bytes_list_feature": [
            np.array([b"one", b"two", b"three"]),
            np.array([b"one", b"two", b"three"]),
            np.array([b"one", b"two", b"three"]),
        ],
        "bool_list_feature": [
            np.array([True, False, True]),
            np.array([True, False, True]),
            np.array([True, False, True]),
        ],
    })

    # Ingest user embedding data
    all_types_fs.ingest(dataframe=all_types_df)
    time.sleep(3)

    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "user_id": Value(
                            int64_val=all_types_df.iloc[0]["user_id"])
                    })
            ],
            feature_ids=[
                "all_types:1:float_feature",
                "all_types:1:int64_feature",
                "all_types:1:int32_feature",
                "all_types:1:string_feature",
                "all_types:1:bytes_feature",
                "all_types:1:bool_feature",
                "all_types:1:double_feature",
                "all_types:1:float_list_feature",
                "all_types:1:int64_list_feature",
                "all_types:1:int32_list_feature",
                "all_types:1:string_list_feature",
                "all_types:1:bytes_list_feature",
                "all_types:1:bool_list_feature",
                "all_types:1:double_list_feature",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_float_list = (
            response.field_values[0].fields["all_types:1:float_list_feature"].
            float_list_val.val)

        sent_float_list = all_types_df.iloc[0]["float_list_feature"]

        # TODO: Add tests for each value and type
        if math.isclose(returned_float_list[0],
                        sent_float_list[0],
                        abs_tol=FLOAT_TOLERANCE):
            break

        # Wait for values to appear in Serving
        time.sleep(1)