Ejemplo n.º 1
0
def run_offline_online_store_consistency_test(
    fs: FeatureStore, fv: FeatureView
) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date)

    # check result of materialize()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3
    )

    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None
    )

    # check prior value for materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4
    )

    # run materialize_incremental()
    fs.materialize_incremental(feature_views=[fv.name], end_date=now)

    # check result of materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5
    )
Ejemplo n.º 2
0
def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize([fv.name], start_date, end_date)

    # check result of materialize()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 1
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6

    # check prior value for materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6

    # run materialize_incremental()
    fs.materialize_incremental(
        [fv.name],
        now - timedelta(seconds=0),
    )

    # check result of materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
Ejemplo n.º 3
0
def materialize_incremental_command(end_ts: str, views: List[str]):
    """
    Run an incremental materialization job to ingest new data into the online store. Feast will read
    all data from the previously ingested point to END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be incrementally materialized.

    END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    store.materialize_incremental(
        feature_views=None if not views else views,
        end_date=datetime.fromisoformat(end_ts),
    )
    def test_bigquery_table_to_datastore_correctness(self):
        # create dataset
        now = datetime.utcnow()
        ts = pd.Timestamp(now).round("ms")
        data = {
            "id": [1, 2, 1, 3, 3],
            "value": [0.1, 0.2, 0.3, 4, 5],
            "ts_1": [
                ts - timedelta(seconds=4),
                ts,
                ts - timedelta(seconds=3),
                ts - timedelta(seconds=4),
                ts - timedelta(seconds=1),
            ],
            "created_ts": [ts, ts, ts, ts, ts],
        }
        df = pd.DataFrame.from_dict(data)

        # load dataset into BigQuery
        job_config = bigquery.LoadJobConfig()
        table_id = f"{self.gcp_project}.{self.bigquery_dataset}.table_correctness_{int(time.time())}"
        job = self.client.load_table_from_dataframe(df,
                                                    table_id,
                                                    job_config=job_config)
        job.result()

        # create FeatureView
        fv = FeatureView(
            name="test_bq_table_correctness",
            entities=["driver_id"],
            features=[Feature("value", ValueType.FLOAT)],
            ttl=timedelta(minutes=5),
            input=BigQuerySource(
                event_timestamp_column="ts",
                table_ref=table_id,
                created_timestamp_column="created_ts",
                field_mapping={
                    "ts_1": "ts",
                    "id": "driver_id"
                },
                date_partition_column="",
            ),
        )
        config = RepoConfig(
            metadata_store="./metadata.db",
            project=f"test_bq_table_correctness_{int(time.time())}",
            provider="gcp",
        )
        fs = FeatureStore(config=config)
        fs.apply([fv])

        # run materialize()
        fs.materialize(
            [fv.name],
            now - timedelta(seconds=5),
            now - timedelta(seconds=2),
        )

        # check result of materialize()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 1
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6

        # check prior value for materialize_incremental()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 3
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 4) < 1e-6

        # run materialize_incremental()
        fs.materialize_incremental(
            [fv.name],
            now - timedelta(seconds=0),
        )

        # check result of materialize_incremental()
        response_dict = fs.get_online_features([f"{fv.name}:value"],
                                               [{
                                                   "driver_id": 3
                                               }]).to_dict()
        assert abs(response_dict[f"{fv.name}:value"][0] - 5) < 1e-6