Esempio n. 1
0
def run_offline_online_store_consistency_test(fs: FeatureStore,
                                              fv: FeatureView) -> None:
    now = datetime.now()

    full_feature_names = True
    check_offline_store: bool = True

    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize(feature_views=[fv.name],
                   start_date=start_date,
                   end_date=end_date)

    # check result of materialize()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=1,
        event_timestamp=end_date,
        expected_value=0.3,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=2,
        event_timestamp=end_date,
        expected_value=None,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    # check prior value for materialize_incremental()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=3,
        event_timestamp=end_date,
        expected_value=4,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    # run materialize_incremental()
    fs.materialize_incremental(feature_views=[fv.name], end_date=now)

    # check result of materialize_incremental()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=3,
        event_timestamp=now,
        expected_value=5,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )
Esempio n. 2
0
def main():
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 1000)

    # Load the feature store from the current path
    fs = FeatureStore(repo_path=".")

    # Deploy the feature store to AWS
    print("Deploying feature store to AWS...")
    fs.apply([driver, driver_hourly_stats_view])

    # Select features
    feature_refs = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"]

    print("Loading features into the online store...")
    fs.materialize_incremental(end_date=datetime.now())

    print("Retrieving online features...")

    # Retrieve features from the online store (DynamoDB)
    online_features = fs.get_online_features(
        feature_refs=feature_refs,
        entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}],
    ).to_dict()

    print(pd.DataFrame.from_dict(online_features))
Esempio n. 3
0
def main():
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 1000)

    # Load the feature store from the current path
    fs = FeatureStore(repo_path=".")

    # Deploy the feature store to Snowflake
    print("Deploying feature store to Snowflake...")
    fs.apply([driver, driver_stats_fv])

    # Select features
    features = [
        "driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"
    ]

    # Create an entity dataframe. This is the dataframe that will be enriched with historical features
    entity_df = pd.DataFrame({
        "event_timestamp": [
            pd.Timestamp(dt, unit="ms", tz="UTC").round("ms")
            for dt in pd.date_range(
                start=datetime.now() - timedelta(days=3),
                end=datetime.now(),
                periods=3,
            )
        ],
        "driver_id": [1001, 1002, 1003],
    })

    print("Retrieving training data...")

    # Retrieve historical features by joining the entity dataframe to the Snowflake table source
    training_df = fs.get_historical_features(features=features,
                                             entity_df=entity_df).to_df()

    print()
    print(training_df)

    print()
    print("Loading features into the online store...")
    fs.materialize_incremental(end_date=datetime.now())

    print()
    print("Retrieving online features...")

    # Retrieve features from the online store
    online_features = fs.get_online_features(
        features=features,
        entity_rows=[{
            "driver_id": 1001
        }, {
            "driver_id": 1002
        }],
    ).to_dict()

    print()
    print(pd.DataFrame.from_dict(online_features))
Esempio n. 4
0
# Define an entity for the driver. You can think of entity as a primary key used to
# fetch features.
driver = Entity(
    name="driver_id",
    value_type=ValueType.INT64,
    description="driver id",
)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
# data to our model online.
driver_hourly_stats_view = FeatureView(
    name="driver_hourly_stats",
    entities=["driver_id"],
    ttl=Duration(seconds=86400 * 365),
    features=[
        Feature(name="conv_rate", dtype=ValueType.DOUBLE),
        Feature(name="acc_rate", dtype=ValueType.FLOAT),
        Feature(name="avg_daily_trips", dtype=ValueType.INT64),
    ],
    online=True,
    batch_source=driver_hourly_stats,
    tags={},
)

fs = FeatureStore("")
fs.apply([driver_hourly_stats_view, driver])

now = datetime.now()
fs.materialize_incremental(now)