Example #1
0
def run_offline_online_store_consistency_test(fs: FeatureStore,
                                              fv: FeatureView) -> None:
    now = datetime.now()

    full_feature_names = True
    check_offline_store: bool = True

    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize(feature_views=[fv.name],
                   start_date=start_date,
                   end_date=end_date)

    # check result of materialize()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=1,
        event_timestamp=end_date,
        expected_value=0.3,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=2,
        event_timestamp=end_date,
        expected_value=None,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    # check prior value for materialize_incremental()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=3,
        event_timestamp=end_date,
        expected_value=4,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )

    # run materialize_incremental()
    fs.materialize_incremental(feature_views=[fv.name], end_date=now)

    # check result of materialize_incremental()
    check_offline_and_online_features(
        fs=fs,
        fv=fv,
        driver_id=3,
        event_timestamp=now,
        expected_value=5,
        full_feature_names=full_feature_names,
        check_offline_store=check_offline_store,
    )
Example #2
0
def setup_feature_store():
    """Prepares the local environment for a FeatureStore docstring test."""
    from datetime import datetime, timedelta

    from feast import Entity, Feature, FeatureStore, FeatureView, FileSource, ValueType
    from feast.repo_operations import init_repo

    init_repo("feature_repo", "local")
    fs = FeatureStore(repo_path="feature_repo")
    driver = Entity(
        name="driver_id",
        value_type=ValueType.INT64,
        description="driver id",
    )
    driver_hourly_stats = FileSource(
        path="feature_repo/data/driver_stats.parquet",
        event_timestamp_column="event_timestamp",
        created_timestamp_column="created",
    )
    driver_hourly_stats_view = FeatureView(
        name="driver_hourly_stats",
        entities=["driver_id"],
        ttl=timedelta(seconds=86400 * 1),
        features=[
            Feature(name="conv_rate", dtype=ValueType.FLOAT),
            Feature(name="acc_rate", dtype=ValueType.FLOAT),
            Feature(name="avg_daily_trips", dtype=ValueType.INT64),
        ],
        batch_source=driver_hourly_stats,
    )
    fs.apply([driver_hourly_stats_view, driver])
    fs.materialize(
        start_date=datetime.utcnow() - timedelta(hours=3),
        end_date=datetime.utcnow() - timedelta(minutes=10),
    )
Example #3
0
def store_online(feature_store: FeatureStore) -> FeatureStore:
    feature_store.materialize(
        start_date=datetime.utcnow() - timedelta(days=250),
        end_date=datetime.utcnow() - timedelta(minutes=10),
    )
    return feature_store
Example #4
0
)

benchmark_feature_views = [
    FeatureView(
        name=f"feature_view_{i}",
        entities=["entity"],
        ttl=Duration(seconds=86400),
        features=[
            Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64)
            for j in range(10)
        ],
        online=True,
        batch_source=generated_data_source,
    ) for i in range(25)
]

benchmark_feature_service = FeatureService(
    name=f"benchmark_feature_service",
    features=benchmark_feature_views,
)

fs = FeatureStore(".")
fs.apply([
    driver_hourly_stats_view, driver, entity, benchmark_feature_service,
    *benchmark_feature_views
])

now = datetime.now()
fs.materialize(start, now)
print("Materialization finished")
Example #5
0
def construct_test_environment(
    test_repo_config: TestRepoConfig,
    create_and_apply: bool = False,
    materialize: bool = False,
) -> Environment:
    """
    This method should take in the parameters from the test repo config and created a feature repo, apply it,
    and return the constructed feature store object to callers.

    This feature store object can be interacted for the purposes of tests.
    The user is *not* expected to perform any clean up actions.

    :param test_repo_config: configuration
    :return: A feature store built using the supplied configuration.
    """
    df = create_dataset()

    project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}"

    module_name, config_class_name = test_repo_config.offline_store_creator.rsplit(
        ".", 1)

    offline_creator: DataSourceCreator = importer.get_class_from_type(
        module_name, config_class_name, "DataSourceCreator")(project)
    ds = offline_creator.create_data_source(project,
                                            df,
                                            field_mapping={
                                                "ts_1": "ts",
                                                "id": "driver_id"
                                            })
    offline_store = offline_creator.create_offline_store_config()
    online_store = test_repo_config.online_store

    with tempfile.TemporaryDirectory() as repo_dir_name:
        config = RepoConfig(
            registry=str(Path(repo_dir_name) / "registry.db"),
            project=project,
            provider=test_repo_config.provider,
            offline_store=offline_store,
            online_store=online_store,
            repo_path=repo_dir_name,
        )
        fs = FeatureStore(config=config)
        environment = Environment(
            name=project,
            test_repo_config=test_repo_config,
            feature_store=fs,
            data_source=ds,
            data_source_creator=offline_creator,
        )

        fvs = []
        entities = []
        try:
            if create_and_apply:
                entities.extend([driver(), customer()])
                fvs.extend([
                    environment.driver_stats_feature_view(),
                    environment.customer_feature_view(),
                ])
                fs.apply(fvs + entities)

            if materialize:
                fs.materialize(environment.start_date, environment.end_date)

            yield environment
        finally:
            offline_creator.teardown()
            fs.teardown()