예제 #1
0
파일: cli.py 프로젝트: qooba/feast
def serve_transformations_command(ctx: click.Context, port: int):
    """[Experimental] Start a the feature consumption server locally on a given port."""
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))

    store.serve_transformations(port)
예제 #2
0
파일: cli.py 프로젝트: qooba/feast
def feature_view_list(ctx: click.Context):
    """
    List all feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for feature_view in [
            *store.list_feature_views(),
            *store.list_request_feature_views(),
            *store.list_on_demand_feature_views(),
    ]:
        entities = set()
        if isinstance(feature_view, FeatureView):
            entities.update(feature_view.entities)
        elif isinstance(feature_view, OnDemandFeatureView):
            for backing_fv in feature_view.inputs.values():
                if isinstance(backing_fv, FeatureView):
                    entities.update(backing_fv.entities)
        table.append([
            feature_view.name,
            entities if len(entities) > 0 else "n/a",
            type(feature_view).__name__,
        ])

    from tabulate import tabulate

    print(
        tabulate(table, headers=["NAME", "ENTITIES", "TYPE"],
                 tablefmt="plain"))
예제 #3
0
def run_offline_online_store_consistency_test(
    fs: FeatureStore, fv: FeatureView
) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date)

    # check result of materialize()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3
    )

    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None
    )

    # check prior value for materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4
    )

    # run materialize_incremental()
    fs.materialize_incremental(feature_views=[fv.name], end_date=now)

    # check result of materialize_incremental()
    check_offline_and_online_features(
        fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5
    )
예제 #4
0
def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
    with tempfile.NamedTemporaryFile(suffix=".parquet") as f:
        df = create_dataset()
        f.close()
        df.to_parquet(f.name)
        file_source = FileSource(
            file_format=ParquetFormat(),
            file_url=f"file://{f.name}",
            event_timestamp_column="ts",
            created_timestamp_column="created_ts",
            date_partition_column="",
            field_mapping={
                "ts_1": "ts",
                "id": "driver_id"
            },
        )
        fv = get_feature_view(file_source)
        with tempfile.TemporaryDirectory(
        ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
            config = RepoConfig(
                registry=str(Path(repo_dir_name) / "registry.db"),
                project=
                f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
                provider="local",
                online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig(
                    path=str(Path(data_dir_name) / "online_store.db"))),
            )
            fs = FeatureStore(config=config)
            fs.apply([fv])

            yield fs, fv
예제 #5
0
def check_offline_and_online_features(
    fs: FeatureStore,
    fv: FeatureView,
    driver_id: int,
    event_timestamp: datetime,
    expected_value: Optional[float],
) -> None:
    # Check online store
    response_dict = fs.get_online_features(
        [f"{fv.name}:value"], [{"driver": driver_id}]
    ).to_dict()

    if expected_value:
        assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6
    else:
        assert response_dict[f"{fv.name}__value"][0] is None

    # Check offline store
    df = fs.get_historical_features(
        entity_df=pd.DataFrame.from_dict(
            {"driver_id": [driver_id], "event_timestamp": [event_timestamp]}
        ),
        feature_refs=[f"{fv.name}:value"],
    ).to_df()

    if expected_value:
        assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6
    else:
        df = df.where(pd.notnull(df), None)
        assert df.to_dict()[f"{fv.name}__value"][0] is None
예제 #6
0
def prep_dynamodb_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
    with tempfile.NamedTemporaryFile(suffix=".parquet") as f:
        df = create_dataset()
        f.close()
        df.to_parquet(f.name)
        file_source = FileSource(
            file_format=ParquetFormat(),
            file_url=f"file://{f.name}",
            event_timestamp_column="ts",
            created_timestamp_column="created_ts",
            date_partition_column="",
            field_mapping={"ts_1": "ts", "id": "driver_id"},
        )
        fv = get_feature_view(file_source)
        e = Entity(
            name="driver",
            description="id for driver",
            join_key="driver_id",
            value_type=ValueType.INT32,
        )
        with tempfile.TemporaryDirectory() as repo_dir_name:
            config = RepoConfig(
                registry=str(Path(repo_dir_name) / "registry.db"),
                project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
                provider="aws",
                online_store=DynamoDBOnlineStoreConfig(region="us-west-2"),
                offline_store=FileOfflineStoreConfig(),
            )
            fs = FeatureStore(config=config)
            fs.apply([fv, e])

            yield fs, fv
예제 #7
0
def test_non_local_feature_repo() -> None:
    """
    Test running apply on a sample repo, and make sure the infra gets created.
    """
    runner = CliRunner()
    with tempfile.TemporaryDirectory() as repo_dir_name:

        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent("""
        project: foo
        registry: data/registry.db
        provider: local
        online_store:
            path: data/online_store.db
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            (Path(__file__).parent / "example_feature_repo_1.py").read_text())

        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        fs = FeatureStore(repo_path=str(repo_path))
        assertpy.assert_that(fs.list_feature_views()).is_length(3)

        result = runner.run(["teardown"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
예제 #8
0
def test_usage_on(dummy_exporter, enabling_toggle):
    _reload_feast()
    from feast.feature_store import FeatureStore

    with tempfile.TemporaryDirectory() as temp_dir:
        test_feature_store = FeatureStore(config=RepoConfig(
            registry=os.path.join(temp_dir, "registry.db"),
            project="fake_project",
            provider="local",
            online_store=SqliteOnlineStoreConfig(
                path=os.path.join(temp_dir, "online.db")),
        ))
        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            tags={"team": "matchmaking"},
        )

        test_feature_store.apply([entity])

        assert len(dummy_exporter) == 3
        assert {
            "entrypoint":
            "feast.infra.local.LocalRegistryStore.get_registry_proto"
        }.items() <= dummy_exporter[0].items()
        assert {
            "entrypoint":
            "feast.infra.local.LocalRegistryStore.update_registry_proto"
        }.items() <= dummy_exporter[1].items()
        assert {
            "entrypoint": "feast.feature_store.FeatureStore.apply"
        }.items() <= dummy_exporter[2].items()
예제 #9
0
파일: cli.py 프로젝트: feast-dev/feast
def serve_command(ctx: click.Context, host: str, port: int,
                  no_access_log: bool):
    """Start a feature server locally on a given port."""
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))

    store.serve(host, port, no_access_log)
def prep_bq_fs_and_fv(
    bq_source_type: str, ) -> Iterator[Tuple[FeatureStore, FeatureView]]:
    client = bigquery.Client()
    gcp_project = client.project
    bigquery_dataset = "test_ingestion"
    dataset = bigquery.Dataset(f"{gcp_project}.{bigquery_dataset}")
    client.create_dataset(dataset, exists_ok=True)
    dataset.default_table_expiration_ms = (1000 * 60 * 60 * 24 * 14
                                           )  # 2 weeks in milliseconds
    client.update_dataset(dataset, ["default_table_expiration_ms"])

    df = create_dataset()

    job_config = bigquery.LoadJobConfig()
    table_ref = f"{gcp_project}.{bigquery_dataset}.{bq_source_type}_correctness_{int(time.time())}"
    query = f"SELECT * FROM `{table_ref}`"
    job = client.load_table_from_dataframe(df,
                                           table_ref,
                                           job_config=job_config)
    job.result()

    bigquery_source = BigQuerySource(
        table_ref=table_ref if bq_source_type == "table" else None,
        query=query if bq_source_type == "query" else None,
        event_timestamp_column="ts",
        created_timestamp_column="created_ts",
        date_partition_column="",
        field_mapping={
            "ts_1": "ts",
            "id": "driver_id"
        },
    )

    fv = get_feature_view(bigquery_source)
    e = Entity(
        name="driver",
        description="id for driver",
        join_key="driver_id",
        value_type=ValueType.INT32,
    )
    with tempfile.TemporaryDirectory() as repo_dir_name:
        config = RepoConfig(
            registry=str(Path(repo_dir_name) / "registry.db"),
            project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
            provider="gcp",
            online_store=DatastoreOnlineStoreConfig(
                namespace="integration_test"),
        )
        fs = FeatureStore(config=config)
        fs.apply([fv, e])

        yield fs, fv
예제 #11
0
def feature_view_list():
    """
    List all feature views
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    table = []
    for feature_view in store.list_feature_views():
        table.append([feature_view.name, feature_view.entities])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
def check_offline_and_online_features(
    fs: FeatureStore,
    fv: FeatureView,
    driver_id: int,
    event_timestamp: datetime,
    expected_value: Optional[float],
    full_feature_names: bool,
) -> None:
    # Check online store
    response_dict = fs.get_online_features(
        [f"{fv.name}:value"],
        [{
            "driver": driver_id
        }],
        full_feature_names=full_feature_names,
    ).to_dict()

    if full_feature_names:
        if expected_value:
            assert abs(response_dict[f"{fv.name}__value"][0] -
                       expected_value) < 1e-6
        else:
            assert response_dict[f"{fv.name}__value"][0] is None
    else:
        if expected_value:
            assert abs(response_dict["value"][0] - expected_value) < 1e-6
        else:
            assert response_dict["value"][0] is None

    # Check offline store
    df = fs.get_historical_features(
        entity_df=pd.DataFrame.from_dict({
            "driver_id": [driver_id],
            "event_timestamp": [event_timestamp]
        }),
        features=[f"{fv.name}:value"],
        full_feature_names=full_feature_names,
    ).to_df()

    if full_feature_names:
        if expected_value:
            assert abs(df.to_dict()[f"{fv.name}__value"][0] -
                       expected_value) < 1e-6
        else:
            assert math.isnan(df.to_dict()[f"{fv.name}__value"][0])
    else:
        if expected_value:
            assert abs(df.to_dict()["value"][0] - expected_value) < 1e-6
        else:
            assert math.isnan(df.to_dict()["value"][0])
예제 #13
0
파일: cli.py 프로젝트: qooba/feast
def endpoint(ctx: click.Context):
    """
    Display feature server endpoints.
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    endpoint = store.get_feature_server_endpoint()
    if endpoint is not None:
        _logger.info(
            f"Feature server endpoint: {Style.BRIGHT + Fore.GREEN}{endpoint}{Style.RESET_ALL}"
        )
    else:
        _logger.info("There is no active feature server.")
예제 #14
0
파일: cli.py 프로젝트: qooba/feast
def on_demand_feature_view_list(ctx: click.Context):
    """
    [Experimental] List all on demand feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for on_demand_feature_view in store.list_on_demand_feature_views():
        table.append([on_demand_feature_view.name])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME"], tablefmt="plain"))
예제 #15
0
def feature_view_list(ctx: click.Context):
    """
    List all feature views
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for feature_view in store.list_feature_views():
        table.append([feature_view.name, feature_view.entities])

    from tabulate import tabulate

    print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
예제 #16
0
파일: cli.py 프로젝트: tleyden/feast
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registred Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    store = FeatureStore(repo_path=repo_path)
    store.materialize(
        feature_views=None if not views else views,
        start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc),
        end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc),
    )
예제 #17
0
파일: cli.py 프로젝트: danielsiwiec/feast
def materialize_incremental_command(end_ts: str, views: List[str]):
    """
    Run an incremental materialization job to ingest new data into the online store. Feast will read
    all data from the previously ingested point to END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be incrementally materialized.

    END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    store.materialize_incremental(
        feature_views=None if not views else views,
        end_date=datetime.fromisoformat(end_ts),
    )
예제 #18
0
def apply_total_with_repo_instance(
    store: FeatureStore,
    project: str,
    registry: Registry,
    repo: RepoContents,
    skip_source_validation: bool,
):
    if not skip_source_validation:
        data_sources = [t.batch_source for t in repo.feature_views]
        # Make sure the data source used by this feature view is supported by Feast
        for data_source in data_sources:
            data_source.validate(store.config)

    # For each object in the registry, determine whether it should be kept or deleted.
    (
        all_to_apply,
        all_to_delete,
        views_to_delete,
        views_to_keep,
    ) = extract_objects_for_apply_delete(project, registry, repo)

    diff = store.apply(all_to_apply,
                       objects_to_delete=all_to_delete,
                       partial=False)

    log_cli_output(diff, views_to_delete, views_to_keep)
예제 #19
0
def entity_list():
    """
    List all entities
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))
    table = []
    for entity in store.list_entities():
        table.append([entity.name, entity.description, entity.value_type])

    from tabulate import tabulate

    print(
        tabulate(table,
                 headers=["NAME", "DESCRIPTION", "TYPE"],
                 tablefmt="plain"))
예제 #20
0
def feature_view_describe(name: str):
    """
    Describe a feature view
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))

    try:
        feature_view = store.get_feature_view(name)
    except FeastObjectNotFoundException as e:
        print(e)
        exit(1)

    print(
        yaml.dump(yaml.safe_load(str(feature_view)),
                  default_flow_style=False,
                  sort_keys=False))
예제 #21
0
def entity_describe(name: str):
    """
    Describe an entity
    """
    cli_check_repo(Path.cwd())
    store = FeatureStore(repo_path=str(Path.cwd()))

    try:
        entity = store.get_entity(name)
    except FeastObjectNotFoundException as e:
        print(e)
        exit(1)

    print(
        yaml.dump(yaml.safe_load(str(entity)),
                  default_flow_style=False,
                  sort_keys=False))
예제 #22
0
def entity_list(ctx: click.Context):
    """
    List all entities
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for entity in store.list_entities():
        table.append([entity.name, entity.description, entity.value_type])

    from tabulate import tabulate

    print(
        tabulate(table,
                 headers=["NAME", "DESCRIPTION", "TYPE"],
                 tablefmt="plain"))
예제 #23
0
def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None:
    now = datetime.utcnow()
    # Run materialize()
    # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
    start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
    end_date = now - timedelta(hours=2)
    fs.materialize([fv.name], start_date, end_date)

    # check result of materialize()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 1
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6

    # check prior value for materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6

    # run materialize_incremental()
    fs.materialize_incremental(
        [fv.name],
        now - timedelta(seconds=0),
    )

    # check result of materialize_incremental()
    response_dict = fs.get_online_features([f"{fv.name}:value"],
                                           [{
                                               "driver_id": 3
                                           }]).to_dict()
    assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
예제 #24
0
def feature_store_with_local_registry():
    fd, registry_path = mkstemp()
    fd, online_store_path = mkstemp()
    return FeatureStore(config=RepoConfig(
        registry=registry_path,
        project="default",
        provider="local",
        online_store=SqliteOnlineStoreConfig(path=online_store_path),
    ))
예제 #25
0
def feature_store_with_s3_registry():
    return FeatureStore(config=RepoConfig(
        registry=
        f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db",
        project="default",
        provider="aws",
        online_store=DynamoDBOnlineStoreConfig(region="us-west-2"),
        offline_store=FileOfflineStoreConfig(),
    ))
예제 #26
0
def entity_describe(ctx: click.Context, name: str):
    """
    Describe an entity
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))

    try:
        entity = store.get_entity(name)
    except FeastObjectNotFoundException as e:
        print(e)
        exit(1)

    print(
        yaml.dump(yaml.safe_load(str(entity)),
                  default_flow_style=False,
                  sort_keys=False))
예제 #27
0
def materialize_command(ctx: click.Context, start_ts: str, end_ts: str,
                        views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    store.materialize(
        feature_views=None if not views else views,
        start_date=utils.make_tzaware(datetime.fromisoformat(start_ts)),
        end_date=utils.make_tzaware(datetime.fromisoformat(end_ts)),
    )
예제 #28
0
def test_apply_remote_repo():
    fd, registry_path = mkstemp()
    fd, online_store_path = mkstemp()
    return FeatureStore(config=RepoConfig(
        registry=registry_path,
        project="default",
        provider="local",
        online_store=SqliteOnlineStoreConfig(path=online_store_path),
    ))
예제 #29
0
파일: cli.py 프로젝트: feast-dev/feast
def data_source_list(ctx: click.Context):
    """
    List all data sources
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    table = []
    for datasource in store.list_data_sources():
        table.append([datasource.name, datasource.__class__])

    from tabulate import tabulate

    warnings.warn(
        "Listing data sources will only work properly if all data sources have names or table names specified. "
        "Starting Feast 0.21, data source unique names will be required to encourage data source discovery",
        RuntimeWarning,
    )
    print(tabulate(table, headers=["NAME", "CLASS"], tablefmt="plain"))
예제 #30
0
 def feature_store_with_local_registry(self):
     fd, registry_path = mkstemp()
     fd, online_store_path = mkstemp()
     return FeatureStore(config=RepoConfig(
         metadata_store=registry_path,
         project="default",
         provider="local",
         online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig(
             path=online_store_path)),
     ))