Esempio n. 1
0
def test_partial() -> None:
    """
    Add another table to existing repo using partial apply API. Make sure both the table
    applied via CLI apply and the new table are passing RW test.
    """

    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:

        driver_locations_source = BigQuerySource(
            table_ref="feast-oss.public.drivers",
            event_timestamp_column="event_timestamp",
            created_timestamp_column="created_timestamp",
        )

        driver_locations_100 = FeatureView(
            name="driver_locations_100",
            entities=["driver"],
            ttl=Duration(seconds=86400 * 1),
            features=[
                Feature(name="lat", dtype=ValueType.FLOAT),
                Feature(name="lon", dtype=ValueType.STRING),
                Feature(name="name", dtype=ValueType.STRING),
            ],
            online=True,
            batch_source=driver_locations_source,
            tags={},
        )

        store.apply([driver_locations_100])

        basic_rw_test(store, view_name="driver_locations")
        basic_rw_test(store, view_name="driver_locations_100")
Esempio n. 2
0
def test_missing_bq_source_fail() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: {project_id}
        registry: {data_path / "registry.db"}
        provider: gcp
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            get_example_repo("example_feature_repo_with_missing_bq_source.py"))

        returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
        assert returncode == 1
        assert b"DataSourceNotFoundException" in output
Esempio n. 3
0
def test_partial() -> None:
    """
    Add another table to existing repo using partial apply API. Make sure both the table
    applied via CLI apply and the new table are passing RW test.
    """

    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:

        driver_locations_source = BigQuerySource(
            table="feast-oss.public.drivers",
            timestamp_field="event_timestamp",
            created_timestamp_column="created_timestamp",
        )

        driver_locations_100 = FeatureView(
            name="driver_locations_100",
            entities=["driver"],
            ttl=timedelta(days=1),
            schema=[
                Field(name="lat", dtype=Float32),
                Field(name="lon", dtype=String),
                Field(name="name", dtype=String),
            ],
            online=True,
            batch_source=driver_locations_source,
            tags={},
        )

        store.apply([driver_locations_100])

        basic_rw_test(store, view_name="driver_locations")
        basic_rw_test(store, view_name="driver_locations_100")
Esempio n. 4
0
def test_cli_apply_duplicated_featureview_names() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            get_example_repo(
                "example_feature_repo_with_duplicated_featureview_names.py"))
        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert (rc != 0
                and b"Please ensure that all feature view names are unique"
                in output)
def test_cli_apply_imported_featureview_with_duplication() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
        repo_example_2 = repo_path / "example_2.py"
        repo_example_2.write_text(
            "from datetime import timedelta\n"
            "from example import driver_hourly_stats, driver_hourly_stats_view\n"
            "from feast import FeatureService, FeatureView\n"
            "a_feature_service = FeatureService(\n"
            "   name='driver_locations_service',\n"
            "   features=[driver_hourly_stats_view],\n"
            ")\n"
            "driver_hourly_stats_view_2 = FeatureView(\n"
            "   name='driver_hourly_stats',\n"
            "   entities=['driver_id'],\n"
            "   ttl=timedelta(days=1),\n"
            "   online=True,\n"
            "   batch_source=driver_hourly_stats,\n"
            "   tags={'dummy': 'true'})\n"
        )

        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc != 0
        assert (
            b"More than one feature view with name driver_hourly_stats found." in output
        )
Esempio n. 6
0
def test_feature_service_read() -> None:
    """
    Read feature values from the FeatureStore using a FeatureService.
    """

    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:

        basic_rw_test(
            store,
            view_name="driver_locations",
            feature_service_name="driver_locations_service",
        )
Esempio n. 7
0
def test_basic() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
            project: {project_id}
            registry: {data_path / "registry.db"}
            provider: aws
            online_store:
                type: dynamodb
                region: us-west-2
            offline_store:
              type: redshift
              cluster_id: feast-integration-tests
              region: us-west-2
              user: admin
              database: feast
              s3_staging_location: s3://feast-integration-tests/redshift
              iam_role: arn:aws:iam::402087665549:role/redshift_s3_access_role
            """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        # Doing another apply should be a no op, and should not cause errors
        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        basic_rw_test(
            FeatureStore(repo_path=str(repo_path), config=None),
            view_name="driver_locations",
        )

        result = runner.run(["teardown"], cwd=repo_path)
        assert result.returncode == 0
Esempio n. 8
0
def test_basic() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: {project_id}
        registry: {data_path / "registry.db"}
        provider: local
        offline_store:
            type: bigquery
        online_store:
            type: redis
            connection_string: localhost:6379,db=0
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        # Doing another apply should be a no op, and should not cause errors
        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        basic_rw_test(
            FeatureStore(repo_path=str(repo_path), config=None),
            view_name="driver_locations",
        )

        result = runner.run(["teardown"], cwd=repo_path)
        assert result.returncode == 0
def test_cli_apply_imported_featureview() -> None:
    """
    Test apply feature views with duplicated names and single py file in a feature repo using CLI
    """

    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
        repo_example_2 = repo_path / "example_2.py"
        repo_example_2.write_text(
            "from example import driver_hourly_stats_view\n"
            "from feast import FeatureService\n"
            "a_feature_service = FeatureService(\n"
            "   name='driver_locations_service',\n"
            "   features=[driver_hourly_stats_view],\n"
            ")\n"
        )

        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc == 0
        assert b"Created feature service driver_locations_service" in output
def test_read_pre_applied() -> None:
    """
    Read feature values from the FeatureStore using a FeatureService.
    """
    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:

        assert len(store.list_feature_services()) == 1
        fs = store.get_feature_service("driver_locations_service")
        assert len(fs.tags) == 1
        assert fs.tags["release"] == "production"

        fv = store.get_feature_view("driver_locations")

        fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]])

        store.apply([fs])

        assert len(store.list_feature_services()) == 2
        store.get_feature_service("new_feature_service")
Esempio n. 11
0
def test_nullable_online_store(test_nullable_online_store) -> None:
    project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}"
    runner = CliRunner()

    with tempfile.TemporaryDirectory() as repo_dir_name:
        try:
            repo_path = Path(repo_dir_name)
            feature_store_yaml = make_feature_store_yaml(
                project, test_nullable_online_store, repo_path)

            repo_config = repo_path / "feature_store.yaml"

            repo_config.write_text(dedent(feature_store_yaml))

            repo_example = repo_path / "example.py"
            repo_example.write_text(
                get_example_repo("example_feature_repo_1.py"))
            result = runner.run(["apply"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
        finally:
            runner.run(["teardown"], cwd=repo_path)
Esempio n. 12
0
def test_non_local_feature_repo() -> None:
    """
    Test running apply on a sample repo, and make sure the infra gets created.
    """
    runner = CliRunner()
    with tempfile.TemporaryDirectory() as repo_dir_name:

        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                """
        project: foo
        registry: data/registry.db
        provider: local
        online_store:
            path: data/online_store.db
        offline_store:
            type: bigquery
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        fs = FeatureStore(repo_path=str(repo_path))
        assertpy.assert_that(fs.list_feature_views()).is_length(3)

        result = runner.run(["teardown"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
Esempio n. 13
0
def test_connection_error() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits)
        for _ in range(10))
    runner = CliRunner()
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: {project_id}
        registry: {data_path / "registry.db"}
        provider: local
        offline_store:
            type: file
        online_store:
            type: redis
            connection_string: localhost:6379,db=0=
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assert result.returncode == 0

        # Redis does not support names for its databases.
        with pytest.raises(redis.exceptions.ResponseError):
            basic_rw_test(
                FeatureStore(repo_path=str(repo_path), config=None),
                view_name="driver_hourly_stats",
            )
Esempio n. 14
0
def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes):
    with tempfile.TemporaryDirectory(
    ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
        runner = CliRunner()
        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        """))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo(example_repo_file_name))
        rc, output = runner.run_with_output(["apply"], cwd=repo_path)

        assert rc != 0 and expected_error in output
Esempio n. 15
0
def test_workflow() -> None:
    """
    Test running apply on a sample repo, and make sure the infra gets created.
    """
    runner = CliRunner()
    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        # Construct an example repo in a temporary dir
        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: foo
        registry: {data_path / "registry.db"}
        provider: local
        online_store:
            path: {data_path / "online_store.db"}
        offline_store:
            type: bigquery
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))

        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        # entity & feature view list commands should succeed
        result = runner.run(["entities", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(["feature-views", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(["feature-services", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        # entity & feature view describe commands should succeed when objects exist
        result = runner.run(["entities", "describe", "driver"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(
            ["feature-views", "describe", "driver_locations"], cwd=repo_path
        )
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(
            ["feature-services", "describe", "driver_locations_service"], cwd=repo_path
        )
        assertpy.assert_that(result.returncode).is_equal_to(0)

        # entity & feature view describe commands should fail when objects don't exist
        result = runner.run(["entities", "describe", "foo"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)
        result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)
        result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)

        # Doing another apply should be a no op, and should not cause errors
        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        basic_rw_test(
            FeatureStore(repo_path=str(repo_path), config=None),
            view_name="driver_locations",
        )

        result = runner.run(["teardown"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
Esempio n. 16
0
def test_e2e_local() -> None:
    """
    A more comprehensive than "basic" test, using local provider.

    1. Create a repo.
    2. Apply
    3. Ingest some data to online store from parquet
    4. Read from the online store to make sure it made it there.
    """

    runner = CliRunner()
    with tempfile.TemporaryDirectory() as data_dir:

        # Generate some test data in parquet format.
        end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
        start_date = end_date - timedelta(days=15)

        driver_entities = [1001, 1002, 1003, 1004, 1005]
        driver_df = driver_data.create_driver_hourly_stats_df(
            driver_entities, start_date, end_date
        )
        driver_stats_path = os.path.join(data_dir, "driver_stats.parquet")
        driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True)

        global_df = driver_data.create_global_daily_stats_df(start_date, end_date)
        global_stats_path = os.path.join(data_dir, "global_stats.parquet")
        global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True)

        # Note that runner takes care of running apply/teardown for us here.
        # We patch python code in example_feature_repo_2.py to set the path to Parquet files.
        with runner.local_repo(
            get_example_repo("example_feature_repo_2.py")
            .replace("%PARQUET_PATH%", driver_stats_path)
            .replace("%PARQUET_PATH_GLOBAL%", global_stats_path),
            "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            r = runner.run(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df, end_date - timedelta(days=7))

            # feast materialize-incremental
            r = runner.run(
                ["materialize-incremental", end_date.isoformat()],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df, end_date)

        # Test a failure case when the parquet file doesn't include a join key
        with runner.local_repo(
            get_example_repo("example_feature_repo_with_entity_join_key.py").replace(
                "%PARQUET_PATH%", driver_stats_path
            ),
            "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            returncode, output = runner.run_with_output(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert returncode != 0
            assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)
Esempio n. 17
0
def test_universal_cli(test_repo_config) -> None:
    project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}"

    runner = CliRunner()

    with tempfile.TemporaryDirectory() as repo_dir_name:
        feature_store_yaml = make_feature_store_yaml(project, test_repo_config,
                                                     repo_dir_name)
        repo_path = Path(repo_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(dedent(feature_store_yaml))

        repo_example = repo_path / "example.py"
        repo_example.write_text(get_example_repo("example_feature_repo_1.py"))
        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        # Store registry contents, to be compared later.
        fs = FeatureStore(repo_path=str(repo_path))
        registry_dict = fs.registry.to_dict(project=project)

        # entity & feature view list commands should succeed
        result = runner.run(["entities", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(["feature-views", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(["feature-services", "list"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)

        # entity & feature view describe commands should succeed when objects exist
        result = runner.run(["entities", "describe", "driver"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(["feature-views", "describe", "driver_locations"],
                            cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        result = runner.run(
            ["feature-services", "describe", "driver_locations_service"],
            cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        assertpy.assert_that(fs.list_feature_views()).is_length(3)

        # entity & feature view describe commands should fail when objects don't exist
        result = runner.run(["entities", "describe", "foo"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)
        result = runner.run(["feature-views", "describe", "foo"],
                            cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)
        result = runner.run(["feature-services", "describe", "foo"],
                            cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(1)

        # Doing another apply should be a no op, and should not cause errors
        result = runner.run(["apply"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
        basic_rw_test(
            FeatureStore(repo_path=str(repo_path), config=None),
            view_name="driver_locations",
        )

        # Confirm that registry contents have not changed.
        assertpy.assert_that(registry_dict).is_equal_to(
            fs.registry.to_dict(project=project))

        result = runner.run(["teardown"], cwd=repo_path)
        assertpy.assert_that(result.returncode).is_equal_to(0)
Esempio n. 18
0
def test_online() -> None:
    """
    Test reading from the online store in local mode.
    """
    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:
        # Write some data to two tables

        driver_locations_fv = store.get_feature_view(name="driver_locations")
        customer_profile_fv = store.get_feature_view(name="customer_profile")
        customer_driver_combined_fv = store.get_feature_view(
            name="customer_driver_combined")

        provider = store._get_provider()

        driver_key = EntityKeyProto(join_keys=["driver_id"],
                                    entity_values=[ValueProto(int64_val=1)])
        provider.online_write_batch(
            config=store.config,
            table=driver_locations_fv,
            data=[(
                driver_key,
                {
                    "lat": ValueProto(double_val=0.1),
                    "lon": ValueProto(string_val="1.0"),
                },
                datetime.utcnow(),
                datetime.utcnow(),
            )],
            progress=None,
        )

        customer_key = EntityKeyProto(
            join_keys=["customer_id"],
            entity_values=[ValueProto(string_val="5")])
        provider.online_write_batch(
            config=store.config,
            table=customer_profile_fv,
            data=[(
                customer_key,
                {
                    "avg_orders_day": ValueProto(float_val=1.0),
                    "name": ValueProto(string_val="John"),
                    "age": ValueProto(int64_val=3),
                },
                datetime.utcnow(),
                datetime.utcnow(),
            )],
            progress=None,
        )

        customer_key = EntityKeyProto(
            join_keys=["customer_id", "driver_id"],
            entity_values=[
                ValueProto(string_val="5"),
                ValueProto(int64_val=1)
            ],
        )
        provider.online_write_batch(
            config=store.config,
            table=customer_driver_combined_fv,
            data=[(
                customer_key,
                {
                    "trips": ValueProto(int64_val=7)
                },
                datetime.utcnow(),
                datetime.utcnow(),
            )],
            progress=None,
        )

        # Retrieve two features using two keys, one valid one non-existing
        result = store.get_online_features(
            features=[
                "driver_locations:lon",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_driver_combined:trips",
            ],
            entity_rows=[
                {
                    "driver_id": 1,
                    "customer_id": "5"
                },
                {
                    "driver_id": 1,
                    "customer_id": 5
                },
            ],
            full_feature_names=False,
        ).to_dict()

        assert "lon" in result
        assert "avg_orders_day" in result
        assert "name" in result
        assert result["driver_id"] == [1, 1]
        assert result["customer_id"] == ["5", "5"]
        assert result["lon"] == ["1.0", "1.0"]
        assert result["avg_orders_day"] == [1.0, 1.0]
        assert result["name"] == ["John", "John"]
        assert result["trips"] == [7, 7]

        # Ensure features are still in result when keys not found
        result = store.get_online_features(
            features=["customer_driver_combined:trips"],
            entity_rows=[{
                "driver_id": 0,
                "customer_id": 0
            }],
            full_feature_names=False,
        ).to_dict()

        assert "trips" in result

        # invalid table reference
        with pytest.raises(FeatureViewNotFoundException):
            store.get_online_features(
                features=["driver_locations_bad:lon"],
                entity_rows=[{
                    "driver_id": 1
                }],
                full_feature_names=False,
            )

        # Create new FeatureStore object with fast cache invalidation
        cache_ttl = 1
        fs_fast_ttl = FeatureStore(config=RepoConfig(
            registry=RegistryConfig(path=store.config.registry,
                                    cache_ttl_seconds=cache_ttl),
            online_store=store.config.online_store,
            project=store.project,
            provider=store.config.provider,
        ))

        # Should download the registry and cache it permanently (or until manually refreshed)
        result = fs_fast_ttl.get_online_features(
            features=[
                "driver_locations:lon",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_driver_combined:trips",
            ],
            entity_rows=[{
                "driver_id": 1,
                "customer_id": 5
            }],
            full_feature_names=False,
        ).to_dict()
        assert result["lon"] == ["1.0"]
        assert result["trips"] == [7]

        # Rename the registry.db so that it cant be used for refreshes
        os.rename(store.config.registry, store.config.registry + "_fake")

        # Wait for registry to expire
        time.sleep(cache_ttl)

        # Will try to reload registry because it has expired (it will fail because we deleted the actual registry file)
        with pytest.raises(FileNotFoundError):
            fs_fast_ttl.get_online_features(
                features=[
                    "driver_locations:lon",
                    "customer_profile:avg_orders_day",
                    "customer_profile:name",
                    "customer_driver_combined:trips",
                ],
                entity_rows=[{
                    "driver_id": 1,
                    "customer_id": 5
                }],
                full_feature_names=False,
            ).to_dict()

        # Restore registry.db so that we can see if it actually reloads registry
        os.rename(store.config.registry + "_fake", store.config.registry)

        # Test if registry is actually reloaded and whether results return
        result = fs_fast_ttl.get_online_features(
            features=[
                "driver_locations:lon",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_driver_combined:trips",
            ],
            entity_rows=[{
                "driver_id": 1,
                "customer_id": 5
            }],
            full_feature_names=False,
        ).to_dict()
        assert result["lon"] == ["1.0"]
        assert result["trips"] == [7]

        # Create a registry with infinite cache (for users that want to manually refresh the registry)
        fs_infinite_ttl = FeatureStore(config=RepoConfig(
            registry=RegistryConfig(path=store.config.registry,
                                    cache_ttl_seconds=0),
            online_store=store.config.online_store,
            project=store.project,
            provider=store.config.provider,
        ))

        # Should return results (and fill the registry cache)
        result = fs_infinite_ttl.get_online_features(
            features=[
                "driver_locations:lon",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_driver_combined:trips",
            ],
            entity_rows=[{
                "driver_id": 1,
                "customer_id": 5
            }],
            full_feature_names=False,
        ).to_dict()
        assert result["lon"] == ["1.0"]
        assert result["trips"] == [7]

        # Wait a bit so that an arbitrary TTL would take effect
        time.sleep(2)

        # Rename the registry.db so that it cant be used for refreshes
        os.rename(store.config.registry, store.config.registry + "_fake")

        # TTL is infinite so this method should use registry cache
        result = fs_infinite_ttl.get_online_features(
            features=[
                "driver_locations:lon",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_driver_combined:trips",
            ],
            entity_rows=[{
                "driver_id": 1,
                "customer_id": 5
            }],
            full_feature_names=False,
        ).to_dict()
        assert result["lon"] == ["1.0"]
        assert result["trips"] == [7]

        # Force registry reload (should fail because file is missing)
        with pytest.raises(FileNotFoundError):
            fs_infinite_ttl.refresh_registry()

        # Restore registry.db so that teardown works
        os.rename(store.config.registry + "_fake", store.config.registry)
Esempio n. 19
0
def test_universal_cli(environment: Environment):
    project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}"
    runner = CliRunner()

    with tempfile.TemporaryDirectory() as repo_dir_name:
        try:
            repo_path = Path(repo_dir_name)
            feature_store_yaml = make_feature_store_yaml(
                project, environment.test_repo_config, repo_path)

            repo_config = repo_path / "feature_store.yaml"

            repo_config.write_text(dedent(feature_store_yaml))

            repo_example = repo_path / "example.py"
            repo_example.write_text(
                get_example_repo("example_feature_repo_1.py"))
            result = runner.run(["apply"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)

            # Store registry contents, to be compared later.
            fs = FeatureStore(repo_path=str(repo_path))
            registry_dict = fs.registry.to_dict(project=project)
            # Save only the specs, not the metadata.
            registry_specs = {
                key: [fco["spec"] if "spec" in fco else fco for fco in value]
                for key, value in registry_dict.items()
            }

            # entity & feature view list commands should succeed
            result = runner.run(["entities", "list"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            result = runner.run(["feature-views", "list"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            result = runner.run(["feature-services", "list"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            result = runner.run(["data-sources", "list"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)

            # entity & feature view describe commands should succeed when objects exist
            result = runner.run(["entities", "describe", "driver"],
                                cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            result = runner.run(
                ["feature-views", "describe", "driver_locations"],
                cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            result = runner.run(
                ["feature-services", "describe", "driver_locations_service"],
                cwd=repo_path,
            )
            assertpy.assert_that(result.returncode).is_equal_to(0)
            assertpy.assert_that(fs.list_feature_views()).is_length(4)
            result = runner.run(
                ["data-sources", "describe", "customer_profile_source"],
                cwd=repo_path,
            )
            assertpy.assert_that(result.returncode).is_equal_to(0)
            assertpy.assert_that(fs.list_data_sources()).is_length(4)

            # entity & feature view describe commands should fail when objects don't exist
            result = runner.run(["entities", "describe", "foo"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(1)
            result = runner.run(["feature-views", "describe", "foo"],
                                cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(1)
            result = runner.run(["feature-services", "describe", "foo"],
                                cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(1)
            result = runner.run(["data-sources", "describe", "foo"],
                                cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(1)

            # Doing another apply should be a no op, and should not cause errors
            result = runner.run(["apply"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
            basic_rw_test(
                FeatureStore(repo_path=str(repo_path), config=None),
                view_name="driver_locations",
            )

            # Confirm that registry contents have not changed.
            registry_dict = fs.registry.to_dict(project=project)
            assertpy.assert_that(registry_specs).is_equal_to({
                key: [fco["spec"] if "spec" in fco else fco for fco in value]
                for key, value in registry_dict.items()
            })

            result = runner.run(["teardown"], cwd=repo_path)
            assertpy.assert_that(result.returncode).is_equal_to(0)
        finally:
            runner.run(["teardown"], cwd=repo_path)
Esempio n. 20
0
def test_online_to_df():
    """
    Test dataframe conversion. Make sure the response columns and rows are
    the same order as the request.
    """

    driver_ids = [1, 2, 3]
    customer_ids = [4, 5, 6]
    name = "foo"
    lon_multiply = 1.0
    lat_multiply = 0.1
    age_multiply = 10
    avg_order_day_multiply = 1.0

    runner = CliRunner()
    with runner.local_repo(get_example_repo("example_feature_repo_1.py"),
                           "bigquery") as store:
        # Write three tables to online store
        driver_locations_fv = store.get_feature_view(name="driver_locations")
        customer_profile_fv = store.get_feature_view(name="customer_profile")
        customer_driver_combined_fv = store.get_feature_view(
            name="customer_driver_combined")
        provider = store._get_provider()

        for (d, c) in zip(driver_ids, customer_ids):
            """
            driver table:
                                    lon                    lat
                1                   1.0                    0.1
                2                   2.0                    0.2
                3                   3.0                    0.3
            """
            driver_key = EntityKeyProto(
                join_keys=["driver_id"],
                entity_values=[ValueProto(int64_val=d)])
            provider.online_write_batch(
                config=store.config,
                table=driver_locations_fv,
                data=[(
                    driver_key,
                    {
                        "lat": ValueProto(double_val=d * lat_multiply),
                        "lon": ValueProto(string_val=str(d * lon_multiply)),
                    },
                    datetime.utcnow(),
                    datetime.utcnow(),
                )],
                progress=None,
            )
            """
            customer table
            customer     avg_orders_day          name        age
                4           4.0                  foo4         40
                5           5.0                  foo5         50
                6           6.0                  foo6         60
            """
            customer_key = EntityKeyProto(
                join_keys=["customer_id"],
                entity_values=[ValueProto(string_val=str(c))])
            provider.online_write_batch(
                config=store.config,
                table=customer_profile_fv,
                data=[(
                    customer_key,
                    {
                        "avg_orders_day":
                        ValueProto(float_val=c * avg_order_day_multiply),
                        "name":
                        ValueProto(string_val=name + str(c)),
                        "age":
                        ValueProto(int64_val=c * age_multiply),
                    },
                    datetime.utcnow(),
                    datetime.utcnow(),
                )],
                progress=None,
            )
            """
            customer_driver_combined table
            customer  driver    trips
                4       1       4
                5       2       10
                6       3       18
            """
            combo_keys = EntityKeyProto(
                join_keys=["customer_id", "driver_id"],
                entity_values=[
                    ValueProto(string_val=str(c)),
                    ValueProto(int64_val=d)
                ],
            )
            provider.online_write_batch(
                config=store.config,
                table=customer_driver_combined_fv,
                data=[(
                    combo_keys,
                    {
                        "trips": ValueProto(int64_val=c * d)
                    },
                    datetime.utcnow(),
                    datetime.utcnow(),
                )],
                progress=None,
            )

        # Get online features in dataframe
        result_df = store.get_online_features(
            features=[
                "driver_locations:lon",
                "driver_locations:lat",
                "customer_profile:avg_orders_day",
                "customer_profile:name",
                "customer_profile:age",
                "customer_driver_combined:trips",
            ],
            # Reverse the row order
            entity_rows=[{
                "driver_id": d,
                "customer_id": c
            } for (d, c) in zip(reversed(driver_ids), reversed(customer_ids))],
        ).to_df()
        """
        Construct the expected dataframe with reversed row order like so:
        driver  customer     lon    lat     avg_orders_day      name        age     trips
            3       6        3.0    0.3         6.0             foo6        60       18
            2       5        2.0    0.2         5.0             foo5        50       10
            1       4        1.0    0.1         4.0             foo4        40       4
        """
        df_dict = {
            "driver_id": driver_ids,
            "customer_id": [str(c) for c in customer_ids],
            "lon": [str(d * lon_multiply) for d in driver_ids],
            "lat": [d * lat_multiply for d in driver_ids],
            "avg_orders_day":
            [c * avg_order_day_multiply for c in customer_ids],
            "name": [name + str(c) for c in customer_ids],
            "age": [c * age_multiply for c in customer_ids],
            "trips": [d * c for (d, c) in zip(driver_ids, customer_ids)],
        }
        # Requested column order
        ordered_column = [
            "driver_id",
            "customer_id",
            "lon",
            "lat",
            "avg_orders_day",
            "name",
            "age",
            "trips",
        ]
        expected_df = pd.DataFrame(
            {k: reversed(v)
             for (k, v) in df_dict.items()})
        assert_frame_equal(result_df[ordered_column], expected_df)