Ejemplo n.º 1
0
def test_3rd_party_providers() -> None:
    """
    Test running apply on third party providers
    """
    runner = CliRunner()
    # Check with incorrect built-in provider name (no dots)
    with setup_third_party_provider_repo("feast123") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Provider 'feast123' is not implemented")
    # Check with incorrect third-party provider name (with dots)
    with setup_third_party_provider_repo("feast_foo.provider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import provider module 'feast_foo'")
    # Check with incorrect third-party provider name (with dots)
    with setup_third_party_provider_repo("foo.FooProvider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(1)
        assertpy.assert_that(output).contains(
            b"Could not import provider 'FooProvider' from module 'foo'")
    # Check with correct third-party provider name
    with setup_third_party_provider_repo(
            "foo.provider.FooProvider") as repo_path:
        return_code, output = runner.run_with_output(["apply"], cwd=repo_path)
        assertpy.assert_that(return_code).is_equal_to(0)
Ejemplo n.º 2
0
def test_missing_bq_source_fail() -> None:
    project_id = "".join(
        random.choice(string.ascii_lowercase + string.digits) for _ in range(10)
    )
    runner = CliRunner()
    with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

        repo_path = Path(repo_dir_name)
        data_path = Path(data_dir_name)

        repo_config = repo_path / "feature_store.yaml"

        repo_config.write_text(
            dedent(
                f"""
        project: {project_id}
        registry: {data_path / "registry.db"}
        provider: gcp
        """
            )
        )

        repo_example = repo_path / "example.py"
        repo_example.write_text(
            (
                Path(__file__).parent / "example_feature_repo_with_missing_bq_source.py"
            ).read_text()
        )

        returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
        assert returncode == 1
        assert b"DataSourceNotFoundException" in output
Ejemplo n.º 3
0
def test_e2e_local() -> None:
    """
    A more comprehensive than "basic" test, using local provider.

    1. Create a repo.
    2. Apply
    3. Ingest some data to online store from parquet
    4. Read from the online store to make sure it made it there.
    """

    runner = CliRunner()
    with tempfile.TemporaryDirectory() as data_dir:

        # Generate some test data in parquet format.
        end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
        start_date = end_date - timedelta(days=15)

        driver_entities = [1001, 1002, 1003, 1004, 1005]
        driver_df = driver_data.create_driver_hourly_stats_df(
            driver_entities, start_date, end_date)

        driver_stats_path = os.path.join(data_dir, "driver_stats.parquet")
        driver_df.to_parquet(path=driver_stats_path,
                             allow_truncated_timestamps=True)

        # Note that runner takes care of running apply/teardown for us here.
        # We patch python code in example_feature_repo_2.py to set the path to Parquet files.
        with runner.local_repo(
                get_example_repo("example_feature_repo_2.py").replace(
                    "%PARQUET_PATH%", driver_stats_path),
                "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            r = runner.run(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df,
                                    end_date - timedelta(days=7))

            # feast materialize-incremental
            r = runner.run(
                ["materialize-incremental",
                 end_date.isoformat()],
                cwd=Path(store.repo_path),
            )

            assert r.returncode == 0

            _assert_online_features(store, driver_df, end_date)

        # Test a failure case when the parquet file doesn't include a join key
        with runner.local_repo(
                get_example_repo(
                    "example_feature_repo_with_entity_join_key.py").replace(
                        "%PARQUET_PATH%", driver_stats_path),
                "file",
        ) as store:

            assert store.repo_path is not None

            # feast materialize
            returncode, output = runner.run_with_output(
                [
                    "materialize",
                    start_date.isoformat(),
                    (end_date - timedelta(days=7)).isoformat(),
                ],
                cwd=Path(store.repo_path),
            )

            assert returncode != 0
            assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(
                output)