Python construct_universal_feature_viewsの例、tests.integration.feature_repos.repo_configuration.construct_universal_feature_views Pythonの例

コード例 #1

0

ファイルを表示

def test_historical_features_with_missing_request_data(environment,
                                                       universal_data_sources,
                                                       full_feature_names):
    store = environment.feature_store

    (_, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    store.apply([driver(), customer(), location(), *feature_views.values()])

    # If request data is missing that's needed for on demand transform, throw an error
    with pytest.raises(RequestDataNotFoundInEntityDfException):
        store.get_historical_features(
            entity_df=datasets.entity_df,
            features=[
                "customer_profile:current_balance",
                "customer_profile:avg_passenger_count",
                "customer_profile:lifetime_trip_count",
                "conv_rate_plus_100:conv_rate_plus_100",
                "conv_rate_plus_100:conv_rate_plus_val_to_add",
                "global_stats:num_rides",
                "global_stats:avg_ride_length",
                "field_mapping:feature_name",
            ],
            full_feature_names=full_feature_names,
        )

コード例 #2

0

ファイルを表示

def test_push_features_and_read(environment, universal_data_sources):
    store = environment.feature_store

    (_, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    store.apply([driver(), customer(), location(), *feature_views.values()])
    data = {
        "location_id": [1],
        "temperature": [4],
        "event_timestamp":
        [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
        "created": [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
    }
    df_ingest = pd.DataFrame(data)

    store.push("location_stats_push_source", df_ingest)

    online_resp = store.get_online_features(
        features=["pushable_location_stats:temperature"],
        entity_rows=[{
            "location_id": 1
        }],
    )
    online_resp_dict = online_resp.to_dict()
    assert online_resp_dict["location_id"] == [1]
    assert online_resp_dict["temperature"] == [4]

コード例 #3

0

ファイルを表示

ファイル: test_s3_custom_endpoint.py プロジェクト: qooba/feast

def test_registration_and_retrieval_from_custom_s3_endpoint(universal_data_sources):
    config = IntegrationTestRepoConfig(
        offline_store_creator="tests.integration.feature_repos.universal.data_sources.file.S3FileDataSourceCreator"
    )
    import os

    if "AWS_ACCESS_KEY_ID" in os.environ:
        raise Exception(
            "AWS_ACCESS_KEY_ID has already been set in the environment. Setting it again may cause a conflict. "
            "It may be better to deduplicate AWS configuration or use sub-processes for isolation"
        )

    os.environ["AWS_ACCESS_KEY_ID"] = "AKIAIOSFODNN7EXAMPLE"
    os.environ["AWS_SECRET_ACCESS_KEY"] = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"

    with construct_test_environment(config) as environment:
        fs = environment.feature_store

        entities, datasets, data_sources = universal_data_sources
        feature_views = construct_universal_feature_views(data_sources)

        feast_objects = []
        feast_objects.extend(feature_views.values())
        feast_objects.extend([driver(), customer()])
        fs.apply(feast_objects)
        fs.materialize(environment.start_date, environment.end_date)

        out = fs.get_online_features(
            features=["driver_stats:conv_rate"], entity_rows=[{"driver": 5001}]
        ).to_dict()
        assert out["conv_rate"][0] is not None

    del os.environ["AWS_ACCESS_KEY_ID"]
    del os.environ["AWS_SECRET_ACCESS_KEY"]

コード例 #4

0

ファイルを表示

ファイル: test_validation.py プロジェクト: feast-dev/feast

def test_historical_retrieval_with_validation(environment,
                                              universal_data_sources):
    store = environment.feature_store
    (entities, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)
    store.apply([driver(), customer(), location(), *feature_views.values()])

    # Create two identical retrieval jobs
    entity_df = datasets.entity_df.drop(
        columns=["order_id", "origin_id", "destination_id"])
    reference_job = store.get_historical_features(
        entity_df=entity_df,
        features=_features,
    )
    job = store.get_historical_features(
        entity_df=entity_df,
        features=_features,
    )

    # Save dataset using reference job and retrieve it
    store.create_saved_dataset(
        from_=reference_job,
        name="my_training_dataset",
        storage=environment.data_source_creator.
        create_saved_dataset_destination(),
    )
    saved_dataset = store.get_saved_dataset("my_training_dataset")

    # If validation pass there will be no exceptions on this point
    reference = saved_dataset.as_reference(profiler=configurable_profiler)
    job.to_df(validation_reference=reference)

コード例 #5

0

ファイルを表示

def test_online_retrieval_with_event_timestamps(environment,
                                                universal_data_sources,
                                                full_feature_names):
    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    fs.apply([driver(), feature_views.driver, feature_views.global_fv])

    # fake data to ingest into Online Store
    data = {
        "driver_id": [1, 2],
        "conv_rate": [0.5, 0.3],
        "acc_rate": [0.6, 0.4],
        "avg_daily_trips": [4, 5],
        "event_timestamp": [
            pd.to_datetime(1646263500, utc=True, unit="s"),
            pd.to_datetime(1646263600, utc=True, unit="s"),
        ],
        "created": [
            pd.to_datetime(1646263500, unit="s"),
            pd.to_datetime(1646263600, unit="s"),
        ],
    }
    df_ingest = pd.DataFrame(data)

    # directly ingest data into the Online Store
    fs.write_to_online_store("driver_stats", df_ingest)

    response = fs.get_online_features(
        features=[
            "driver_stats:avg_daily_trips",
            "driver_stats:acc_rate",
            "driver_stats:conv_rate",
        ],
        entity_rows=[{
            "driver_id": 1
        }, {
            "driver_id": 2
        }],
    )
    df = response.to_df(True)
    assertpy.assert_that(len(df)).is_equal_to(2)
    assertpy.assert_that(df["driver_id"].iloc[0]).is_equal_to(1)
    assertpy.assert_that(df["driver_id"].iloc[1]).is_equal_to(2)
    assertpy.assert_that(df["avg_daily_trips" +
                            TIMESTAMP_POSTFIX].iloc[0]).is_equal_to(1646263500)
    assertpy.assert_that(df["avg_daily_trips" +
                            TIMESTAMP_POSTFIX].iloc[1]).is_equal_to(1646263600)
    assertpy.assert_that(df["acc_rate" +
                            TIMESTAMP_POSTFIX].iloc[0]).is_equal_to(1646263500)
    assertpy.assert_that(df["acc_rate" +
                            TIMESTAMP_POSTFIX].iloc[1]).is_equal_to(1646263600)
    assertpy.assert_that(df["conv_rate" +
                            TIMESTAMP_POSTFIX].iloc[0]).is_equal_to(1646263500)
    assertpy.assert_that(df["conv_rate" +
                            TIMESTAMP_POSTFIX].iloc[1]).is_equal_to(1646263600)

コード例 #6

0

ファイルを表示

ファイル: test_benchmark_universal_online_retrieval.py プロジェクト: qooba/feast

def test_online_retrieval(environment, universal_data_sources, benchmark):

    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    feature_service = FeatureService(
        "convrate_plus100",
        features=[
            feature_views["driver"][["conv_rate"]],
            feature_views["driver_odfv"]
        ],
    )

    feast_objects = []
    feast_objects.extend(feature_views.values())
    feast_objects.extend([driver(), customer(), location(), feature_service])
    fs.apply(feast_objects)
    fs.materialize(environment.start_date, environment.end_date)

    sample_drivers = random.sample(entities["driver"], 10)

    sample_customers = random.sample(entities["customer"], 10)

    entity_rows = [{
        "driver": d,
        "customer_id": c,
        "val_to_add": 50
    } for (d, c) in zip(sample_drivers, sample_customers)]

    feature_refs = [
        "driver_stats:conv_rate",
        "driver_stats:avg_daily_trips",
        "customer_profile:current_balance",
        "customer_profile:avg_passenger_count",
        "customer_profile:lifetime_trip_count",
        "conv_rate_plus_100:conv_rate_plus_100",
        "conv_rate_plus_100:conv_rate_plus_val_to_add",
        "global_stats:num_rides",
        "global_stats:avg_ride_length",
    ]
    unprefixed_feature_refs = [
        f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f
    ]
    # Remove the on demand feature view output features, since they're not present in the source dataframe
    unprefixed_feature_refs.remove("conv_rate_plus_100")
    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")

    benchmark(
        fs.get_online_features,
        features=feature_refs,
        entity_rows=entity_rows,
    )

コード例 #7

0

ファイルを表示

ファイル: test_python_feature_server.py プロジェクト: feast-dev/feast

def setup_python_fs_client():
    config = IntegrationTestRepoConfig()
    environment = construct_test_environment(config)
    fs = environment.feature_store
    try:
        entities, datasets, data_sources = construct_universal_test_data(
            environment)
        feature_views = construct_universal_feature_views(data_sources)
        feast_objects: List[FeastObject] = []
        feast_objects.extend(feature_views.values())
        feast_objects.extend([driver(), customer(), location()])
        fs.apply(feast_objects)
        fs.materialize(environment.start_date, environment.end_date)
        client = TestClient(get_app(fs))
        yield client
    finally:
        fs.teardown()
        environment.data_source_creator.teardown()

コード例 #8

0

ファイルを表示

ファイル: test_validation.py プロジェクト: feast-dev/feast

def test_historical_retrieval_fails_on_validation(environment,
                                                  universal_data_sources):
    store = environment.feature_store

    (entities, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    store.apply([driver(), customer(), location(), *feature_views.values()])

    entity_df = datasets.entity_df.drop(
        columns=["order_id", "origin_id", "destination_id"])

    reference_job = store.get_historical_features(
        entity_df=entity_df,
        features=_features,
    )

    store.create_saved_dataset(
        from_=reference_job,
        name="my_other_dataset",
        storage=environment.data_source_creator.
        create_saved_dataset_destination(),
    )

    job = store.get_historical_features(
        entity_df=entity_df,
        features=_features,
    )

    with pytest.raises(ValidationFailed) as exc_info:
        job.to_df(validation_reference=store.get_saved_dataset(
            "my_other_dataset").as_reference(
                profiler=profiler_with_unrealistic_expectations))

    failed_expectations = exc_info.value.report.errors
    assert len(failed_expectations) == 2

    assert failed_expectations[
        0].check_name == "expect_column_max_to_be_between"
    assert failed_expectations[0].column_name == "current_balance"

    assert failed_expectations[
        1].check_name == "expect_column_values_to_be_in_set"
    assert failed_expectations[1].column_name == "avg_passenger_count"

コード例 #9

0

ファイルを表示

def test_online_store_cleanup(environment, universal_data_sources):
    """
    Some online store implementations (like Redis) keep features from different features views
    but with common entities together.
    This might end up with deletion of all features attached to the entity,
    when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150).

    Plan:
        1. Register two feature views with common entity "driver"
        2. Materialize data
        3. Check if features are available (via online retrieval)
        4. Delete one feature view
        5. Check that features for other are still available
        6. Delete another feature view (and create again)
        7. Verify that features for both feature view were deleted
    """
    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    driver_stats_fv = construct_universal_feature_views(data_sources).driver

    driver_entities = entities.driver_vals
    df = pd.DataFrame({
        "ts_1": [environment.end_date] * len(driver_entities),
        "created_ts": [environment.end_date] * len(driver_entities),
        "driver_id":
        driver_entities,
        "value":
        np.random.random(size=len(driver_entities)),
    })

    ds = environment.data_source_creator.create_data_source(
        df, destination_name="simple_driver_dataset")

    simple_driver_fv = driver_feature_view(
        data_source=ds, name="test_universal_online_simple_driver")

    fs.apply([driver(), simple_driver_fv, driver_stats_fv])

    fs.materialize(
        environment.start_date - timedelta(days=1),
        environment.end_date + timedelta(days=1),
    )
    expected_values = df.sort_values(by="driver_id")

    features = [f"{simple_driver_fv.name}:value"]
    entity_rows = [{
        "driver_id": driver_id
    } for driver_id in sorted(driver_entities)]

    online_features = fs.get_online_features(
        features=features, entity_rows=entity_rows).to_dict()
    assert np.allclose(expected_values["value"], online_features["value"])

    fs.apply(objects=[simple_driver_fv],
             objects_to_delete=[driver_stats_fv],
             partial=False)

    online_features = fs.get_online_features(
        features=features, entity_rows=entity_rows).to_dict()
    assert np.allclose(expected_values["value"], online_features["value"])

    fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False)

    def eventually_apply() -> Tuple[None, bool]:
        try:
            fs.apply([simple_driver_fv])
        except BotoCoreError:
            return None, False

        return None, True

    # Online store backend might have eventual consistency in schema update
    # So recreating table that was just deleted might need some retries
    wait_retry_backoff(eventually_apply, timeout_secs=60)

    online_features = fs.get_online_features(
        features=features, entity_rows=entity_rows).to_dict()
    assert all(v is None for v in online_features["value"])

コード例 #10

0

ファイルを表示

def test_online_retrieval(environment, universal_data_sources,
                          full_feature_names):
    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    feature_service = FeatureService(
        "convrate_plus100",
        features=[
            feature_views.driver[["conv_rate"]],
            feature_views.driver_odfv,
            feature_views.customer[["current_balance"]],
        ],
    )
    feature_service_entity_mapping = FeatureService(
        name="entity_mapping",
        features=[
            feature_views.location.with_name("origin").with_join_key_map(
                {"location_id": "origin_id"}),
            feature_views.location.with_name("destination").with_join_key_map(
                {"location_id": "destination_id"}),
        ],
    )

    feast_objects = []
    feast_objects.extend(feature_views.values())
    feast_objects.extend([
        driver(),
        customer(),
        location(),
        feature_service,
        feature_service_entity_mapping,
    ])
    fs.apply(feast_objects)
    fs.materialize(
        environment.start_date - timedelta(days=1),
        environment.end_date + timedelta(days=1),
    )

    entity_sample = datasets.orders_df.sample(10)[[
        "customer_id", "driver_id", "order_id", "event_timestamp"
    ]]
    orders_df = datasets.orders_df[(
        datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
        & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"]))]

    sample_drivers = entity_sample["driver_id"]
    drivers_df = datasets.driver_df[datasets.driver_df["driver_id"].isin(
        sample_drivers)]

    sample_customers = entity_sample["customer_id"]
    customers_df = datasets.customer_df[
        datasets.customer_df["customer_id"].isin(sample_customers)]

    location_pairs = np.array(
        list(itertools.permutations(entities.location_vals, 2)))
    sample_location_pairs = location_pairs[np.random.choice(
        len(location_pairs), 10)].T.tolist()
    origins_df = datasets.location_df[datasets.location_df["location_id"].isin(
        sample_location_pairs[0])]
    destinations_df = datasets.location_df[
        datasets.location_df["location_id"].isin(sample_location_pairs[1])]

    global_df = datasets.global_df

    entity_rows = [{
        "driver_id": d,
        "customer_id": c,
        "val_to_add": 50
    } for (d, c) in zip(sample_drivers, sample_customers)]

    feature_refs = [
        "driver_stats:conv_rate",
        "driver_stats:avg_daily_trips",
        "customer_profile:current_balance",
        "customer_profile:avg_passenger_count",
        "customer_profile:lifetime_trip_count",
        "conv_rate_plus_100:conv_rate_plus_100",
        "conv_rate_plus_100:conv_rate_plus_val_to_add",
        "order:order_is_success",
        "global_stats:num_rides",
        "global_stats:avg_ride_length",
    ]
    unprefixed_feature_refs = [
        f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f
    ]
    # Remove the on demand feature view output features, since they're not present in the source dataframe
    unprefixed_feature_refs.remove("conv_rate_plus_100")
    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")

    online_features_dict = get_online_features_dict(
        environment=environment,
        features=feature_refs,
        entity_rows=entity_rows,
        full_feature_names=full_feature_names,
    )

    # Test that the on demand feature views compute properly even if the dependent conv_rate
    # feature isn't requested.
    online_features_no_conv_rate = get_online_features_dict(
        environment=environment,
        features=[
            ref for ref in feature_refs if ref != "driver_stats:conv_rate"
        ],
        entity_rows=entity_rows,
        full_feature_names=full_feature_names,
    )

    assert online_features_no_conv_rate is not None

    keys = set(online_features_dict.keys())
    expected_keys = set(
        f.replace(":", "__") if full_feature_names else f.split(":")[-1]
        for f in feature_refs) | {"customer_id", "driver_id"}
    assert (
        keys == expected_keys
    ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"

    tc = unittest.TestCase()
    for i, entity_row in enumerate(entity_rows):
        df_features = get_latest_feature_values_from_dataframes(
            driver_df=drivers_df,
            customer_df=customers_df,
            orders_df=orders_df,
            global_df=global_df,
            entity_row=entity_row,
        )

        assert df_features["customer_id"] == online_features_dict[
            "customer_id"][i]
        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
        tc.assertAlmostEqual(
            online_features_dict[response_feature_name("conv_rate_plus_100",
                                                       feature_refs,
                                                       full_feature_names)][i],
            df_features["conv_rate"] + 100,
            delta=0.0001,
        )
        tc.assertAlmostEqual(
            online_features_dict[response_feature_name(
                "conv_rate_plus_val_to_add", feature_refs,
                full_feature_names)][i],
            df_features["conv_rate"] + df_features["val_to_add"],
            delta=0.0001,
        )
        for unprefixed_feature_ref in unprefixed_feature_refs:
            tc.assertAlmostEqual(
                df_features[unprefixed_feature_ref],
                online_features_dict[response_feature_name(
                    unprefixed_feature_ref, feature_refs,
                    full_feature_names)][i],
                delta=0.0001,
            )

    # Check what happens for missing values
    missing_responses_dict = get_online_features_dict(
        environment=environment,
        features=feature_refs,
        entity_rows=[{
            "driver_id": 0,
            "customer_id": 0,
            "val_to_add": 100
        }],
        full_feature_names=full_feature_names,
    )
    assert missing_responses_dict is not None
    for unprefixed_feature_ref in unprefixed_feature_refs:
        if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
            tc.assertIsNone(missing_responses_dict[response_feature_name(
                unprefixed_feature_ref, feature_refs, full_feature_names)][0])

    # Check what happens for missing request data
    with pytest.raises(RequestDataNotFoundInEntityRowsException):
        get_online_features_dict(
            environment=environment,
            features=feature_refs,
            entity_rows=[{
                "driver_id": 0,
                "customer_id": 0
            }],
            full_feature_names=full_feature_names,
        )

    assert_feature_service_correctness(
        environment,
        feature_service,
        entity_rows,
        full_feature_names,
        drivers_df,
        customers_df,
        orders_df,
        global_df,
    )

    entity_rows = [{
        "origin_id": origin,
        "destination_id": destination
    } for (_driver, _customer, origin, destination
           ) in zip(sample_drivers, sample_customers, *sample_location_pairs)]
    assert_feature_service_entity_mapping_correctness(
        environment,
        feature_service_entity_mapping,
        entity_rows,
        full_feature_names,
        origins_df,
        destinations_df,
    )

コード例 #11

0

ファイルを表示

def test_historical_features_persisting(environment, universal_data_sources,
                                        full_feature_names):
    store = environment.feature_store

    (entities, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    store.apply([driver(), customer(), location(), *feature_views.values()])

    entity_df = datasets.entity_df.drop(
        columns=["order_id", "origin_id", "destination_id"])

    job = store.get_historical_features(
        entity_df=entity_df,
        features=[
            "customer_profile:current_balance",
            "customer_profile:avg_passenger_count",
            "customer_profile:lifetime_trip_count",
            "order:order_is_success",
            "global_stats:num_rides",
            "global_stats:avg_ride_length",
            "field_mapping:feature_name",
        ],
        full_feature_names=full_feature_names,
    )

    saved_dataset = store.create_saved_dataset(
        from_=job,
        name="saved_dataset",
        storage=environment.data_source_creator.
        create_saved_dataset_destination(),
        tags={"env": "test"},
    )

    event_timestamp = DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
    expected_df = get_expected_training_df(
        datasets.customer_df,
        feature_views.customer,
        datasets.driver_df,
        feature_views.driver,
        datasets.orders_df,
        feature_views.order,
        datasets.location_df,
        feature_views.location,
        datasets.global_df,
        feature_views.global_fv,
        datasets.field_mapping_df,
        feature_views.field_mapping,
        entity_df,
        event_timestamp,
        full_feature_names,
    ).drop(columns=[
        response_feature_name("conv_rate_plus_100", full_feature_names),
        response_feature_name("conv_rate_plus_100_rounded",
                              full_feature_names),
        response_feature_name("avg_daily_trips", full_feature_names),
        response_feature_name("conv_rate", full_feature_names),
        "origin__temperature",
        "destination__temperature",
    ])

    assert_frame_equal(
        expected_df,
        saved_dataset.to_df(),
        keys=[event_timestamp, "driver_id", "customer_id"],
    )

    assert_frame_equal(
        job.to_df(),
        saved_dataset.to_df(),
        keys=[event_timestamp, "driver_id", "customer_id"],
    )

コード例 #12

0

ファイルを表示

def test_historical_features_with_entities_from_query(environment,
                                                      universal_data_sources,
                                                      full_feature_names):
    store = environment.feature_store
    (entities, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    orders_table = table_name_from_data_source(data_sources.orders)
    if not orders_table:
        raise pytest.skip("Offline source is not sql-based")

    data_source_creator = environment.test_repo_config.offline_store_creator
    if data_source_creator.__name__ == SnowflakeDataSourceCreator.__name__:
        entity_df_query = f"""
        SELECT "customer_id", "driver_id", "order_id", "origin_id", "destination_id", "event_timestamp"
        FROM "{orders_table}"
        """
    else:
        entity_df_query = f"""
        SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp
        FROM {orders_table}
        """

    store.apply([driver(), customer(), location(), *feature_views.values()])

    job_from_sql = store.get_historical_features(
        entity_df=entity_df_query,
        features=[
            "customer_profile:current_balance",
            "customer_profile:avg_passenger_count",
            "customer_profile:lifetime_trip_count",
            "order:order_is_success",
            "global_stats:num_rides",
            "global_stats:avg_ride_length",
            "field_mapping:feature_name",
        ],
        full_feature_names=full_feature_names,
    )

    start_time = datetime.utcnow()
    actual_df_from_sql_entities = job_from_sql.to_df()
    end_time = datetime.utcnow()
    print(
        str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'"
            ))

    event_timestamp = (DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
                       if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
                       in datasets.orders_df.columns else "e_ts")
    full_expected_df = get_expected_training_df(
        datasets.customer_df,
        feature_views.customer,
        datasets.driver_df,
        feature_views.driver,
        datasets.orders_df,
        feature_views.order,
        datasets.location_df,
        feature_views.location,
        datasets.global_df,
        feature_views.global_fv,
        datasets.field_mapping_df,
        feature_views.field_mapping,
        datasets.entity_df,
        event_timestamp,
        full_feature_names,
    )

    # Not requesting the on demand transform with an entity_df query (can't add request data in them)
    expected_df_query = full_expected_df.drop(columns=[
        response_feature_name("conv_rate_plus_100", full_feature_names),
        response_feature_name("conv_rate_plus_100_rounded",
                              full_feature_names),
        response_feature_name("avg_daily_trips", full_feature_names),
        response_feature_name("conv_rate", full_feature_names),
        "origin__temperature",
        "destination__temperature",
    ])
    assert_frame_equal(
        expected_df_query,
        actual_df_from_sql_entities,
        keys=[event_timestamp, "order_id", "driver_id", "customer_id"],
    )

    table_from_sql_entities = job_from_sql.to_arrow().to_pandas()
    for col in table_from_sql_entities.columns:
        expected_df_query[col] = expected_df_query[col].astype(
            table_from_sql_entities[col].dtype)

    assert_frame_equal(
        expected_df_query,
        table_from_sql_entities,
        keys=[event_timestamp, "order_id", "driver_id", "customer_id"],
    )

コード例 #13

0

ファイルを表示

def test_historical_features(environment, universal_data_sources,
                             full_feature_names):
    store = environment.feature_store

    (entities, datasets, data_sources) = universal_data_sources

    feature_views = construct_universal_feature_views(data_sources)

    entity_df_with_request_data = datasets.entity_df.copy(deep=True)
    entity_df_with_request_data["val_to_add"] = [
        i for i in range(len(entity_df_with_request_data))
    ]
    entity_df_with_request_data["driver_age"] = [
        i + 100 for i in range(len(entity_df_with_request_data))
    ]

    feature_service = FeatureService(
        name="convrate_plus100",
        features=[
            feature_views.driver[["conv_rate"]], feature_views.driver_odfv
        ],
    )
    feature_service_entity_mapping = FeatureService(
        name="entity_mapping",
        features=[
            feature_views.location.with_name("origin").with_join_key_map(
                {"location_id": "origin_id"}),
            feature_views.location.with_name("destination").with_join_key_map(
                {"location_id": "destination_id"}),
        ],
    )

    store.apply([
        driver(),
        customer(),
        location(),
        feature_service,
        feature_service_entity_mapping,
        *feature_views.values(),
    ])

    event_timestamp = (DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
                       if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
                       in datasets.orders_df.columns else "e_ts")
    full_expected_df = get_expected_training_df(
        datasets.customer_df,
        feature_views.customer,
        datasets.driver_df,
        feature_views.driver,
        datasets.orders_df,
        feature_views.order,
        datasets.location_df,
        feature_views.location,
        datasets.global_df,
        feature_views.global_fv,
        datasets.field_mapping_df,
        feature_views.field_mapping,
        entity_df_with_request_data,
        event_timestamp,
        full_feature_names,
    )

    # Only need the shadow entities features in the FeatureService test
    expected_df = full_expected_df.drop(
        columns=["origin__temperature", "destination__temperature"], )

    job_from_df = store.get_historical_features(
        entity_df=entity_df_with_request_data,
        features=[
            "driver_stats:conv_rate",
            "driver_stats:avg_daily_trips",
            "customer_profile:current_balance",
            "customer_profile:avg_passenger_count",
            "customer_profile:lifetime_trip_count",
            "conv_rate_plus_100:conv_rate_plus_100",
            "conv_rate_plus_100:conv_rate_plus_100_rounded",
            "conv_rate_plus_100:conv_rate_plus_val_to_add",
            "order:order_is_success",
            "global_stats:num_rides",
            "global_stats:avg_ride_length",
            "field_mapping:feature_name",
        ],
        full_feature_names=full_feature_names,
    )

    start_time = datetime.utcnow()
    actual_df_from_df_entities = job_from_df.to_df()

    print(
        f"actual_df_from_df_entities shape: {actual_df_from_df_entities.shape}"
    )
    end_time = datetime.utcnow()
    print(
        str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n"
            ))

    assert sorted(expected_df.columns) == sorted(
        actual_df_from_df_entities.columns)
    assert_frame_equal(
        expected_df,
        actual_df_from_df_entities,
        keys=[event_timestamp, "order_id", "driver_id", "customer_id"],
    )

    assert_feature_service_correctness(
        store,
        feature_service,
        full_feature_names,
        entity_df_with_request_data,
        expected_df,
        event_timestamp,
    )
    assert_feature_service_entity_mapping_correctness(
        store,
        feature_service_entity_mapping,
        full_feature_names,
        entity_df_with_request_data,
        full_expected_df,
        event_timestamp,
    )
    table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas()

    assert_frame_equal(
        expected_df,
        table_from_df_entities,
        keys=[event_timestamp, "order_id", "driver_id", "customer_id"],
    )

コード例 #14

0

ファイルを表示

ファイル: test_universal_online.py プロジェクト: qooba/feast

def test_online_retrieval(environment, universal_data_sources,
                          full_feature_names):

    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    feature_service = FeatureService(
        "convrate_plus100",
        features=[
            feature_views["driver"][["conv_rate"]],
            feature_views["driver_odfv"],
            feature_views["driver_age_request_fv"],
        ],
    )
    feature_service_entity_mapping = FeatureService(
        name="entity_mapping",
        features=[
            feature_views["location"].with_name("origin").with_join_key_map(
                {"location_id": "origin_id"}),
            feature_views["location"].with_name(
                "destination").with_join_key_map(
                    {"location_id": "destination_id"}),
        ],
    )

    feast_objects = []
    feast_objects.extend(feature_views.values())
    feast_objects.extend([
        driver(),
        customer(),
        location(),
        feature_service,
        feature_service_entity_mapping,
    ])
    fs.apply(feast_objects)
    fs.materialize(
        environment.start_date - timedelta(days=1),
        environment.end_date + timedelta(days=1),
    )

    entity_sample = datasets["orders"].sample(10)[[
        "customer_id", "driver_id", "order_id", "event_timestamp"
    ]]
    orders_df = datasets["orders"][(
        datasets["orders"]["customer_id"].isin(entity_sample["customer_id"])
        & datasets["orders"]["driver_id"].isin(entity_sample["driver_id"]))]

    sample_drivers = entity_sample["driver_id"]
    drivers_df = datasets["driver"][datasets["driver"]["driver_id"].isin(
        sample_drivers)]

    sample_customers = entity_sample["customer_id"]
    customers_df = datasets["customer"][datasets["customer"]
                                        ["customer_id"].isin(sample_customers)]

    location_pairs = np.array(
        list(itertools.permutations(entities["location"], 2)))
    sample_location_pairs = location_pairs[np.random.choice(
        len(location_pairs), 10)].T
    origins_df = datasets["location"][datasets["location"]["location_id"].isin(
        sample_location_pairs[0])]
    destinations_df = datasets["location"][
        datasets["location"]["location_id"].isin(sample_location_pairs[1])]

    global_df = datasets["global"]

    entity_rows = [{
        "driver": d,
        "customer_id": c,
        "val_to_add": 50,
        "driver_age": 25
    } for (d, c) in zip(sample_drivers, sample_customers)]

    feature_refs = [
        "driver_stats:conv_rate",
        "driver_stats:avg_daily_trips",
        "customer_profile:current_balance",
        "customer_profile:avg_passenger_count",
        "customer_profile:lifetime_trip_count",
        "conv_rate_plus_100:conv_rate_plus_100",
        "conv_rate_plus_100:conv_rate_plus_val_to_add",
        "order:order_is_success",
        "global_stats:num_rides",
        "global_stats:avg_ride_length",
        "driver_age:driver_age",
    ]
    unprefixed_feature_refs = [
        f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f
    ]
    # Remove the on demand feature view output features, since they're not present in the source dataframe
    unprefixed_feature_refs.remove("conv_rate_plus_100")
    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")

    online_features = fs.get_online_features(
        features=feature_refs,
        entity_rows=entity_rows,
        full_feature_names=full_feature_names,
    )
    assert online_features is not None

    online_features_dict = online_features.to_dict()
    keys = online_features_dict.keys()
    assert (
        len(keys) == len(feature_refs) + 3
    )  # Add three for the driver id and the customer id entity keys + val_to_add request data.
    for feature in feature_refs:
        # full_feature_names does not apply to request feature views
        if full_feature_names and feature != "driver_age:driver_age":
            assert feature.replace(":", "__") in keys
        else:
            assert feature.rsplit(":", 1)[-1] in keys
            assert ("driver_stats" not in keys
                    and "customer_profile" not in keys and "order" not in keys
                    and "global_stats" not in keys)

    tc = unittest.TestCase()
    for i, entity_row in enumerate(entity_rows):
        df_features = get_latest_feature_values_from_dataframes(
            driver_df=drivers_df,
            customer_df=customers_df,
            orders_df=orders_df,
            global_df=global_df,
            entity_row=entity_row,
        )

        assert df_features["customer_id"] == online_features_dict[
            "customer_id"][i]
        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
        tc.assertAlmostEqual(
            online_features_dict[response_feature_name("conv_rate_plus_100",
                                                       full_feature_names)][i],
            df_features["conv_rate"] + 100,
            delta=0.0001,
        )
        tc.assertAlmostEqual(
            online_features_dict[response_feature_name(
                "conv_rate_plus_val_to_add", full_feature_names)][i],
            df_features["conv_rate"] + df_features["val_to_add"],
            delta=0.0001,
        )
        for unprefixed_feature_ref in unprefixed_feature_refs:
            tc.assertAlmostEqual(
                df_features[unprefixed_feature_ref],
                online_features_dict[response_feature_name(
                    unprefixed_feature_ref, full_feature_names)][i],
                delta=0.0001,
            )

    # Check what happens for missing values
    missing_responses_dict = fs.get_online_features(
        features=feature_refs,
        entity_rows=[{
            "driver": 0,
            "customer_id": 0,
            "val_to_add": 100,
            "driver_age": 125
        }],
        full_feature_names=full_feature_names,
    ).to_dict()
    assert missing_responses_dict is not None
    for unprefixed_feature_ref in unprefixed_feature_refs:
        if unprefixed_feature_ref not in {
                "num_rides", "avg_ride_length", "driver_age"
        }:
            tc.assertIsNone(missing_responses_dict[response_feature_name(
                unprefixed_feature_ref, full_feature_names)][0])

    # Check what happens for missing request data
    with pytest.raises(RequestDataNotFoundInEntityRowsException):
        fs.get_online_features(
            features=feature_refs,
            entity_rows=[{
                "driver": 0,
                "customer_id": 0
            }],
            full_feature_names=full_feature_names,
        ).to_dict()

    # Also with request data
    with pytest.raises(RequestDataNotFoundInEntityRowsException):
        fs.get_online_features(
            features=feature_refs,
            entity_rows=[{
                "driver": 0,
                "customer_id": 0,
                "val_to_add": 20
            }],
            full_feature_names=full_feature_names,
        ).to_dict()

    assert_feature_service_correctness(
        fs,
        feature_service,
        entity_rows,
        full_feature_names,
        drivers_df,
        customers_df,
        orders_df,
        global_df,
    )

    entity_rows = [{
        "driver": driver,
        "customer_id": customer,
        "origin_id": origin,
        "destination_id": destination,
    } for (driver, customer, origin, destination
           ) in zip(sample_drivers, sample_customers, *sample_location_pairs)]
    assert_feature_service_entity_mapping_correctness(
        fs,
        feature_service_entity_mapping,
        entity_rows,
        full_feature_names,
        drivers_df,
        customers_df,
        orders_df,
        origins_df,
        destinations_df,
    )

コード例 #15

0

ファイルを表示

def test_historical_features(environment, universal_data_sources, full_feature_names):
    store = environment.feature_store

    (entities, datasets, data_sources) = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    customer_df, driver_df, location_df, orders_df, global_df, entity_df = (
        datasets["customer"],
        datasets["driver"],
        datasets["location"],
        datasets["orders"],
        datasets["global"],
        datasets["entity"],
    )
    entity_df_with_request_data = entity_df.copy(deep=True)
    entity_df_with_request_data["val_to_add"] = [
        i for i in range(len(entity_df_with_request_data))
    ]
    entity_df_with_request_data["driver_age"] = [
        i + 100 for i in range(len(entity_df_with_request_data))
    ]

    (
        customer_fv,
        driver_fv,
        driver_odfv,
        location_fv,
        order_fv,
        global_fv,
        driver_age_request_fv,
    ) = (
        feature_views["customer"],
        feature_views["driver"],
        feature_views["driver_odfv"],
        feature_views["location"],
        feature_views["order"],
        feature_views["global"],
        feature_views["driver_age_request_fv"],
    )

    feature_service = FeatureService(
        name="convrate_plus100",
        features=[
            feature_views["driver"][["conv_rate"]],
            driver_odfv,
            driver_age_request_fv,
        ],
    )
    feature_service_entity_mapping = FeatureService(
        name="entity_mapping",
        features=[
            location_fv.with_name("origin").with_join_key_map(
                {"location_id": "origin_id"}
            ),
            location_fv.with_name("destination").with_join_key_map(
                {"location_id": "destination_id"}
            ),
        ],
    )

    feast_objects = []
    feast_objects.extend(
        [
            customer_fv,
            driver_fv,
            driver_odfv,
            location_fv,
            order_fv,
            global_fv,
            driver_age_request_fv,
            driver(),
            customer(),
            location(),
            feature_service,
            feature_service_entity_mapping,
        ]
    )
    store.apply(feast_objects)

    entity_df_query = None
    orders_table = table_name_from_data_source(data_sources["orders"])
    if orders_table:
        entity_df_query = f"SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp FROM {orders_table}"

    event_timestamp = (
        DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
        if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in orders_df.columns
        else "e_ts"
    )
    full_expected_df = get_expected_training_df(
        customer_df,
        customer_fv,
        driver_df,
        driver_fv,
        orders_df,
        order_fv,
        location_df,
        location_fv,
        global_df,
        global_fv,
        entity_df_with_request_data,
        event_timestamp,
        full_feature_names,
    )

    # Only need the shadow entities features in the FeatureService test
    expected_df = full_expected_df.drop(
        columns=["origin__temperature", "destination__temperature"],
    )

    if entity_df_query:
        job_from_sql = store.get_historical_features(
            entity_df=entity_df_query,
            features=[
                "driver_stats:conv_rate",
                "driver_stats:avg_daily_trips",
                "customer_profile:current_balance",
                "customer_profile:avg_passenger_count",
                "customer_profile:lifetime_trip_count",
                "order:order_is_success",
                "global_stats:num_rides",
                "global_stats:avg_ride_length",
            ],
            full_feature_names=full_feature_names,
        )

        start_time = datetime.utcnow()
        actual_df_from_sql_entities = job_from_sql.to_df()
        end_time = datetime.utcnow()
        print(
            str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'")
        )

        # Not requesting the on demand transform with an entity_df query (can't add request data in them)
        expected_df_query = expected_df.drop(
            columns=[
                "conv_rate_plus_100",
                "conv_rate_plus_100_rounded",
                "val_to_add",
                "conv_rate_plus_val_to_add",
                "driver_age",
            ]
        )
        assert sorted(expected_df_query.columns) == sorted(
            actual_df_from_sql_entities.columns
        )

        actual_df_from_sql_entities = (
            actual_df_from_sql_entities[expected_df_query.columns]
            .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
            .drop_duplicates()
            .reset_index(drop=True)
        )
        expected_df_query = (
            expected_df_query.sort_values(
                by=[event_timestamp, "order_id", "driver_id", "customer_id"]
            )
            .drop_duplicates()
            .reset_index(drop=True)
        )

        assert_frame_equal(
            actual_df_from_sql_entities, expected_df_query, check_dtype=False,
        )

        table_from_sql_entities = job_from_sql.to_arrow()
        df_from_sql_entities = (
            table_from_sql_entities.to_pandas()[expected_df_query.columns]
            .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
            .drop_duplicates()
            .reset_index(drop=True)
        )

        for col in df_from_sql_entities.columns:
            expected_df_query[col] = expected_df_query[col].astype(
                df_from_sql_entities[col].dtype
            )

        assert_frame_equal(expected_df_query, df_from_sql_entities)

    job_from_df = store.get_historical_features(
        entity_df=entity_df_with_request_data,
        features=[
            "driver_stats:conv_rate",
            "driver_stats:avg_daily_trips",
            "customer_profile:current_balance",
            "customer_profile:avg_passenger_count",
            "customer_profile:lifetime_trip_count",
            "conv_rate_plus_100:conv_rate_plus_100",
            "conv_rate_plus_100:conv_rate_plus_100_rounded",
            "conv_rate_plus_100:conv_rate_plus_val_to_add",
            "order:order_is_success",
            "global_stats:num_rides",
            "global_stats:avg_ride_length",
            "driver_age:driver_age",
        ],
        full_feature_names=full_feature_names,
    )

    start_time = datetime.utcnow()
    actual_df_from_df_entities = job_from_df.to_df()

    print(f"actual_df_from_df_entities shape: {actual_df_from_df_entities.shape}")
    end_time = datetime.utcnow()
    print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n"))

    assert sorted(expected_df.columns) == sorted(actual_df_from_df_entities.columns)
    expected_df: pd.DataFrame = (
        expected_df.sort_values(
            by=[event_timestamp, "order_id", "driver_id", "customer_id"]
        )
        .drop_duplicates()
        .reset_index(drop=True)
    )
    actual_df_from_df_entities = (
        actual_df_from_df_entities[expected_df.columns]
        .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
        .drop_duplicates()
        .reset_index(drop=True)
    )

    assert_frame_equal(
        expected_df, actual_df_from_df_entities, check_dtype=False,
    )
    assert_feature_service_correctness(
        store,
        feature_service,
        full_feature_names,
        entity_df_with_request_data,
        expected_df,
        event_timestamp,
    )
    assert_feature_service_entity_mapping_correctness(
        store,
        feature_service_entity_mapping,
        full_feature_names,
        entity_df_with_request_data,
        full_expected_df,
        event_timestamp,
    )

    table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas()

    columns_expected_in_table = expected_df.columns.tolist()

    table_from_df_entities = (
        table_from_df_entities[columns_expected_in_table]
        .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
        .drop_duplicates()
        .reset_index(drop=True)
    )
    assert_frame_equal(actual_df_from_df_entities, table_from_df_entities)

    # If request data is missing that's needed for on demand transform, throw an error
    with pytest.raises(RequestDataNotFoundInEntityDfException):
        store.get_historical_features(
            entity_df=entity_df,
            features=[
                "driver_stats:conv_rate",
                "driver_stats:avg_daily_trips",
                "customer_profile:current_balance",
                "customer_profile:avg_passenger_count",
                "customer_profile:lifetime_trip_count",
                "conv_rate_plus_100:conv_rate_plus_100",
                "conv_rate_plus_100:conv_rate_plus_val_to_add",
                "global_stats:num_rides",
                "global_stats:avg_ride_length",
            ],
            full_feature_names=full_feature_names,
        )
    # If request data is missing that's needed for a request feature view, throw an error
    with pytest.raises(RequestDataNotFoundInEntityDfException):
        store.get_historical_features(
            entity_df=entity_df,
            features=[
                "driver_stats:conv_rate",
                "driver_stats:avg_daily_trips",
                "customer_profile:current_balance",
                "customer_profile:avg_passenger_count",
                "customer_profile:lifetime_trip_count",
                "driver_age:driver_age",
                "global_stats:num_rides",
                "global_stats:avg_ride_length",
            ],
            full_feature_names=full_feature_names,
        )

コード例 #16

0

ファイルを表示

ファイル: test_universal_online.py プロジェクト: dmatrix/feast

def test_online_retrieval(environment, universal_data_sources, full_feature_names):

    fs = environment.feature_store
    entities, datasets, data_sources = universal_data_sources
    feature_views = construct_universal_feature_views(data_sources)

    feature_service = FeatureService(
        "convrate_plus100",
        features=[feature_views["driver"][["conv_rate"]], feature_views["driver_odfv"]],
    )

    feast_objects = []
    feast_objects.extend(feature_views.values())
    feast_objects.extend([driver(), customer(), feature_service])
    fs.apply(feast_objects)
    fs.materialize(
        environment.start_date - timedelta(days=1),
        environment.end_date + timedelta(days=1),
    )

    entity_sample = datasets["orders"].sample(10)[
        ["customer_id", "driver_id", "order_id", "event_timestamp"]
    ]
    orders_df = datasets["orders"][
        (
            datasets["orders"]["customer_id"].isin(entity_sample["customer_id"])
            & datasets["orders"]["driver_id"].isin(entity_sample["driver_id"])
        )
    ]

    sample_drivers = entity_sample["driver_id"]
    drivers_df = datasets["driver"][
        datasets["driver"]["driver_id"].isin(sample_drivers)
    ]

    sample_customers = entity_sample["customer_id"]
    customers_df = datasets["customer"][
        datasets["customer"]["customer_id"].isin(sample_customers)
    ]

    global_df = datasets["global"]

    entity_rows = [
        {"driver": d, "customer_id": c, "val_to_add": 50}
        for (d, c) in zip(sample_drivers, sample_customers)
    ]

    feature_refs = [
        "driver_stats:conv_rate",
        "driver_stats:avg_daily_trips",
        "customer_profile:current_balance",
        "customer_profile:avg_passenger_count",
        "customer_profile:lifetime_trip_count",
        "conv_rate_plus_100:conv_rate_plus_100",
        "conv_rate_plus_100:conv_rate_plus_val_to_add",
        "order:order_is_success",
        "global_stats:num_rides",
        "global_stats:avg_ride_length",
    ]
    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
    # Remove the on demand feature view output features, since they're not present in the source dataframe
    unprefixed_feature_refs.remove("conv_rate_plus_100")
    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")

    online_features = fs.get_online_features(
        features=feature_refs,
        entity_rows=entity_rows,
        full_feature_names=full_feature_names,
    )
    assert online_features is not None

    online_features_dict = online_features.to_dict()
    keys = online_features_dict.keys()
    assert (
        len(keys) == len(feature_refs) + 3
    )  # Add three for the driver id and the customer id entity keys + val_to_add request data.
    for feature in feature_refs:
        if full_feature_names:
            assert feature.replace(":", "__") in keys
        else:
            assert feature.rsplit(":", 1)[-1] in keys
            assert (
                "driver_stats" not in keys
                and "customer_profile" not in keys
                and "order" not in keys
                and "global_stats" not in keys
            )

    tc = unittest.TestCase()
    for i, entity_row in enumerate(entity_rows):
        df_features = get_latest_feature_values_from_dataframes(
            drivers_df, customers_df, orders_df, global_df, entity_row
        )

        assert df_features["customer_id"] == online_features_dict["customer_id"][i]
        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
        assert (
            online_features_dict[
                response_feature_name("conv_rate_plus_100", full_feature_names)
            ][i]
            == df_features["conv_rate"] + 100
        )
        assert (
            online_features_dict[
                response_feature_name("conv_rate_plus_val_to_add", full_feature_names)
            ][i]
            == df_features["conv_rate"] + df_features["val_to_add"]
        )
        for unprefixed_feature_ref in unprefixed_feature_refs:
            tc.assertEqual(
                df_features[unprefixed_feature_ref],
                online_features_dict[
                    response_feature_name(unprefixed_feature_ref, full_feature_names)
                ][i],
            )

    # Check what happens for missing values
    missing_responses_dict = fs.get_online_features(
        features=feature_refs,
        entity_rows=[{"driver": 0, "customer_id": 0, "val_to_add": 100}],
        full_feature_names=full_feature_names,
    ).to_dict()
    assert missing_responses_dict is not None
    for unprefixed_feature_ref in unprefixed_feature_refs:
        if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
            tc.assertIsNone(
                missing_responses_dict[
                    response_feature_name(unprefixed_feature_ref, full_feature_names)
                ][0]
            )

    # Check what happens for missing request data
    with pytest.raises(RequestDataNotFoundInEntityRowsException):
        fs.get_online_features(
            features=feature_refs,
            entity_rows=[{"driver": 0, "customer_id": 0}],
            full_feature_names=full_feature_names,
        ).to_dict()

    assert_feature_service_correctness(
        fs,
        feature_service,
        entity_rows,
        full_feature_names,
        drivers_df,
        customers_df,
        orders_df,
        global_df,
    )