Exemple #1
0
def get_historical_features(features: str, entity_df_path: str,
                            destination: str):
    """
    Get historical features
    """
    import pandas

    client = Client()

    # TODO: clean this up
    entity_df = pandas.read_csv(
        entity_df_path,
        sep=None,
        engine="python",
    )

    entity_df["event_timestamp"] = pandas.to_datetime(
        entity_df["event_timestamp"])

    uploaded_df = client.stage_dataframe(entity_df, "event_timestamp",
                                         "created_timestamp")

    job = client.get_historical_features(
        features.split(","),
        uploaded_df,
    )
    print(job.get_output_file_uri())
Exemple #2
0
def get_historical_features(features: str, entity_df_path: str,
                            entity_df_dtype: str, destination: str):
    """
    Get historical features. This CLI command is mostly for testing/easy demos; use the
    corresponding API method in production.

    The main reason why this command is unlikely to be more broadly useful is that we make quite a
    few assumptions about the entity dataframe, namely:
        * it has to have `event_timestamp` column
        * it has to parse cleanly by `pandas.read_csv()` with no extra tuning of data types
    """
    import pandas

    client = Client()

    if entity_df_dtype:
        dtype = json.loads(entity_df_dtype)
        entity_df = pandas.read_csv(entity_df_path,
                                    sep=None,
                                    engine="python",
                                    dtype=dtype)
    else:
        entity_df = pandas.read_csv(entity_df_path, sep=None, engine="python")

    entity_df["event_timestamp"] = pandas.to_datetime(
        entity_df["event_timestamp"])

    uploaded_df = client.stage_dataframe(entity_df, "event_timestamp")

    job = client.get_historical_features(
        features.split(","),
        uploaded_df,
    )
    print(job.get_output_file_uri())