def get_historical_features(features: str, entity_df_path: str, destination: str): """ Get historical features """ import pandas client = Client() # TODO: clean this up entity_df = pandas.read_csv( entity_df_path, sep=None, engine="python", ) entity_df["event_timestamp"] = pandas.to_datetime( entity_df["event_timestamp"]) uploaded_df = client.stage_dataframe(entity_df, "event_timestamp", "created_timestamp") job = client.get_historical_features( features.split(","), uploaded_df, ) print(job.get_output_file_uri())
def get_historical_features(features: str, entity_df_path: str, entity_df_dtype: str, destination: str): """ Get historical features. This CLI command is mostly for testing/easy demos; use the corresponding API method in production. The main reason why this command is unlikely to be more broadly useful is that we make quite a few assumptions about the entity dataframe, namely: * it has to have `event_timestamp` column * it has to parse cleanly by `pandas.read_csv()` with no extra tuning of data types """ import pandas client = Client() if entity_df_dtype: dtype = json.loads(entity_df_dtype) entity_df = pandas.read_csv(entity_df_path, sep=None, engine="python", dtype=dtype) else: entity_df = pandas.read_csv(entity_df_path, sep=None, engine="python") entity_df["event_timestamp"] = pandas.to_datetime( entity_df["event_timestamp"]) uploaded_df = client.stage_dataframe(entity_df, "event_timestamp") job = client.get_historical_features( features.split(","), uploaded_df, ) print(job.get_output_file_uri())