def test_load_features_data(): dates = [datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 2, 1, 0, 0)] # make dataframe for entity ids and dates ids_dates = create_entity_date_df( labels=labels, states=states, as_of_dates=dates, label_name="booking", label_type="binary", label_timespan="1 month", ) features = [["f1", "f2"], ["f3", "f4"]] # make dataframes of features to test against features_dfs = [] for i, table in enumerate(features_tables): cols = ["entity_id", "as_of_date"] + features[i] temp_df = pd.DataFrame(table, columns=cols) temp_df["as_of_date"] = convert_string_column_to_date(temp_df["as_of_date"]) features_dfs.append( ids_dates.merge( right=temp_df, how="left", on=["entity_id", "as_of_date"] ).set_index(["entity_id", "as_of_date"]) ) # create an engine and generate a table with fake feature data with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) create_schemas( engine=engine, features_tables=features_tables, labels=labels, states=states ) with get_matrix_storage_engine() as matrix_storage_engine: builder = MatrixBuilder( db_config=db_config, matrix_storage_engine=matrix_storage_engine, experiment_hash=experiment_hash, engine=engine, ) # make the entity-date table entity_date_table_name = builder.make_entity_date_table( as_of_times=dates, label_type="binary", label_name="booking", state="active", matrix_type="train", matrix_uuid="my_uuid", label_timespan="1 month", ) feature_dictionary = dict( ("features{}".format(i), feature_list) for i, feature_list in enumerate(features) ) returned_features_dfs = builder.load_features_data( as_of_times=dates, feature_dictionary=feature_dictionary, entity_date_table_name=entity_date_table_name, matrix_uuid="my_uuid", ) # get the queries and test them for result, df in zip(returned_features_dfs, features_dfs): test = result == df assert test.all().all()
def test_load_features_data(): dates = [ datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 2, 1, 0, 0) ] # make dataframe for entity ids and dates ids_dates = create_entity_date_df(labels=labels, states=states, as_of_dates=dates, state_one=True, state_two=True, label_name='booking', label_type='binary', label_timespan='1 month') features = [['f1', 'f2'], ['f3', 'f4']] # make dataframes of features to test against features_dfs = [] for i, table in enumerate(features_tables): cols = ['entity_id', 'as_of_date'] + features[i] temp_df = pd.DataFrame(table, columns=cols) temp_df['as_of_date'] = convert_string_column_to_date( temp_df['as_of_date']) features_dfs.append( ids_dates.merge(right=temp_df, how='left', on=['entity_id', 'as_of_date' ]).set_index(['entity_id', 'as_of_date'])) # create an engine and generate a table with fake feature data with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) create_schemas(engine=engine, features_tables=features_tables, labels=labels, states=states) with get_matrix_storage_engine() as matrix_storage_engine: builder = MatrixBuilder( db_config=db_config, matrix_storage_engine=matrix_storage_engine, engine=engine, ) # make the entity-date table entity_date_table_name = builder.make_entity_date_table( as_of_times=dates, label_type='binary', label_name='booking', state='state_one AND state_two', matrix_type='train', matrix_uuid='my_uuid', label_timespan='1 month') feature_dictionary = dict( ('features{}'.format(i), feature_list) for i, feature_list in enumerate(features)) returned_features_dfs = builder.load_features_data( as_of_times=dates, feature_dictionary=feature_dictionary, entity_date_table_name=entity_date_table_name, matrix_uuid='my_uuid') # get the queries and test them for result, df in zip(returned_features_dfs, features_dfs): test = (result == df) assert (test.all().all())