# ---------------- features = model.transform(population_table=population_table, peripheral_tables=[peripheral_table]) # ---------------- features = model.transform(population_table=population_table, peripheral_tables=[peripheral_table], df_name="features") # ---------------- yhat = model.predict(population_table=population_table, peripheral_tables=[peripheral_table]) # ---------------- print(model.to_sql()) # ---------------- scores = model.score(population_table=population_table, peripheral_tables=[peripheral_table]) print(scores) # ---------------- engine.delete_project("examples")
def test_relboost_same_units(): """Check if the same results will be obtained regardless of whether the units are assigned to the DataFrame, to the Columns, or to the RelboostModel. """ # ---------------------------------------------------------------- engine.set_project("examples") seed = 33231 units = {"column_01": "column_01"} # ---------------------------------------------------------------- # Generate artificial dataset # The problem we create looks like this: # # SELECT COUNT( * ) # FROM POPULATION t1 # LEFT JOIN PERIPHERAL t2 # ON t1.join_key = t2.join_key # WHERE ( # ( t1.time_stamp - t2.time_stamp <= 0.5 ) # ) AND t2.time_stamp <= t1.time_stamp # GROUP BY t1.join_key, # t1.time_stamp; # ---------------------------------------------------------------- # Assign the units to the columns population_table_columns, peripheral_table_columns = datasets.make_same_units_numerical( random_state=seed) population_table_columns.set_unit("column_01", "column_01") peripheral_table_columns.set_unit("column_01", "column_01") population_placeholder_columns = population_table_columns.to_placeholder() peripheral_placeholder_columns = peripheral_table_columns.to_placeholder() population_placeholder_columns.join(peripheral_placeholder_columns, "join_key", "time_stamp") # ---------------------------------------------------------------- model_columns, features_columns, yhat_columns, scores_columns = _fit_model( population_table_columns, peripheral_table_columns, population_placeholder_columns, peripheral_placeholder_columns, seed, dict()) # ---------------------------------------------------------------- # Assign units to Model population_table_model, peripheral_table_model = datasets.make_same_units_numerical( random_state=seed) population_placeholder_model = population_table_model.to_placeholder() peripheral_placeholder_model = peripheral_table_model.to_placeholder() population_placeholder_model.join(peripheral_placeholder_model, "join_key", "time_stamp") # ---------------------------------------------------------------- model_model, features_model, yhat_model, scores_model = _fit_model( population_table_model, peripheral_table_model, population_placeholder_model, peripheral_placeholder_model, seed, units) # ---------------------------------------------------------------- # Check whether the results are the same. assert scores_model == scores_columns assert (yhat_model == yhat_columns).all() # ---------------------------------------------------------------- engine.delete_project("examples")