def _fit_model(population_df, peripheral_df, population_ph, peripheral_ph, seed, units): # ---------------------------------------------------------------- predictor = predictors.LinearRegression() # ---------------------------------------------------------------- model = models.MultirelModel( aggregation=[ aggregations.Count, aggregations.Sum ], population=population_ph, peripheral=[peripheral_ph], loss_function=loss_functions.SquareLoss(), predictor=predictor, num_features=10, share_aggregations=1.0, max_length=3, num_threads=1, seed=seed, units=units ).send() # ---------------------------------------------------------------- model = model.fit( population_table=population_df, peripheral_tables=[peripheral_df] ) # ---------------------------------------------------------------- features = model.transform( population_table=population_df, peripheral_tables=[peripheral_df] ) # ---------------------------------------------------------------- yhat = model.predict( population_table=population_df, peripheral_tables=[peripheral_df] ) # ---------------------------------------------------------------- scores = model.score( population_table=population_df, peripheral_tables=[peripheral_df] ) # ---------------------------------------------------------------- return model, features, yhat, scores
n_jobs=6, max_depth=7, reg_lambda=0.0) #predictor = predictors.LogisticRegression() model = models.MultirelModel( population=population_placeholder, peripheral=[expd_placeholder, memd_placeholder], loss_function=loss_functions.CrossEntropyLoss(), aggregation=[ aggregations.Avg, aggregations.Count, aggregations.CountDistinct, aggregations.CountMinusCountDistinct, aggregations.Max, aggregations.Median, aggregations.Min, aggregations.Sum, aggregations.Var ], num_features=10, share_aggregations=0.2, #feature_selector=feature_selector, predictor=predictor, allow_sets=True, max_length=3, min_num_samples=200, num_threads=4, shrinkage=0.1, include_categorical=True).send() # ----------------------------------------------------------------------------- # Fit model model = model.fit(population_table=df_population_training, peripheral_tables=[df_expd, df_memd])
# ---------------- # Build model population_placeholder = data.Placeholder(name="numerical_population") peripheral_placeholder = data.Placeholder(name="numerical_peripheral") population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp") predictor = predictors.LinearRegression() model = models.MultirelModel( aggregation=[aggregations.Count, aggregations.Sum], population=population_placeholder, peripheral=[peripheral_placeholder], loss_function=loss_functions.SquareLoss(), predictor=predictor, num_features=10, share_aggregations=1.0, max_length=3, num_threads=0).send() # ---------------- model = model.fit(population_table=population_on_engine, peripheral_tables=[peripheral_on_engine]) # ---------------- features = model.transform(population_table=population_on_engine, peripheral_tables=[peripheral_on_engine])
# t1.time_stamp; population_table, peripheral_table = datasets.make_numerical() population_placeholder = population_table.to_placeholder() peripheral_placeholder = peripheral_table.to_placeholder() population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp") predictor = predictors.LinearRegression() model = models.MultirelModel( name="MyModel", aggregation=[aggregations.Count, aggregations.Sum], population=population_placeholder, peripheral=[peripheral_placeholder], loss_function=loss_functions.SquareLoss(), predictor=predictor, include_categorical=True, num_features=10, share_aggregations=1.0, max_length=1, num_threads=0).send() # ---------------- model = model.fit(population_table=population_table, peripheral_tables=[peripheral_table]) # ---------------- model.transform(population_table=population_table, peripheral_tables=[peripheral_table],
reg_lambda=500) predictor = predictors.XGBoostClassifier(booster="gbtree", n_estimators=100, n_jobs=6, max_depth=7, reg_lambda=500) model = models.MultirelModel(population=population_placeholder, peripheral=[expd_placeholder, memd_placeholder], loss_function=loss_functions.CrossEntropyLoss(), aggregation=[ aggregations.Avg, aggregations.Count, aggregations.CountDistinct, aggregations.CountMinusCountDistinct, aggregations.Max, aggregations.Median, aggregations.Min, aggregations.Sum, aggregations.Var ], feature_selector=feature_selector, predictor=predictor, allow_sets=True, num_threads=3).send() # ---------------- # Build a hyperparameter space param_space = dict() param_space["grid_factor"]: [1.0, 16.0] param_space["max_length"]: [1, 10]
import getml.loss_functions as loss_functions model = models.MultirelModel( population=CE_placeholder, peripheral=[CE_placeholder], predictor=predictor, loss_function=loss_functions.CrossEntropyLoss(), aggregation=[ aggregations.Avg, aggregations.Count, aggregations.CountDistinct, aggregations.CountMinusCountDistinct, aggregations.Max, aggregations.Median, aggregations.Min, aggregations.Sum ], use_timestamps=True, num_features=70, max_length=7, min_num_samples=100, shrinkage=0.1, grid_factor=1.0, regularization=0.0, round_robin=False, share_aggregations=0.04, share_conditions=0.8, sampling_factor=1.0 ).send()
population_placeholder.join( peripheral_placeholder, join_key="join_key", time_stamp="time_stamp_lagged", other_time_stamp="time_stamp", ) predictor = predictors.LinearRegression() model = models.MultirelModel( aggregation=[aggregations.Count, aggregations.Sum], population=population_placeholder, peripheral=[peripheral_placeholder], loss_function=loss_functions.SquareLoss(), predictor=predictor, min_num_samples=1, num_features=10, share_aggregations=1.0, max_length=2, num_threads=4, delta_t=1.0 # Define the time delta ).send() # ---------------- model = model.fit(population_table=population_on_engine, peripheral_tables=[peripheral_on_engine]) # ---------------- features = model.transform(population_table=population_on_engine,