def _fit_model(population_df, peripheral_df, population_ph, peripheral_ph, seed, units):

    # ----------------------------------------------------------------
    
    predictor = predictors.LinearRegression()
    
    # ----------------------------------------------------------------
    
    model = models.MultirelModel(
        aggregation=[
            aggregations.Count,
            aggregations.Sum
        ],
        population=population_ph,
        peripheral=[peripheral_ph],
        loss_function=loss_functions.SquareLoss(),
        predictor=predictor,
        num_features=10,
        share_aggregations=1.0,
        max_length=3,
        num_threads=1,
        seed=seed,
        units=units
    ).send()

    # ----------------------------------------------------------------

    model = model.fit(
        population_table=population_df,
        peripheral_tables=[peripheral_df]
    )
    
    # ----------------------------------------------------------------

    features = model.transform(
        population_table=population_df,
        peripheral_tables=[peripheral_df]
    )

    # ----------------------------------------------------------------

    yhat = model.predict(
        population_table=population_df,
        peripheral_tables=[peripheral_df]
    )

    # ----------------------------------------------------------------

    scores = model.score(
        population_table=population_df,
        peripheral_tables=[peripheral_df]
    )

    # ----------------------------------------------------------------
    
    return model, features, yhat, scores
Beispiel #2
0
                                         n_jobs=6,
                                         max_depth=7,
                                         reg_lambda=0.0)

#predictor = predictors.LogisticRegression()

model = models.MultirelModel(
    population=population_placeholder,
    peripheral=[expd_placeholder, memd_placeholder],
    loss_function=loss_functions.CrossEntropyLoss(),
    aggregation=[
        aggregations.Avg, aggregations.Count, aggregations.CountDistinct,
        aggregations.CountMinusCountDistinct, aggregations.Max,
        aggregations.Median, aggregations.Min, aggregations.Sum,
        aggregations.Var
    ],
    num_features=10,
    share_aggregations=0.2,
    #feature_selector=feature_selector,
    predictor=predictor,
    allow_sets=True,
    max_length=3,
    min_num_samples=200,
    num_threads=4,
    shrinkage=0.1,
    include_categorical=True).send()

# -----------------------------------------------------------------------------
# Fit model

model = model.fit(population_table=df_population_training,
                  peripheral_tables=[df_expd, df_memd])
Beispiel #3
0
# ----------------
# Build model

population_placeholder = data.Placeholder(name="numerical_population")

peripheral_placeholder = data.Placeholder(name="numerical_peripheral")

population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")

predictor = predictors.LinearRegression()

model = models.MultirelModel(
    aggregation=[aggregations.Count, aggregations.Sum],
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    loss_function=loss_functions.SquareLoss(),
    predictor=predictor,
    num_features=10,
    share_aggregations=1.0,
    max_length=3,
    num_threads=0).send()

# ----------------

model = model.fit(population_table=population_on_engine,
                  peripheral_tables=[peripheral_on_engine])

# ----------------

features = model.transform(population_table=population_on_engine,
                           peripheral_tables=[peripheral_on_engine])
#          t1.time_stamp;

population_table, peripheral_table = datasets.make_numerical()

population_placeholder = population_table.to_placeholder()
peripheral_placeholder = peripheral_table.to_placeholder()
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")

predictor = predictors.LinearRegression()

model = models.MultirelModel(
    name="MyModel",
    aggregation=[aggregations.Count, aggregations.Sum],
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    loss_function=loss_functions.SquareLoss(),
    predictor=predictor,
    include_categorical=True,
    num_features=10,
    share_aggregations=1.0,
    max_length=1,
    num_threads=0).send()

# ----------------

model = model.fit(population_table=population_table,
                  peripheral_tables=[peripheral_table])

# ----------------

model.transform(population_table=population_table,
                peripheral_tables=[peripheral_table],
                                                reg_lambda=500)

predictor = predictors.XGBoostClassifier(booster="gbtree",
                                         n_estimators=100,
                                         n_jobs=6,
                                         max_depth=7,
                                         reg_lambda=500)

model = models.MultirelModel(population=population_placeholder,
                             peripheral=[expd_placeholder, memd_placeholder],
                             loss_function=loss_functions.CrossEntropyLoss(),
                             aggregation=[
                                 aggregations.Avg, aggregations.Count,
                                 aggregations.CountDistinct,
                                 aggregations.CountMinusCountDistinct,
                                 aggregations.Max, aggregations.Median,
                                 aggregations.Min, aggregations.Sum,
                                 aggregations.Var
                             ],
                             feature_selector=feature_selector,
                             predictor=predictor,
                             allow_sets=True,
                             num_threads=3).send()

# ----------------
# Build a hyperparameter space

param_space = dict()

param_space["grid_factor"]: [1.0, 16.0]
param_space["max_length"]: [1, 10]
import getml.loss_functions as loss_functions

model = models.MultirelModel(
    population=CE_placeholder,
    peripheral=[CE_placeholder],
    predictor=predictor,
    loss_function=loss_functions.CrossEntropyLoss(),
    aggregation=[
        aggregations.Avg,
        aggregations.Count,
        aggregations.CountDistinct,
        aggregations.CountMinusCountDistinct,
        aggregations.Max,
        aggregations.Median,
        aggregations.Min,
        aggregations.Sum
    ],
    use_timestamps=True,
    num_features=70,
    max_length=7,
    min_num_samples=100,
    shrinkage=0.1,
    grid_factor=1.0,
    regularization=0.0,
    round_robin=False,
    share_aggregations=0.04,
    share_conditions=0.8,
    sampling_factor=1.0
).send()

population_placeholder.join(
    peripheral_placeholder,
    join_key="join_key",
    time_stamp="time_stamp_lagged",
    other_time_stamp="time_stamp",
)

predictor = predictors.LinearRegression()

model = models.MultirelModel(
    aggregation=[aggregations.Count, aggregations.Sum],
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    loss_function=loss_functions.SquareLoss(),
    predictor=predictor,
    min_num_samples=1,
    num_features=10,
    share_aggregations=1.0,
    max_length=2,
    num_threads=4,
    delta_t=1.0  # Define the time delta
).send()

# ----------------

model = model.fit(population_table=population_on_engine,
                  peripheral_tables=[peripheral_on_engine])

# ----------------

features = model.transform(population_table=population_on_engine,