population_table = population_table.rename( index=str, columns={"time_stamp_population": "time_stamp"}) peripheral_table = peripheral_table.rename( index=str, columns={"time_stamp_peripheral": "time_stamp"}) # Replace NaN targets with 0.0 - target values may never be NaN!. population_table.targets = np.where(np.isnan(population_table['targets']), 0, population_table['targets']) # ---------------- # Build model population_placeholder = data.Placeholder(name="POPULATION", numerical=["column_01"], join_keys=["join_key"], targets=["targets"]) peripheral_placeholder = data.Placeholder(name="PERIPHERAL", numerical=["column_01"], join_keys=["join_key"]) population_placeholder.join(peripheral_placeholder, "join_key") predictor = predictors.LinearRegression() #predictor = predictors.XGBoostRegressor() model = models.RelboostModel(population=population_placeholder, peripheral=[peripheral_placeholder], loss_function=loss_functions.SquareLoss(),
# Here we load the first part of the pandas.DataFrame... population_on_engine = data.DataFrame(name="POPULATION", roles={ "join_key": ["join_key"], "numerical": ["column_01"], "time_stamp": ["time_stamp"], "target": ["targets"] }).read_pandas(population_table[:20]) # ...and now we load the second part population_on_engine.read_pandas(population_table[20:], append=True) # ---------------- # Build model population_placeholder = data.Placeholder(name="numerical_population") peripheral_placeholder = data.Placeholder(name="numerical_peripheral") population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp") predictor = predictors.LinearRegression() model = models.MultirelModel( aggregation=[aggregations.Count, aggregations.Sum], population=population_placeholder, peripheral=[peripheral_placeholder], loss_function=loss_functions.SquareLoss(), predictor=predictor, num_features=10, share_aggregations=1.0,
"target": ["column_01"], "time_stamp": ["time_stamp_lagged"] }).read_pandas(time_series) peripheral_on_engine = data.DataFrame(name="PERIPHERAL", roles={ "join_key": ["join_key"], "numerical": ["column_01"], "time_stamp": ["time_stamp", "upper_time_stamp"] }).read_pandas(time_series) # ---------------- # Build model population_placeholder = data.Placeholder(name="TIME_SERIES") peripheral_placeholder = data.Placeholder(name="TIME_SERIES") population_placeholder.join(peripheral_placeholder, join_key="join_key", time_stamp="time_stamp_lagged", other_time_stamp="time_stamp", upper_time_stamp="upper_time_stamp") predictor = predictors.LinearRegression() model = models.MultirelModel( aggregation=[aggregations.Count, aggregations.Sum], population=population_placeholder, peripheral=[peripheral_placeholder],