def __prepare_sales_train_ds(ds):
    ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y')
    ds['Day'] = ds['Date'].dt.weekday_name
    ds['Date'] = ds['Date'].apply(lambda x: x.strftime('%Y-%m-%d'))
    ds['Month'] = ds['Date']
    ds['Month'] = ds['Month'].apply(lambda x: x.split("-")[1])
    ds = imp.one_hot_numeric(ds, 'Month', 'Month_')
    ds = imp.one_hot_numeric(ds, 'Region', 'Region_')
    ds = imp.one_hot(ds, 'Day', header='Day_')
    ds = pre_u.eliminate_IsOpen_zeros(ds)
    ds = pre_u.mean_std_sales_per_shop_per_day(ds)
    ds = pre_u.add_avg_per_shop(ds)
    ds = pre_u.add_std_per_shop(ds)
    ds = pre_u.add_max_per_shop(ds)
    ds = pre_u.add_min_per_shop(ds)
    ds = pre_u.mean_sales_per_month_per_region(ds)
    return ds
Beispiel #2
0
def __prepare_customers_train_ds(das, m1, a1, m2, a2):
    das['Date'] = pandas.to_datetime(das['Date'], format='%d/%m/%Y')
    das['Day'] = das['Date'].dt.weekday_name
    das['Date'] = das['Date'].apply(lambda x: x.strftime('%Y-%m-%d'))
    das['Month'] = das['Date']
    das['Month'] = das['Month'].apply(lambda x: x.split("-")[1])
    das = imp.one_hot(das, 'Day', header='Day_')
    das = imp.one_hot_numeric(das, 'Month', 'Month_')
    das = imp.one_hot_numeric(das, 'Region', 'Region_')
    dfrom = utils.get_frame_out_of_range(das, m1, a1, m2, a2)
    das = preu.eliminate_IsOpen_zeros(das)
    das = preu.mean_std_cust_per_shop_per_day(das, dfrom)
    das = preu.add_avg_cust_per_shop(das, dfrom)
    das = preu.add_std_cust_per_shop(das, dfrom)
    das = preu.add_max_cust_per_shop(das, dfrom)
    das = preu.add_min_cust_per_shop(das, dfrom)
    das = preu.mean_cust_per_month_per_shop(das, dfrom)
    das = preu.mean_cust_per_month_per_region(das, dfrom)
    return das
def __prepare_customers_test_ds(ds, dfrom):
    ds['NumberOfSales'] = p.Series(np.zeros(len(ds)), ds.index)
    ds['NumberOfCustomers'] = p.Series(np.zeros(len(ds)), ds.index)
    ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y')
    ds['Day'] = ds['Date'].dt.weekday_name
    ds['Date'] = ds['Date'].apply(lambda x: x.strftime('%Y-%m-%d'))
    ds['Month'] = ds['Date']
    ds['Month'] = ds['Month'].apply(lambda x: x.split("-")[1])
    ds = imp.one_hot(ds, 'Day', header='Day_')
    ds = imp.one_hot_numeric(ds, 'Month', 'Month_')
    ds = imp.one_hot_numeric(ds, 'Region', 'Region_')
    ds = pre_u.eliminate_IsOpen_zeros(ds)
    ds = pre_u.mean_std_cust_per_shop_per_day(ds, dfrom)
    ds = pre_u.add_avg_cust_per_shop(ds, dfrom)
    ds = pre_u.add_std_cust_per_shop(ds, dfrom)
    ds = pre_u.add_max_cust_per_shop(ds, dfrom)
    ds = pre_u.add_min_cust_per_shop(ds, dfrom)
    ds = pre_u.mean_cust_per_month_per_shop(ds, dfrom)
    ds = pre_u.mean_cust_per_month_per_region(ds, dfrom)
    return ds
Beispiel #4
0
    return linear_model.Lasso(alpha=5)


def regtree():
    return tree.DecisionTreeRegressor(max_depth=9)


def gradboostreg():
    return ensemble.GradientBoostingRegressor(max_depth=8, n_estimators=5)


if __name__ == '__main__':
    datas = ds.read_dataset("mean_var_on_customers_from_tain.csv")
    datas['Month'] = datas['Date']
    datas['Month'] = datas['Month'].apply(lambda x: x.split("-")[1])
    datas = imp.one_hot_numeric(datas, 'Month', 'Month_')
    datas = imp.one_hot_numeric(datas, 'Region', 'Region_')
    datas = preprocessing_utils.mean_cust_per_month_per_region(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = preprocessing_utils.mean_cust_per_month_per_shop(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\
        .exclude('NumberOfSales', 'Month', 'Max_Humidity', 'Max_Sea_Level_PressurehPa', 'Max_TemperatureC',
                 'Max_VisibilityKm', 'Max_Wind_SpeedKm_h', 'Mean_Humidity', 'Mean_Sea_Level_PressurehPa',
                 'Mean_VisibilityKm', 'Mean_Wind_SpeedKm_h', 'Min_Dew_PointC',
                 'Min_Humidity', 'Min_Sea_Level_PressurehPa', 'Min_TemperatureC', 'Min_VisibilitykM')\
        .build()
    model = [
        ridge, linear_model.LinearRegression, lasso, regtree, gradboostreg
    ]
    final = ridge