def Create_date_list(config): dwps = util.create_list_period(config["train_start"], config["train_end"], False) dwp_test = util.create_list_period(config["backtest_start"], config["backtest_end"], False) dwp, dtp = util.get_all_combination_date(dwps, 12) return dwp_test,dwp,dtp
def setUpClass(cls) -> None: # Generating data / params cls.list_period = utils.create_list_period(201601, 202012) cls.horizon = 6 cls.years_to_add = 3 cls.combination_date = utils.get_all_combination_date( cls.list_period, cls.horizon) # Reading test data for il cls.raw_master_il = pd.read_csv(os.path.join(DIR_TEST_DATA, 'raw_master_il.csv'), parse_dates=['date']) cls.all_sales_il = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_all_sales_il.pkl')) cls.forecast_il = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_extend_forecast_il.pkl')) cls.pre_forecast_correction_il = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_apply_forecast_correction_il.pkl')) cls.long_il = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_reformat_il.pkl')) # Reading test data for dc cls.raw_master_dc = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'raw_master_dc.pkl')) cls.all_sales_dc = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_all_sales_dc.pkl')) cls.forecast_dc = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_extend_forecast_dc.pkl')) cls.pre_forecast_correction_dc = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_apply_forecast_correction_dc.pkl')) cls.long_dc = pd.read_pickle( os.path.join(DIR_TEST_DATA, 'test_reformat_dc.pkl'))
def run_model(features, model_config): print(f"..runnning with config : {model_config}") table_all_features = features.copy() dwp_test = util.create_list_period(201702, 201902, False) res = mod.recreate_past_forecasts(table_all_features, dwp_test, model_config=model_config, horizon=8) res2 = res.groupby(['date_to_predict', 'sku_wo_pkg', 'horizon'])['prediction'].sum().reset_index() res_di = res2.copy() res_eib = res2.copy() res_il = res2.copy() res_di['label'] = 'di' res_eib['label'] = 'eib' res_il['label'] = 'il' res_final = pd.concat([res_il, res_eib, res_di]) res_final['ratio'] = 1 test = diagnostic.Diagnostic(cvr=res_final[(res_final.horizon == 6)], raw_master=raw_master, postprocess='indep') temp = test.run_test(plot=False, prediction_horizon=6) error = (temp.assign(abs_error=lambda x: abs(x["yhat_di_calib"] - x[ "target_di"])).abs_error.sum()) return error
def run_model(features, model_config): print(f"..runnning with config : {model_config}") table_all_features = features.copy() dwp_test = util.create_list_period(201710, 201902, False) res = mod.recreate_past_forecasts(table_all_features, dwp_test, model_config, horizon=8) test = diagnostic.Diagnostic(cvr=res[res.horizon == 6], raw_master=raw_master, postprocess='indep', di_eib_il_format=False) temp = test.run_test_dc(plot=False, horizon=6) error = (temp.assign(abs_error=lambda x: abs(x["prediction"] - x[ "target_dc"])).abs_error.sum()) return error
feature_importance_df_final = pd.concat([feature_importance_df_final,feature_importance_df]) feature_importance_df_final.to_csv('./data/feature_importance_all_df.csv') return resfinal if __name__ == '__main__': import src.forecaster.utilitaires as util import src.forecaster.diagnostic as diagnostic raw_master = pd.read_csv('./data/raw/raw_master_dc_20191126.csv') mod = Modeldc(raw_master) max_date_available = mod.all_sales.calendar_yearmonth.max() filter_date = min(201909, max_date_available) dwps = util.create_list_period(201701, filter_date, False) dwp, dtp = util.get_all_combination_date(dwps, 12) print("creating the main table") table_all_features = mod.create_all_features(dwp, dtp) # table_all_features = pd.read_csv("data/table_all_features_dc.csv") dwp_test = util.create_list_period(201804, 201909, False) # # model_config = ModelConfig( # model_name="GradientBoostingRegressor", # model_params={ # 'standard_scaling': False, # 'pca': 0, # 'loss': 'huber', # 'learning_rate': 0.01,
# rescale il di eib #res = self.correct_fc(res, month_to_correct=['CNY', 11], thrsh=0.05) resfinal = pd.concat([resfinal, res]) return resfinal if __name__ == '__main__': import pandas as pd import numpy as np import src.forecaster.utilitaires as util raw_master = pd.read_csv('data/raw/raw_master_il_0910.csv') mod = Modeldi(raw_master) dwp_test = util.create_list_period(201707, 201902, False) max_date_available = mod.all_sales.calendar_yearmonth.max() filter_date = min(201908, max_date_available) dwps = util.create_list_period(201601, filter_date, False) dwp, dtp = util.get_all_combination_date(dwps, 10) # 1. Read precalculated features table_all_features = pd.read_csv('data/table_all_features.csv') # 2. Remove negative targets msk = table_all_features.target >= 0 print( f"removing {len(msk) - sum(msk)}/{len(msk)} rows because of a negative target") table_all_features = table_all_features[msk]
for model_params in combinations: configs.append( ModelConfig(model_name=model_name, model_params=deepcopy(model_params))) return configs if __name__ == '__main__': # 0. create table all features raw_master = pd.read_csv('data/raw/raw_master_dc_20191016.csv') mod = Modeldc(raw_master) max_date_available = mod.all_sales.calendar_yearmonth.max() filter_date = min(201908, max_date_available) dwps = util.create_list_period(201701, filter_date, False) dwp, dtp = util.get_all_combination_date(dwps, 10) features = mod.create_all_features(dwp, dtp) # 1. read meta prams config with open("src/exploration/models.yaml", "r") as stream: meta_config = OrderedDict(yaml.load(stream=stream)) # 2. prepare configs model_configs = prepare_configs(meta_config=meta_config) random.shuffle(model_configs) print(f"Running model on {len(model_configs)} models") # 3. run the model for each config for (ix, model_config) in enumerate(model_configs): print(f"Running model {ix+1}/{len(model_configs)}")