def update_history_for_level3_inv(level3_inv_data: Level3InvDataLoader, model_config: Bunch, db_config: Bunch, start_pred_year: int, start_pred_month: int, gap: int, need_unitize: bool = True) -> None: """Update inventory forecast result of level3 in specified month.""" # Step 1: Prepare training and testing set # ============================================================================================ # last_train_year, last_train_month = infer_month(start_pred_year, start_pred_month, offset=-gap) train_months = get_pre_months(last_train_year, last_train_month, left_bound='2018-06') true_pred_year, true_pred_month = infer_month(start_pred_year, start_pred_month, gap) X_train, y_train = level3_inv_data.prepare_training_set(train_months, gap=gap) X_test = level3_inv_data.prepare_testing_set(start_pred_year, start_pred_month, gap=gap) # Step 2: Training and predicting # ============================================================================================ # level3_inv_infer = LGBMSalesInfer(model_config) level3_inv_infer.fit(X_train, y_train) preds_test = level3_inv_infer.predict(X_test) # Step 3: Process forecast result # ============================================================================================ # df_test = level3_inv_data.get_true_data(true_pred_year, true_pred_month) df_preds_test = level3_inv_data.decorate_pred_result( preds_test, true_pred_year, true_pred_month, use_unitize=need_unitize) result = df_test.join(df_preds_test, how='left').reset_index() result['bu_code'] = 'M111' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' customer_info_dict = level3_inv_data.customer_info.to_dict() result['customer_name'] = result.customer_code.map( customer_info_dict['customer_name']) result['sales_cen_code'] = result.customer_code.map( customer_info_dict['sales_cen_code']) result['sales_cen_name'] = result.customer_code.map( customer_info_dict['sales_cen_name']) # result['province_code'] = result.customer_code.map(customer_info['province_id']) # result['city_code'] = result.customer_code.map(customer_info['city_id']) # result['district_code'] = result.customer_code.map(customer_info['district_id']) # result['channel_code'] = result.customer_code.map(customer_info['channel_name_id']) result['province_name'] = result.customer_code.map( customer_info_dict['province']) result['city_name'] = result.customer_code.map(customer_info_dict['city']) result['district_name'] = result.customer_code.map( customer_info_dict['district']) sku_info_dict = level3_inv_data.sku_info.to_dict() result['item_name'] = result.item_code.map(sku_info_dict['item_name']) result['first_cate_code'] = result.item_code.map( sku_info_dict['first_cate_code']) result['second_cate_code'] = result.item_code.map( sku_info_dict['second_cate_code']) result['first_cate_name'] = result.item_code.map( sku_info_dict['first_cate_name']) result['second_cate_name'] = result.item_code.map( sku_info_dict['second_cate_name']) result['channel_name'] = result.item_code.map( sku_info_dict['channel_name']) result['item_price'] = result.item_code.map(sku_info_dict['item_price']) result['act_inv_amount'] = np.round(result.act_inv_qty * result.item_price, decimals=4 if need_unitize else 0) result['pred_inv_amount'] = np.round(result.pred_inv_qty * result.item_price, decimals=4 if need_unitize else 0) result['inv_pred_time'] = timestamp_to_time(time.time()) add_accuracy(result, 'inv_acc', 'act_inv_qty', 'pred_inv_qty') result['inv_weighted_acc'] = (result.act_inv_qty * result.inv_acc).astype( np.float32) customer_list = CustomerList() item_list = ItemList(start_pred_year, start_pred_month) result = result.loc[result.customer_code.apply( lambda x: customer_list.is_white_customer(x))] result = result.loc[result.item_code.apply( lambda x: item_list.is_white_items(x))] print() print("[INFO] The average accuracy is: %.2f" % (result.inv_acc.mean() * 100)) print("[INFO] The weighted accuracy is: %.2f" % (result.inv_weighted_acc.sum() / result.act_inv_qty.sum() * 100)) # Step 4: Write into database (Kudu) # ============================================================================================ # if db_config.env == 'SIT': level3_order_writer = KuduResultWriter(Bunch(SIT_DB_CONFIG)) elif db_config.env == 'UAT': level3_order_writer = KuduResultWriter(Bunch(UAT_DB_CONFIG)) elif db_config.env == 'PROD': level3_order_writer = KuduResultWriter(Bunch(PROD_DB_CONFIG)) else: raise Exception( "[INFO] The environment name of database to write result is illegal!!!" ) level3_order_writer.clear_one_month(db_config.table_name, 'order_date', true_pred_year, true_pred_month) level3_order_writer.upsert(result, db_config.table_name, db_config.batch_size)
def update_history_for_level2_order(level2_data: Level2DataLoader, plan_data: PlanData, model_config: Bunch, db_config: Bunch, start_pred_year: int, start_pred_month: int, gap: int, use_unitize: bool = True) -> None: """Update order forecast result of level2 in specified month.""" # Step 1: Prepare training and testing set # ============================================================================================ # last_train_year, last_train_month = infer_month(start_pred_year, start_pred_month, offset=-gap) train_months = get_pre_months(last_train_year, last_train_month, left_bound='2016-03') true_pred_year, true_pred_month = infer_month(start_pred_year, start_pred_month, gap) X_train, y_train = level2_data.prepare_training_set(train_months, gap=gap) X_test = level2_data.prepare_testing_set(start_pred_year, start_pred_month, gap=gap) # Step 2: Training and predicting # ============================================================================================ # level2_order_infer = LGBMSalesInfer(model_config) level2_order_infer.fit(X_train, y_train) preds_test = level2_order_infer.predict(X_test) # Step 3: Process forecast result # ============================================================================================ # df_test = level2_data.get_true_order_data(true_pred_year, true_pred_month) df_pred_test = level2_data.decorate_pred_result(preds_test, true_pred_year, true_pred_month, use_unitize=use_unitize) result = df_test.join(df_pred_test, how='left').reset_index() result['bu_code'] = 'M111' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' sku_info_dict = level2_data.sku_info.to_dict() result['item_name'] = result.item_code.map(sku_info_dict['item_name']) result['first_cate_code'] = result.item_code.map( sku_info_dict['first_cate_code']) result['second_cate_code'] = result.item_code.map( sku_info_dict['second_cate_code']) result['first_cate_name'] = result.item_code.map( sku_info_dict['first_cate_name']) result['second_cate_name'] = result.item_code.map( sku_info_dict['second_cate_name']) result['channel_name'] = result.item_code.map( sku_info_dict['channel_name']) result['item_price'] = result.item_code.map(sku_info_dict['item_price']) result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price, decimals=4 if use_unitize else 0) result['pred_ord_amount'] = np.round(result.pred_ord_qty * result.item_price, decimals=4 if use_unitize else 0) result['ord_pred_time'] = timestamp_to_time(time.time()) add_accuracy(result, 'ord_acc', 'act_ord_qty', 'pred_ord_qty') result['ord_weighted_acc'] = (result.act_ord_qty * result.ord_acc).astype( np.float32) item_list = ItemList(start_pred_year, start_pred_month) result = result.loc[result.item_code.apply( lambda x: item_list.is_white_items(x))] print() print("[INFO] The average accuracy of model is: %.2f" % (result.ord_acc.mean() * 100)) print("[INFO] The weighted accuracy of model is: %.2f" % (result.ord_weighted_acc.sum() / result.act_ord_qty.sum() * 100)) # Step 4: Ensemble with rule # ============================================================================================ # rule_res = result.copy() order_sku_month_pre6_mean = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 6, True).replace(0, np.nan).mean(axis=1) order_sku_month_pre1 = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 1, True).mean(axis=1) dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals( start_pred_year, start_pred_month, 3, True).replace(0, np.nan).mean(axis=1) dis_sku_month_pre1 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 1, True).mean(axis=1) plan_sku_month_mean = plan_data.plan_sku_month_mean rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map( order_sku_month_pre6_mean) rule_res['ord_sku_month_pre1'] = rule_res.item_code.map( order_sku_month_pre1) rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map( dis_sku_month_pre3_mean) rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1) rule_res['plan_sku_month_mean'] = rule_res.item_code.map( plan_sku_month_mean) rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1 rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1 rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1 rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1 rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1 rule_res['online_offline_flag'] = rule_res.item_code.map( sku_info_dict['sales_chan_name']).fillna('未知') rule_res['project_flag'] = rule_res.item_code.map( sku_info_dict['project_flag']).fillna('未知') order_sku_month_pre24_mean = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 24, True).replace(0, np.nan).mean(axis=1) curr_new_items = set(order_sku_month_pre24_mean.loc[ order_sku_month_pre24_mean.isna()].index) dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 3, True) dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) * 1).sum(axis=1) new_items_by_dis = set( dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1) & (dis_sku_month_pre3.iloc[:, 2] > 0)].index) demand = plan_data.get_one_month(true_pred_year, true_pred_month, True) rule_res['demand'] = rule_res.item_code.map(demand) rule_res['is_curr_new'] = rule_res.item_code.apply( lambda x: 1 if x in curr_new_items else 0) rule_res['is_new_by_dis'] = rule_res.item_code.apply( lambda x: 1 if x in new_items_by_dis else 0) rule_res[ 'demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1) rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace( np.nan, 0) rule_res['pred_ord_qty_rule'] = rule_res.apply( lambda x: x.pred_ord_qty if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule, axis=1) add_accuracy(rule_res, 'rule_ord_acc', 'act_ord_qty', 'pred_ord_qty_rule') rule_res['rule_ord_weighted_acc'] = (rule_res.act_ord_qty * rule_res.rule_ord_acc).astype( np.float32) print() print("[INFO] The average accuracy of rule is: %.2f" % (rule_res.rule_ord_acc.mean() * 100)) print("[INFO] The weighted accuracy of rule is: %.2f" % (rule_res.rule_ord_weighted_acc.sum() / rule_res.act_ord_qty.sum() * 100)) result[ 'pred_ord_qty'] = result.pred_ord_qty * 0.5 + rule_res.pred_ord_qty_rule * 0.5 add_accuracy(result, 'ord_acc', 'act_ord_qty', 'pred_ord_qty') result['ord_weighted_acc'] = (result.act_ord_qty * result.ord_acc).astype( np.float32) print() print("[INFO] The average accuracy of ensemble is: %.2f" % (result.ord_acc.mean() * 100)) print("[INFO] The weighted accuracy of ensemble is: %.2f" % (result.ord_weighted_acc.sum() / result.act_ord_qty.sum() * 100)) # Step 5: Write into database (Kudu) # ============================================================================================ # if db_config.env == 'SIT': level2_order_writer = KuduResultWriter(Bunch(SIT_DB_CONFIG)) elif db_config.env == 'UAT': level2_order_writer = KuduResultWriter(Bunch(UAT_DB_CONFIG)) elif db_config.env == 'PROD': level2_order_writer = KuduResultWriter(Bunch(PROD_DB_CONFIG)) else: raise Exception( "[INFO] The environment name of database to write result is illegal!!!" ) level2_order_writer.clear_one_month(db_config.table_name, 'order_date', true_pred_year, true_pred_month) level2_order_writer.upsert(result, db_config.table_name, db_config.batch_size)
def update_future_for_level2_order(model_config: Bunch, db_config: Bunch, start_pred_year: int, start_pred_month: int, periods: int = 4, categories: Union[str, List[str]] = 'all', need_unitize: bool = True) -> None: """Update order future result of level2.""" # Step 1: Read in data # ============================================================================================ # level2_data = Level2DataLoader(start_pred_year, start_pred_month, categories=categories, need_unitize=need_unitize, label_data='order') plan_data = PlanData(start_pred_year, start_pred_month, need_unitize=need_unitize) item_list = ItemList(start_pred_year, start_pred_month) # Step 2: Training and predicting # ============================================================================================ # year_upper_bound, month_upper_bound = infer_month(start_pred_year, start_pred_month, offset=-periods) train_months = get_pre_months(year_upper_bound, month_upper_bound, left_bound='2016-03') preds_test = [] for i in range(periods): X_train, y_train = level2_data.prepare_training_set(train_months, gap=i) X_train, y_train = modify_training_set(X_train, y_train) X_test = level2_data.prepare_testing_set(start_pred_year, start_pred_month, gap=i) predictor = LGBMSalesInfer(model_config) predictor.fit(X_train, y_train) preds_test.append(predictor.predict(X_test)) # Step 3: Process forecast result & write into "明细表" # ============================================================================================ # m1_year, m1_month = infer_month(start_pred_year, start_pred_month, 1) result = level2_data.add_index_v2(preds_test[1:]) m1_to_m3_res = result.copy( ) # | item_code (index) | pred_ord_qty_m1 | pred_ord_qty_m2 | pred_ord_qty_m3 | if need_unitize: for col in result.columns: result[col] = result[col].apply(lambda x: 0.0025 if x < 0 else x) else: for col in result.columns: result[col] = result[col].apply(lambda x: 25 if x < 0 else x) result = result.reset_index( ) # | item_code | pred_ord_qty_m1 | pred_ord_qty_m2 | pred_ord_qty_m3 | result['bu_code'] = '30015305' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' result['sales_type'] = "内销" result['forecast_type'] = "内销整机预测" result['order_date'] = "%d-%02d-%02d" % ( m1_year, m1_month, get_days_of_month(m1_year, m1_month)) sku_info_dict = level2_data.sku_info.to_dict() result['item_name'] = result.item_code.map(sku_info_dict['item_name']) result['first_cate_code'] = result.item_code.map( sku_info_dict['first_cate_code']) result['second_cate_code'] = result.item_code.map( sku_info_dict['second_cate_code']) result['first_cate_name'] = result.item_code.map( sku_info_dict['first_cate_name']) result['second_cate_name'] = result.item_code.map( sku_info_dict['second_cate_name']) result['item_price'] = result.item_code.map(sku_info_dict['item_price']) item_list_dict = item_list.items.copy().set_index('item_code').to_dict() result['manu_code'] = result.item_code.map( item_list_dict['manu_code']).fillna('') result['area_name'] = '' rule_res = result.copy() rule_res['pred_ord_qty'] = rule_res['pred_ord_qty_m1'] order_sku_month_pre6_mean = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 6, True).replace(0, np.nan).mean(axis=1) order_sku_month_pre1 = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 1, True).mean(axis=1) dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals( start_pred_year, start_pred_month, 3, True).replace(0, np.nan).mean(axis=1) dis_sku_month_pre1 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 1, True).mean(axis=1) plan_sku_month_mean = plan_data.plan_sku_month_mean rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map( order_sku_month_pre6_mean) rule_res['ord_sku_month_pre1'] = rule_res.item_code.map( order_sku_month_pre1) rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map( dis_sku_month_pre3_mean) rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1) rule_res['plan_sku_month_mean'] = rule_res.item_code.map( plan_sku_month_mean) rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1 rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1 rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1 rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1 rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1 rule_res['online_offline_flag'] = rule_res.item_code.map( sku_info_dict['sales_chan_name']).fillna('未知') rule_res['project_flag'] = rule_res.item_code.map( sku_info_dict['project_flag']).fillna('未知') order_sku_month_pre24_mean = level2_data.get_pre_order_vals( start_pred_year, start_pred_month, 24, True).replace(0, np.nan).mean(axis=1) curr_new_items = set(order_sku_month_pre24_mean.loc[ order_sku_month_pre24_mean.isna()].index) dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 3, True) dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) * 1).sum(axis=1) new_items_by_dis = set( dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1) & (dis_sku_month_pre3.iloc[:, 2] > 0)].index) demand = plan_data.get_one_month(m1_year, m1_month, True) rule_res['demand'] = rule_res.item_code.map(demand) rule_res['is_curr_new'] = rule_res.item_code.apply( lambda x: 1 if x in curr_new_items else 0) rule_res['is_new_by_dis'] = rule_res.item_code.apply( lambda x: 1 if x in new_items_by_dis else 0) rule_res[ 'demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1) rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace( np.nan, 0) rule_res['pred_ord_qty_rule'] = rule_res.apply( lambda x: x.pred_ord_qty if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule, axis=1) result[ 'pred_ord_qty_m1'] = result.pred_ord_qty_m1 * 0.5 + rule_res.pred_ord_qty_rule * 0.5 result['avg_dis'] = rule_res['dis_sku_month_pre3_mean'].fillna(0.0) result['pred_ord_amount_m1'] = np.round(result.pred_ord_qty_m1 * result.item_price, decimals=4 if need_unitize else 0) result['pred_ord_amount_m2'] = np.round(result.pred_ord_qty_m2 * result.item_price, decimals=4 if need_unitize else 0) result['pred_ord_amount_m3'] = np.round(result.pred_ord_qty_m3 * result.item_price, decimals=4 if need_unitize else 0) result['ord_pred_time'] = timestamp_to_time(time.time()) if need_unitize: result['avg_dis'] = np.round(result.avg_dis * 10000) result['pred_ord_qty_m1'] = np.round(result.pred_ord_qty_m1 * 10000) result['pred_ord_qty_m2'] = np.round(result.pred_ord_qty_m2 * 10000) result['pred_ord_qty_m3'] = np.round(result.pred_ord_qty_m3 * 10000) result['pred_ord_amount_m1'] = np.round(result.pred_ord_amount_m1 * 10000) result['pred_ord_amount_m2'] = np.round(result.pred_ord_amount_m2 * 10000) result['pred_ord_amount_m3'] = np.round(result.pred_ord_amount_m3 * 10000) result = result.loc[~result.item_code. apply(lambda x: item_list.is_delisting_items(x))] result = result.loc[~(result.manu_code == '')] if db_config.env == 'SIT': kudu_config = SIT_DB_CONFIG esb_url = UAT_ESB_URL elif db_config.env == 'UAT': kudu_config = UAT_DB_CONFIG esb_url = UAT_ESB_URL elif db_config.env == 'PROD': kudu_config = PROD_DB_CONFIG esb_url = PROD_ESB_URL else: raise Exception( "[INFO] The environment name of database to write result is illegal!!!" ) writer = KuduResultWriter(Bunch(kudu_config)) writer.clear_one_month(db_config.table2_name, 'order_date', m1_year, m1_month) writer.upsert(result, db_config.table2_name, db_config.batch_size) # Step 4: Push to ESB # ============================================================================================ # result['customer_code'] = '' result['attribute1'] = '' result['attribute2'] = '' result['attribute3'] = '' result['attribute4'] = '' result['attribute5'] = '' result.rename(columns={'manu_code': 'manu_name'}, inplace=True) result = result[[ 'bu_code', 'sales_type', 'manu_name', 'area_name', 'customer_code', 'order_date', 'first_cate_name', 'second_cate_name', 'item_code', 'forecast_type', 'avg_dis', 'item_price', 'pred_ord_qty_m1', 'pred_ord_qty_m2', 'pred_ord_qty_m3', 'attribute1', 'attribute2', 'attribute3', 'attribute4', 'attribute5' ]] push_to_esb(result, esb_url) del result gc.collect() # Step 5: Process forecast result & write into "水晶球" # ============================================================================================ # df_test = level2_data.get_true_order_data(start_pred_year, start_pred_month) # 847 df_preds_test = level2_data.add_index(preds_test, start_pred_year, start_pred_month) df_preds_test['%d%02d' % (m1_year, m1_month)] = m1_to_m3_res['pred_ord_qty_m1'] df_pred_test_more = level2_data.predict_by_history(start_pred_year, start_pred_month, gap=periods) df_preds_test = pd.concat([df_preds_test, df_pred_test_more], axis=1).stack().to_frame('pred_ord_qty') df_preds_test.index.set_names(['item_code', 'order_date'], inplace=True) if need_unitize: df_preds_test['pred_ord_qty'] = df_preds_test.pred_ord_qty.apply( lambda x: x if x > 0 else 0.0025) else: df_preds_test['pred_ord_qty'] = df_preds_test.pred_ord_qty.apply( lambda x: x if x > 0 else 25) df_preds_test['pred_ord_qty'] = np.round(df_preds_test.pred_ord_qty, decimals=4 if need_unitize else 0) result = df_preds_test.join(df_test, how='left').reset_index() result.act_ord_qty.fillna(0, inplace=True) result['bu_code'] = 'M111' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' sku_info_dict = level2_data.sku_info.to_dict() result['item_name'] = result.item_code.map(sku_info_dict['item_name']) result['first_cate_code'] = result.item_code.map( sku_info_dict['first_cate_code']) result['second_cate_code'] = result.item_code.map( sku_info_dict['second_cate_code']) result['first_cate_name'] = result.item_code.map( sku_info_dict['first_cate_name']) result['second_cate_name'] = result.item_code.map( sku_info_dict['second_cate_name']) result['channel_name'] = result.item_code.map( sku_info_dict['channel_name']) result['item_price'] = result.item_code.map(sku_info_dict['item_price']) # m1_res = result.loc[result.order_date == "%d%02d" % (m1_year, m1_month)] # other_res = result.loc[~(result.order_date == "%d%02d" % (m1_year, m1_month))] # # rule_res = m1_res.copy() # order_sku_month_pre6_mean = level2_data.get_pre_order_vals( # start_pred_year, start_pred_month, 6, True).replace(0, np.nan).mean(axis=1) # order_sku_month_pre1 = level2_data.get_pre_order_vals( # start_pred_year, start_pred_month, 1, True).mean(axis=1) # dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals( # start_pred_year, start_pred_month, 3, True).replace(0, np.nan).mean(axis=1) # dis_sku_month_pre1 = level2_data.get_pre_dis_vals( # start_pred_year, start_pred_month, 1, True).mean(axis=1) # plan_sku_month_mean = plan_data.plan_sku_month_mean # # rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map(order_sku_month_pre6_mean) # rule_res['ord_sku_month_pre1'] = rule_res.item_code.map(order_sku_month_pre1) # rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map(dis_sku_month_pre3_mean) # rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1) # rule_res['plan_sku_month_mean'] = rule_res.item_code.map(plan_sku_month_mean) # # rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1 # rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1 # rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1 # rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1 # rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1 # # rule_res['online_offline_flag'] = rule_res.item_code.map(sku_info_dict['sales_chan_name']).fillna('未知') # rule_res['project_flag'] = rule_res.item_code.map(sku_info_dict['project_flag']).fillna('未知') # # order_sku_month_pre24_mean = level2_data.get_pre_order_vals( # start_pred_year, start_pred_month, 24, True).replace(0, np.nan).mean(axis=1) # curr_new_items = set(order_sku_month_pre24_mean.loc[order_sku_month_pre24_mean.isna()].index) # # dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 3, True) # dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) * 1).sum(axis=1) # new_items_by_dis = set( # dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1) & (dis_sku_month_pre3.iloc[:, 2] > 0)].index) # # demand = plan_data.get_one_month(m1_year, m1_month, True) # rule_res['demand'] = rule_res.item_code.map(demand) # rule_res['is_curr_new'] = rule_res.item_code.apply(lambda x: 1 if x in curr_new_items else 0) # rule_res['is_new_by_dis'] = rule_res.item_code.apply(lambda x: 1 if x in new_items_by_dis else 0) # rule_res['demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean # # rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1) # rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace(np.nan, 0) # rule_res['pred_ord_qty_rule'] = rule_res.apply( # lambda x: x.pred_ord_qty if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule, # axis=1 # ) # # m1_res['pred_ord_qty'] = m1_res.pred_ord_qty * 0.5 + rule_res.pred_ord_qty_rule * 0.5 # result = pd.concat([m1_res, other_res], axis=0) result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price, decimals=4 if need_unitize else 0) result['pred_ord_amount'] = np.round(result.pred_ord_qty * result.item_price, decimals=4 if need_unitize else 0) result['ord_pred_time'] = timestamp_to_time(time.time()) result = result.loc[result.item_code.apply( lambda x: item_list.is_white_items(x))] writer = KuduResultWriter(Bunch(kudu_config)) writer.clear_months_after(db_config.table1_name, 'order_date', start_pred_year, start_pred_month) writer.upsert(result, db_config.table1_name, db_config.batch_size)
def update_future_for_level3_order(model_config: Bunch, db_config: Bunch, start_pred_year: int, start_pred_month: int, periods: int = 4, categories: Union[str, List[str]] = 'all', need_unitize: bool = True) -> None: """Update order future result of level3.""" # Step 1: Read in data # ============================================================================================ # data_loader = Level3OrderDataLoader(start_pred_year, start_pred_month, categories=categories, need_unitize=need_unitize) # Step 2: Training and predicting # ============================================================================================ # year_upper_bound, month_upper_bound = infer_month(start_pred_year, start_pred_month, offset=-periods) train_months = get_pre_months(year_upper_bound, month_upper_bound, left_bound='2016-03') preds_test = [] for i in range(periods): X_train, y_train = data_loader.prepare_training_set(train_months, gap=i) X_train, y_train = modify_training_set(X_train, y_train) X_test = data_loader.prepare_testing_set(start_pred_year, start_pred_month, gap=i) predictor = LGBMSalesInfer(model_config) predictor.fit(X_train, y_train) preds_test.append(predictor.predict(X_test)) # Step 3: Process forecast result # ============================================================================================ # df_test = data_loader.get_true_data(start_pred_year, start_pred_month) df_pred_test = data_loader.add_index(preds_test, start_pred_year, start_pred_month) df_pred_test_more = data_loader.predict_by_history(start_pred_year, start_pred_month, gap=periods) df_pred_test = pd.concat([df_pred_test, df_pred_test_more], axis=1).stack().to_frame('pred_ord_qty') df_pred_test.index.set_names(['customer_code', 'item_code', 'order_date'], inplace=True) df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply( lambda x: x if x > 0 else 0) df_pred_test['pred_ord_qty'] = np.round(df_pred_test.pred_ord_qty, decimals=4 if need_unitize else 0) result = df_pred_test.join(df_test, how='left').reset_index() result.act_ord_qty.fillna(0, inplace=True) result['bu_code'] = 'M111' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' customer_info_dict = data_loader.customer_info.to_dict() result['customer_name'] = result.customer_code.map( customer_info_dict['customer_name']) result['sales_cen_code'] = result.customer_code.map( customer_info_dict['sales_cen_code']) result['sales_cen_name'] = result.customer_code.map( customer_info_dict['sales_cen_name']) # result['province_code'] = result.customer_code.map(customer_info['province_id']) # result['city_code'] = result.customer_code.map(customer_info['city_id']) # result['district_code'] = result.customer_code.map(customer_info['district_id']) # result['channel_code'] = result.customer_code.map(customer_info['channel_name_id']) result['province_name'] = result.customer_code.map( customer_info_dict['province']) result['city_name'] = result.customer_code.map(customer_info_dict['city']) result['district_name'] = result.customer_code.map( customer_info_dict['district']) sku_info_dict = data_loader.sku_info.to_dict() result['item_name'] = result.item_code.map(sku_info_dict['item_name']) result['first_cate_code'] = result.item_code.map( sku_info_dict['first_cate_code']) result['second_cate_code'] = result.item_code.map( sku_info_dict['second_cate_code']) result['first_cate_name'] = result.item_code.map( sku_info_dict['first_cate_name']) result['second_cate_name'] = result.item_code.map( sku_info_dict['second_cate_name']) result['channel_name'] = result.item_code.map( sku_info_dict['channel_name']) result['item_price'] = result.item_code.map(sku_info_dict['item_price']) result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price, decimals=4) result['pred_ord_amount'] = np.round(result.pred_ord_qty * result.item_price, decimals=4) result['ord_pred_time'] = timestamp_to_time(time.time()) customer_list = CustomerList() item_list = ItemList(start_pred_year, start_pred_month) result = result.loc[result.customer_code.apply( lambda x: customer_list.is_white_customer(x))] result = result.loc[result.item_code.apply( lambda x: item_list.is_white_items(x))] # Step 4: Write into database (Kudu) # ============================================================================================ # if db_config.env == 'SIT': writer = KuduResultWriter(Bunch(SIT_DB_CONFIG)) elif db_config.env == 'UAT': writer = KuduResultWriter(Bunch(UAT_DB_CONFIG)) elif db_config.env == 'PROD': writer = KuduResultWriter(Bunch(PROD_DB_CONFIG)) else: raise Exception( "[INFO] The environment name of database to write result is illegal!!!" ) writer.clear_months_after(db_config.table_name, 'order_date', start_pred_year, start_pred_month) writer.upsert(result, db_config.table_name, db_config.batch_size)
def update_future_for_level1_order(model_config: Bunch, db_config: Bunch, start_pred_year: int, start_pred_month: int, periods: int = 4, categories: Union[str, List[str]] = 'all', need_unitize: bool = True) -> None: """Update order future result of level1.""" # Step 1: Read in data # ============================================================================================ # level1_data = Level1DataLoader(start_pred_year, start_pred_month, categories=categories, need_unitize=need_unitize, label_data='order') # Step 2: Training and predicting # ============================================================================================ # year_upper_bound, month_upper_bound = infer_month(start_pred_year, start_pred_month, offset=-periods) train_months = get_pre_months(year_upper_bound, month_upper_bound, left_bound='2016-03') preds_test = [] for i in range(periods): X_train, y_train = level1_data.prepare_training_set(train_months, gap=i) X_train, y_train = modify_training_set(X_train, y_train) X_test = level1_data.prepare_testing_set(start_pred_year, start_pred_month, gap=i) predictor = RFSalesInfer(model_config) predictor.fit(X_train, y_train) preds_test.append(predictor.predict(X_test)) # Step 3: Process forecast result & write into "水晶球" # ============================================================================================ # df_test = level1_data.get_true_order_data(start_pred_year, start_pred_month) df_pred_test = level1_data.add_index(preds_test, start_pred_year, start_pred_month) df_pred_test_more = level1_data.predict_by_history(start_pred_year, start_pred_month, gap=periods) df_pred_test = pd.concat([df_pred_test, df_pred_test_more], axis=1).stack().to_frame('pred_ord_qty') df_pred_test.index.set_names(['first_cate_code', 'order_date'], inplace=True) if need_unitize: df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply( lambda x: x if x > 0 else 0.0025) else: df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply( lambda x: x if x > 0 else 25) df_pred_test['pred_ord_qty'] = np.round(df_pred_test.pred_ord_qty, decimals=4 if need_unitize else 0) result = df_pred_test.join(df_test, how='left').reset_index() result.act_ord_qty.fillna(0, inplace=True) result['bu_code'] = 'M111' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' result['first_cate_name'] = result.first_cate_code.map( CATE_CODE_2_CATE_NAME) cate_info_dict = level1_data.cate_info.to_dict() result['aver_price'] = result.first_cate_code.map( cate_info_dict['cate_aver_price']) result['act_ord_amount'] = np.round(result.act_ord_qty * result.aver_price, decimals=4 if need_unitize else 0) result['pred_ord_amount'] = np.round(result.pred_ord_qty * result.aver_price, decimals=4 if need_unitize else 0) result['ord_pred_time'] = timestamp_to_time(time.time()) if db_config.env == 'SIT': kudu_config = SIT_DB_CONFIG elif db_config.env == 'UAT': kudu_config = UAT_DB_CONFIG elif db_config.env == 'PROD': kudu_config = PROD_DB_CONFIG else: raise Exception( "[INFO] The environment name of database to write result is illegal!!!" ) writer = KuduResultWriter(Bunch(kudu_config)) writer.clear_months_after(db_config.table1_name, 'order_date', start_pred_year, start_pred_month) writer.upsert(result, db_config.table1_name, db_config.batch_size) del result gc.collect() # Step 4: Process forecast result & write into "明细表" # ============================================================================================ # result = level1_data.add_index_v2(preds_test[1:]) if need_unitize: for col in result.columns: result[col] = result[col].apply(lambda x: 0.0025 if x < 0 else x) else: for col in result.columns: result[col] = result[col].apply(lambda x: 25 if x < 0 else x) result = result.reset_index() result['bu_code'] = '30015305' result['bu_name'] = '厨房热水器事业部' result['comb_name'] = 'Default' m1_year, m1_month = infer_month(start_pred_year, start_pred_month, 1) result['order_date'] = "%d%02d" % (m1_year, m1_month) result['first_cate_name'] = result.first_cate_code.map( CATE_CODE_2_CATE_NAME) result['aver_price'] = result.first_cate_code.map( cate_info_dict['cate_aver_price']) result['pred_ord_amount_m1'] = np.round(result.pred_ord_qty_m1 * result.aver_price, decimals=4 if need_unitize else 0) result['pred_ord_amount_m2'] = np.round(result.pred_ord_qty_m2 * result.aver_price, decimals=4 if need_unitize else 0) result['pred_ord_amount_m3'] = np.round(result.pred_ord_qty_m3 * result.aver_price, decimals=4 if need_unitize else 0) result['ord_pred_time'] = timestamp_to_time(time.time()) if need_unitize: result['pred_ord_qty_m1'] = np.round(result.pred_ord_qty_m1 * 10000) result['pred_ord_qty_m2'] = np.round(result.pred_ord_qty_m2 * 10000) result['pred_ord_qty_m3'] = np.round(result.pred_ord_qty_m3 * 10000) result['pred_ord_amount_m1'] = np.round(result.pred_ord_amount_m1 * 10000) result['pred_ord_amount_m2'] = np.round(result.pred_ord_amount_m2 * 10000) result['pred_ord_amount_m3'] = np.round(result.pred_ord_amount_m3 * 10000) writer = KuduResultWriter(Bunch(kudu_config)) writer.clear_one_month(db_config.table2_name, 'order_date', m1_year, m1_month) writer.upsert(result, db_config.table2_name, db_config.batch_size)