def update_history_for_level3_inv(level3_inv_data: Level3InvDataLoader,
                                  model_config: Bunch,
                                  db_config: Bunch,
                                  start_pred_year: int,
                                  start_pred_month: int,
                                  gap: int,
                                  need_unitize: bool = True) -> None:
    """Update inventory forecast result of level3 in specified month."""

    # Step 1: Prepare training and testing set
    # ============================================================================================ #

    last_train_year, last_train_month = infer_month(start_pred_year,
                                                    start_pred_month,
                                                    offset=-gap)
    train_months = get_pre_months(last_train_year,
                                  last_train_month,
                                  left_bound='2018-06')
    true_pred_year, true_pred_month = infer_month(start_pred_year,
                                                  start_pred_month, gap)

    X_train, y_train = level3_inv_data.prepare_training_set(train_months,
                                                            gap=gap)
    X_test = level3_inv_data.prepare_testing_set(start_pred_year,
                                                 start_pred_month,
                                                 gap=gap)

    # Step 2: Training and predicting
    # ============================================================================================ #

    level3_inv_infer = LGBMSalesInfer(model_config)
    level3_inv_infer.fit(X_train, y_train)
    preds_test = level3_inv_infer.predict(X_test)

    # Step 3: Process forecast result
    # ============================================================================================ #

    df_test = level3_inv_data.get_true_data(true_pred_year, true_pred_month)
    df_preds_test = level3_inv_data.decorate_pred_result(
        preds_test, true_pred_year, true_pred_month, use_unitize=need_unitize)

    result = df_test.join(df_preds_test, how='left').reset_index()

    result['bu_code'] = 'M111'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    customer_info_dict = level3_inv_data.customer_info.to_dict()
    result['customer_name'] = result.customer_code.map(
        customer_info_dict['customer_name'])
    result['sales_cen_code'] = result.customer_code.map(
        customer_info_dict['sales_cen_code'])
    result['sales_cen_name'] = result.customer_code.map(
        customer_info_dict['sales_cen_name'])
    # result['province_code'] = result.customer_code.map(customer_info['province_id'])
    # result['city_code'] = result.customer_code.map(customer_info['city_id'])
    # result['district_code'] = result.customer_code.map(customer_info['district_id'])
    # result['channel_code'] = result.customer_code.map(customer_info['channel_name_id'])
    result['province_name'] = result.customer_code.map(
        customer_info_dict['province'])
    result['city_name'] = result.customer_code.map(customer_info_dict['city'])
    result['district_name'] = result.customer_code.map(
        customer_info_dict['district'])

    sku_info_dict = level3_inv_data.sku_info.to_dict()
    result['item_name'] = result.item_code.map(sku_info_dict['item_name'])
    result['first_cate_code'] = result.item_code.map(
        sku_info_dict['first_cate_code'])
    result['second_cate_code'] = result.item_code.map(
        sku_info_dict['second_cate_code'])
    result['first_cate_name'] = result.item_code.map(
        sku_info_dict['first_cate_name'])
    result['second_cate_name'] = result.item_code.map(
        sku_info_dict['second_cate_name'])
    result['channel_name'] = result.item_code.map(
        sku_info_dict['channel_name'])
    result['item_price'] = result.item_code.map(sku_info_dict['item_price'])

    result['act_inv_amount'] = np.round(result.act_inv_qty * result.item_price,
                                        decimals=4 if need_unitize else 0)
    result['pred_inv_amount'] = np.round(result.pred_inv_qty *
                                         result.item_price,
                                         decimals=4 if need_unitize else 0)
    result['inv_pred_time'] = timestamp_to_time(time.time())

    add_accuracy(result, 'inv_acc', 'act_inv_qty', 'pred_inv_qty')
    result['inv_weighted_acc'] = (result.act_inv_qty * result.inv_acc).astype(
        np.float32)

    customer_list = CustomerList()
    item_list = ItemList(start_pred_year, start_pred_month)
    result = result.loc[result.customer_code.apply(
        lambda x: customer_list.is_white_customer(x))]
    result = result.loc[result.item_code.apply(
        lambda x: item_list.is_white_items(x))]

    print()
    print("[INFO] The average accuracy is: %.2f" %
          (result.inv_acc.mean() * 100))
    print("[INFO] The weighted accuracy is: %.2f" %
          (result.inv_weighted_acc.sum() / result.act_inv_qty.sum() * 100))

    # Step 4: Write into database (Kudu)
    # ============================================================================================ #

    if db_config.env == 'SIT':
        level3_order_writer = KuduResultWriter(Bunch(SIT_DB_CONFIG))
    elif db_config.env == 'UAT':
        level3_order_writer = KuduResultWriter(Bunch(UAT_DB_CONFIG))
    elif db_config.env == 'PROD':
        level3_order_writer = KuduResultWriter(Bunch(PROD_DB_CONFIG))
    else:
        raise Exception(
            "[INFO] The environment name of database to write result is illegal!!!"
        )
    level3_order_writer.clear_one_month(db_config.table_name, 'order_date',
                                        true_pred_year, true_pred_month)
    level3_order_writer.upsert(result, db_config.table_name,
                               db_config.batch_size)
Esempio n. 2
0
def update_history_for_level2_order(level2_data: Level2DataLoader,
                                    plan_data: PlanData,
                                    model_config: Bunch,
                                    db_config: Bunch,
                                    start_pred_year: int,
                                    start_pred_month: int,
                                    gap: int,
                                    use_unitize: bool = True) -> None:
    """Update order forecast result of level2 in specified month."""

    # Step 1: Prepare training and testing set
    # ============================================================================================ #

    last_train_year, last_train_month = infer_month(start_pred_year,
                                                    start_pred_month,
                                                    offset=-gap)
    train_months = get_pre_months(last_train_year,
                                  last_train_month,
                                  left_bound='2016-03')
    true_pred_year, true_pred_month = infer_month(start_pred_year,
                                                  start_pred_month, gap)

    X_train, y_train = level2_data.prepare_training_set(train_months, gap=gap)
    X_test = level2_data.prepare_testing_set(start_pred_year,
                                             start_pred_month,
                                             gap=gap)

    # Step 2: Training and predicting
    # ============================================================================================ #

    level2_order_infer = LGBMSalesInfer(model_config)
    level2_order_infer.fit(X_train, y_train)
    preds_test = level2_order_infer.predict(X_test)

    # Step 3: Process forecast result
    # ============================================================================================ #

    df_test = level2_data.get_true_order_data(true_pred_year, true_pred_month)
    df_pred_test = level2_data.decorate_pred_result(preds_test,
                                                    true_pred_year,
                                                    true_pred_month,
                                                    use_unitize=use_unitize)

    result = df_test.join(df_pred_test, how='left').reset_index()

    result['bu_code'] = 'M111'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    sku_info_dict = level2_data.sku_info.to_dict()
    result['item_name'] = result.item_code.map(sku_info_dict['item_name'])
    result['first_cate_code'] = result.item_code.map(
        sku_info_dict['first_cate_code'])
    result['second_cate_code'] = result.item_code.map(
        sku_info_dict['second_cate_code'])
    result['first_cate_name'] = result.item_code.map(
        sku_info_dict['first_cate_name'])
    result['second_cate_name'] = result.item_code.map(
        sku_info_dict['second_cate_name'])
    result['channel_name'] = result.item_code.map(
        sku_info_dict['channel_name'])
    result['item_price'] = result.item_code.map(sku_info_dict['item_price'])

    result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price,
                                        decimals=4 if use_unitize else 0)
    result['pred_ord_amount'] = np.round(result.pred_ord_qty *
                                         result.item_price,
                                         decimals=4 if use_unitize else 0)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    add_accuracy(result, 'ord_acc', 'act_ord_qty', 'pred_ord_qty')
    result['ord_weighted_acc'] = (result.act_ord_qty * result.ord_acc).astype(
        np.float32)

    item_list = ItemList(start_pred_year, start_pred_month)
    result = result.loc[result.item_code.apply(
        lambda x: item_list.is_white_items(x))]

    print()
    print("[INFO] The average accuracy of model is: %.2f" %
          (result.ord_acc.mean() * 100))
    print("[INFO] The weighted accuracy of model is: %.2f" %
          (result.ord_weighted_acc.sum() / result.act_ord_qty.sum() * 100))

    # Step 4: Ensemble with rule
    # ============================================================================================ #

    rule_res = result.copy()
    order_sku_month_pre6_mean = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 6,
        True).replace(0, np.nan).mean(axis=1)
    order_sku_month_pre1 = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 1, True).mean(axis=1)
    dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals(
        start_pred_year, start_pred_month, 3,
        True).replace(0, np.nan).mean(axis=1)
    dis_sku_month_pre1 = level2_data.get_pre_dis_vals(start_pred_year,
                                                      start_pred_month, 1,
                                                      True).mean(axis=1)
    plan_sku_month_mean = plan_data.plan_sku_month_mean

    rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map(
        order_sku_month_pre6_mean)
    rule_res['ord_sku_month_pre1'] = rule_res.item_code.map(
        order_sku_month_pre1)
    rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map(
        dis_sku_month_pre3_mean)
    rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1)
    rule_res['plan_sku_month_mean'] = rule_res.item_code.map(
        plan_sku_month_mean)

    rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1
    rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1
    rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1
    rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1
    rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1

    rule_res['online_offline_flag'] = rule_res.item_code.map(
        sku_info_dict['sales_chan_name']).fillna('未知')
    rule_res['project_flag'] = rule_res.item_code.map(
        sku_info_dict['project_flag']).fillna('未知')

    order_sku_month_pre24_mean = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 24,
        True).replace(0, np.nan).mean(axis=1)
    curr_new_items = set(order_sku_month_pre24_mean.loc[
        order_sku_month_pre24_mean.isna()].index)

    dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year,
                                                      start_pred_month, 3,
                                                      True)
    dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) *
                                          1).sum(axis=1)
    new_items_by_dis = set(
        dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1)
                               & (dis_sku_month_pre3.iloc[:, 2] > 0)].index)

    demand = plan_data.get_one_month(true_pred_year, true_pred_month, True)
    rule_res['demand'] = rule_res.item_code.map(demand)
    rule_res['is_curr_new'] = rule_res.item_code.apply(
        lambda x: 1 if x in curr_new_items else 0)
    rule_res['is_new_by_dis'] = rule_res.item_code.apply(
        lambda x: 1 if x in new_items_by_dis else 0)
    rule_res[
        'demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean

    rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1)
    rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace(
        np.nan, 0)
    rule_res['pred_ord_qty_rule'] = rule_res.apply(
        lambda x: x.pred_ord_qty
        if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule,
        axis=1)

    add_accuracy(rule_res, 'rule_ord_acc', 'act_ord_qty', 'pred_ord_qty_rule')
    rule_res['rule_ord_weighted_acc'] = (rule_res.act_ord_qty *
                                         rule_res.rule_ord_acc).astype(
                                             np.float32)

    print()
    print("[INFO] The average accuracy of rule is: %.2f" %
          (rule_res.rule_ord_acc.mean() * 100))
    print("[INFO] The weighted accuracy of rule is: %.2f" %
          (rule_res.rule_ord_weighted_acc.sum() / rule_res.act_ord_qty.sum() *
           100))

    result[
        'pred_ord_qty'] = result.pred_ord_qty * 0.5 + rule_res.pred_ord_qty_rule * 0.5

    add_accuracy(result, 'ord_acc', 'act_ord_qty', 'pred_ord_qty')
    result['ord_weighted_acc'] = (result.act_ord_qty * result.ord_acc).astype(
        np.float32)

    print()
    print("[INFO] The average accuracy of ensemble is: %.2f" %
          (result.ord_acc.mean() * 100))
    print("[INFO] The weighted accuracy of ensemble is: %.2f" %
          (result.ord_weighted_acc.sum() / result.act_ord_qty.sum() * 100))

    # Step 5: Write into database (Kudu)
    # ============================================================================================ #

    if db_config.env == 'SIT':
        level2_order_writer = KuduResultWriter(Bunch(SIT_DB_CONFIG))
    elif db_config.env == 'UAT':
        level2_order_writer = KuduResultWriter(Bunch(UAT_DB_CONFIG))
    elif db_config.env == 'PROD':
        level2_order_writer = KuduResultWriter(Bunch(PROD_DB_CONFIG))
    else:
        raise Exception(
            "[INFO] The environment name of database to write result is illegal!!!"
        )
    level2_order_writer.clear_one_month(db_config.table_name, 'order_date',
                                        true_pred_year, true_pred_month)
    level2_order_writer.upsert(result, db_config.table_name,
                               db_config.batch_size)
Esempio n. 3
0
def update_future_for_level2_order(model_config: Bunch,
                                   db_config: Bunch,
                                   start_pred_year: int,
                                   start_pred_month: int,
                                   periods: int = 4,
                                   categories: Union[str, List[str]] = 'all',
                                   need_unitize: bool = True) -> None:
    """Update order future result of level2."""

    # Step 1: Read in data
    # ============================================================================================ #

    level2_data = Level2DataLoader(start_pred_year,
                                   start_pred_month,
                                   categories=categories,
                                   need_unitize=need_unitize,
                                   label_data='order')
    plan_data = PlanData(start_pred_year,
                         start_pred_month,
                         need_unitize=need_unitize)
    item_list = ItemList(start_pred_year, start_pred_month)

    # Step 2: Training and predicting
    # ============================================================================================ #

    year_upper_bound, month_upper_bound = infer_month(start_pred_year,
                                                      start_pred_month,
                                                      offset=-periods)
    train_months = get_pre_months(year_upper_bound,
                                  month_upper_bound,
                                  left_bound='2016-03')

    preds_test = []
    for i in range(periods):
        X_train, y_train = level2_data.prepare_training_set(train_months,
                                                            gap=i)
        X_train, y_train = modify_training_set(X_train, y_train)
        X_test = level2_data.prepare_testing_set(start_pred_year,
                                                 start_pred_month,
                                                 gap=i)
        predictor = LGBMSalesInfer(model_config)
        predictor.fit(X_train, y_train)
        preds_test.append(predictor.predict(X_test))

    # Step 3: Process forecast result & write into "明细表"
    # ============================================================================================ #

    m1_year, m1_month = infer_month(start_pred_year, start_pred_month, 1)

    result = level2_data.add_index_v2(preds_test[1:])
    m1_to_m3_res = result.copy(
    )  # | item_code (index) | pred_ord_qty_m1 | pred_ord_qty_m2 | pred_ord_qty_m3 |
    if need_unitize:
        for col in result.columns:
            result[col] = result[col].apply(lambda x: 0.0025 if x < 0 else x)
    else:
        for col in result.columns:
            result[col] = result[col].apply(lambda x: 25 if x < 0 else x)
    result = result.reset_index(
    )  # | item_code | pred_ord_qty_m1 | pred_ord_qty_m2 | pred_ord_qty_m3 |

    result['bu_code'] = '30015305'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'
    result['sales_type'] = "内销"
    result['forecast_type'] = "内销整机预测"

    result['order_date'] = "%d-%02d-%02d" % (
        m1_year, m1_month, get_days_of_month(m1_year, m1_month))

    sku_info_dict = level2_data.sku_info.to_dict()
    result['item_name'] = result.item_code.map(sku_info_dict['item_name'])
    result['first_cate_code'] = result.item_code.map(
        sku_info_dict['first_cate_code'])
    result['second_cate_code'] = result.item_code.map(
        sku_info_dict['second_cate_code'])
    result['first_cate_name'] = result.item_code.map(
        sku_info_dict['first_cate_name'])
    result['second_cate_name'] = result.item_code.map(
        sku_info_dict['second_cate_name'])
    result['item_price'] = result.item_code.map(sku_info_dict['item_price'])

    item_list_dict = item_list.items.copy().set_index('item_code').to_dict()
    result['manu_code'] = result.item_code.map(
        item_list_dict['manu_code']).fillna('')
    result['area_name'] = ''

    rule_res = result.copy()
    rule_res['pred_ord_qty'] = rule_res['pred_ord_qty_m1']
    order_sku_month_pre6_mean = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 6,
        True).replace(0, np.nan).mean(axis=1)
    order_sku_month_pre1 = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 1, True).mean(axis=1)
    dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals(
        start_pred_year, start_pred_month, 3,
        True).replace(0, np.nan).mean(axis=1)
    dis_sku_month_pre1 = level2_data.get_pre_dis_vals(start_pred_year,
                                                      start_pred_month, 1,
                                                      True).mean(axis=1)
    plan_sku_month_mean = plan_data.plan_sku_month_mean

    rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map(
        order_sku_month_pre6_mean)
    rule_res['ord_sku_month_pre1'] = rule_res.item_code.map(
        order_sku_month_pre1)
    rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map(
        dis_sku_month_pre3_mean)
    rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1)
    rule_res['plan_sku_month_mean'] = rule_res.item_code.map(
        plan_sku_month_mean)

    rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1
    rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1
    rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1
    rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1
    rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1

    rule_res['online_offline_flag'] = rule_res.item_code.map(
        sku_info_dict['sales_chan_name']).fillna('未知')
    rule_res['project_flag'] = rule_res.item_code.map(
        sku_info_dict['project_flag']).fillna('未知')

    order_sku_month_pre24_mean = level2_data.get_pre_order_vals(
        start_pred_year, start_pred_month, 24,
        True).replace(0, np.nan).mean(axis=1)
    curr_new_items = set(order_sku_month_pre24_mean.loc[
        order_sku_month_pre24_mean.isna()].index)

    dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year,
                                                      start_pred_month, 3,
                                                      True)
    dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) *
                                          1).sum(axis=1)
    new_items_by_dis = set(
        dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1)
                               & (dis_sku_month_pre3.iloc[:, 2] > 0)].index)

    demand = plan_data.get_one_month(m1_year, m1_month, True)
    rule_res['demand'] = rule_res.item_code.map(demand)
    rule_res['is_curr_new'] = rule_res.item_code.apply(
        lambda x: 1 if x in curr_new_items else 0)
    rule_res['is_new_by_dis'] = rule_res.item_code.apply(
        lambda x: 1 if x in new_items_by_dis else 0)
    rule_res[
        'demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean

    rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1)
    rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace(
        np.nan, 0)
    rule_res['pred_ord_qty_rule'] = rule_res.apply(
        lambda x: x.pred_ord_qty
        if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule,
        axis=1)

    result[
        'pred_ord_qty_m1'] = result.pred_ord_qty_m1 * 0.5 + rule_res.pred_ord_qty_rule * 0.5
    result['avg_dis'] = rule_res['dis_sku_month_pre3_mean'].fillna(0.0)
    result['pred_ord_amount_m1'] = np.round(result.pred_ord_qty_m1 *
                                            result.item_price,
                                            decimals=4 if need_unitize else 0)
    result['pred_ord_amount_m2'] = np.round(result.pred_ord_qty_m2 *
                                            result.item_price,
                                            decimals=4 if need_unitize else 0)
    result['pred_ord_amount_m3'] = np.round(result.pred_ord_qty_m3 *
                                            result.item_price,
                                            decimals=4 if need_unitize else 0)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    if need_unitize:
        result['avg_dis'] = np.round(result.avg_dis * 10000)
        result['pred_ord_qty_m1'] = np.round(result.pred_ord_qty_m1 * 10000)
        result['pred_ord_qty_m2'] = np.round(result.pred_ord_qty_m2 * 10000)
        result['pred_ord_qty_m3'] = np.round(result.pred_ord_qty_m3 * 10000)
        result['pred_ord_amount_m1'] = np.round(result.pred_ord_amount_m1 *
                                                10000)
        result['pred_ord_amount_m2'] = np.round(result.pred_ord_amount_m2 *
                                                10000)
        result['pred_ord_amount_m3'] = np.round(result.pred_ord_amount_m3 *
                                                10000)

    result = result.loc[~result.item_code.
                        apply(lambda x: item_list.is_delisting_items(x))]
    result = result.loc[~(result.manu_code == '')]

    if db_config.env == 'SIT':
        kudu_config = SIT_DB_CONFIG
        esb_url = UAT_ESB_URL
    elif db_config.env == 'UAT':
        kudu_config = UAT_DB_CONFIG
        esb_url = UAT_ESB_URL
    elif db_config.env == 'PROD':
        kudu_config = PROD_DB_CONFIG
        esb_url = PROD_ESB_URL
    else:
        raise Exception(
            "[INFO] The environment name of database to write result is illegal!!!"
        )

    writer = KuduResultWriter(Bunch(kudu_config))
    writer.clear_one_month(db_config.table2_name, 'order_date', m1_year,
                           m1_month)
    writer.upsert(result, db_config.table2_name, db_config.batch_size)

    # Step 4: Push to ESB
    # ============================================================================================ #

    result['customer_code'] = ''
    result['attribute1'] = ''
    result['attribute2'] = ''
    result['attribute3'] = ''
    result['attribute4'] = ''
    result['attribute5'] = ''
    result.rename(columns={'manu_code': 'manu_name'}, inplace=True)
    result = result[[
        'bu_code', 'sales_type', 'manu_name', 'area_name', 'customer_code',
        'order_date', 'first_cate_name', 'second_cate_name', 'item_code',
        'forecast_type', 'avg_dis', 'item_price', 'pred_ord_qty_m1',
        'pred_ord_qty_m2', 'pred_ord_qty_m3', 'attribute1', 'attribute2',
        'attribute3', 'attribute4', 'attribute5'
    ]]
    push_to_esb(result, esb_url)

    del result
    gc.collect()

    # Step 5: Process forecast result & write into "水晶球"
    # ============================================================================================ #

    df_test = level2_data.get_true_order_data(start_pred_year,
                                              start_pred_month)  # 847
    df_preds_test = level2_data.add_index(preds_test, start_pred_year,
                                          start_pred_month)
    df_preds_test['%d%02d' %
                  (m1_year, m1_month)] = m1_to_m3_res['pred_ord_qty_m1']
    df_pred_test_more = level2_data.predict_by_history(start_pred_year,
                                                       start_pred_month,
                                                       gap=periods)
    df_preds_test = pd.concat([df_preds_test, df_pred_test_more],
                              axis=1).stack().to_frame('pred_ord_qty')
    df_preds_test.index.set_names(['item_code', 'order_date'], inplace=True)
    if need_unitize:
        df_preds_test['pred_ord_qty'] = df_preds_test.pred_ord_qty.apply(
            lambda x: x if x > 0 else 0.0025)
    else:
        df_preds_test['pred_ord_qty'] = df_preds_test.pred_ord_qty.apply(
            lambda x: x if x > 0 else 25)
    df_preds_test['pred_ord_qty'] = np.round(df_preds_test.pred_ord_qty,
                                             decimals=4 if need_unitize else 0)

    result = df_preds_test.join(df_test, how='left').reset_index()
    result.act_ord_qty.fillna(0, inplace=True)

    result['bu_code'] = 'M111'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    sku_info_dict = level2_data.sku_info.to_dict()
    result['item_name'] = result.item_code.map(sku_info_dict['item_name'])
    result['first_cate_code'] = result.item_code.map(
        sku_info_dict['first_cate_code'])
    result['second_cate_code'] = result.item_code.map(
        sku_info_dict['second_cate_code'])
    result['first_cate_name'] = result.item_code.map(
        sku_info_dict['first_cate_name'])
    result['second_cate_name'] = result.item_code.map(
        sku_info_dict['second_cate_name'])
    result['channel_name'] = result.item_code.map(
        sku_info_dict['channel_name'])
    result['item_price'] = result.item_code.map(sku_info_dict['item_price'])

    # m1_res = result.loc[result.order_date == "%d%02d" % (m1_year, m1_month)]
    # other_res = result.loc[~(result.order_date == "%d%02d" % (m1_year, m1_month))]
    #
    # rule_res = m1_res.copy()
    # order_sku_month_pre6_mean = level2_data.get_pre_order_vals(
    #     start_pred_year, start_pred_month, 6, True).replace(0, np.nan).mean(axis=1)
    # order_sku_month_pre1 = level2_data.get_pre_order_vals(
    #     start_pred_year, start_pred_month, 1, True).mean(axis=1)
    # dis_sku_month_pre3_mean = level2_data.get_pre_dis_vals(
    #     start_pred_year, start_pred_month, 3, True).replace(0, np.nan).mean(axis=1)
    # dis_sku_month_pre1 = level2_data.get_pre_dis_vals(
    #     start_pred_year, start_pred_month, 1, True).mean(axis=1)
    # plan_sku_month_mean = plan_data.plan_sku_month_mean
    #
    # rule_res['ord_sku_month_pre6_mean'] = rule_res.item_code.map(order_sku_month_pre6_mean)
    # rule_res['ord_sku_month_pre1'] = rule_res.item_code.map(order_sku_month_pre1)
    # rule_res['dis_sku_month_pre3_mean'] = rule_res.item_code.map(dis_sku_month_pre3_mean)
    # rule_res['dis_sku_month_pre1'] = rule_res.item_code.map(dis_sku_month_pre1)
    # rule_res['plan_sku_month_mean'] = rule_res.item_code.map(plan_sku_month_mean)
    #
    # rule_res['is_aver_ord_na'] = (rule_res.ord_sku_month_pre6_mean.isna()) * 1
    # rule_res['is_aver_dis_na'] = (rule_res.dis_sku_month_pre3_mean.isna()) * 1
    # rule_res['is_aver_plan_na'] = (rule_res.plan_sku_month_mean.isna()) * 1
    # rule_res['is_ord_pre1_na'] = (rule_res.ord_sku_month_pre1.isna()) * 1
    # rule_res['is_dis_pre1_na'] = (rule_res.dis_sku_month_pre1.isna()) * 1
    #
    # rule_res['online_offline_flag'] = rule_res.item_code.map(sku_info_dict['sales_chan_name']).fillna('未知')
    # rule_res['project_flag'] = rule_res.item_code.map(sku_info_dict['project_flag']).fillna('未知')
    #
    # order_sku_month_pre24_mean = level2_data.get_pre_order_vals(
    #     start_pred_year, start_pred_month, 24, True).replace(0, np.nan).mean(axis=1)
    # curr_new_items = set(order_sku_month_pre24_mean.loc[order_sku_month_pre24_mean.isna()].index)
    #
    # dis_sku_month_pre3 = level2_data.get_pre_dis_vals(start_pred_year, start_pred_month, 3, True)
    # dis_sku_month_pre3['num_not_null'] = ((dis_sku_month_pre3 > 0) * 1).sum(axis=1)
    # new_items_by_dis = set(
    #     dis_sku_month_pre3.loc[(dis_sku_month_pre3.num_not_null == 1) & (dis_sku_month_pre3.iloc[:, 2] > 0)].index)
    #
    # demand = plan_data.get_one_month(m1_year, m1_month, True)
    # rule_res['demand'] = rule_res.item_code.map(demand)
    # rule_res['is_curr_new'] = rule_res.item_code.apply(lambda x: 1 if x in curr_new_items else 0)
    # rule_res['is_new_by_dis'] = rule_res.item_code.apply(lambda x: 1 if x in new_items_by_dis else 0)
    # rule_res['demand_dis_ratio'] = rule_res.demand / rule_res.dis_sku_month_pre3_mean
    #
    # rule_res['pred_ord_qty_rule'] = rule_res.apply(rule_func, axis=1)
    # rule_res['pred_ord_qty_rule'] = rule_res.pred_ord_qty_rule.replace(np.nan, 0)
    # rule_res['pred_ord_qty_rule'] = rule_res.apply(
    #     lambda x: x.pred_ord_qty if x.pred_ord_qty_rule == 0 else x.pred_ord_qty_rule,
    #     axis=1
    # )
    #
    # m1_res['pred_ord_qty'] = m1_res.pred_ord_qty * 0.5 + rule_res.pred_ord_qty_rule * 0.5
    # result = pd.concat([m1_res, other_res], axis=0)

    result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price,
                                        decimals=4 if need_unitize else 0)
    result['pred_ord_amount'] = np.round(result.pred_ord_qty *
                                         result.item_price,
                                         decimals=4 if need_unitize else 0)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    result = result.loc[result.item_code.apply(
        lambda x: item_list.is_white_items(x))]

    writer = KuduResultWriter(Bunch(kudu_config))
    writer.clear_months_after(db_config.table1_name, 'order_date',
                              start_pred_year, start_pred_month)
    writer.upsert(result, db_config.table1_name, db_config.batch_size)
def update_future_for_level3_order(model_config: Bunch,
                                   db_config: Bunch,
                                   start_pred_year: int,
                                   start_pred_month: int,
                                   periods: int = 4,
                                   categories: Union[str, List[str]] = 'all',
                                   need_unitize: bool = True) -> None:
    """Update order future result of level3."""

    # Step 1: Read in data
    # ============================================================================================ #

    data_loader = Level3OrderDataLoader(start_pred_year,
                                        start_pred_month,
                                        categories=categories,
                                        need_unitize=need_unitize)

    # Step 2: Training and predicting
    # ============================================================================================ #

    year_upper_bound, month_upper_bound = infer_month(start_pred_year,
                                                      start_pred_month,
                                                      offset=-periods)
    train_months = get_pre_months(year_upper_bound,
                                  month_upper_bound,
                                  left_bound='2016-03')

    preds_test = []
    for i in range(periods):
        X_train, y_train = data_loader.prepare_training_set(train_months,
                                                            gap=i)
        X_train, y_train = modify_training_set(X_train, y_train)
        X_test = data_loader.prepare_testing_set(start_pred_year,
                                                 start_pred_month,
                                                 gap=i)
        predictor = LGBMSalesInfer(model_config)
        predictor.fit(X_train, y_train)
        preds_test.append(predictor.predict(X_test))

    # Step 3: Process forecast result
    # ============================================================================================ #

    df_test = data_loader.get_true_data(start_pred_year, start_pred_month)
    df_pred_test = data_loader.add_index(preds_test, start_pred_year,
                                         start_pred_month)
    df_pred_test_more = data_loader.predict_by_history(start_pred_year,
                                                       start_pred_month,
                                                       gap=periods)
    df_pred_test = pd.concat([df_pred_test, df_pred_test_more],
                             axis=1).stack().to_frame('pred_ord_qty')
    df_pred_test.index.set_names(['customer_code', 'item_code', 'order_date'],
                                 inplace=True)
    df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply(
        lambda x: x if x > 0 else 0)
    df_pred_test['pred_ord_qty'] = np.round(df_pred_test.pred_ord_qty,
                                            decimals=4 if need_unitize else 0)

    result = df_pred_test.join(df_test, how='left').reset_index()
    result.act_ord_qty.fillna(0, inplace=True)

    result['bu_code'] = 'M111'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    customer_info_dict = data_loader.customer_info.to_dict()
    result['customer_name'] = result.customer_code.map(
        customer_info_dict['customer_name'])
    result['sales_cen_code'] = result.customer_code.map(
        customer_info_dict['sales_cen_code'])
    result['sales_cen_name'] = result.customer_code.map(
        customer_info_dict['sales_cen_name'])
    # result['province_code'] = result.customer_code.map(customer_info['province_id'])
    # result['city_code'] = result.customer_code.map(customer_info['city_id'])
    # result['district_code'] = result.customer_code.map(customer_info['district_id'])
    # result['channel_code'] = result.customer_code.map(customer_info['channel_name_id'])
    result['province_name'] = result.customer_code.map(
        customer_info_dict['province'])
    result['city_name'] = result.customer_code.map(customer_info_dict['city'])
    result['district_name'] = result.customer_code.map(
        customer_info_dict['district'])

    sku_info_dict = data_loader.sku_info.to_dict()
    result['item_name'] = result.item_code.map(sku_info_dict['item_name'])
    result['first_cate_code'] = result.item_code.map(
        sku_info_dict['first_cate_code'])
    result['second_cate_code'] = result.item_code.map(
        sku_info_dict['second_cate_code'])
    result['first_cate_name'] = result.item_code.map(
        sku_info_dict['first_cate_name'])
    result['second_cate_name'] = result.item_code.map(
        sku_info_dict['second_cate_name'])
    result['channel_name'] = result.item_code.map(
        sku_info_dict['channel_name'])
    result['item_price'] = result.item_code.map(sku_info_dict['item_price'])

    result['act_ord_amount'] = np.round(result.act_ord_qty * result.item_price,
                                        decimals=4)
    result['pred_ord_amount'] = np.round(result.pred_ord_qty *
                                         result.item_price,
                                         decimals=4)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    customer_list = CustomerList()
    item_list = ItemList(start_pred_year, start_pred_month)
    result = result.loc[result.customer_code.apply(
        lambda x: customer_list.is_white_customer(x))]
    result = result.loc[result.item_code.apply(
        lambda x: item_list.is_white_items(x))]

    # Step 4: Write into database (Kudu)
    # ============================================================================================ #

    if db_config.env == 'SIT':
        writer = KuduResultWriter(Bunch(SIT_DB_CONFIG))
    elif db_config.env == 'UAT':
        writer = KuduResultWriter(Bunch(UAT_DB_CONFIG))
    elif db_config.env == 'PROD':
        writer = KuduResultWriter(Bunch(PROD_DB_CONFIG))
    else:
        raise Exception(
            "[INFO] The environment name of database to write result is illegal!!!"
        )

    writer.clear_months_after(db_config.table_name, 'order_date',
                              start_pred_year, start_pred_month)
    writer.upsert(result, db_config.table_name, db_config.batch_size)
Esempio n. 5
0
def update_future_for_level1_order(model_config: Bunch,
                                   db_config: Bunch,
                                   start_pred_year: int,
                                   start_pred_month: int,
                                   periods: int = 4,
                                   categories: Union[str, List[str]] = 'all',
                                   need_unitize: bool = True) -> None:
    """Update order future result of level1."""

    # Step 1: Read in data
    # ============================================================================================ #

    level1_data = Level1DataLoader(start_pred_year,
                                   start_pred_month,
                                   categories=categories,
                                   need_unitize=need_unitize,
                                   label_data='order')

    # Step 2: Training and predicting
    # ============================================================================================ #

    year_upper_bound, month_upper_bound = infer_month(start_pred_year,
                                                      start_pred_month,
                                                      offset=-periods)
    train_months = get_pre_months(year_upper_bound,
                                  month_upper_bound,
                                  left_bound='2016-03')

    preds_test = []
    for i in range(periods):
        X_train, y_train = level1_data.prepare_training_set(train_months,
                                                            gap=i)
        X_train, y_train = modify_training_set(X_train, y_train)
        X_test = level1_data.prepare_testing_set(start_pred_year,
                                                 start_pred_month,
                                                 gap=i)
        predictor = RFSalesInfer(model_config)
        predictor.fit(X_train, y_train)
        preds_test.append(predictor.predict(X_test))

    # Step 3: Process forecast result & write into "水晶球"
    # ============================================================================================ #

    df_test = level1_data.get_true_order_data(start_pred_year,
                                              start_pred_month)
    df_pred_test = level1_data.add_index(preds_test, start_pred_year,
                                         start_pred_month)
    df_pred_test_more = level1_data.predict_by_history(start_pred_year,
                                                       start_pred_month,
                                                       gap=periods)
    df_pred_test = pd.concat([df_pred_test, df_pred_test_more],
                             axis=1).stack().to_frame('pred_ord_qty')
    df_pred_test.index.set_names(['first_cate_code', 'order_date'],
                                 inplace=True)
    if need_unitize:
        df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply(
            lambda x: x if x > 0 else 0.0025)
    else:
        df_pred_test['pred_ord_qty'] = df_pred_test.pred_ord_qty.apply(
            lambda x: x if x > 0 else 25)
    df_pred_test['pred_ord_qty'] = np.round(df_pred_test.pred_ord_qty,
                                            decimals=4 if need_unitize else 0)

    result = df_pred_test.join(df_test, how='left').reset_index()
    result.act_ord_qty.fillna(0, inplace=True)

    result['bu_code'] = 'M111'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    result['first_cate_name'] = result.first_cate_code.map(
        CATE_CODE_2_CATE_NAME)
    cate_info_dict = level1_data.cate_info.to_dict()
    result['aver_price'] = result.first_cate_code.map(
        cate_info_dict['cate_aver_price'])

    result['act_ord_amount'] = np.round(result.act_ord_qty * result.aver_price,
                                        decimals=4 if need_unitize else 0)
    result['pred_ord_amount'] = np.round(result.pred_ord_qty *
                                         result.aver_price,
                                         decimals=4 if need_unitize else 0)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    if db_config.env == 'SIT':
        kudu_config = SIT_DB_CONFIG
    elif db_config.env == 'UAT':
        kudu_config = UAT_DB_CONFIG
    elif db_config.env == 'PROD':
        kudu_config = PROD_DB_CONFIG
    else:
        raise Exception(
            "[INFO] The environment name of database to write result is illegal!!!"
        )

    writer = KuduResultWriter(Bunch(kudu_config))
    writer.clear_months_after(db_config.table1_name, 'order_date',
                              start_pred_year, start_pred_month)
    writer.upsert(result, db_config.table1_name, db_config.batch_size)

    del result
    gc.collect()

    # Step 4: Process forecast result & write into "明细表"
    # ============================================================================================ #

    result = level1_data.add_index_v2(preds_test[1:])
    if need_unitize:
        for col in result.columns:
            result[col] = result[col].apply(lambda x: 0.0025 if x < 0 else x)
    else:
        for col in result.columns:
            result[col] = result[col].apply(lambda x: 25 if x < 0 else x)
    result = result.reset_index()

    result['bu_code'] = '30015305'
    result['bu_name'] = '厨房热水器事业部'
    result['comb_name'] = 'Default'

    m1_year, m1_month = infer_month(start_pred_year, start_pred_month, 1)
    result['order_date'] = "%d%02d" % (m1_year, m1_month)

    result['first_cate_name'] = result.first_cate_code.map(
        CATE_CODE_2_CATE_NAME)
    result['aver_price'] = result.first_cate_code.map(
        cate_info_dict['cate_aver_price'])

    result['pred_ord_amount_m1'] = np.round(result.pred_ord_qty_m1 *
                                            result.aver_price,
                                            decimals=4 if need_unitize else 0)
    result['pred_ord_amount_m2'] = np.round(result.pred_ord_qty_m2 *
                                            result.aver_price,
                                            decimals=4 if need_unitize else 0)
    result['pred_ord_amount_m3'] = np.round(result.pred_ord_qty_m3 *
                                            result.aver_price,
                                            decimals=4 if need_unitize else 0)
    result['ord_pred_time'] = timestamp_to_time(time.time())

    if need_unitize:
        result['pred_ord_qty_m1'] = np.round(result.pred_ord_qty_m1 * 10000)
        result['pred_ord_qty_m2'] = np.round(result.pred_ord_qty_m2 * 10000)
        result['pred_ord_qty_m3'] = np.round(result.pred_ord_qty_m3 * 10000)
        result['pred_ord_amount_m1'] = np.round(result.pred_ord_amount_m1 *
                                                10000)
        result['pred_ord_amount_m2'] = np.round(result.pred_ord_amount_m2 *
                                                10000)
        result['pred_ord_amount_m3'] = np.round(result.pred_ord_amount_m3 *
                                                10000)

    writer = KuduResultWriter(Bunch(kudu_config))
    writer.clear_one_month(db_config.table2_name, 'order_date', m1_year,
                           m1_month)
    writer.upsert(result, db_config.table2_name, db_config.batch_size)