Ejemplo n.º 1
0
def interval_regression(df_feature_full, train_begin, test_day):
    # make label as int
    df_feature_full['result'] = df_feature_full['result'].astype(int)

    storeid_list = df_feature_full['storeid'].unique()
    goodsn_list = df_feature_full['goodscode'].unique()

    df_results_out = pd.DataFrame()
    df_corr = pd.DataFrame()
    for store_id in storeid_list:
        for goodsn in goodsn_list:
            # filter data
            df_goods_full = df_feature_full[
                (df_feature_full['goodscode'] == goodsn)
                & (df_feature_full['storeid'] == store_id)
                & (df_feature_full['rundate'] >= train_begin)
                & (df_feature_full['rundate'] <= test_day)
            ].drop('goodscode', 1).drop('storeid', 1)
            if len(df_goods_full) == 0:
                continue

            # binning
            df_goods_full = f_binning.binner_engine(df_goods_full)

            # get feature and label
            df_feature = df_goods_full.drop('result', 1)
            df_label = df_goods_full[['result', 'rundate']]

            # get best parameters
            # paras = get_paras(pred_dts[0], df_feature, df_label)
            paras = {}

            # single process predict
            res_stock_list = []
            res_sale_list = []
            res_sale_list.append(sale_job(paras, test_day, df_feature, df_label))
            res_stock_list.append(stock_job(test_day, df_feature, df_label))


            # # generate plt draw data
            # df_draw = df_goods_full
            # df_draw = df_draw[df_draw['rundate'] >
            #                   (datetime.strptime(test_begin, "%Y-%m-%d") - timedelta(days=20)).strftime('%Y-%m-%d')]
            # # exporter
            # if not print_evaluation(goodsn, res_list):
            #     continue
            # if not plt_draw(store_id, goodsn, df_draw, res_list):
            #     continue
            succ, result = gen_predicts(store_id, goodsn, res_sale_list, res_stock_list)
            if not succ:
                continue

            #result
            df_results_out = pd.concat([df_results_out, result], ignore_index=True)
            #feature corr output
            df_corr = corr_evaluation(df_corr, store_id, goodsn, df_goods_full)


    write_corr(df_corr)
    return df_results_out
Ejemplo n.º 2
0
def day_regression(store_id, goodsn_list, df_feature_full, date_max, gap=30):
    # make label as int
    df_feature_full['result'] = df_feature_full['result'].astype(int)

    df_results = pd.DataFrame()
    df_corr = pd.DataFrame()
    for goodsn in goodsn_list:
        df_goods_full = df_feature_full[
            (df_feature_full['goodscode'] == goodsn)
            & (df_feature_full['storeid'] == store_id)].drop('goodscode',
                                                             1).drop(
                                                                 'storeid', 1)
        if len(df_goods_full) == 0:
            continue
        # binning
        df_goods_full = f_binning.binner_engine(df_goods_full)
        # feature and label
        df_feature = df_goods_full.drop('result', 1)
        df_label = df_goods_full[['result', 'rundate']]
        # get date range
        pred_dts = [
            x.strftime('%Y-%m-%d')
            for x in pd.date_range(end=date_max, periods=gap).tolist()
        ]
        # get best parameters
        dt_min = pred_dts[0]
        paras = get_paras(dt_min, df_feature, df_label)
        # paras = {}
        # single process predict
        res_list = []
        for pred_dt in pred_dts:
            res_list.append(proba_job(paras, pred_dt, df_feature, df_label))
        # generate plt draw data
        df_draw = df_feature
        df_draw['rundate'] = df_draw['rundate'] + timedelta(days=-2)
        df_draw = df_draw[df_draw['rundate'] > (
            datetime.strptime(date_max, "%Y-%m-%d") -
            timedelta(days=gap + 20)).strftime('%Y-%m-%d')]
        #exporter
        if not print_evaluation(goodsn, res_list):
            continue
        if not plt_draw(store_id, goodsn, df_draw, res_list):
            continue
        succ, result = gen_predicts(goodsn, res_list)
        if not succ:
            continue
        df_results = pd.concat([df_results, result], ignore_index=True)
        #feature corr output
        df_corr = corr_evaluation(df_corr, store_id, goodsn, df_goods_full)
    write_corr(df_corr)
    return df_results
Ejemplo n.º 3
0
def day_regression(store_id, goodsn_list, df_feature_full, date_max, gap=30):
    # make label as int
    df_feature_full['result'] = df_feature_full['result'].astype(int)

    df_results = pd.DataFrame()
    for goodsn in goodsn_list:
        df_goods_full = df_feature_full[
            (df_feature_full['goodscode'] == goodsn)
            & (df_feature_full['storeid'] == store_id)].drop('goodscode',
                                                             1).drop(
                                                                 'storeid', 1)
        if len(df_goods_full) == 0:
            continue
        # binning
        df_goods_full = f_binning.binner_engine(df_goods_full)
        # get date range
        pred_dts = [
            x.strftime('%Y-%m-%d')
            for x in pd.date_range(end=date_max, periods=gap).tolist()
        ]
        # single process predict
        res_list = []
        for pred_dt in pred_dts:
            day_ago = get_n_day_ago(pred_dt, 7)
            df_goods_full = df_goods_full[df_goods_full['rundate'] > day_ago]
            # feature and label
            df_feature = df_goods_full.drop('result', 1)
            df_label = df_goods_full[['result', 'rundate']]
            res_list.append(proba_job(pred_dt, df_feature, df_label))
        #exporter
        if not print_evaluation(goodsn, res_list):
            continue
        succ, result = gen_predicts(goodsn, res_list)
        if not succ:
            continue
        df_results = pd.concat([df_results, result], ignore_index=True)
    return df_results
Ejemplo n.º 4
0
def day_regression(store_id, goodsn_list, df_feature_full, gap=30):
    df_feature_full['result'] = df_feature_full['result'].astype(int)

    df_results = []
    for goodsn in goodsn_list:
        df_data = df_feature_full[(df_feature_full['goodscode']==goodsn) & (df_feature_full['storeid']==store_id)]\
            .drop('goodscode', 1).drop('storeid', 1)
        df_simple = df_data.drop('result', 1)
        df_label = df_data[['result', 'rundate']]

        # get date range
        rundate_max = df_simple['rundate'].max()
        if rundate_max == 'nan' or str(rundate_max) == 'NaT':
            continue
        df_simple = f_binning.binner_engine(df_simple)
        df_max = rundate_max.strftime('%Y-%m-%d')
        pred_dts = [
            x.strftime('%Y-%m-%d')
            for x in pd.date_range(end=df_max, periods=gap).tolist()
        ]

        # single process
        res_list = []
        for pred_dt in pred_dts:
            res_list.append(proba_job(pred_dt, df_simple, df_label))

        # Arrangement data
        df_draw = df_simple
        df_draw['rundate'] = df_draw['rundate'] + timedelta(days=-2)
        df_draw = df_draw[df_draw['rundate'] > (
            rundate_max - timedelta(days=gap)).strftime('%Y-%m-%d')]
        y_tests = []
        y_preds = []
        dts = []
        for res in res_list:
            if not res:
                continue
            y_tests.append(res['y_test'])
            y_preds.append(res['y_pred'])
            dts.append(res['rundate'])
        if len(y_preds) == 0:
            continue
        y_preds = np.array(y_preds)
        y_tests = np.array(y_tests)
        print('goodsn: ' + str(goodsn))
        print('\tmean(stock): ' + str(df_draw['endqty'].mean()))
        print('\tmean(sales): ' + str(y_tests.mean()))
        print('\tmean(predict_stock): ' + str(y_preds.mean()))
        print('\tcount(stock<0): ' + str(len(df_draw[df_draw['endqty'] < 0])))
        print('\tpredict_stock<sales: ' + str(np.sum(y_preds < y_tests)))
        print('\tcount(sales): ' + str(len(y_tests)))

        # draw sales chart
        plt.figure(1, figsize=(20, 7))
        plt.plot(df_draw['rundate'],
                 df_draw['saleqty'],
                 color='#6aa84f',
                 marker='o')
        # plt.plot(df_draw['rundate'], df_draw['stockqty'], color='black', marker='o')
        plt.plot([datetime.strptime(d, '%Y-%m-%d').date() for d in dts],
                 y_preds,
                 color='red',
                 marker='o')
        plt.savefig('../sources/' + str(store_id) + '_' + str(goodsn) + '.png')
        plt.close('all')

        df_result = pd.DataFrame(dts, columns=['rundate'])
        df_result['pred'] = y_preds
        df_result['sales'] = y_tests
        df_result['goodsn'] = goodsn
        df_results.append(df_result)
    df_results_detail = pd.concat(df_results, ignore_index=True)
    return df_results_detail