def interval_regression(df_feature_full, train_begin, test_day): # make label as int df_feature_full['result'] = df_feature_full['result'].astype(int) storeid_list = df_feature_full['storeid'].unique() goodsn_list = df_feature_full['goodscode'].unique() df_results_out = pd.DataFrame() df_corr = pd.DataFrame() for store_id in storeid_list: for goodsn in goodsn_list: # filter data df_goods_full = df_feature_full[ (df_feature_full['goodscode'] == goodsn) & (df_feature_full['storeid'] == store_id) & (df_feature_full['rundate'] >= train_begin) & (df_feature_full['rundate'] <= test_day) ].drop('goodscode', 1).drop('storeid', 1) if len(df_goods_full) == 0: continue # binning df_goods_full = f_binning.binner_engine(df_goods_full) # get feature and label df_feature = df_goods_full.drop('result', 1) df_label = df_goods_full[['result', 'rundate']] # get best parameters # paras = get_paras(pred_dts[0], df_feature, df_label) paras = {} # single process predict res_stock_list = [] res_sale_list = [] res_sale_list.append(sale_job(paras, test_day, df_feature, df_label)) res_stock_list.append(stock_job(test_day, df_feature, df_label)) # # generate plt draw data # df_draw = df_goods_full # df_draw = df_draw[df_draw['rundate'] > # (datetime.strptime(test_begin, "%Y-%m-%d") - timedelta(days=20)).strftime('%Y-%m-%d')] # # exporter # if not print_evaluation(goodsn, res_list): # continue # if not plt_draw(store_id, goodsn, df_draw, res_list): # continue succ, result = gen_predicts(store_id, goodsn, res_sale_list, res_stock_list) if not succ: continue #result df_results_out = pd.concat([df_results_out, result], ignore_index=True) #feature corr output df_corr = corr_evaluation(df_corr, store_id, goodsn, df_goods_full) write_corr(df_corr) return df_results_out
def day_regression(store_id, goodsn_list, df_feature_full, date_max, gap=30): # make label as int df_feature_full['result'] = df_feature_full['result'].astype(int) df_results = pd.DataFrame() df_corr = pd.DataFrame() for goodsn in goodsn_list: df_goods_full = df_feature_full[ (df_feature_full['goodscode'] == goodsn) & (df_feature_full['storeid'] == store_id)].drop('goodscode', 1).drop( 'storeid', 1) if len(df_goods_full) == 0: continue # binning df_goods_full = f_binning.binner_engine(df_goods_full) # feature and label df_feature = df_goods_full.drop('result', 1) df_label = df_goods_full[['result', 'rundate']] # get date range pred_dts = [ x.strftime('%Y-%m-%d') for x in pd.date_range(end=date_max, periods=gap).tolist() ] # get best parameters dt_min = pred_dts[0] paras = get_paras(dt_min, df_feature, df_label) # paras = {} # single process predict res_list = [] for pred_dt in pred_dts: res_list.append(proba_job(paras, pred_dt, df_feature, df_label)) # generate plt draw data df_draw = df_feature df_draw['rundate'] = df_draw['rundate'] + timedelta(days=-2) df_draw = df_draw[df_draw['rundate'] > ( datetime.strptime(date_max, "%Y-%m-%d") - timedelta(days=gap + 20)).strftime('%Y-%m-%d')] #exporter if not print_evaluation(goodsn, res_list): continue if not plt_draw(store_id, goodsn, df_draw, res_list): continue succ, result = gen_predicts(goodsn, res_list) if not succ: continue df_results = pd.concat([df_results, result], ignore_index=True) #feature corr output df_corr = corr_evaluation(df_corr, store_id, goodsn, df_goods_full) write_corr(df_corr) return df_results
def day_regression(store_id, goodsn_list, df_feature_full, date_max, gap=30): # make label as int df_feature_full['result'] = df_feature_full['result'].astype(int) df_results = pd.DataFrame() for goodsn in goodsn_list: df_goods_full = df_feature_full[ (df_feature_full['goodscode'] == goodsn) & (df_feature_full['storeid'] == store_id)].drop('goodscode', 1).drop( 'storeid', 1) if len(df_goods_full) == 0: continue # binning df_goods_full = f_binning.binner_engine(df_goods_full) # get date range pred_dts = [ x.strftime('%Y-%m-%d') for x in pd.date_range(end=date_max, periods=gap).tolist() ] # single process predict res_list = [] for pred_dt in pred_dts: day_ago = get_n_day_ago(pred_dt, 7) df_goods_full = df_goods_full[df_goods_full['rundate'] > day_ago] # feature and label df_feature = df_goods_full.drop('result', 1) df_label = df_goods_full[['result', 'rundate']] res_list.append(proba_job(pred_dt, df_feature, df_label)) #exporter if not print_evaluation(goodsn, res_list): continue succ, result = gen_predicts(goodsn, res_list) if not succ: continue df_results = pd.concat([df_results, result], ignore_index=True) return df_results
def day_regression(store_id, goodsn_list, df_feature_full, gap=30): df_feature_full['result'] = df_feature_full['result'].astype(int) df_results = [] for goodsn in goodsn_list: df_data = df_feature_full[(df_feature_full['goodscode']==goodsn) & (df_feature_full['storeid']==store_id)]\ .drop('goodscode', 1).drop('storeid', 1) df_simple = df_data.drop('result', 1) df_label = df_data[['result', 'rundate']] # get date range rundate_max = df_simple['rundate'].max() if rundate_max == 'nan' or str(rundate_max) == 'NaT': continue df_simple = f_binning.binner_engine(df_simple) df_max = rundate_max.strftime('%Y-%m-%d') pred_dts = [ x.strftime('%Y-%m-%d') for x in pd.date_range(end=df_max, periods=gap).tolist() ] # single process res_list = [] for pred_dt in pred_dts: res_list.append(proba_job(pred_dt, df_simple, df_label)) # Arrangement data df_draw = df_simple df_draw['rundate'] = df_draw['rundate'] + timedelta(days=-2) df_draw = df_draw[df_draw['rundate'] > ( rundate_max - timedelta(days=gap)).strftime('%Y-%m-%d')] y_tests = [] y_preds = [] dts = [] for res in res_list: if not res: continue y_tests.append(res['y_test']) y_preds.append(res['y_pred']) dts.append(res['rundate']) if len(y_preds) == 0: continue y_preds = np.array(y_preds) y_tests = np.array(y_tests) print('goodsn: ' + str(goodsn)) print('\tmean(stock): ' + str(df_draw['endqty'].mean())) print('\tmean(sales): ' + str(y_tests.mean())) print('\tmean(predict_stock): ' + str(y_preds.mean())) print('\tcount(stock<0): ' + str(len(df_draw[df_draw['endqty'] < 0]))) print('\tpredict_stock<sales: ' + str(np.sum(y_preds < y_tests))) print('\tcount(sales): ' + str(len(y_tests))) # draw sales chart plt.figure(1, figsize=(20, 7)) plt.plot(df_draw['rundate'], df_draw['saleqty'], color='#6aa84f', marker='o') # plt.plot(df_draw['rundate'], df_draw['stockqty'], color='black', marker='o') plt.plot([datetime.strptime(d, '%Y-%m-%d').date() for d in dts], y_preds, color='red', marker='o') plt.savefig('../sources/' + str(store_id) + '_' + str(goodsn) + '.png') plt.close('all') df_result = pd.DataFrame(dts, columns=['rundate']) df_result['pred'] = y_preds df_result['sales'] = y_tests df_result['goodsn'] = goodsn df_results.append(df_result) df_results_detail = pd.concat(df_results, ignore_index=True) return df_results_detail