def get_year_ticket(yyyy, cutoff_date, ts_name): start, end = xl_ut.iso_dates(yyyy) cutoff_date = pd.to_datetime(cutoff_date) ts_cfg, _ = dp.ts_setup(ts_name, cutoff_date, pd.to_datetime('2016-01-01'), 'W') a_df = dp.ts_actuals(ts_name, ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False) a_df['ts_name'] = ts_name f_df = ap.hive.query('select * from sup.cx_weekly_forecasts where ts_name = \'' + ts_name + '\' and cutoff =\'' + str(cutoff_date.date()) + '\';') f_df.columns = [c.split('.')[1] for c in f_df.columns] a_df['ds'] = pd.to_datetime(a_df['ds'].values) a_df = a_df[(a_df['ds'] >= start) & (a_df['ds'] <= cutoff_date)].copy() f_df['ds'] = pd.to_datetime(f_df['ds'].values) f_df = f_df[f_df['ds'] <= end].copy() f_df.rename(columns={'yhat': ts_name}, inplace=True) return pd.concat([a_df, f_df], axis=0)
def basic_perf(ts_name, cutoff_date, upr, lwr, init_date='2016-01-01', time_scale='W'): ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) # actuals actuals_df = dp.ts_actuals(ts_name, ts_cfg, cols) actuals_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) actuals_df.drop_duplicates(inplace=True) # not sure why there are dups. Table problem? # forecasts f_df = get_lang_fcast(ts_cfg, cutoff_date, eq=True) f_df.drop_duplicates(inplace=True) # not sure why there are dups. Table problem? f_idx = fcast_idx(f_df) f_df = f_df.merge(f_idx, on='dim_cfg', how='left') f_df.drop('dim_cfg', axis=1, inplace=True) f_df.rename(columns={'index': 'dim_cfg'}, inplace=True) df = f_df.merge(actuals_df, on=['ds', 'language'], how='left') df.drop('cutoff', axis=1, inplace=True) df.drop_duplicates(inplace=True) # not sure why there are dups p_ut.save_df(df, '~/my_tmp/df_all') # perf cu_sun = cutoff_date - pd.to_timedelta(6, unit='D') sf = df[['ds', 'language', 'y']].drop_duplicates() sf.dropna(inplace=True) f_shift = sf.groupby('language').apply(get_shift, cu_sun=cu_sun, upr=upr, lwr=lwr).reset_index() # find best shift nf = df.merge(f_shift, on=['ds', 'language', 'y'], how='left') nf.set_index(['language', 'dim_cfg'], inplace=True) # avoids drop of nuisance cols nf.dropna(inplace=True) nf.drop_duplicates(inplace=True) zperf = nf.groupby(['language', 'dim_cfg']).apply(cfg_perf, upr=upr, lwr=lwr, cu_sun=cu_sun).reset_index(drop=True) # do not groupby df.index zperf.dropna(inplace=True) p_ut.save_df(zperf, '~/my_tmp/zperf_' + ts_name + '_' + str(cutoff_date.date())) nf.reset_index(inplace=True) p_ut.save_df(nf, '~/my_tmp/nf') fout = nf.merge(zperf, on=list(nf.columns), how='left') return actuals_df, fout
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') upr = 12 lwr = 8 evals = 50 by_lang = False # ########################### # ########################### # ########################### print(argv) if len(argv[1:]) == 1: ts_name = argv[-1] cutoff_date = pd.to_datetime('today') to_table = False elif len(argv[1:]) == 2: ts_name, cutoff_date = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv[1:]) == 3: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) # actuals actuals_df = dp.ts_actuals(ts_name, ts_cfg, cols) actuals_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) # forecasts f_df = fp.get_lang_fcast(ts_cfg, cutoff_date) fcast_date = cutoff_date + pd.to_timedelta(upr, unit=time_scale) perf_list = list() for xens in [ 'XGBRegressor', 'AdaBoostRegressor', 'BaggingRegressor', 'GradientBoostingRegressor', 'RandomForestRegressor', 'ExtraTreesRegressor', 'lasso' ]: fcast_df = ep.make_fcast(ts_name, f_df, actuals_df, cutoff_date, fcast_date, xens, evals, by_lang, (lwr, upr), lwr=lwr, upr=upr) perf_df = perf.fcast_perf(fcast_df, actuals_df, cutoff_date, lwr, upr, time_scale, xens) if perf_df is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: forecast performance detail failed for ' + ts_name + ' ,cutoff date ' + str(cutoff_date.date()) + ' and ensemble: ' + str(xens)) else: perf_df['ts_name'] = ts_name perf_list.append(perf_df) if len(perf_list) > 0: pf = pd.concat(perf_list, axis=0) p_ut.save_df( pf, '~/my_tmp/perf/fcast_perf_detail_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['language', 'y', 'yhat', 'err', 'lwr', 'upr', 'ens'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_name} ret = hql.to_tble(pf, tab_cols, 'sup.cx_language_forecast_performance_detail', partition) if ret != 0: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE') else: s_ut.my_print( 'pid: ' + str(os.getpid()) + ' ERROR: no data for forecast performance detail failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit()
cutoff_date = pd.to_datetime(cutoff_date) else: print('invalid args: ' + str(sys.argv)) sys.exit() # ######################### actuals_weeks = 12 target_year = 2020 init_date = pd.to_datetime('2016-01-01') time_scale = 'W' ts_name = 'ticket_count' # ######################### # get actuals ts_cfg, _ = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) adf = dp.ts_actuals(ts_name, ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False, use_cache=False) start, end = tm_ut.iso_dates(target_year - 1) adf = adf[adf['ds'] >= start] # use for YoY comparison adf.rename(columns={'ticket_count': 'y'}, inplace=True) p_ut.save_df(adf, '~/my_tmp/a_df') # current forecast s_ut.my_print('current forecast') cf_df = pd.concat([get_fcast(ts_name, ts_cfg, cutoff_date, bu, use_cache=False) for bu in ts_cfg['business_units']], axis=0) cf_df = cf_df[cf_df['ds'] > cutoff_date].copy() p_ut.save_df(cf_df, '~/my_tmp/cf_df') # target year summary xl_ut.year_summary(adf, cf_df, target_year, cutoff_date) # get 90 day old forecast (last Sat of a month)
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') froot = '~/my_tmp/fbp/' # ########################### # ########################### print(argv) if len(argv) == 2: ts_name = argv[-1] to_table = False run_date = pd.to_datetime('today') elif len(argv) == 3: ts_name, run_date = argv[-2:] try: run_date = pd.to_datetime(run_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, run_date, to_table = argv[1:] try: run_date = pd.to_datetime(run_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, run_date, to_table): ' + str(argv)) sys.exit() # data cfg cutoff_date = tm_ut.get_last_sat( run_date ) # set to last saturday before run_date or the run_date if a saturday ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) FCAST_DICT['outlier_coef'] = ts_cfg.get('outlier_coef', [3.0]) fcast_days = ts_cfg.get('fcast_days', None) if fcast_days is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR" fcast_days must be specified in data_cfg') sys.exit() else: fcast_date = cutoff_date + pd.to_timedelta(fcast_days, unit='D') if time_scale == 'W': fcast_date = fcast_date - pd.to_timedelta( 1 + fcast_date.weekday(), unit='D') # set to week starting Sunday cu = cutoff_date - pd.to_timedelta( 1 + cutoff_date.weekday(), unit='D') # set to week starting Sunday fcast_days = (fcast_date - cu).days # multiple of 7 upr_horizon = int(fcast_days / 7) # in time scale units elif time_scale == 'D': upr_horizon = int(fcast_days) # in time scale units else: s_ut.my_print('pid: ' + str(os.getpid()) + ' invalid time scale: ' + str(time_scale)) sys.exit() s_ut.my_print('pid: ' + str(os.getpid()) + ' ------------------------ start language forecast for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') # get actuals actuals_df = dp.ts_actuals( ts_name, ts_cfg, cols) # may have data past cutoff for accuracy checking if actuals_df['ds'].max() < cutoff_date: s_ut.my_print( 'ERROR: no actuals available for forecast from cutoff date: ' + str(cutoff_date.date())) sys.exit() f_actuals_df = actuals_df[actuals_df['ds'] <= cutoff_date].copy( ) # actuals for forecast: only use up to cutoff date # adjust FCAST_DICT if len(FCAST_DICT['do_res']) == 2: # True, False FCAST_DICT['do_res'] = [ True ] # MUST overwrite: the False care is always included and otherwise we double count. if len(ts_cfg.get('regressors', list())) == 0: FCAST_DICT['r_mode'] = [None] reg_dict = dict() else: reg_dict = regs.ens_fcast( ts_name, ts_cfg['regressors'], cutoff_date, time_scale, fcast_days, init_date, f_actuals_df) # stored by cutoff date on last Sat of the month # update init_date init_date = max([f_actuals_df['ds'].min()] + [f['ds'].min() for f in reg_dict.values()]) f_actuals_df = f_actuals_df[f_actuals_df['ds'] >= init_date].copy() reg_dict = { lx: f[f['ds'] >= init_date].copy() for lx, f in reg_dict.items() } ts_cfg['init_date'] = init_date # set the list of fcast cfgs tlist = get_f_cfg(FCAST_DICT, cutoff_date, init_date, time_scale) # list of fcast cfg's fix_pars = [ f_actuals_df, ts_name, reg_dict, fcast_date, cutoff_date, ts_cfg, time_scale, upr_horizon ] arg_list = [ fix_pars + [tlist[ix]] for ix in range(len(tlist)) ] # 2 fcasts are done per input cfg (do_res = true and do_res = false) n_fcfg = 2 * len(arg_list) s_ut.my_print('pid: ' + str(os.getpid()) + ' ++++++++ there are ' + str(n_fcfg) + ' fcast configs per language **********') # ############################################################################### # ############################################################################### # ############################################################################### if is_test: df_list_ = s_ut.do_mp(fcast_lang, arg_list, is_mp=False, cpus=None, do_sigkill=True) else: df_list_ = s_ut.do_mp(fcast_lang, arg_list, is_mp=True, cpus=None, do_sigkill=True) # ############################################################################### # ############################################################################### # ############################################################################### # join all the fcasted data into a flat list df_list = [f for f in df_list_ if f is not None] if len(df_list) > 0: ylist, alist = list(), list() for fl in df_list: if fl is not None: fl = set_cfg(fl.copy(), CFG_COLS) ylist.append(fl[[ 'ds', 'language', 'yhat', 'ts_name', 'cutoff', 'dim_cfg', 'fcast_date' ]].copy()) alist.append(fl) # save basic fcast data fcast_df = pd.concat( ylist, axis=0) # now all the list elements have the same columns fcast_df.reset_index(inplace=True, drop=True) ok_cfg = fcast_df['dim_cfg'].unique() s_ut.my_print('pid: ' + str(os.getpid()) + str(len(ok_cfg)) + ' forecasts cfgs available for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') # fcast_df = fcast_df[fcast_df['dim_cfg'].isin(ok_cfg)].copy() fname = froot + 'lang_fcast_' p_ut.save_df(fcast_df, fname + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['ds', 'language', 'dim_cfg', 'yhat'] partition = { 'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key'] } ret = hql.to_tble(fcast_df, tab_cols, 'sup.cx_language_forecast', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + str(ts_cfg['ts_key']) + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() # save all fcast data (y_upr, y_lwr, ...) all_df = pd.concat( alist, axis=0) # now all the list elements have the same columns all_cols = list(set([c for c in all_df.columns if c not in CFG_COLS])) all_df.reset_index(inplace=True, drop=True) all_df = all_df[all_cols].copy() all_df = all_df[all_df['dim_cfg'].isin(ok_cfg)].copy() fname = froot + 'fcast_all_' p_ut.save_df(all_df, fname + ts_name + '_' + str(cutoff_date.date())) if to_table is True: all_df.drop(['cutoff', 'ts_name'], axis=1, inplace=True) mf = pd.melt(all_df, id_vars=['ds', 'language', 'dim_cfg'], var_name='key', value_name='value') mf.dropna(subset=['value'], inplace=True) mf = mf[mf['value'] != 0.0].copy() partition = { 'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key'] } ret = hql.to_tble(mf, list(mf.columns), 'sup.cx_language_forecast_detail', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + str(ts_cfg['ts_key']) + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE') else: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts available for ' + str(ts_cfg['ts_key']) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------')
def main(argv): print(argv) if len(argv) == 2: ts_name = argv[-1] to_table = False elif len(argv) == 3: ts_name, to_table = argv[1:] try: to_table = bool(int(to_table)) except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print('ERROR: invalid arguments (ts_name, to_table): ' + str(argv)) sys.exit() if any([bu in ts_name for bu in ['Homes', 'Experiences', 'China']]): s_ut.my_print('ERROR: time series cannot be a BU time series: ' + str(ts_name)) sys.exit() data_cfg = os.path.expanduser( '~/my_repos/capacity_planning/forecast/config/ratio_forecast_cfg.json') if os.path.isfile(data_cfg): with open(data_cfg, 'r') as fptr: rf_dict = json.load(fptr) else: s_ut.my_print('ERROR: ' + data_cfg + ' file not found') sys.exit() d_date = rf_dict.get('data_date', None) if d_date is None: s_ut.my_print('ERROR: data_date cannot be null') sys.exit() data_date = pd.to_datetime( d_date) # this is the cutoff date we get data from tables a_date = rf_dict.get( 'adjust_date', None) # if None, nothing to adjust and adj_date = data_date adjust_date = data_date if a_date is None else pd.to_datetime( a_date) # this is the actual cutoff date window = rf_dict.get('ratio_windows', dict()) if len(window) == 0: # not set window = { 'default': { 'start': adjust_date - pd.to_timedelta(6, unit='W'), 'end': adjust_date } } else: for k, v in window.items(): for kk, vv in v.items(): v[kk] = pd.to_datetime(vv) s_ut.my_print('************************* read table date: ' + str(data_date.date()) + ' ********************************************') s_ut.my_print('************************* write table date: ' + str(adjust_date.date()) + ' *******************************************') # ############################### # ############################### time_scale = 'W' init_date = pd.to_datetime('2016-01-01') # ############################### # ############################### df_tilde, bottom_ts = hts.main( ts_name, data_date, do_cov=True ) # coherent forecasts at language level + language level adjustments f_df = adj.main( df_tilde, 'language', bottom_ts, ts_name, adjust_date ) # must adjust at language level before service level ratios fr_list, fr_cols = list(), list() a_list = list() ts_list = bottom_ts # ratios only on bottom_ts then aggregate to top TS for ts in ts_list: s_ut.my_print('============= starting ' + str(ts)) ts_cfg, _ = dp.ts_setup(ts, data_date, init_date, time_scale) a_df = dp.ts_actuals( ts, ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False) b_df = filter_actuals(a_df, window) fr = ts_ratio(ts, b_df.copy(), f_df[['ds', 'language', ts + '_tilde']].copy(), window, data_date) fr_list.append(fr) fr_cols.append(fr.columns) a_list.append(b_df) check_ratios(ts, b_df, fr, True, 'service_tier') check_ratios(ts, b_df, fr, False, 'service_tier') check_ratios(ts, b_df, fr, True, 'channel') check_ratios(ts, b_df, fr, False, 'channel') # must adjust together to ensure coherence fr = reduce( lambda x, y: x.merge( y, on=['ds', 'language', 'channel', 'service_tier'], how='outer'), fr_list) if len(fr_list) > 0 else None fr.fillna(0, inplace=True) for k_col in ['channel', 'service_tier']: # language adj must be done before ratios fr = adj.main(fr, k_col, bottom_ts, ts_name, adjust_date) # save data f_list = list() for idx in range(len(ts_list)): ts = ts_list[idx] fx = fr[fr_cols[idx]].copy() fx.rename(columns={ts + '_tilde': 'yhat'}, inplace=True) fx['yhat'] = np.round( fx['yhat'].values, 0) # this makes input totals and output totals to be a bit off fx = fx[fx['yhat'] > 0] fx['ts_name'] = ts fx['cutoff'] = adjust_date f_list.append(fx) # get the aggregate series fall = pd.concat(f_list, axis=0) gall = fall.groupby(['ds', 'language', 'channel', 'service_tier']).sum(numeric_only=True).reset_index() gall['cutoff'] = adjust_date gall['ts_name'] = ts_name # align cols (ap gets confused otherwise?) tcols = [ 'ds', 'language', 'channel', 'service_tier', 'yhat', 'ts_name', 'cutoff' ] fall = fall[tcols].copy() gall = gall[tcols].copy() # final DF to save fout = pd.concat([gall, fall], axis=0) p_ut.save_df( fout, '~/my_tmp/fbp/ratios_fcast_' + ts_name + '_' + str(adjust_date.date())) ts_cfg, _ = dp.ts_setup('ticket_count', data_date, init_date, time_scale) a_df = dp.ts_actuals( 'ticket_count', ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False) p_ut.save_df(a_df, '~/my_tmp/a_df_ticket_count_' + str(adjust_date.date())) # data summary s_ut.my_print('**************** Data Summary *******************') for c in ['language', 'channel', 'service_tier']: s_ut.my_print('unique ' + c + ': ' + str(fout[c].unique())) # save to DB if to_table is True: tcols.remove('cutoff') tcols.remove('ts_name') tcols.insert(1, 'ds_week_starting') tcols.insert(2, 'fcst_date_inv_ending') # ds_week_ending fout = fout[fout['ds'] > adjust_date.date()].copy( ) # only save forecasted values fout['ds_week_starting'] = fout['ds'] fout['fcst_date_inv_ending'] = fout['ds'] + pd.to_timedelta(6, unit='D') for ts in fout['ts_name'].unique(): partition = {'cutoff': str(adjust_date.date()), 'ts_name': ts} tb_df = fout[fout['ts_name'] == ts].copy() ret = hql.to_tble(tb_df, tcols, 'sup.cx_weekly_forecasts', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: no forecasts loaded to table for ' + ts_name + ' and cutoff date ' + str(adjust_date.date())) sys.exit() else: s_ut.my_print( '>>>>>>>>>>>>>>> SUCCESS: data saved to table <<<<<<<<<<<<<<<<<<<' ) else: s_ut.my_print( '>>>>>>>>>>>>>>> WARNING: no data saved to table <<<<<<<<<<<<<<<<<<<' )
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') froot = os.path.expanduser('~/my_tmp/fbp/') evals = 250 by_lang = False lwr, upr = 9, 12 # ########################### # ########################### print(argv) if len(argv) == 3: ts_name, cutoff_date = argv[-2:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print('ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() # data cfg ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) fcast_days = ts_cfg.get('fcast_days', None) if fcast_days is None: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR" fcast_days must be specified in data_cfg') sys.exit() else: fcast_date = cutoff_date + pd.to_timedelta(fcast_days, unit='D') if time_scale == 'W' and fcast_date.weekday() != 6: # set fcast date to week starting Sunday unless it is a Sunday already fcast_date = fcast_date - pd.to_timedelta(1 + fcast_date.weekday(), unit='D') s_ut.my_print('pid: ' + str(os.getpid()) + ' ------------------------ start ens forecast for ' + str(ts_name) + ' from cutoff date ' + str(cutoff_date.date()) + ' (excluded) to forecast date ' + str(fcast_date.date()) + ' (included) -----------------------') a_df = dp.ts_actuals(ts_name, ts_cfg, cols) # get actuals a_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) fcast_df = fp.get_lang_fcast(ts_cfg, cutoff_date) # get fcasts if fcast_df is not None and a_df is not None: s_ut.my_print(ts_name + ': combining ' + str(fcast_df['dim_cfg'].nunique()) + ' forecast configs') xens_ = get_ens(ts_name, cutoff_date) # ts_cfg['ens'].get(str(cutoff_date.month), ens_dict['default']) s_ut.my_print('aggregation for ' + ts_name + ' done with ' + xens_) ts_fcast = ep.make_fcast(ts_name, fcast_df, a_df, cutoff_date, fcast_date, xens_, evals, by_lang, (lwr, upr), lwr=lwr, upr=upr) ts_fcast['fcast_date'] = fcast_date p_ut.save_df(ts_fcast, froot + 'ens_fcast_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: cols = ['ds', 'language', 'ens', 'yhat'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_cfg['ts_key']} ret = hql.to_tble(ts_fcast, cols, 'sup.cx_ens_forecast', partition) if ret != 0: s_ut.my_print('ERROR: DB write for ' + ts_name + ' ens forecast ' + ' at ' + str(cutoff_date.date()) + ' failed') sys.exit() print('DONE') else: s_ut.my_print('ERROR: no actuals or no data for errors of ' + ts_name + ' at ' + str(cutoff_date.date()))
def main(argv): # ########################### # parameters # ########################### time_scale = 'W' # forecasting time scale reset for daily ticket data init_date = pd.to_datetime('2016-01-01') upr = 12 lwr = 8 # ########################### # ########################### # ########################### print(argv) if len(argv) == 2: ts_name = argv[-2:] cutoff_date = pd.to_datetime('today') to_table = False elif len(argv) == 3: ts_name, cutoff_date = argv[-2:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = False except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() elif len(argv) == 4: ts_name, cutoff_date, to_table = argv[1:] try: cutoff_date = pd.to_datetime(cutoff_date) to_table = bool(int(to_table)) except ValueError: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() else: s_ut.my_print( 'ERROR: invalid arguments (ts_name, cutoff_date, to_table): ' + str(argv)) sys.exit() ts_cfg, cols = dp.ts_setup(ts_name, cutoff_date, init_date, time_scale) actuals_df = dp.ts_actuals(ts_name, ts_cfg, cols) actuals_df.rename(columns={ts_cfg['ycol']: 'y'}, inplace=True) fcast_df = fp.get_ens_fcast(ts_name, ts_cfg, cutoff_date) ens = fcast_df.loc[fcast_df.index[0], 'ens'] f_df = ep.fcast_filter(fcast_df, actuals_df, ts_name, cutoff_date + pd.to_timedelta(upr, unit=time_scale), cutoff_date, time_scale) pf = fcast_perf(f_df, actuals_df, cutoff_date, lwr, upr, time_scale, ens) if pf is None: return else: pf['ts_name'] = ts_name p_ut.save_df( pf, '~/my_tmp/perf/fcast_perf_' + ts_name + '_' + str(cutoff_date.date())) if to_table is True: tab_cols = ['language', 'y', 'yhat', 'err', 'lwr', 'upr', 'ens'] partition = {'cutoff': str(cutoff_date.date()), 'ts_name': ts_name} ret = hql.to_tble(pf, tab_cols, 'sup.cx_language_forecast_performance', partition) if ret != 0: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: forecast performance failed for ' + ts_name + ' and cutoff date ' + str(cutoff_date.date())) sys.exit() print('DONE')