Esempio n. 1
0
    def _regr_opt(self,
                  evals=None):  # optimize on the last w weeks before cutoff
        if evals is None:
            evals = self.max_evals

        self.iter, self.valid_iter = 0, 0
        best_params = {'alpha': 1.0, 'l1_ratio': 0.5}
        for space_ in self.space_list:
            trials = Trials()
            self.l1_ratio = space_['l1_ratio']
            space = {
                'alpha':
                hp.uniform('alpha', space_['alpha_min'], space_['alpha_max'])
            }
            with s_ut.suppress_stdout_stderr():
                _ = fmin(fn=self.hyperpar_tuning,
                         space=space,
                         algo=tpe.suggest,
                         max_evals=int(evals),
                         trials=trials,
                         show_progressbar=False)
            self.string += '+++++++++++++++++++ iter: ' + str(
                self.iter) + ' valid iter: ' + str(
                    self.valid_iter) + ' evals: ' + str(int(evals)) + '\n'
            if self.verbose:
                print(self.string)
                self.string = ''
            res = trials.results
            best_res = sorted(res, key=lambda x: x['loss'])[0]
            this_loss = best_res['loss']
            if this_loss <= self.min_loss:
                best_params = best_res['params']
                best_params['l1_ratio'] = self.l1_ratio
                self.min_loss = this_loss
        self.loss = self.min_loss
        self.params = best_params
        self.mdl = ElasticNet(**self.params, normalize=False)
        if bool(np.isinf(self.min_loss)) is True:
            if self.do_test is True:
                s_ut.my_print(
                    'ERROR: &&&&&&&&&&&&&&&&&&&&&&& no valid iterations. Turn SW off'
                )
                self.do_test = False
                self._regr_opt()
            else:
                s_ut.my_print(
                    'ERROR: &&&&&&&&&&&&&&&&&&&&&&& FATAL: no valid iterations'
                )
Esempio n. 2
0
 def _regr_opt(self):  # optimize on the last w weeks before cutoff
     trials = Trials()
     with s_ut.suppress_stdout_stderr():
         _ = fmin(fn=self.hyperpar_tuning,
                  space=self.space,
                  algo=tpe.suggest,
                  max_evals=self.max_evals,
                  trials=trials,
                  show_progressbar=False)
     results = trials.results
     best_res = sorted(results, key=lambda x: x['loss'])[0]
     self.params = best_res['params']
     self.params['n_estimators'] = int(self.params['n_estimators'])
     if self.name == 'XGBRegressor':
         self.params['max_depth'] = int(self.params['max_depth'])
         self.params['objective'] = 'reg:squarederror'
         self.params['booster'] = 'gbtree'
     self.loss = best_res['loss']
     self.mdl = self.rfunc(**
                           self.params)  # optimized regressor obj instance
Esempio n. 3
0
 def _forecast(self):
     # this decorator assumes one fcast runs on the main thread.  If not, set use_signals=False.
     # see https://github.com/pnpnpn/timeout-decorator
     # timeout is in secs
     ret = self._data_prep()
     if ret is None:
         return None
     else:
         dfc, xreg_df, regs_cols = ret
     try:
         if self.verbose:
             self.prophet_obj.fit(dfc)
         else:
             with s_ut.suppress_stdout_stderr():
                 self.prophet_obj.fit(dfc)
         try:
             future_df = self.prophet_obj.make_future_dataframe(periods=self.horizon, freq=self.time_scale)  # day, week-starting sunday, end of month
             if xreg_df is not None:
                 future_df = future_df.merge(xreg_df, on='ds', how='left')
                 if future_df[regs_cols].isnull().sum().sum() > 0:
                     s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: invalid regressors')
                     return None
             f_df = self.prophet_obj.predict(future_df)                            # forecast
         except Exception as e:
             s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: prophet predict future failed with error: ' + str(e))
             return None
     except (Exception, ValueError, KeyError) as e:
         s_ut.my_print('pid: ' + str(os.getpid()) + ' WARNING: prophet fit failed with error: ' + str(e))
         if dfc is not None:
             print(dfc.head())
             print(dfc.tail())
         else:
             s_ut.my_print('ERROR: fit DF dfc is None')
         return None
     self.resi_fcast(f_df, dfc[['ds', 'y']].copy())  # fcast residuals and save fcast with no residuals
     return self.f_df_n_res, self.f_df_y_res
Esempio n. 4
0
import itertools
import pandas as pd
import numpy as np
import json
import platform

from capacity_planning.utilities import pandas_utils as p_ut
from capacity_planning.utilities import sys_utils as s_ut
from capacity_planning.utilities import stats_utils as st_ut
# import sklearn.linear_model as l_mdl
from capacity_planning.data import hql_exec as hql
from capacity_planning.forecast.ticket_forecast import excel_utils as xl_ut
from capacity_planning.forecast.utilities.language import data_processing as dp

with s_ut.suppress_stdout_stderr():
    import airpy as ap
USE_CACHE = True if platform.system() == 'Darwin' else False
USE_CACHE = False
RENEW = not USE_CACHE


def get_year_ticket(yyyy, cutoff_date, ts_name):
    start, end = xl_ut.iso_dates(yyyy)
    cutoff_date = pd.to_datetime(cutoff_date)
    ts_cfg, _ = dp.ts_setup(ts_name, cutoff_date, pd.to_datetime('2016-01-01'), 'W')
    a_df = dp.ts_actuals(ts_name, ts_cfg, ['language', 'business_unit', 'channel', 'service_tier'], drop_cols=False)
    a_df['ts_name'] = ts_name
    f_df = ap.hive.query('select * from sup.cx_weekly_forecasts where ts_name = \'' + ts_name + '\' and cutoff =\'' + str(cutoff_date.date()) + '\';')
    f_df.columns = [c.split('.')[1] for c in f_df.columns]
    a_df['ds'] = pd.to_datetime(a_df['ds'].values)
Esempio n. 5
0
def main(argv):
    print('usage: python lang_fcast.py <ts_name> <cutoff_date>')
    print(argv)
    ts_name, cutoff_date = argv
    this_file = os.path.basename(__file__)
    cfg_dir = '/'.join(FILE_PATH.split('/')[:-1])
    cfg_file = os.path.join(cfg_dir, 'config/' + this_file[:-3] + '_cfg.json')

    # validate the data, prepare regressors, holidays DF
    ts_obj, reg_dict, cfg_dict, train_days = dtp.initialize(
        cfg_file, cutoff_date, ts_name, True, init_date='2016-01-01')
    upr_horizon, lwr_horizon = cfg_dict['upr_horizon_days'], cfg_dict[
        'lwr_horizon_days']
    if_exists = cfg_dict['if_exists']
    cutoff_date = ts_obj.cutoff_date

    out_list = list()
    cu = cutoff_date + pd.to_timedelta(
        upr_horizon, unit='D')  # actual cutoff date for training
    ds = str(cu.date())
    # ctr = 0
    # train_days = [25, 35]
    for l, t_df in ts_obj.df_dict.items():
        # if l != 'Mandarin':
        #     continue
        s_ut.my_print(
            '\n\n****************************** starting language: ' + str(l))
        lang_list = list()
        if t_df is not None:
            for tdays in train_days:
                tlist = dtp.get_f_cfgs(t_df,
                                       l,
                                       cutoff_date,
                                       tdays,
                                       upr_horizon,
                                       cfg_dict,
                                       is_train=True)
                if tlist is None:
                    s_ut.my_print('WARNING: language ' + str(l) +
                                  ' and training cutoff date ' +
                                  str(cutoff_date.date()) +
                                  ' and training days ' + str(tdays) +
                                  ' has NO fcast configs')
                    continue
                else:
                    arg_list = dtp.prophet_prep(ts_obj, l,
                                                reg_dict.get(l, None),
                                                cfg_dict, upr_horizon,
                                                lwr_horizon, tlist, True)
                    s_ut.my_print('pid: ' + str(os.getpid()) +
                                  ' ************* forecasts for ' + str(l) +
                                  ' with ' + str(tdays) + ' train days and ' +
                                  str(len(arg_list)) + ' configs')
                    f_list = s_ut.do_mp(dtp.tf,
                                        arg_list,
                                        is_mp=True,
                                        cpus=len(arg_list),
                                        do_sigkill=True)
                    if f_list is None:
                        s_ut.my_print('pid: ' + str(os.getpid()) +
                                      ': No results with ' + str(tdays) +
                                      ' training days')
                        f_list = list()
                    else:
                        s_ut.my_print('pid: ' + str(os.getpid()) + ': ' +
                                      str(len(f_list)) + ' results with ' +
                                      str(tdays) + ' training days')

                    # save the fcast configs
                    if len(f_list) > 0:
                        s_ut.my_print('pid: ' + str(os.getpid()) +
                                      ' concatenating ' + str(len(f_list)) +
                                      ' DFs for ' + str(l))
                        l_df = pd.concat([f for f in f_list], axis=0)
                        l_df['language'] = l
                        s_ut.my_print('pid: ' + str(os.getpid()) +
                                      ' Language ' + str(l) + ' has ' +
                                      str(len(l_df)) + ' fcast cfgs with ' +
                                      str(tdays) + ' training days')
                        l_df.reset_index(inplace=True, drop=True)
                        l_df[
                            'ds'] = ds  # here we only save cfg's not fcasts. Use ds for partition
                        l_df['ts_name'] = ts_name
                        l_df['cutoff'] = ds
                        lang_list.append(l_df)
                        out_list.append(l_df)
                    else:
                        s_ut.my_print('pid: ' + str(os.getpid()) +
                                      ' WARNING: no DF for ' + str(l))
        else:
            s_ut.my_print('pid: ' + str(os.getpid()) +
                          ' WARNING: no training DF for ' + str(l))
        # ctr += 1
        # if ctr >= 2:
        #     break
        if len(lang_list) > 0:  # save language level results
            fl = pd.concat(lang_list, axis=0)
            p_ut.save_df(
                fl, '~/my_tmp/fcast_cfg_v2_' + ds + '_' + ts_name + '_' + l)

    # all training done or this TS. Save data
    if len(out_list) > 0:
        s_ut.my_print('pid: ' + str(os.getpid()) +
                      ' *************** saving training data ***********')
        df_all = pd.concat(out_list, axis=0)
        df_all.drop_duplicates(inplace=True)
        df_all.reset_index(inplace=True, drop=True)
        df_all['ds'] = ds  # here we only save cfg's not fcasts
        df_all['ts_name'] = ts_name
        df_all['cutoff'] = ds
        p_ut.save_df(df_all,
                     '~/my_tmp/fcast_cfg/fcast_cfg_v2_' + ds + '_' + ts_name)
        df_all.drop(['ds', 'ts_name'], inplace=True,
                    axis=1)  # not needed to push
        partition_ = {'ds': ds, 'ts_name': ts_name}
        table = 'sup.fct_cx_forecast_config_v3'
        try:  # only hive works with the partition argument
            with s_ut.suppress_stdout_stderr():
                import airpy as ap
            ap.hive.push(df_all,
                         table=table,
                         if_exists=if_exists,
                         partition=partition_,
                         table_props={
                             'abb_retention_days': '-1',
                             'abb_retention_days_reason': 'fact table. No pii'
                         })
            s_ut.my_print('data saved to table ' + table + ' for ' + ts_name +
                          ' and ds ' + ds)
            print('DONE')
        except:
            s_ut.my_print('ERROR: could not save to table ' + table + ' for ' +
                          ts_name)
    else:
        s_ut.my_print('ERROR: no output')