def fit(self, y, period, x=None, metric="smape", val_size=None, verbose=False): """ Build the model using best-tuned hyperparameter values. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute error). :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.y = y self.name = "Bayesian Dynamic Linear Model" self.key = "bdlm" self._tune(y=y, period=period, x=x, metric=metric, val_size=val_size, verbose=verbose) self.model = pydlm.dlm(y) self.model = self.model + pydlm.trend(degree=self.params["trend"], discount=0.5) self.model = self.model + pydlm.seasonality(period=self.period, discount=0.99) if self.params["ar"] is not None: self.model = self.model + pydlm.autoReg(degree=self.params["ar"], discount=0.99) if x is not None: for variable_id, x_variable in enumerate(x.T): self.model = self.model + pydlm.dynamic( features=[[v] for v in x_variable], discount=0.99, name=str(variable_id)) with SuppressStdoutStderr(): self.model.tune() self.model.fit()
def dlm_exogenous_r3(y, s, k, a, t, e, r): """ One way to use dlm :returns: x, s', w """ if not s: s = dict() s['dim'] = dimension(y) s = dlm_set_exog_hyperparams(s=s, r=r) y0, exog = split_exogenous(y=y) s['n_obs'] = 0 s['model'] = quietDlm([], printInfo=False) + trend( s['trend_degree'], s['discount']) + seasonality( s['period'], s['discount']) s['model'] = s['model'] + fixedAutoReg( degree=s['auto_degree'], name='ar', w=1.0) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] s['model'] = s['model'] + dynamic(features=exog_wrapped, discount=0.99, name='exog') # Set's first exog if y is not None: y = wrap(y) assert dimension(y) == s['dim'], 'Cannot change dimension of data sent' s['n_obs'] += 1 y0, exog = split_exogenous(y=y) y0_passed_in = None if np.isnan( y0) else y0 # pydlm uses None for missing values s['model'].append([y0_passed_in]) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] if s['n_obs'] > 1: s['model'].append( data=exog_wrapped, component='exog') # Don't get first exog twice num_obs = len(s['model'].data) if s.get('model') else 0 if num_obs % s['n_fit'] == s['n_fit'] - 1: _, _, s = dlm_exogenous_r3(y=None, s=s, k=k, a=a, t=t, e=10, r=r) s['model'].fitForwardFilter() return _dlm_exog_prediction_helper(s=s, k=k, y=y) if y is None: if dimension(y) == 1: s['model'].tune(maxit=20) # Don't tune if exogenous ... haven't got this to work s['model'].fit() return None, None, s
def SerBayes(sDay,nAhead,x0,hWeek): dta = sDay['y'] dta.index = [pd.datetime.strptime(str(x)[0:10],'%Y-%m-%d') for x in dta.index] t_line = [float(calendar.timegm(x.utctimetuple()))/1000000 for x in dta.index] dta.index = t_line model = pydlm.dlm(dta) model = model + pydlm.trend(degree=1,discount=0.98,name='a',w=10.0) model = model + pydlm.dynamic(features=[[v] for v in t_line],discount=1,name='b',w=10.0) model = model + pydlm.autoReg(degree=3,data=dta.values,name='ar3',w=1.0) allStates = model.getLatentState(filterType='forwardFilter') model.evolveMode('independent') model.noisePrior(2.0) model.fit() model.plot() model.turnOff('predict') model.plotCoef(name='a') model.plotCoef(name='b') model.plotCoef(name='ar3')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ import numpy as np import matplotlib.pyplot as plt import pydlm # Simple example (random walk) n = 100 a = 1.0 + np.random.normal(0, 5, n) # the intercept x = np.random.normal(0, 2, n) # the control variable b = 3.0 # the coefficient y = a + b * x dlm = pydlm.dlm(y) dlm = dlm + pydlm.trend(degree=0, discount=0.98, name='a', w=10.0) dlm = dlm + pydlm.dynamic( features=[[v] for v in x], discount=1, name='b', w=10.0) # randomly generate data data = [0] * 100 + [3] * 100 # creadte model dlm = pydlm.dlm(data) # add components dlm = dlm + pydlm.trend(1, name='lineTrend', w=1.0) # covariance=1 dlm = dlm + pydlm.seasonality(7, name='7day', w=1.0) dlm = dlm + pydlm.autoReg(degree=3, data=data, name='ar3', w=1.0) dlm.ls() # delete unwanted component dlm.delete('7day')
def ts_fit(self, suppress=False): """Fit DLM to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ self._prepare_fit() self._model = None self.ts_split() ts_df = self._train_dt.copy() # Fit self._dlm_logger.info("Trying to fit the DLM model....") try: if not suppress: self._dlm_logger.info("...via using parameters\n") print_attributes(self) ts_df = ts_df.reset_index() ts_df.columns = self._ts_df_cols self._model = dlm(ts_df['y']) # trend if self._dlm_trend is not None: self._model = self._model + trend( degree=self._dlm_trend['degree'], discount=self._dlm_trend['discount'], name=self._dlm_trend['name'], w=self._dlm_trend['w']) # seasonality if self._dlm_seasonality is not None: self._model = self._model + seasonality( period=self._dlm_seasonality['period'], discount=self._dlm_seasonality['discount'], name=self._dlm_seasonality['name'], w=self._dlm_seasonality['w']) # dynamic if self._train_dlm_dynamic is not None: for i in range(len(self._train_dlm_dynamic['features'])): self._model = self._model + dynamic( features=self._train_dlm_dynamic['features'][i] ['features'], discount=self._train_dlm_dynamic['features'][i] ['discount'], name=self._train_dlm_dynamic['features'][i]['name'], w=self._train_dlm_dynamic['features'][i]['w']) # auto_reg if self._dlm_auto_reg is not None: self._model = self._model + autoReg( degree=self._dlm_auto_reg['degree'], discount=self._dlm_auto_reg['discount'], name=self._dlm_auto_reg['name'], w=self._dlm_auto_reg['w']) # long_season if self._dlm_long_season is not None: ls = longSeason(period=self._dlm_long_season['period'], stay=self._dlm_long_season['stay'], data=ts_df, name=self._dlm_long_season['name'], w=self._dlm_long_season['w']) self._model = self._model + ls if not suppress: self._dlm_logger.info("The constructed DLM model components:") print(self._model.ls()) # tic start = time() if self._use_rolling_window: self._model.fitForwardFilter(useRollingWindow=True, windowLength=self._window_size) self._model.fitBackwardSmoother() else: self._model.fit() self.model_fit = self._model # toc if not suppress: self._dlm_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError) as e: self._dlm_logger.exception("DLM error...{}".format(e)) return -1 else: self._dlm_logger.info("Model successfully fitted to the data!") self._dlm_logger.info("Computing fitted values and residuals...") # Residuals self.residuals = pd.Series(self.model_fit.getResidual(), index=self._train_dt.index) try: self.lower_conf_int = pd.Series( self.model_fit.getInterval()[1], index=self._train_dt.index) self.upper_conf_int = pd.Series( self.model_fit.getInterval()[0], index=self._train_dt.index) except ValueError as e: self._dlm_logger.exception( "Something went wrong in getInterval...{}".format(e)) self.mse = self.model_fit.getMSE() # Fitted values # this is not elegant, but found no other way self.fittedvalues = self._train_dt['y'] + self.residuals return self
simple_dlm.turnOff('data points') simple_dlm.plot() # Plot each component (attribution) simple_dlm.turnOff('predict plot') simple_dlm.turnOff('filtered plot') simple_dlm.plot('linear_trend') simple_dlm.plot('seasonal52') # Plot the prediction give the first 350 weeks and forcast the next 200 weeks. simple_dlm.plotPredictN(N=200, date=350) # Plot the prediction give the first 250 weeks and forcast the next 200 weeks. simple_dlm.plotPredictN(N=200, date=250) # Build a dynamic regression model from pydlm import dynamic regressor10 = dynamic(features=features, discount=1.0, name='regressor10', w=10) drm = dlm(time_series) + linear_trend + seasonal52 + regressor10 drm.fit() # Plot the fitted results drm.turnOff('data points') drm.plot() # Plot each component (attribution) drm.turnOff('predict plot') drm.turnOff('filtered plot') drm.plot('linear_trend') drm.plot('seasonal52') drm.plot('regressor10') # Plot the prediction give the first 300 weeks and forcast the next 150 weeks. drm.plotPredictN(N=150, date=300)
# Plot the fitted results simple_dlm.turnOff('data points') simple_dlm.plot() # Plot each component (attribution) simple_dlm.turnOff('predict plot') simple_dlm.turnOff('filtered plot') simple_dlm.plot('linear_trend') simple_dlm.plot('seasonal52') # Plot the prediction give the first 350 weeks and forcast the next 200 weeks. simple_dlm.plotPredictN(N=200, date=350) # Plot the prediction give the first 250 weeks and forcast the next 200 weeks. simple_dlm.plotPredictN(N=200, date=250) # Build a dynamic regression model from pydlm import dynamic regressor10 = dynamic(features=features, discount=1.0, name='regressor10', w=10) drm = dlm(time_series) + linear_trend + seasonal52 + regressor10 drm.fit() # Plot the fitted results drm.turnOff('data points') drm.plot() # Plot each component (attribution) drm.turnOff('predict plot') drm.turnOff('filtered plot') drm.plot('linear_trend') drm.plot('seasonal52') drm.plot('regressor10') # Plot the prediction give the first 300 weeks and forcast the next 150 weeks. drm.plotPredictN(N=150, date=300) # Plot the prediction give the first 250 weeks and forcast the next 200 weeks.
def estimate_and_predict_dlm_PR(calendar, df_propor_PR_ts, punched_df, end_train_date, start_test_date, start_of_this_year, enable_sales, pred_weeks=8, locality=10, r=0.05, missing_val=201735): ''' accept the forecasting sales_proportion data as one regressor df_propor_PR_test: [] return type: DataFrame with prediction result return: columns = [wm_yr_wk_nbr,club,yhat] ''' res = pd.DataFrame() punched = punched_df.groupby(['club_nbr', 'posting_date'])['cost'].sum() punched.column = ['total_punched_wg'] punched = punched.reset_index() punched = pd.merge(left=punched, right=calendar, how='left', left_on='posting_date', right_on='calendar_date').drop('calendar_date', axis=1) # mean wage among all clubs punched = removehurricane('cost', punched, 201733, 201739, sales=False) punched_mean = punched.groupby(['wm_yr_wk_nbr', 'posting_date'])['cost'].mean() punched_mean = punched_mean.reset_index() punched_mean.columns = ['wm_yr_wk_nbr', 'posting_date', 'cost'] punched_mean['club_nbr'] = pd.Series(np.ones([punched_mean.shape[0]])) ########################## if missing_val not in punched_mean.wm_yr_wk_nbr.tolist(): punched_mean.loc[-1] = [ missing_val, punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 1] + timedelta(days=14), 0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 2] + 0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, 2)].iloc[0, 2], 1 ] # adding a row punched_mean.index = punched_mean.index + 1 ######################### punched_mean1 = punched_mean.copy(deep=True) punched_mean1['cost'] = 0.5 * punched_mean1['cost'] + 0.25 * punched_mean1[ 'cost'].shift(1) + 0.25 * punched_mean1['cost'].shift(2) ty = punched_mean1['cost'].mean() punched_mean1[['cost']] = punched_mean1[['cost']].fillna(value=ty) punched_mean1 = estimate_and_predict_prophet_PR( calendar, punched_mean1, end_train_date, start_test_date, daily_view=False, pred_days=120) #predict the mean wages. punched_mean1 = punched_mean1.drop('club', axis=1) punched_mean1.columns = ['posting_date', 'PR_cost'] punched_mean1 = pd.merge(left=punched_mean1, right=calendar, how='left', left_on='posting_date', right_on='calendar_date').drop('calendar_date', axis=1) tmp = punched.groupby(['wm_yr_wk_nbr', 'posting_date'])['cost'].mean() tmp = tmp.reset_index() tmp.columns = ['wm_yr_wk_nbr', 'posting_date', 'PR_cost'] tmp = tmp.loc[tmp.wm_yr_wk_nbr <= end_train_date] tmp['PR_cost'] = 0.5 * tmp['PR_cost'] + 0.25 * tmp['PR_cost'].shift( 1) + 0.25 * tmp['PR_cost'].shift(2) ty = tmp['PR_cost'].mean() tmp[['PR_cost']] = tmp[['PR_cost']].fillna(value=ty) punched_mean = pd.concat([tmp, punched_mean1], axis=0) if missing_val not in punched_mean.wm_yr_wk_nbr.tolist(): tu = [ 0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 0] + 0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, 2)].iloc[0, 0] ] tu.append(punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 1] + timedelta(days=14)) tu.append(missing_val) punched_mean.loc[-1] = tu # adding a row punched_mean.index = punched_mean.index + 1 # shifting index punched_mean = punched_mean.sort_values( by='wm_yr_wk_nbr').reset_index().drop('index', axis=1) punched = punched.drop('posting_date', axis=1) punched_pro = punched_df.groupby(['club_nbr', 'posting_date'])['cost'].sum() punched_pro.column = ['total_punched_wg'] punched_pro = punched_pro.reset_index() punched_pro = pd.merge(left=punched_pro, right=calendar, how='left', left_on='posting_date', right_on='calendar_date').drop('calendar_date', axis=1) punched_pro = removehurricane('cost', punched_pro, 201733, 201739, sales=False) #201735 is Maria Hurrican Missing #201737 is the Irma Hurricane club_ls = punched.club_nbr.unique() for club in club_ls: pro_club = punched_pro[punched_pro.club_nbr.isin([club])] ######################################### # adding missing value if missing_val not in pro_club.wm_yr_wk_nbr.tolist(): pro_club.loc[-1] = [ club, pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 1] + timedelta(days=14), 0.5 * pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, 2] + 0.5 * pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add( missing_val, 2)].iloc[0, 2], missing_val ] # adding a row pro_club.index = pro_club.index + 1 # shifting index #################################################### pro_club = pro_club.sort_values(by='posting_date').reset_index().drop( 'index', axis=1) pro_sales = df_propor_PR_ts.loc[df_propor_PR_ts.club == club].drop( ['club'], axis=1) pro_club = pro_club.drop(['club_nbr', 'posting_date'], axis=1) pro_club.columns = ['cost', 'wm_yr_wk_nbr'] pro_sales['total_sales'] = pro_sales['total_sales_across'] * pro_sales[ 'per_nbr_fc'] pro_sales = pd.concat( [pro_sales] + [pro_sales.total_sales.shift(x) for x in range(1, 3)], axis=1) pro_sales.columns = [ 'wm_yr_wk_nbr', 'per_nbr_fc', 'total_sales_across', 'total_sales_0', 'sr_1', 'sr_2' ] ######################################### # adding missing value if missing_val not in pro_sales.wm_yr_wk_nbr.unique().tolist(): tu = [] for k in range(len(pro_sales.columns)): tu.append( 0.5 * pro_sales.loc[pro_sales.wm_yr_wk_nbr == wm_nbr_add( missing_val, -2)].iloc[0, k] + 0.5 * pro_sales.loc[pro_sales.wm_yr_wk_nbr == wm_nbr_add( missing_val, 2)].iloc[0, k]) tu[0] = int(tu[0]) pro_sales.loc[-1] = tu # adding a row pro_sales.index = pro_sales.index + 1 # shifting index pro_sales = pro_sales.sort_values( by='wm_yr_wk_nbr').reset_index().drop('index', axis=1) pro_sales = pd.merge(left=pro_sales, right=punched_mean, how='right', left_on='wm_yr_wk_nbr', right_on='wm_yr_wk_nbr', validate='1:1') pro_sales = pro_sales.drop(['posting_date'], axis=1) pro_sales = pro_sales.apply(lambda x: x.fillna(x.mean()), axis=0) pro_sales_train = pro_sales.loc[ pro_sales.wm_yr_wk_nbr <= end_train_date] pro_sales_test = pro_sales.loc[ pro_sales.wm_yr_wk_nbr >= start_test_date] # trend linear_trend = trend(degree=2, discount=0.98, name='linear_trend', w=8) # seasonality seasonal26 = seasonality(period=26, discount=1, name='seasonal26', w=12) # control variable sales0 = pro_sales_train['total_sales_0'].values.tolist() s0 = [[x] for x in sales0] sales1 = pro_sales_train['sr_1'].values.tolist() s1 = [[x] for x in sales1] sales2 = pro_sales_train['sr_2'].values.tolist() s2 = [[x] for x in sales2] macro = pro_sales_train['PR_cost'].values.tolist() m1 = [[x] for x in macro] ##################################### s0 = dynamic(features=s0, discount=0.99, name='sales0', w=8) s1 = dynamic(features=s1, discount=0.99, name='sales1', w=6) # use the actual sales and forecasting sales amount s2 = dynamic(features=s2, discount=0.95, name='sales2', w=6) m1 = dynamic(features=m1, discount=0.99, name='macro', w=12) #e1 = dynamic(features=e1,discount=0.95,name='eff',w=6) drm = dlm(pro_club['cost']) + linear_trend + seasonal26 + autoReg( degree=locality, name='ar2', w=6) + m1 #+s0+s1+s2+m1 drm.fit() #testset pro_sales_test = pro_sales_test.head(pred_weeks) sales0test = pro_sales_test['total_sales_0'].head( pred_weeks).values.tolist() s0test = [[x] for x in sales0test] sales1test = pro_sales_test['sr_1'].head(pred_weeks).values.tolist() s1test = [[x] for x in sales1test] sales2test = pro_sales_test['sr_2'].head(pred_weeks).values.tolist() s2test = [[x] for x in sales2test] macrotest = pro_sales_test['PR_cost'].head(pred_weeks).values.tolist() m1test = [[x] for x in macrotest] #efftest = testset['eff'].head(pred_weeks).values.tolist() #e1test = [[x] for x in efftest] features = { 'sales0': s0test, 'sales1': s1test, 'sales2': s2test, 'macro': m1test } #,'eff':e1test} (predictMean, predictVar) = drm.predictN(N=pred_weeks, date=drm.n - 1, featureDict=features) #locality pro_sales = pro_sales.drop(['sr_1', 'sr_2'], axis=1) pro_sales['ratio'] = pro_sales['total_sales_0'] / pro_sales[ 'total_sales_across'] pro_sales['ratio_1'] = pro_sales['ratio'].shift(1) pro_sales['ratio_2'] = pro_sales['ratio'].shift(2) trainset1_year = pro_club.loc[ pro_club.wm_yr_wk_nbr <= end_train_date].loc[ pro_club.wm_yr_wk_nbr >= end_train_date - locality] trainset_year = pro_sales.loc[ pro_sales.wm_yr_wk_nbr <= end_train_date].loc[ pro_sales.wm_yr_wk_nbr >= end_train_date - locality] trainset_year.apply(lambda x: x.fillna(x.mean()), axis=0) linear_trend_year = trend(degree=1, discount=0.99, name='linear_trend_year', w=10) sales0_year = trainset_year['ratio'].values.tolist() s0_year = [[x] for x in sales0_year] # use the forecast of the ratio of each club among total in PR area # since this is a local model, the total amount in area can be assumed to be constant. sales1_year = trainset_year['ratio_1'].values.tolist() s1_year = [[x] for x in sales1_year] sales2_year = trainset_year['ratio_2'].values.tolist() s2_year = [[x] for x in sales2_year] macro_year = trainset_year['PR_cost'].values.tolist() m1_year = [[x] for x in macro_year] ##################################### s0_year = dynamic(features=s0_year, discount=0.99, name='sales0_year', w=10) s1_year = dynamic(features=s1_year, discount=0.99, name='sales1_year', w=8) s2_year = dynamic(features=s2_year, discount=0.95, name='sales2_year', w=6) m1_year = dynamic(features=m1_year, discount=0.99, name='macro_year', w=10) #e1_year = dynamic(features=e1_year,discount=0.95,name='eff_year',w=6) if enable_sales: drm_year = dlm(trainset1_year['cost']) + autoReg( degree=locality, name='ar2', w=5 ) + linear_trend_year + m1_year + s0_year + s1_year + s2_year else: drm_year = dlm(trainset1_year['cost']) + autoReg( degree=locality, name='ar2', w=5) + linear_trend_year + m1_year #+s0_year+s1_year+s2_year drm_year.fit() testset_year = pro_sales.loc[ pro_sales.wm_yr_wk_nbr >= start_test_date].head(pred_weeks) sales0test = testset_year['ratio'].head(pred_weeks).values.tolist() s0test = [[x] for x in sales0test] sales1test = testset_year['ratio_1'].head(pred_weeks).values.tolist() s1test = [[x] for x in sales1test] sales2test = testset_year['ratio_2'].head(pred_weeks).values.tolist() s2test = [[x] for x in sales2test] features_year = { 'sales0_year': s0test, 'sales1_year': s1test, 'sales2_year': s2test, 'macro_year': m1test } (predictMean_year, predictVar_year) = drm_year.predictN(N=pred_weeks, date=drm_year.n - 1, featureDict=features_year) weeklist = [] p1 = np.exp(-r * (abs(end_train_date - start_of_this_year - 52))) p2 = 1 - p1 for k in range(pred_weeks): weeklist.append(wm_nbr_add(start_test_date, 2 * k)) if res.shape[0] == 0: res['wm_yr_wk_nbr'] = weeklist res['club'] = pd.Series(club * np.ones(pred_weeks), index=res.index) res['yhat'] = pd.Series(p1 * np.asarray(predictMean) + p2 * np.asarray(predictMean_year), index=res.index) else: tmp = pd.DataFrame() tmp['wm_yr_wk_nbr'] = weeklist tmp['club'] = pd.Series(club * np.ones(pred_weeks), index=tmp.index) tmp['yhat'] = pd.Series(p1 * np.asarray(predictMean) + p2 * np.asarray(predictMean_year), index=tmp.index) res = pd.concat([res, tmp], axis=0) return res
def _tune(self, y, period, x=None, metric="smape", val_size=None, verbose=False): """ Tune hyperparameters of the model. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute error). :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.period = data_utils.period_to_int(period) if type( period) == str else period val_size = int(len(y) * .1) if val_size is None else val_size y_train, y_val = model_utils.train_val_split(y, val_size=val_size) if x is not None: x_train, x_val = model_utils.train_val_split(x, val_size=val_size) metric_fun = get_metric(metric) params_grid = { "trend": [0, 1, 2, 3], "ar": [None], # "ar": [None, 1, 2, 3], } params_keys, params_values = zip(*params_grid.items()) params_permutations = [ dict(zip(params_keys, v)) for v in itertools.product(*params_values) ] scores = [] for permutation in params_permutations: try: with warnings.catch_warnings(): warnings.simplefilter("ignore") model = pydlm.dlm(y_train) model = model + pydlm.trend(degree=permutation["trend"], discount=0.5) model = model + pydlm.seasonality(period=self.period, discount=0.99) if permutation["ar"] is not None: model = model + pydlm.autoReg(degree=permutation["ar"], discount=0.99) if x is not None: for variable_id, x_variable in enumerate(x_train.T): model = model + pydlm.dynamic( features=[[v] for v in x_variable], discount=0.99, name=str(variable_id)) with SuppressStdoutStderr(): model.tune() model.fit() if x is not None: x_val_dict = {} for variable_id, x_variable in enumerate(x_val.T): x_val_dict.update( {str(variable_id): [[v] for v in x_variable]}) else: x_val_dict = None y_pred = model.predictN(date=model.n - 1, N=len(y_val), featureDict=x_val_dict)[0] score = metric_fun(y_val, y_pred) scores.append(score) except: scores.append(np.inf) best_params = params_permutations[np.nanargmin(scores)] self.params.update(best_params) self.params["tuned"] = True