def combshd(train, horizon, seasonality, init): '''Author: Sandeep Pawar Date: 8/30/2020 version: 1.1''' train_x, lam = boxcox(train) ses = sm.tsa.statespace.ExponentialSmoothing(train_x, trend=True, seasonal=None, initialization_method=init, damped_trend=False).fit() fc1 = inv_boxcox(ses.forecast(horizon), lam) holt = sm.tsa.statespace.ExponentialSmoothing(train_x, trend=True, seasonal=seasonality, initialization_method=init, damped_trend=False).fit() fc2 = inv_boxcox(holt.forecast(horizon), lam) damp = sm.tsa.statespace.ExponentialSmoothing(train_x, trend=True, seasonal=seasonality, initialization_method=init, damped_trend=True).fit() fc3 = inv_boxcox(damp.forecast(horizon), lam) fc = (fc1 + fc2 + fc3) / 3 return fc
def pellet_forecast(train, forecast_horizon): train_x, lam = boxcox(train) ses = sm.tsa.statespace.ExponentialSmoothing( train_x, trend=True, seasonal=None, initialization_method='estimated', damped_trend=False).fit() fc1 = inv_boxcox(ses.forecast(forecast_horizon), lam) holt = sm.tsa.statespace.ExponentialSmoothing( train_x, trend=True, seasonal=12, initialization_method='estimated', damped_trend=False).fit() fc2 = inv_boxcox(holt.forecast(forecast_horizon), lam) damp = sm.tsa.statespace.ExponentialSmoothing( train_x, trend=True, seasonal=12, initialization_method='estimated', damped_trend=True).fit() fc3 = inv_boxcox(damp.forecast(forecast_horizon), lam) fc = (fc1 + fc2 + fc3) / 3 return fc
def build_model(train, test, bc_lambda): # Now lets see how well we can predict the boxcox transform of property_crime formula = "property_crime_bc ~ " + ' + '.join(model_vars) lm1 = smf.ols(formula=formula, data=train).fit() print(lm1.summary()) pred_train = inv_boxcox(lm1.fittedvalues, bc_lambda) pred_test = inv_boxcox(lm1.predict(test), bc_lambda) resids_train = train["property_crime"] - pred_train resids_test = test["property_crime"] - pred_test #print("RMSE: {:.4f}".format(resids_train.std())) #print("------\nTrain\n------") r2_test_bc = r2_score(test["property_crime_bc"], lm1.predict(test)) r2_test = r2_score(test["property_crime"], pred_test) print("R-squared (Property Crime Box-Cox): {}".format(r2_test_bc)) print("R-squared (Property Crime): {}".format(r2_test)) #print("------\nTest\n------") build_evaluate_model(train, test, bc_lambda, model_vars) return lm1, pred_train, pred_test, resids_train, resids_test
def train_arima(dataframe, p, d, q, test, BOXCOX): print(dataframe) if (BOXCOX == "Yes"): dataframe['closing_balance'], lam = boxcox( dataframe['closing_balance']) else: lam = None model = ARIMA(dataframe, order=(p, d, q)) results_ARIMA = model.fit(disp=-1) ARIMA_predict = pd.DataFrame(results_ARIMA.predict(start=test.index[0], end=test.index[-1]), index=test.index) print(results_ARIMA.fittedvalues) if (BOXCOX == "Yes"): results_ARIMA.fittedvalues = inv_boxcox(results_ARIMA.fittedvalues, lam) ARIMA_predict = inv_boxcox(ARIMA_predict, lam) ARIMA_predict.columns = ['y'] response = {} response['model'] = results_ARIMA response['predictions'] = ARIMA_predict response['lam'] = lam return response else: ARIMA_predict.columns = ['y'] response = {} response['model'] = results_ARIMA response['predictions'] = ARIMA_predict response['lam'] = lam return response
def inverse_transform(self, X): xtemp = X*self.xstd + self.xmean if isinstance(self.lmbda, float): xinv = inv_boxcox(xtemp, self.lmbda) else: xinv = numpy.zeros(shape=X.shape) for j, lmb in enumerate(self.lmbda): xinv[:, j] = inv_boxcox(xtemp[:, j], lmb) return xinv #- self.shift
def inverse_transform(self, X): xtemp = X * self.xstd + self.xmean if isinstance(self.lmbda, float): xinv = inv_boxcox(xtemp, self.lmbda) else: xinv = numpy.zeros(shape=X.shape) for j, lmb in enumerate(self.lmbda): xinv[:, j] = inv_boxcox(xtemp[:, j], lmb) return xinv # - self.shift
def gbm_error_func(y_pred, y_true): if target == 'log_y': y_pred = np.exp(y_pred) y_true = np.exp(y_true) if target == 'boxcox_y': y_pred = inv_boxcox(y_pred, lba_boxcox) y_true = inv_boxcox(y_true, lba_boxcox) eval_result = error_func(y_pred, y_true) return 'error', eval_result, False
def renormalize(self): if self.lambda_boxcox == float(-999): self.predict_mean = self.predict[0] self.predict_down = self.predict[1][:, 0] self.predict_up = self.predict[1][:, 1] else: self.predict_mean = inv_boxcox(self.predict[0], self.lambda_boxcox) self.predict_down = inv_boxcox(self.predict[1][:, 0], self.lambda_boxcox) self.predict_up = inv_boxcox(self.predict[1][:, 1], self.lambda_boxcox) # Join predict dates with values into a dataframe df_final = pd.DataFrame(self.predict_mean, self.dates_prev) df_final.columns = ['endo_value'] return (df_final)
def predict(self, dataset): """Predict from X""" X = dataset[self.columns] Xscaled = self.scaler.transform(X) Xpca = self.pca.transform(Xscaled) predsbc = self.estimator.predict(Xpca) return inv_boxcox(predsbc, self.lmbda)
def transform_test(df, column): initial_mean = df[column].mean() initial_std = df[column].std() # 1. Boxcox transform boxcox_transformed, max_log = boxcox(df[column].values) # 2. Standardize mean_value = boxcox_transformed.mean() std_value = boxcox_transformed.std() standardized = (boxcox_transformed - mean_value) / std_value # 3. Inverse standardize inverse_standardized = (standardized * std_value) + mean_value # 4. Inverse Boxcox transform inversed_boxcox = inv_boxcox(inverse_standardized, max_log) # inversed_boxcox = (inversed_boxcox + initial_mean) * initial_std print('Test Results:') print(f'max_log = {max_log}') print(f' * Intial Array: mean = {df[column].values.mean()}, std = {df[column].values.std()}') print(f' * Boxcox Transformed: mean = {boxcox_transformed.mean()}, std = {boxcox_transformed.std()}') print(f' * Standardized: mean = {standardized.mean()}, std = {standardized.std()}') print(f' * Inverse Standardized: mean = {inverse_standardized.mean()}, std = {inverse_standardized.std()}') print(f' * Inverse Boxcox: mean = {inversed_boxcox.mean()}, std = {inversed_boxcox.std()}')
def train_theta_boxcox(ts, seasonality, n): theta_bc = Theta(theta=0, season_mode=SeasonalityMode.NONE) shiftdata = 0 if (ts.univariate_values() < 0).any(): shiftdata = -ts.min() + 100 ts = ts + shiftdata new_values, lmbd = boxcox(ts.univariate_values()) if lmbd < 0: lmbds, value = boxcox_normplot(ts.univariate_values(), lmbd - 1, 0, N=100) if np.isclose(value[0], 0): lmbd = lmbds[np.argmax(value)] new_values = boxcox(ts.univariate_values(), lmbd) if np.isclose(new_values, new_values[0]).all(): lmbd = 0 new_values = boxcox(ts.univariate_values(), lmbd) ts = TimeSeries.from_times_and_values(ts.time_index(), new_values) theta_bc.fit(ts) forecast = theta_bc.predict(n) new_values = inv_boxcox(forecast.univariate_values(), lmbd) forecast = TimeSeries.from_times_and_values(seasonality.time_index(), new_values) if shiftdata > 0: forecast = forecast - shiftdata forecast = forecast * seasonality if (forecast.univariate_values() < 0).any(): indices = seasonality.time_index()[forecast < 0] forecast = forecast.update(indices, np.zeros(len(indices)), inplace=True) return forecast
def model_predict(model, X): y = model['model'].predict(X) if 'transform' in model: y_lambda = model['transform'][1] y = inv_boxcox(y, y_lambda) - model['transform'][2] return y
def briquette_forecast(series_with_dates, forecast_horizon): #ETS series_with_dates.index.freq = 'MS' train = series_with_dates.values train_x, lam = boxcox(series_with_dates) comb_fc = combshd(train, horizon=forecast_horizon, seasonality=12, init='concentrated') # SARIMA sarima = (SARIMAX(endog=train_x, order=(1, 1, 1), seasonal_order=(1, 0, 1, 12), trend='c', enforce_invertibility=False)).fit() start = len(train_x) end = len(train_x) + forecast_horizon - 1 sarima_fc = inv_boxcox(sarima.predict(start, end, dynamic=False), lam) briquette_fc = pd.Series((comb_fc + sarima_fc) / 2) return briquette_fc.values
def predict(self, metrics, **kwargs): """ This function generate and returns prediction of the duration of a CSV recreate job. The user should only pass to this function a snapshot of the metrics for the cluster on which he is trying to execute the job and the input size information. :param metrics: :return int: Duration prediction in seconds """ # Feature selection features = np.array([ float(metrics.AvailableMB), # YARN_AVAILABLE_MEMORY float(metrics.AvailableVCores), # YARN_AVAILABLE_VIRTUAL_CORES ]) data = xgboost.DMatrix(features.reshape(1, -1), feature_names=[ 'YARN_AVAILABLE_MEMORY', 'YARN_AVAILABLE_VIRTUAL_CORES' ]) # Generate predictions prediction = self._model.predict(data) # Apply inverse Boxcox function on generate prediction return inv_boxcox(prediction, self._config['boxcoxMaxLog'])
def predict(self, steps): print("Forecasting...") progress_bar = ProgressBar(len(self.models.items())) self.fcst_ds = pd.date_range( start=self.train_ds.min(), freq="D", periods=len(self.train_ds)+steps)[-365:] for item, model in self.models.items(): pred = model.predict( exogenous=fourier( steps, seasonality=self.seasonality, n_terms=self.n_fourier_terms), n_periods=steps, return_conf_int=True, alpha=(1.0 - self.confidence_interval)) self.fcst[item] = pd.DataFrame( {"yhat":pred[0], "yhat_lower":pred[1][:,0], "yhat_upper":pred[1][:,1]}, index=self.fcst_ds) if self.use_boxcox: self.fcst[item] = inv_boxcox( self.fcst[item], self.lmbda_boxcox[item]) progress_bar.update() progress_bar.finish() return pd.concat(self.fcst, axis=1)
def inverse_transform(self, x): """ Scale back the data to the original representation. Parameters ---------- x: DataFrame, Series, ndarray, list The data used to scale along the features axis. Returns ------- DataFrame Inverse transformed data. """ x = self._check_type(x) xs = [] for col, shift, lmd in zip(x.T, self._shift, self._lmd): for case in Switch(lmd): if case(np.nan, np.inf): _x = col break if case(): _x = inv_boxcox(col, lmd) - shift xs.append(_x.reshape(-1, 1)) xs = np.concatenate(xs, axis=1) if len(self._shape) == 1: return xs.ravel() return xs
def _inverse_boxcox(self, predicted, lambda_param): """ Method apply inverse Box-Cox transformation """ if lambda_param == 0: return np.exp(predicted) else: res = inv_boxcox(predicted, lambda_param) res = self._filling_gaps(res) return res
def inverse(self, ts_array): if self.method is None: return ts_array if self.method == 'log': return np.expm1(ts_array) else: return inv_boxcox(ts_array, self.param)
def pd_invboxcox(Data, lambdas) : Inv_BC_cols = np.empty(Data.shape) for i, col in enumerate(Data.columns): inv_bc_col = inv_boxcox(Data[col], lambdas[i]) Inv_BC_cols[:,i] = inv_bc_col Inv_BC_Data = pd.DataFrame(Inv_BC_cols, index=Data.index, columns=Data.columns) return Inv_BC_Data
def inverse_transform(boxcoxT, transform, y): if transform is not None: if transform == 'BoxCox': for column in range(len(y.columns.tolist()) - 1): y.iloc[:, column + 1] = inv_boxcox(y.iloc[:, column + 1], boxcoxT[column]) return y
def predict_epsilon(self, x, boxcox_lambda=0.2): """ predict epsilon Args: x -- SMILES(str) boxcox_lambda -- int/float(preset to 0.2 for preloaded model) """ y = self.__predict(self.model_epsilon, x) return inv_boxcox(y, boxcox_lambda)
def run(df, fold): try: drop_cols = [ 'scheduled_year', 'scheduled_weekofyear', 'scheduled_month', 'scheduled_dayofweek', 'scheduled_weekend', 'delivery_year', 'delivery_weekofyear', 'delivery_month', 'delivery_dayofweek', 'delivery_weekend', "City", "Code" ] df = df.drop(drop_cols, axis=1) except: None # list of numerical columns num_cols = ["Artist Reputation", "Height", "Width", "Price Of Sculpture", "Base Shipping Price"] # note that folds are same as before # get training data using folds df_train = df[df.kfold != fold].reset_index(drop=True) # get validation data using folds df_valid = df[df.kfold == fold].reset_index(drop=True) features = [ f for f in df.columns if f not in ("kfold", "Cost", "Customer Id") ] # scale training data x_train = df_train[features].values # scale validation data x_valid = df_valid[features].values # initialize lgbm model model = model_dispatcher.models['rf'] # fit model on training data model.fit(x_train, boxcox(df_train.Cost, -0.398686)) # predict on validation data valid_preds_log = model.predict(x_valid) valid_preds = inv_boxcox(valid_preds_log, -0.398686) # get score score = calc_metric.calc_score(df_valid.Cost.values, valid_preds) # print auc print(f"Fold = {fold}, Score = {score}") # joblib.dump( # model, # os.path.join(config.MODEL_OUTPUT, f"tg_enc/lgbm_{fold}.bin") # ) return score
def forecast(self): future = self.model.make_future_dataframe(periods=self.forecast_len, freq=self.freq) future_pred = self.model.predict(future) future_pred = future_pred[-self.forecast_len:] if self.boxcox_lambda: future_pred["yhat"] = ( inv_boxcox(future_pred["yhat"], self.boxcox_lambda) - 1) self.prediction = self.format_output(future_pred)
def invBoxCoxTransform(self, bcData): r""" Transform data using inverse Box-Cox transformation. """ if self.boxCoxLambda is None: raise RuntimeError('Box-Cox lambda has not been learned or set.') rawVals = inv_boxcox(bcData, self.boxCoxLambda) return pd.Series(rawVals, index=bcData.index)
def test_step(self, model, batch): """ Called inside the testing loop with the data from the testing dataloader \ passed in as `batch`. :param model: The chosen model :type model: Model :param batch: Batch of input and ground truth variables :type batch: int :return: Loss and logs :rtype: dict """ x, y_pre = batch y_hat_pre = model(x) mask = model.data.mask.expand_as(y_pre[0][0]) tensorboard_logs = defaultdict(dict) for b in range(y_pre.shape[0]): for c in range(y_pre.shape[1]): y = y_pre[b][c][mask] y_hat = y_hat_pre[b][c][mask] if self.hparams.round_frp_to_zero: y_hat = y_hat[y > self.hparams.round_frp_to_zero] y = y[y > 0.5] if y_hat.nelement() == 0: return {} if self.hparams.boxcox: y_hat = torch.from_numpy( inv_boxcox(y_hat.cpu().numpy(), self.hparams.boxcox)).to(y_hat.device) if self.hparams.clip_output: y = y[(y_hat < self.hparams.clip_output[-1]) & (self.hparams.clip_output[0] < y_hat)] y_hat = y_hat[(y_hat < self.hparams.clip_output[-1]) & (self.hparams.clip_output[0] < y_hat)] pre_loss = (y_hat - y)**2 loss = pre_loss.mean() assert loss == loss # Accuracy for a threshold acc = ((y - y_hat).abs() < self.hparams.out_mad / 2).float().mean() mae = (y - y_hat).abs().float().mean() tensorboard_logs["test_loss"][str(c)] = loss tensorboard_logs["acc_test"][str(c)] = acc tensorboard_logs["mae_test"][str(c)] = mae test_loss = torch.stack(list( tensorboard_logs["test_loss"].values())).mean() tensorboard_logs["_test_loss"] = test_loss return { "test_loss": test_loss, "log": tensorboard_logs, }
def inv_bc_scale(number, lbd, mib, mia, maa): n = number * (maa - mia) + mia n = inv_boxcox(n, lbd) if mib <= 0: n = n + mib - 1 return n
def inv_bc_scale_list(ls, lbd, mib, mia, maa): ls = [l * (maa - mia) + mia for l in ls] ls = inv_boxcox(ls, lbd) if mib <= 0: ls = [l + mib - 1 for l in ls] return ls
def inv_transform(self, series): data = series.copy() u = self.parameter_dict["mean"] std = self.parameter_dict["std"] data = data * std + u if self.flag: lmbda = self.parameter_dict["lambda"] return pd.Series(inv_boxcox(data, lmbda), name=self.name) else: return data
def inverse_transform(self, series): """ Inverse Transforms the passed series using the learned lambda and offset learnt earlier :param series: Series to be reverse transformed :return: inverse transformed series """ transformed_series = pd.Series( special.inv_boxcox(series, self.__lambda) - self.__offset) transformed_series.index = series.index return transformed_series
def inverse_transform(self, X): if self.transform_cols is None: raise NotFittedError( f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." ) new_X = X.copy() for column in self.transform_cols: new_X[column] = inv_boxcox(new_X[column], self.lmbda) return new_X
def build_evaluate_model(train, test, bc_lambda, model_vars): regr = linear_model.LinearRegression() regr.fit(train[model_vars], train["property_crime_bc"]) y_pred = inv_boxcox(cross_val_predict(regr, test[model_vars], test["property_crime_bc"]), bc_lambda) print("R-squared (after reverse property crime Box-Cox): {:.4f}".format(r2_score(test["property_crime"], y_pred))) resids_train = evaluate_model(regr, train, bc_lambda, "Train", model_vars) resids_test = evaluate_model(regr, test, bc_lambda, "Test", model_vars) return regr, resids_train, resids_test
def boxcox(x,y,y_label): box_cox, maxlog = stats.boxcox(y + abs(min(y)) + 1) regr.fit(x,box_cox) box_cox_predict = regr.predict(x) y_predict = inv_boxcox(box_cox_predict,maxlog) - abs(min(y)) - 1 print "R squared: " + str(np.var(y_predict)/np.var(y)) # Plot outputs fig = plt.figure() plt.scatter(y, y_predict, color='blue') plt.xlabel(y_label) plt.ylabel('predicted') plt.show()
def test_inv_boxcox(): x = np.array([0., 1., 2.]) lam = np.array([0., 1., 2.]) y = boxcox(x, lam) x2 = inv_boxcox(y, lam) assert_almost_equal(x, x2) x = np.array([0., 1., 2.]) lam = np.array([0., 1., 2.]) y = boxcox1p(x, lam) x2 = inv_boxcox1p(y, lam) assert_almost_equal(x, x2)
def _predict(self, h=None, smoothing_level=None, smoothing_slope=None, smoothing_seasonal=None, initial_level=None, initial_slope=None, damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None, remove_bias=None): """ Helper prediction function Parameters ---------- h : int, optional The number of time steps to forecast ahead. """ # Variable renames to alpha,beta, etc as this helps with following the # mathematical notation in general alpha = smoothing_level beta = smoothing_slope gamma = smoothing_seasonal phi = damping_slope # Start in sample and out of sample predictions data = self.endog damped = self.damped seasoning = self.seasoning trending = self.trending trend = self.trend seasonal = self.seasonal m = self.seasonal_periods phi = phi if damped else 1.0 if use_boxcox == 'log': lamda = 0.0 y = boxcox(data, 0.0) elif isinstance(use_boxcox, float): lamda = use_boxcox y = boxcox(data, lamda) elif use_boxcox: y, lamda = boxcox(data) else: lamda = None y = data.squeeze() if np.ndim(y) != 1: raise NotImplementedError('Only 1 dimensional data supported') y_alpha = np.zeros((self.nobs,)) y_gamma = np.zeros((self.nobs,)) alphac = 1 - alpha y_alpha[:] = alpha * y if trending: betac = 1 - beta if seasoning: gammac = 1 - gamma y_gamma[:] = gamma * y l = np.zeros((self.nobs + h + 1,)) b = np.zeros((self.nobs + h + 1,)) s = np.zeros((self.nobs + h + m + 1,)) l[0] = initial_level b[0] = initial_slope s[:m] = initial_seasons phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1) ) if damped else np.arange(1, h + 1 + 1) trended = {'mul': np.multiply, 'add': np.add, None: lambda l, b: l }[trend] detrend = {'mul': np.divide, 'add': np.subtract, None: lambda l, b: 0 }[trend] dampen = {'mul': np.power, 'add': np.multiply, None: lambda b, phi: 0 }[trend] if seasonal == 'mul': for i in range(1, self.nobs + 1): l[i] = y_alpha[i - 1] / s[i - 1] + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) s[i + m - 1] = y_gamma[i - 1] / \ trended(l[i - 1], dampen(b[i - 1], phi)) + \ (gammac * s[i - 1]) slope = b[1:i + 1].copy() season = s[m:i + m].copy() l[i:] = l[i] if trending: b[:i] = dampen(b[:i], phi) b[i:] = dampen(b[i], phi_h) trend = trended(l, b) s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)] fitted = trend * s[:-m] elif seasonal == 'add': for i in range(1, self.nobs + 1): l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) s[i + m - 1] = y_gamma[i - 1] - \ (gamma * trended(l[i - 1], dampen(b[i - 1], phi))) + (gammac * s[i - 1]) slope = b[1:i + 1].copy() season = s[m:i + m].copy() l[i:] = l[i] if trending: b[:i] = dampen(b[:i], phi) b[i:] = dampen(b[i], phi_h) trend = trended(l, b) s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)] fitted = trend + s[:-m] else: for i in range(1, self.nobs + 1): l[i] = y_alpha[i - 1] + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) slope = b[1:i + 1].copy() season = s[m:i + m].copy() l[i:] = l[i] if trending: b[:i] = dampen(b[:i], phi) b[i:] = dampen(b[i], phi_h) trend = trended(l, b) fitted = trend level = l[1:i + 1].copy() if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float): fitted = inv_boxcox(fitted, lamda) level = inv_boxcox(level, lamda) slope = detrend(trend[:i], level) if seasonal == 'add': season = (fitted - inv_boxcox(trend, lamda))[:i] elif seasonal == 'mul': season = (fitted / inv_boxcox(trend, lamda))[:i] else: pass sse = sqeuclidean(fitted[:-h - 1], data) # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi k = m * seasoning + 2 * trending + 2 + 1 * damped aic = self.nobs * np.log(sse / self.nobs) + (k) * 2 aicc = aic + (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3) bic = self.nobs * np.log(sse / self.nobs) + (k) * np.log(self.nobs) resid = data - fitted[:-h - 1] if remove_bias: fitted += resid.mean() if not damped: phi = np.NaN self.params = {'smoothing_level': alpha, 'smoothing_slope': beta, 'smoothing_seasonal': gamma, 'damping_slope': phi, 'initial_level': l[0], 'initial_slope': b[0], 'initial_seasons': s[:m], 'use_boxcox': use_boxcox, 'lamda': lamda, 'remove_bias': remove_bias} hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted, fittedvalues=fitted[:-h - 1], fcastvalues=fitted[-h - 1:], sse=sse, level=level, slope=slope, season=season, aic=aic, bic=bic, aicc=aicc, resid=resid, k=k) return HoltWintersResultsWrapper(hwfit)
def _predict(self, h=None, smoothing_level=None, smoothing_slope=None, smoothing_seasonal=None, initial_level=None, initial_slope=None, damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None, remove_bias=None, is_optimized=None): """ Helper prediction function Parameters ---------- h : int, optional The number of time steps to forecast ahead. """ # Variable renames to alpha, beta, etc as this helps with following the # mathematical notation in general alpha = smoothing_level beta = smoothing_slope gamma = smoothing_seasonal phi = damping_slope # Start in sample and out of sample predictions data = self.endog damped = self.damped seasoning = self.seasoning trending = self.trending trend = self.trend seasonal = self.seasonal m = self.seasonal_periods phi = phi if damped else 1.0 if use_boxcox == 'log': lamda = 0.0 y = boxcox(data, 0.0) elif isinstance(use_boxcox, float): lamda = use_boxcox y = boxcox(data, lamda) elif use_boxcox: y, lamda = boxcox(data) else: lamda = None y = data.squeeze() if np.ndim(y) != 1: raise NotImplementedError('Only 1 dimensional data supported') y_alpha = np.zeros((self.nobs,)) y_gamma = np.zeros((self.nobs,)) alphac = 1 - alpha y_alpha[:] = alpha * y if trending: betac = 1 - beta if seasoning: gammac = 1 - gamma y_gamma[:] = gamma * y l = np.zeros((self.nobs + h + 1,)) b = np.zeros((self.nobs + h + 1,)) s = np.zeros((self.nobs + h + m + 1,)) l[0] = initial_level b[0] = initial_slope s[:m] = initial_seasons phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1) ) if damped else np.arange(1, h + 1 + 1) trended = {'mul': np.multiply, 'add': np.add, None: lambda l, b: l }[trend] detrend = {'mul': np.divide, 'add': np.subtract, None: lambda l, b: 0 }[trend] dampen = {'mul': np.power, 'add': np.multiply, None: lambda b, phi: 0 }[trend] nobs = self.nobs if seasonal == 'mul': for i in range(1, nobs + 1): l[i] = y_alpha[i - 1] / s[i - 1] + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) s[i + m - 1] = y_gamma[i - 1] / trended(l[i - 1], dampen(b[i - 1], phi)) + \ (gammac * s[i - 1]) slope = b[1:nobs + 1].copy() season = s[m:nobs + m].copy() l[nobs:] = l[nobs] if trending: b[:nobs] = dampen(b[:nobs], phi) b[nobs:] = dampen(b[nobs], phi_h) trend = trended(l, b) s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)] fitted = trend * s[:-m] elif seasonal == 'add': for i in range(1, nobs + 1): l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) s[i + m - 1] = y_gamma[i - 1] - \ (gamma * trended(l[i - 1], dampen(b[i - 1], phi))) + \ (gammac * s[i - 1]) slope = b[1:nobs + 1].copy() season = s[m:nobs + m].copy() l[nobs:] = l[nobs] if trending: b[:nobs] = dampen(b[:nobs], phi) b[nobs:] = dampen(b[nobs], phi_h) trend = trended(l, b) s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)] fitted = trend + s[:-m] else: for i in range(1, nobs + 1): l[i] = y_alpha[i - 1] + \ (alphac * trended(l[i - 1], dampen(b[i - 1], phi))) if trending: b[i] = (beta * detrend(l[i], l[i - 1])) + \ (betac * dampen(b[i - 1], phi)) slope = b[1:nobs + 1].copy() season = s[m:nobs + m].copy() l[nobs:] = l[nobs] if trending: b[:nobs] = dampen(b[:nobs], phi) b[nobs:] = dampen(b[nobs], phi_h) trend = trended(l, b) fitted = trend level = l[1:nobs + 1].copy() if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float): fitted = inv_boxcox(fitted, lamda) level = inv_boxcox(level, lamda) slope = detrend(trend[:nobs], level) if seasonal == 'add': season = (fitted - inv_boxcox(trend, lamda))[:nobs] else: # seasonal == 'mul': season = (fitted / inv_boxcox(trend, lamda))[:nobs] sse = sqeuclidean(fitted[:-h - 1], data) # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi k = m * seasoning + 2 * trending + 2 + 1 * damped aic = self.nobs * np.log(sse / self.nobs) + k * 2 if self.nobs - k - 3 > 0: aicc_penalty = (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3) else: aicc_penalty = np.inf aicc = aic + aicc_penalty bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs) resid = data - fitted[:-h - 1] if remove_bias: fitted += resid.mean() if not damped: phi = np.NaN self.params = {'smoothing_level': alpha, 'smoothing_slope': beta, 'smoothing_seasonal': gamma, 'damping_slope': phi, 'initial_level': l[0], 'initial_slope': b[0], 'initial_seasons': s[:m], 'use_boxcox': use_boxcox, 'lamda': lamda, 'remove_bias': remove_bias} # Format parameters into a DataFrame codes = ['alpha', 'beta', 'gamma', 'l.0', 'b.0', 'phi'] codes += ['s.{0}'.format(i) for i in range(m)] idx = ['smoothing_level', 'smoothing_slope', 'smoothing_seasonal', 'initial_level', 'initial_slope', 'damping_slope'] idx += ['initial_seasons.{0}'.format(i) for i in range(m)] formatted = [alpha, beta, gamma, l[0], b[0], phi] formatted += s[:m].tolist() formatted = list(map(lambda v: np.nan if v is None else v, formatted)) formatted = np.array(formatted) if is_optimized is None: optimized = np.zeros(len(codes), dtype=np.bool) else: optimized = is_optimized.astype(np.bool) included = [True, trending, seasoning, True, trending, damped] included += [True] * m formatted = pd.DataFrame([[c, f, o] for c, f, o in zip(codes, formatted, optimized)], columns=['name', 'param', 'optimized'], index=idx) formatted = formatted.loc[included] hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted, fittedvalues=fitted[:-h - 1], fcastvalues=fitted[-h - 1:], sse=sse, level=level, slope=slope, season=season, aic=aic, bic=bic, aicc=aicc, resid=resid, k=k, params_formatted=formatted, optimized=optimized) return HoltWintersResultsWrapper(hwfit)