def test_concentrated_initialization(): # Compare a model where initialization is concentrated out versus # numarical maximum likelihood estimation mod1 = ExponentialSmoothing(oildata, initialization_method='concentrated') mod2 = ExponentialSmoothing(oildata) # First, fix the other parameters at a particular value res1 = mod1.filter([0.1]) res2 = mod2.fit_constrained({'smoothing_level': 0.1}, disp=0) # Alternatively, estimate the remaining parameters res1 = mod1.fit(disp=0) res2 = mod2.fit(disp=0) assert_allclose(res1.llf, res2.llf) assert_allclose(res1.initial_state, res2.initial_state, rtol=1e-5)
def test_parameterless_model(reset_randomstate): # GH 6687 x = np.cumsum(np.random.standard_normal(1000)) ses = ExponentialSmoothing(x, initial_level=x[0], initialization_method="known") with ses.fix_params({'smoothing_level': 0.5}): res = ses.fit() assert np.isnan(res.bse).all() assert res.fixed_params == ["smoothing_level"]
class Callback(): def __init__(self): self.count = 0 def __call__(self, point): print("Interation {} is finished.".format(self.count)) self.count += 1 es_state_model = StatespaceExponentialSmoothing(price, trend='add', seasonal=12) es_state_fit = es_state_model.fit(method='lbfgs', maxiter=30, disp=True, callback=Callback(), gtol=1e-2, ftol=1e-2, xtol=1e-2) es_state_fit.summary() # Forecast for 30 years ahead. PERIODS_AHEAD = 360 simulations = es_state_fit.simulate(PERIODS_AHEAD, repetitions=100, error='mul') simulations.plot(style='-', alpha=0.05, color='grey', legend=False) plt.title("Simulation Plot") plt.show()
def update_revenue_forecast(historicals, method, fcast_index, focus_scenario, p, d, q, P, D, Q): historicals = pd.DataFrame(historicals) historicals['DT_FIM_EXERC'] = pd.to_datetime(historicals['DT_FIM_EXERC']) models = {} # Revenue time series model data = historicals.set_index('DT_FIM_EXERC').asfreq('Q') y = data['Revenue'] # Transform if fcast_index != '': idx = data[fcast_index.upper()] y = y / idx * idx.iloc[-1] y = np.log(y) # Create forecast model if method == 'ets': rev_model = ExponentialSmoothing(y, trend=True, damped_trend=True, seasonal=4) elif method == 'arima': rev_model = SARIMAX(y, order=(p, d, q), seasonal_order=(P, D, Q, 4), trend='c') else: return {} rev_results = rev_model.fit() models['revenue'] = { 'Params': rev_results.params, 'diag': { 'In-sample RMSE': np.sqrt(rev_results.mse), 'In-sample MAE': rev_results.mae, 'Ljung-Box': rev_results.test_serial_correlation('ljungbox')[0, 0, -1], 'log-Likelihood': rev_results.llf, 'AICc': rev_results.aicc, 'BIC': rev_results.bic } } # Cross validation foldsize = 1 nfolds = round(y.shape[0] / (4 * foldsize)) - 1 cv_errors = [] for fold in range(nfolds, 0, -1): train_subset = y.iloc[:-(fold + 2) * (4 * foldsize)] valid_subset = y.iloc[-(fold + 2) * (4 * foldsize):-(fold + 1) * (4 * foldsize)] if train_subset.shape[0] < 16: continue fcasts = (rev_model.clone(np.log(train_subset)).fit().forecast( valid_subset.shape[0])) cv_errors = np.append(cv_errors, fcasts - np.log(valid_subset)) if len(cv_errors) > 4: models['revenue']['diag']['CV RMSE'] = np.sqrt( np.mean(np.array(cv_errors)**2)) models['revenue']['diag']['CV MAE'] = np.mean(np.abs(cv_errors)) # Generate simulated forecasts nsim = 100 horiz = int(np.sum(focus['scenario'] == focus_scenario)) forecasts = (pd.DataFrame({ 'y': rev_results.forecast(horiz), 'group': 'forecast', 'variable_1': '' }).reset_index()) simulations = (rev_results.simulate( horiz, repetitions=nsim, anchor=data.shape[0]).reset_index().melt('index', value_name='y').drop( columns='variable_0').assign(group='simulation')) simulations = (pd.concat( [simulations, forecasts]).reset_index(drop=True).rename(columns={ 'variable_1': 'iteration', 'index': 'DT_FIM_EXERC' }).pipe(add_quarters)) simulations['Revenue'] = np.exp(simulations['y']) if fcast_index != '': simulations = simulations.merge( focus[['DT_FIM_EXERC', fcast_index.upper()]][focus['scenario'] == focus_scenario], on="DT_FIM_EXERC", how="left") simulations['Revenue'] = simulations['Revenue'] \ * simulations[fcast_index.upper()] \ / data[fcast_index.upper()].iloc[-1] simulations['RevenueGrowth'] = 100 * ( simulations['Revenue'] / simulations.groupby('iteration')['Revenue'].shift(4) - 1) simulations.loc[simulations['RevenueGrowth'].isna(), 'RevenueGrowth'] = \ np.reshape( 100 * ( np.reshape( simulations['Revenue'][simulations['RevenueGrowth'].isna()].values, (nsim + 1, 4)) / historicals['Revenue'].tail(4).values - 1 ), ((nsim + 1) * 4) ) # Expenses regression model historicals['logRevenue'] = np.log(historicals['Revenue']) exog = historicals[['logRevenue', 'Q1', 'Q2', 'Q3', 'Q4']] opex_model = QuantReg(np.log(historicals['Opex']), exog) opex_results = opex_model.fit(q=0.5) opex_coefs = opex_results.params rmse = np.mean(opex_results.resid**2)**.5 models['opex'] = { 'Params': opex_results.params, 'diag': { 'In-sample RMSE': np.sqrt(np.mean(opex_results.resid)**2), 'In-sample MAE': np.mean(np.abs(opex_results.resid)), #'Ljung-Box': opex_results.test_serial_correlation('ljungbox')[0, 0, -1], #'log-Likelihood': opex_results.llf, #'AICc': opex_results.aicc, #'BIC': opex_results.bic } } # Simulations simulations['Opex'] = np.exp( opex_coefs[0] * np.log(simulations['Revenue']) + opex_coefs[1] * simulations['Q1'] + opex_coefs[2] * simulations['Q2'] + opex_coefs[3] * simulations['Q3'] + opex_coefs[4] * simulations['Q4'] + np.random.normal(0, rmse, simulations.shape[0]) * (simulations['group'] == 'simulation')) simulations['EBIT'] = simulations['Revenue'] - simulations['Opex'] simulations[ 'EBITMargin'] = 100 * simulations['EBIT'] / simulations['Revenue'] simulations['Taxes'] = simulations['EBIT'] * .34 simulations['NOPAT'] = simulations['EBIT'] - simulations['Taxes'] simulations = pd.concat( [historicals.assign(group='historicals', iteration=''), simulations]) return simulations.to_dict('records'), models