def test_invalid(): # Tests for invalid model specifications that raise ValueErrors with pytest.raises(ValueError, match='Cannot have a seasonal period of 1.'): mod = ExponentialSmoothing(aust, seasonal=1) with pytest.raises( TypeError, match=( 'seasonal must be integer_like' r' \(int or np.integer, but not bool or timedelta64\) or None' )): mod = ExponentialSmoothing(aust, seasonal=True) with pytest.raises(ValueError, match='Invalid initialization method "invalid".'): mod = ExponentialSmoothing(aust, initialization_method='invalid') with pytest.raises(ValueError, match=('`initial_level` argument must be provided' ' when initialization method is set to' ' "known".')): mod = ExponentialSmoothing(aust, initialization_method='known') with pytest.raises(ValueError, match=('`initial_trend` argument must be provided' ' for models with a trend component when' ' initialization method is set to "known".')): mod = ExponentialSmoothing(aust, trend=True, initialization_method='known', initial_level=0) with pytest.raises(ValueError, match=('`initial_seasonal` argument must be provided' ' for models with a seasonal component when' ' initialization method is set to "known".')): mod = ExponentialSmoothing(aust, seasonal=4, initialization_method='known', initial_level=0) for arg in ['initial_level', 'initial_trend', 'initial_seasonal']: msg = ('Cannot give `%s` argument when initialization is "estimated"' % arg) with pytest.raises(ValueError, match=msg): mod = ExponentialSmoothing(aust, **{arg: 0}) with pytest.raises( ValueError, match=('Invalid length of initial seasonal values. Must be' ' one of s or s-1, where s is the number of seasonal' ' periods.')): mod = ExponentialSmoothing(aust, seasonal=4, initialization_method='known', initial_level=0, initial_seasonal=0) with pytest.raises(NotImplementedError, match='ExponentialSmoothing does not support `exog`.'): mod = ExponentialSmoothing(aust) mod.clone(aust, exog=air)
def update_revenue_forecast(historicals, method, fcast_index, focus_scenario, p, d, q, P, D, Q): historicals = pd.DataFrame(historicals) historicals['DT_FIM_EXERC'] = pd.to_datetime(historicals['DT_FIM_EXERC']) models = {} # Revenue time series model data = historicals.set_index('DT_FIM_EXERC').asfreq('Q') y = data['Revenue'] # Transform if fcast_index != '': idx = data[fcast_index.upper()] y = y / idx * idx.iloc[-1] y = np.log(y) # Create forecast model if method == 'ets': rev_model = ExponentialSmoothing(y, trend=True, damped_trend=True, seasonal=4) elif method == 'arima': rev_model = SARIMAX(y, order=(p, d, q), seasonal_order=(P, D, Q, 4), trend='c') else: return {} rev_results = rev_model.fit() models['revenue'] = { 'Params': rev_results.params, 'diag': { 'In-sample RMSE': np.sqrt(rev_results.mse), 'In-sample MAE': rev_results.mae, 'Ljung-Box': rev_results.test_serial_correlation('ljungbox')[0, 0, -1], 'log-Likelihood': rev_results.llf, 'AICc': rev_results.aicc, 'BIC': rev_results.bic } } # Cross validation foldsize = 1 nfolds = round(y.shape[0] / (4 * foldsize)) - 1 cv_errors = [] for fold in range(nfolds, 0, -1): train_subset = y.iloc[:-(fold + 2) * (4 * foldsize)] valid_subset = y.iloc[-(fold + 2) * (4 * foldsize):-(fold + 1) * (4 * foldsize)] if train_subset.shape[0] < 16: continue fcasts = (rev_model.clone(np.log(train_subset)).fit().forecast( valid_subset.shape[0])) cv_errors = np.append(cv_errors, fcasts - np.log(valid_subset)) if len(cv_errors) > 4: models['revenue']['diag']['CV RMSE'] = np.sqrt( np.mean(np.array(cv_errors)**2)) models['revenue']['diag']['CV MAE'] = np.mean(np.abs(cv_errors)) # Generate simulated forecasts nsim = 100 horiz = int(np.sum(focus['scenario'] == focus_scenario)) forecasts = (pd.DataFrame({ 'y': rev_results.forecast(horiz), 'group': 'forecast', 'variable_1': '' }).reset_index()) simulations = (rev_results.simulate( horiz, repetitions=nsim, anchor=data.shape[0]).reset_index().melt('index', value_name='y').drop( columns='variable_0').assign(group='simulation')) simulations = (pd.concat( [simulations, forecasts]).reset_index(drop=True).rename(columns={ 'variable_1': 'iteration', 'index': 'DT_FIM_EXERC' }).pipe(add_quarters)) simulations['Revenue'] = np.exp(simulations['y']) if fcast_index != '': simulations = simulations.merge( focus[['DT_FIM_EXERC', fcast_index.upper()]][focus['scenario'] == focus_scenario], on="DT_FIM_EXERC", how="left") simulations['Revenue'] = simulations['Revenue'] \ * simulations[fcast_index.upper()] \ / data[fcast_index.upper()].iloc[-1] simulations['RevenueGrowth'] = 100 * ( simulations['Revenue'] / simulations.groupby('iteration')['Revenue'].shift(4) - 1) simulations.loc[simulations['RevenueGrowth'].isna(), 'RevenueGrowth'] = \ np.reshape( 100 * ( np.reshape( simulations['Revenue'][simulations['RevenueGrowth'].isna()].values, (nsim + 1, 4)) / historicals['Revenue'].tail(4).values - 1 ), ((nsim + 1) * 4) ) # Expenses regression model historicals['logRevenue'] = np.log(historicals['Revenue']) exog = historicals[['logRevenue', 'Q1', 'Q2', 'Q3', 'Q4']] opex_model = QuantReg(np.log(historicals['Opex']), exog) opex_results = opex_model.fit(q=0.5) opex_coefs = opex_results.params rmse = np.mean(opex_results.resid**2)**.5 models['opex'] = { 'Params': opex_results.params, 'diag': { 'In-sample RMSE': np.sqrt(np.mean(opex_results.resid)**2), 'In-sample MAE': np.mean(np.abs(opex_results.resid)), #'Ljung-Box': opex_results.test_serial_correlation('ljungbox')[0, 0, -1], #'log-Likelihood': opex_results.llf, #'AICc': opex_results.aicc, #'BIC': opex_results.bic } } # Simulations simulations['Opex'] = np.exp( opex_coefs[0] * np.log(simulations['Revenue']) + opex_coefs[1] * simulations['Q1'] + opex_coefs[2] * simulations['Q2'] + opex_coefs[3] * simulations['Q3'] + opex_coefs[4] * simulations['Q4'] + np.random.normal(0, rmse, simulations.shape[0]) * (simulations['group'] == 'simulation')) simulations['EBIT'] = simulations['Revenue'] - simulations['Opex'] simulations[ 'EBITMargin'] = 100 * simulations['EBIT'] / simulations['Revenue'] simulations['Taxes'] = simulations['EBIT'] * .34 simulations['NOPAT'] = simulations['EBIT'] - simulations['Taxes'] simulations = pd.concat( [historicals.assign(group='historicals', iteration=''), simulations]) return simulations.to_dict('records'), models