def setup_class(cls): super(TestAdditiveModel, cls).setup_class() #initialize DGP nobs = cls.nobs y_true, x, exog = cls.y_true, cls.x, cls.exog np.random.seed(8765993) sigma_noise = 0.1 y = y_true + sigma_noise * np.random.randn(nobs) m = AdditiveModel(x) m.fit(y) res_gam = m.results #TODO: currently attached to class res_ols = OLS(y, exog).fit() #Note: there still are some naming inconsistencies cls.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark cls.res2 = res2 = res_ols #reuse existing ols results, will add additional res1.y_pred = res_gam.predict(x) res2.y_pred = res_ols.model.predict(res_ols.params, exog) res1.y_predshort = res_gam.predict(x[:10]) slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) #print const, slopes res1.params = np.array([const] + slopes)
x1.sort() x2 = R.standard_normal(nobs) x2.sort() y = R.standard_normal((nobs, )) f1 = lambda x1: (x1 + x1**2 - 3 - 1 * x1**3 + 0.1 * np.exp(-x1 / 4.)) f2 = lambda x2: (x2 + x2**2 - 0.1 * np.exp(x2 / 4.)) z = standardize(f1(x1)) + standardize(f2(x2)) z = standardize(z) * 2 # 0.1 y += z d = np.array([x1, x2]).T if example == 1: print("normal") m = AdditiveModel(d) m.fit(y) x = np.linspace(-2, 2, 50) print(m) y_pred = m.results.predict(d) plt.figure() plt.plot(y, '.') plt.plot(z, 'b-', label='true') plt.plot(y_pred, 'r-', label='AdditiveModel') plt.legend() plt.title('gam.AdditiveModel') if example == 2: print("binomial")
def trend_seasonality_spike_strength(x, freq): """Strength of trend and seasonality and spike""" cont_x = x.dropna() length_cont_x = len(cont_x) season = peak = trough = np.nan if length_cont_x < (2 * freq): trend = linearity = curvature = season = spike = peak = trough = np.nan else: if freq > 1: all_stl = sm.tsa.seasonal_decompose(cont_x, freq=freq) trend0 = all_stl.trend fits = trend0 + all_stl.seasonal adj_x = cont_x - fits v_adj = adj_x.var() detrend = cont_x - trend0 deseason = cont_x - all_stl.seasonal peak = all_stl.seasonal.max() trough = all_stl.seasonal.min() remainder = all_stl.resid season = 0 if detrend.var() < 1e-10 else max( 0, min(1, 1 - v_adj / detrend.var())) else: # No seasonal component tt = np.array([range(length_cont_x)]).T _trend0_values = AdditiveModel(tt).fit(cont_x.values).mu trend0 = pd.Series(_trend0_values, index=cont_x.index) remainder = cont_x - trend0 deseason = cont_x - trend0 v_adj = trend0.var() trend = 0 if deseason.var() < 1e-10 else max( 0, min(1, 1 - v_adj / deseason.var())) n = len(remainder) v = remainder.var() d = (remainder - remainder.mean())**2 varloo = (v * (n - 1) - d) / (n - 2) spike = varloo.var() pl = Poly() pl.fit(range(length_cont_x), degree=2) result_pl = pl.predict(range(length_cont_x)) # [:, 2] X = sm.add_constant(result_pl, has_constant='add') ols_data = trend0.copy() ols_data = pd.concat( [ols_data.reset_index(drop=True), pd.DataFrame(X)], axis=1, ignore_index=True) ols_data.columns = ['Y', 'Intercept', 'X1', 'X2', 'X3'] result_ols = ols('Y ~ X1 + X2 + X3', data=ols_data.dropna()) trend_coef = result_ols.fit().params linearity = trend_coef[1] curvature = trend_coef[2] result = dict(trend=trend, spike=spike, peak=peak, trough=trough, linearity=linearity, curvature=curvature) if freq > 1: result["season"] = season return result