def __init__(self):
        super(self.__class__, self).__init__() #initialize DGP

        nobs = self.nobs
        y_true, x, exog = self.y_true, self.x, self.exog

        np.random.seed(8765993)
        sigma_noise = 0.1
        y = y_true + sigma_noise * np.random.randn(nobs)

        m = AdditiveModel(x)
        m.fit(y)
        res_gam = m.results #TODO: currently attached to class

        res_ols = OLS(y, exog).fit()

        #Note: there still are some naming inconsistencies
        self.res1 = res1 = Dummy() #for gam model
        #res2 = Dummy() #for benchmark
        self.res2 = res2 = res_ols  #reuse existing ols results, will add additional

        res1.y_pred = res_gam.predict(x)
        res2.y_pred = res_ols.model.predict(res_ols.params, exog)
        res1.y_predshort = res_gam.predict(x[:10])

        slopes = [i for ss in m.smoothers for i in ss.params[1:]]

        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        #print const, slopes
        res1.params = np.array([const] + slopes)
    def setup_class(cls):
        super(TestAdditiveModel, cls).setup_class()  #initialize DGP

        nobs = cls.nobs
        y_true, x, exog = cls.y_true, cls.x, cls.exog

        np.random.seed(8765993)
        sigma_noise = 0.1
        y = y_true + sigma_noise * np.random.randn(nobs)

        m = AdditiveModel(x)
        m.fit(y)
        res_gam = m.results  #TODO: currently attached to class

        res_ols = OLS(y, exog).fit()

        #Note: there still are some naming inconsistencies
        cls.res1 = res1 = Dummy()  #for gam model
        #res2 = Dummy() #for benchmark
        cls.res2 = res2 = res_ols  #reuse existing ols results, will add additional

        res1.y_pred = res_gam.predict(x)
        res2.y_pred = res_ols.model.predict(res_ols.params, exog)
        res1.y_predshort = res_gam.predict(x[:10])

        slopes = [i for ss in m.smoothers for i in ss.params[1:]]

        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        #print const, slopes
        res1.params = np.array([const] + slopes)
x1.sort()
x2 = R.standard_normal(nobs)
x2.sort()
y = R.standard_normal((nobs, ))

f1 = lambda x1: (x1 + x1**2 - 3 - 1 * x1**3 + 0.1 * np.exp(-x1 / 4.))
f2 = lambda x2: (x2 + x2**2 - 0.1 * np.exp(x2 / 4.))
z = standardize(f1(x1)) + standardize(f2(x2))
z = standardize(z) * 2  # 0.1

y += z
d = np.array([x1, x2]).T

if example == 1:
    print("normal")
    m = AdditiveModel(d)
    m.fit(y)
    x = np.linspace(-2, 2, 50)

    print(m)

    y_pred = m.results.predict(d)
    plt.figure()
    plt.plot(y, '.')
    plt.plot(z, 'b-', label='true')
    plt.plot(y_pred, 'r-', label='AdditiveModel')
    plt.legend()
    plt.title('gam.AdditiveModel')

if example == 2:
    print("binomial")
x1 = np.linspace(lb, ub, nobs)
x2 = np.sin(2*x1)
x = np.column_stack((x1/x1.max()*2, x2))
exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
idx = lrange((order+1)*2)
del idx[order+1]
exog_reduced = exog[:,idx]  #remove duplicate constant
y_true = exog.sum(1) / 2.
z = y_true #alias check
d = x
y = y_true + sigma_noise * np.random.randn(nobs)

example = 1

if example == 1:
    m = AdditiveModel(d)
    m.fit(y)

    y_pred = m.results.predict(d)


for ss in m.smoothers:
    print(ss.params)

res_ols = OLS(y, exog_reduced).fit()
print(res_ols.params)

#assert_almost_equal(y_pred, res_ols.fittedvalues, 3)

if example > 0:
    import matplotlib.pyplot as plt
Exemple #5
0
x2 = R.standard_normal(nobs)
x2.sort()
y = R.standard_normal((nobs,))

f1 = lambda x1: (x1 + x1**2 - 3 - 1 * x1**3 + 0.1 * np.exp(-x1/4.))
f2 = lambda x2: (x2 + x2**2 - 0.1 * np.exp(x2/4.))
z = standardize(f1(x1)) + standardize(f2(x2))
z = standardize(z) * 2 # 0.1

y += z
d = np.array([x1,x2]).T


if example == 1:
    print "normal"
    m = AdditiveModel(d)
    m.fit(y)
    x = np.linspace(-2,2,50)

    print m

    y_pred = m.results.predict(d)
    plt.figure()
    plt.plot(y, '.')
    plt.plot(z, 'b-', label='true')
    plt.plot(y_pred, 'r-', label='AdditiveModel')
    plt.legend()
    plt.title('gam.AdditiveModel')

import scipy.stats, time
Exemple #6
0
def trend_seasonality_spike_strength(x, freq):
    """Strength of trend and seasonality and spike"""
    cont_x = x.dropna()
    length_cont_x = len(cont_x)
    season = peak = trough = np.nan

    if length_cont_x < (2 * freq):
        trend = linearity = curvature = season = spike = peak = trough = np.nan
    else:

        if freq > 1:
            all_stl = sm.tsa.seasonal_decompose(cont_x, freq=freq)
            trend0 = all_stl.trend
            fits = trend0 + all_stl.seasonal
            adj_x = cont_x - fits
            v_adj = adj_x.var()
            detrend = cont_x - trend0
            deseason = cont_x - all_stl.seasonal
            peak = all_stl.seasonal.max()
            trough = all_stl.seasonal.min()
            remainder = all_stl.resid
            season = 0 if detrend.var() < 1e-10 else max(
                0, min(1, 1 - v_adj / detrend.var()))

        else:  # No seasonal component
            tt = np.array([range(length_cont_x)]).T

            _trend0_values = AdditiveModel(tt).fit(cont_x.values).mu
            trend0 = pd.Series(_trend0_values, index=cont_x.index)
            remainder = cont_x - trend0
            deseason = cont_x - trend0
            v_adj = trend0.var()

        trend = 0 if deseason.var() < 1e-10 else max(
            0, min(1, 1 - v_adj / deseason.var()))

        n = len(remainder)
        v = remainder.var()
        d = (remainder - remainder.mean())**2
        varloo = (v * (n - 1) - d) / (n - 2)
        spike = varloo.var()
        pl = Poly()
        pl.fit(range(length_cont_x), degree=2)
        result_pl = pl.predict(range(length_cont_x))  # [:, 2]

        X = sm.add_constant(result_pl, has_constant='add')
        ols_data = trend0.copy()
        ols_data = pd.concat(
            [ols_data.reset_index(drop=True),
             pd.DataFrame(X)],
            axis=1,
            ignore_index=True)
        ols_data.columns = ['Y', 'Intercept', 'X1', 'X2', 'X3']
        result_ols = ols('Y ~ X1 + X2 + X3', data=ols_data.dropna())

        trend_coef = result_ols.fit().params
        linearity = trend_coef[1]
        curvature = trend_coef[2]

    result = dict(trend=trend,
                  spike=spike,
                  peak=peak,
                  trough=trough,
                  linearity=linearity,
                  curvature=curvature)

    if freq > 1:
        result["season"] = season

    return result