Ejemplo n.º 1
0
    def setup_class(cls):
        s_scale = 0.0263073404164214

        cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6])
        gam_cc = GLMGam(data_mcycle['accel'], smoother=cc,
                        alpha=0)
        cls.res1 = gam_cc.fit(method='bfgs')
Ejemplo n.º 2
0
 def setup_class(cls):
     s_scale = 0.0263073404164214
     nobs = data_mcycle['times'].shape[0]
     cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6],
                             constraints='center')
     gam_cc = GLMGam(data_mcycle['accel'], np.ones((nobs, 1)),
                     smoother=cc, alpha=1 / s_scale / 2)
     cls.res1 = gam_cc.fit(method='pirls')
Ejemplo n.º 3
0
    def _init(cls):
        # TODO: CyclicCubicSplines raises when using pandas
        cc_h = CyclicCubicSplines(np.asarray(data_mcycle['times']), df=[6])

        constraints = np.atleast_2d(cc_h.basis.mean(0))
        transf = transf_constraints(constraints)

        exog = cc_h.basis.dot(transf)
        penalty_matrix = transf.T.dot(cc_h.penalty_matrices[0]).dot(transf)
        restriction = matrix_sqrt(penalty_matrix)
        return exog, penalty_matrix, restriction
Ejemplo n.º 4
0
    def setup_class(cls):
        s_scale = 0.0263073404164214

        x = data_mcycle['times'].values
        endog = data_mcycle['accel']
        cc = CyclicCubicSplines(x, df=[6], constraints='center')
        gam_cc = GLMGam(endog, smoother=cc, alpha=1 / s_scale / 2)
        cls.res1 = gam_cc.fit(method='bfgs')
        cls.res2 = results_pls.pls5

        cls.rtol_fitted = 1e-5
        # cls.covp_corrfact = 1.0025464444310588  # without edf
        # edf is implemented
        cls.covp_corrfact = 1
Ejemplo n.º 5
0
    def setup_class(cls):

        sp = np.array([6.46225497484073, 0.81532465890585])
        s_scale = np.array([2.95973613706629e-07, 0.000126203730141359])

        x_spline = df_autos[['weight', 'hp']].values
        exog = patsy.dmatrix('fuel + drive', data=df_autos)
        cc = CyclicCubicSplines(x_spline, df=[6, 5], constraints='center')
        # TODO alpha needs to be list
        gam_cc = GLMGam(df_autos['city_mpg'], exog=exog, smoother=cc,
                        alpha=(1 / s_scale * sp / 2).tolist())
        cls.res1a = gam_cc.fit()
        gam_cc = GLMGam(df_autos['city_mpg'], exog=exog, smoother=cc,
                        alpha=(1 / s_scale * sp / 2).tolist())
        cls.res1b = gam_cc.fit(method='newton')
Ejemplo n.º 6
0
def test_cyclic_cubic_splines():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    file_path = os.path.join(cur_dir, "results",
                             "cubic_cyclic_splines_from_mgcv.csv")
    data_from_r = pd.read_csv(file_path)

    x = data_from_r[['x0', 'x2']].values
    y = data_from_r['y'].values
    y_est_mgcv = data_from_r[['y_est']].values  # noqa: F841
    s_mgcv = data_from_r[['s(x0)', 's(x2)']].values

    dfs = [10, 10]
    ccs = CyclicCubicSplines(x, df=dfs)
    alpha = [0.05 / 2, 0.0005 / 2]
    # TODO: if alpha changes in pirls this should be updated

    gam = GLMGam(y, smoother=ccs, alpha=alpha)
    gam_res = gam.fit(method='pirls')

    s0 = np.dot(ccs.basis[:, ccs.mask[0]],
                gam_res.params[ccs.mask[0]])
    # TODO: Mean has to be removed
    # removing mean could be replaced by options for intercept handling
    s0 -= s0.mean()

    s1 = np.dot(ccs.basis[:, ccs.mask[1]],
                gam_res.params[ccs.mask[1]])
    s1 -= s1.mean()  # TODO: Mean has to be removed

    # plt.subplot(2, 1, 1)
    # plt.plot(x[:, 0], s0, '.', label='s0')
    # plt.plot(x[:, 0], s_mgcv[:, 0], '.', label='s0_mgcv')
    # plt.legend(loc='best')
    #
    # plt.subplot(2, 1, 2)
    # plt.plot(x[:, 1], s1, '.', label='s1_est')
    # plt.plot(x[:, 1], s_mgcv[:, 1], '.', label='s1_mgcv')
    # plt.legend(loc='best')
    # plt.show()

    assert_allclose(s0, s_mgcv[:, 0], atol=0.02)
    assert_allclose(s1, s_mgcv[:, 1], atol=0.33)
Ejemplo n.º 7
0
def decompose(x, transform=True):
    # Decompose data into trend, seasonality and randomness
    # Accepts a pandas series object with a datetime index
    if (transform and min(x.dropna()) >= 0):
        # Transforms data and finds the lambda that maximizes the log likelihood
        # R version has above method and method that minimizes the coefficient of variation ("guerrero")
        x_transformed, var_lambda = boxcox(na_contiguous(x), lmbda=None)
        x_transformed = pd.Series(x_transformed, index=na_contiguous(x).index)

    else:
        x_transformed = x
        var_lambda = np.nan
        transform = False

    # Seasonal data
    # In R code, we find the number of samples per unit time below (should be 1 every time)
    # Here I take the datetime index differences, take their inverses, and store in a list to be evaluated
    # https://stackoverflow.com/questions/36583859/compute-time-difference-of-datetimeindex
    idx = x_transformed.index
    #samples = np.unique([int(1/(idx[n]-idx[n - 1]).days) for n in range(1,len(idx))])
    # Filter out Nulls for this exercise
    #samples = samples[~np.isnan(samples)]
    #if len(samples) == 1 and samples.item() > 1:

    # Just use the R code instead
    # This is supposed to be "> 1" but all data results in a frequency of 1
    # All frequency results in R equal 4, meaning this code block gets evaluated every time in R
    # So this code block should always be evaluated as well
    if int(rstats.frequency(x_transformed).item()) == 1:
        # Decompose
        stl = sm.tsa.seasonal_decompose(na_contiguous(x_transformed))
        #stl = rstats.stl(na_contiguous(x_transformed),s_window='periodic')
        # When I try to use above function, I get this:
        '''
        R[write to console]: Error in (function (x, s.window, s.degree = 0, t.window = NULL, t.degree = 1,  : 
  series is not periodic or has less than two periods
        '''
        trend = stl.trend
        seasonality = stl.seasonal
        remainder = x_transformed - trend - seasonality

    else:
        # Nonseasonal data
        trend = pd.Series(np.nan, index=x_transformed.index)
        time_index = pd.Index([i for i in range(1, len(x_transformed) + 1)])
        # Python specific
        bs = BSplines(time_index, df=[12, 10], degree=[3, 3])
        cs = CyclicCubicSplines(time_index, df=[3, 3])
        alpha = np.array([218.338888])
        gam = GLMGam(x_transformed, smoother=cs, alpha=alpha).fit()
        #trend.loc[~x_transformed.isnull()] = gam.fittedvalues

        # R Code
        fmla = Formula('x ~ s(tt)')
        env = fmla.environment
        env['tt'] = time_index
        env['x'] = x_transformed
        trend.loc[~x_transformed.isnull()] = rstats.fitted(rmgcv.gam(fmla))
        seasonality = pd.Series(np.nan, index=x_transformed.index)
        remainder = x_transformed - trend

    return_dct = {
        'x': x_transformed,
        'trend': trend,
        'seasonality': seasonality,
        'remainder': remainder,
        'transform': transform,
        'lambda': var_lambda,
    }

    return return_dct