Beispiel #1
0
 def reg2(T):
     global i
     print(i)
     i+=1
     #防止全部为Nan
     if T.isnull().sum()!=T.shape[0]:
         window = 50
         tscv = TimeSeriesSplit(n_splits = T.shape[0]-window+1)
         new_dd = pd.Series(np.NAN,index=T.index)
         for train_index, test_index in tscv.split(T):
             #print("TRAIN:", train_index[-window:], "TEST:", test_index)
             X, Y = T.iloc[train_index[-window:]],bench.iloc[train_index[-window:]]
             #防止全部为Nan
             if X.isnull().sum()!=X.shape[0]:
                 X = sm.add_constant(X)
                 model = OLS(Y,X,missing='drop')
                 results = model.fit()
                 res = results.resid.iloc[-1]
                 new_dd.iloc[train_index[-1]] = res
         #计算最后一个
         X, Y = T.iloc[-window:],bench.iloc[-window:]
         #防止全部为Nan
         if X.isnull().sum()!=X.shape[0]:
             X = sm.add_constant(X)
             model = OLS(Y,X,missing='drop')
             results = model.fit()
             res = results.resid.iloc[-1]
             new_dd.iloc[-1] = res
             return new_dd
         else:
             return T
     else:
         return T
Beispiel #2
0
def research_pair_trading_opportunity(currency1, currency2):
    name1, name2 = currency1.name, currency2.name
    print(f"Researching Pair {name1} and {name2}")
    model = OLS(currency1, sm.add_constant(currency2))
    ols_results = model.fit()
    print("Prices OLS results:")
    print(
        f"const: {ols_results.params['const']} || {name2} {ols_results.params[name2]}"
    )

    coint_series = currency1 - currency2 * ols_results.params[name2]
    coint_series.plot()
    plt.show()

    dependent_var = coint_series.diff()[1:]
    independent_var = coint_series.shift(1)[1:]
    independent_var.name = "val_prev"
    model = OLS(dependent_var, sm.add_constant(independent_var))
    ols_results = model.fit()
    ols_results.params

    print("Diff of Cointegrating Series OLS Results:")
    print(
        f"const: {ols_results.params['const']} || {ols_results.params['val_prev']}"
    )
    print("Mean-Reverse Half-life:",
          -np.log(2) / ols_results.params["val_prev"])
def test_permuted_ols_statsmodels_withcovar_multivariate(random_state=0):
    """Test permuted_ols with multiple tested variates and covariates.

    It is equivalent to fitting several models with only one tested variate.

    This test has a statsmodels dependance. There seems to be no simple,
    alternative way to perform a F-test on a linear model including
    covariates.

    """
    try:
        from statsmodels.regression.linear_model import OLS
    except:
        warnings.warn("Statsmodels is required to run this test")
        raise nose.SkipTest

    rng = check_random_state(random_state)
    # design parameters
    n_samples = 50
    n_targets = 10
    n_covars = 2
    # create design
    target_vars = rng.randn(n_samples, n_targets)
    tested_var = rng.randn(n_samples, 1)
    confounding_vars = rng.randn(n_samples, n_covars)
    # statsmodels OLS
    fvals = np.empty((n_targets, 1))
    test_matrix = np.array([[1.] + [0.] * n_covars])
    for i in range(n_targets):
        ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars)))
        fvals[i] = ols.fit().f_test(test_matrix).fvalue[0][0]
    # permuted OLS
    _, orig_scores, _ = permuted_ols(tested_var,
                                     target_vars,
                                     confounding_vars,
                                     model_intercept=False,
                                     n_perm=0,
                                     random_state=random_state)
    assert_almost_equal(fvals, orig_scores, decimal=6)

    ### Adds intercept
    # permuted OLS
    _, orig_scores_addintercept, _ = permuted_ols(tested_var,
                                                  target_vars,
                                                  confounding_vars,
                                                  model_intercept=True,
                                                  n_perm=0,
                                                  random_state=random_state)
    # statsmodels OLS
    confounding_vars = np.hstack((confounding_vars, np.ones((n_samples, 1))))
    fvals_addintercept = np.empty((n_targets, 1))
    test_matrix = np.array([[1.] + [0.] * (n_covars + 1)])
    for i in range(n_targets):
        ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars)))
        fvals_addintercept[i] = ols.fit().f_test(test_matrix).fvalue[0][0]
    assert_array_almost_equal(fvals_addintercept,
                              orig_scores_addintercept,
                              decimal=6)
def test_qr_equiv(cov_info):
    cov_type, cov_kwds = cov_info
    rs = np.random.RandomState(123498)
    x = rs.standard_normal((500, 3))
    b = np.ones(3)
    y = x @ b + rs.standard_normal(500)
    mod = OLS(y, x)
    pinv_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds)
    qr_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds, method="qr")
    assert_allclose(pinv_fit.bse, qr_fit.bse)
    def setup_class(cls):

        cls.cov_type = 'HAC'

        kwds = {'kernel': sw.weights_uniform, 'maxlags': 2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)

        #for debugging
        cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags': 2})
    def setup_class(cls):

        cls.cov_type = 'HAC'

        kwds={'kernel':sw.weights_uniform, 'maxlags':2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)

        #for debugging
        cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags':2})
def test_permuted_ols_statsmodels_withcovar_multivariate(random_state=0):
    """Test permuted_ols with multiple tested variates and covariates.

    It is equivalent to fitting several models with only one tested variate.

    This test has a statsmodels dependance. There seems to be no simple,
    alternative way to perform a F-test on a linear model including
    covariates.

    """
    try:
        from statsmodels.regression.linear_model import OLS
    except:
        warnings.warn("Statsmodels is required to run this test")
        raise nose.SkipTest

    rng = check_random_state(random_state)
    # design parameters
    n_samples = 50
    n_targets = 10
    n_covars = 2
    # create design
    target_vars = rng.randn(n_samples, n_targets)
    tested_var = rng.randn(n_samples, 1)
    confounding_vars = rng.randn(n_samples, n_covars)
    # statsmodels OLS
    fvals = np.empty((n_targets, 1))
    test_matrix = np.array([[1.0] + [0.0] * n_covars])
    for i in range(n_targets):
        ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars)))
        fvals[i] = ols.fit().f_test(test_matrix).fvalue[0][0]
    # permuted OLS
    _, orig_scores, _ = permuted_ols(
        tested_var, target_vars, confounding_vars, model_intercept=False, n_perm=0, random_state=random_state
    )
    assert_almost_equal(fvals, orig_scores, decimal=6)

    ### Adds intercept
    # permuted OLS
    _, orig_scores_addintercept, _ = permuted_ols(
        tested_var, target_vars, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state
    )
    # statsmodels OLS
    confounding_vars = np.hstack((confounding_vars, np.ones((n_samples, 1))))
    fvals_addintercept = np.empty((n_targets, 1))
    test_matrix = np.array([[1.0] + [0.0] * (n_covars + 1)])
    for i in range(n_targets):
        ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars)))
        fvals_addintercept[i] = ols.fit().f_test(test_matrix).fvalue[0][0]
    assert_array_almost_equal(fvals_addintercept, orig_scores_addintercept, decimal=6)
def test_estimates():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test for start_params
    assert_equal(mod.start_params, 0)

    # Test the RLS coefficient estimates against those from R (quantreg)
    # Due to initialization issues, we get more agreement as we get
    # farther from the initial values.
    assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T,
                    results_R.iloc[:8][['beta1', 'beta2']],
                    atol=1e-2,
                    rtol=1e-3)
    assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T,
                    results_R.iloc[7:18][['beta1', 'beta2']],
                    atol=1e-3,
                    rtol=1e-4)
    assert_allclose(res.recursive_coefficients.filtered[:, 19:].T,
                    results_R.iloc[17:][['beta1', 'beta2']],
                    atol=1e-4,
                    rtol=1e-4)

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
Beispiel #9
0
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'):
    merge_tab = t.join(tab, sfrsd_tab, 'plateifu')
    is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4])

    mlb_ix = totalmass.StellarMass.bands_ixs[mlb]
    absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix]

    logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun))
    logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab['ifu_absmag'][:, mlb_ix].to(
        u.dex(m.bandpass_sol_l_unit), totalmass.bandpass_flux_to_solarunits(absmag_sun_mlb))
    merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw
    ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3)
    sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2
    mass_pca = merge_tab['mass_in_ifu'] + merge_tab['outer_mass_{}'.format(mltype)]
    ssfrsd = sfrsd / mass_pca
    merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit))
    merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit

    ols = OLS(
        endog=np.array(merge_tab['dlogmass_lw'][~is_agn]),
        exog=sm_add_constant(
            t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(),
            prepend=False),
        hasconst=True, missing='drop')

    olsfit = ols.fit()

    return olsfit
Beispiel #10
0
    def setup(self):
        model = OLS(self.res1.model.endog, self.res1.model.exog)
        # res_ols = self.res1.model.fit(cov_type='cluster',
        res_ols = model.fit(
            cov_type="cluster",
            cov_kwds=dict(
                groups=self.groups,
                use_correction=False,
                use_t=False,
                df_correction=True,
            ),
        )
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
        se1 = sw.se_cov(cov1)
        self.bse_robust2 = se1
        self.cov_robust2 = cov1
        self.small = False
        self.res2 = res2.results_cluster_large

        self.skip_f = True
        self.rtol = 1e-6
        self.rtolh = 1e-10
Beispiel #11
0
def fit_ols(regressors, x):
    X = c_[list(regressors.values())].T
    X1 = DataFrame(X, columns=regressors.keys())
    X1 = add_constant(X1)

    model = OLS(x, X1, missing='drop')
    return model.fit()
Beispiel #12
0
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'):
    merge_tab = t.join(tab, sfrsd_tab, 'plateifu')
    is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4])

    mlb_ix = totalmass.StellarMass.bands_ixs[mlb]
    absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix]

    logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun))
    logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab[f'logsollum_in_ifu_{mlb}']
    merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw
    std_atten_mwtd = merge_tab['std_atten_mwtd']
    mean_atten_mwtd = merge_tab['mean_atten_mwtd']
    ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3)
    sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2
    outer_mass = (merge_tab[f'outerml_{mltype}'] + \
                  merge_tab[f'logsollum_outer_{mlb}']).to(u.Msun)
    mass_pca = merge_tab['mass_in_ifu'].to(u.Msun) + outer_mass
    ssfrsd = sfrsd / mass_pca
    merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit))
    merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit

    ols = OLS(
        endog=np.array(merge_tab['dlogmass_lw'][~is_agn]),
        exog=sm_add_constant(
            t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(),
            prepend=False),
        hasconst=True, missing='drop')

    olsfit = ols.fit()

    return olsfit
Beispiel #13
0
def port_ret_summary():
    output = {}
    for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']:
        for suffix in ['', ' #no peg']:
            ret = get_port_ret_df(prefix, suffix)
            ret.loc[:, 'carry'] = data['carry' + suffix]
            ret = ret.dropna()
            label = prefix + suffix
            t = ret.mean() / ret.std() * len(ret) ** 0.5  # t on mean return
            # regress on carry
            x = sm.add_constant(ret['carry'])
            ols_series = pd.Series()
            for i in range(7):
                if i == 6:
                    olslabel = 'HML'
                    y = ret[5] - ret[0]
                else:
                    olslabel = str(i)
                    y = ret[i]
                model = OLS(y, x)
                results = model.fit()
                ols_series = ols_series.combine_first(
                    pd.Series({'alpha to carry': results.params['const'] * 12, 'beta to carry': results.params['carry'],
                               't(alpha to carry)': results.tvalues['const'], 't(beta to carry)': results.tvalues['carry']}
                              ).add_suffix('#' + olslabel))
            output[label] = (ret.mean().multiply(12)).add_prefix('mean return#').combine_first(
                t.add_prefix('t(mean return)#')).combine_first(
                pd.Series({'nobs': len(ret)})).combine_first(ols_series)
    output = pd.DataFrame(output)
def test_resid_recursive():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the recursive residuals against those from R (strucchange)
    # Due to initialization issues, we get more agreement as we get
    # farther from the initial values.
    assert_allclose(res.resid_recursive[2:10].T,
                    results_R.iloc[:8]['rec_resid'],
                    atol=1e-2,
                    rtol=1e-3)
    assert_allclose(res.resid_recursive[9:20].T,
                    results_R.iloc[7:18]['rec_resid'],
                    atol=1e-3,
                    rtol=1e-4)
    assert_allclose(res.resid_recursive[19:].T,
                    results_R.iloc[17:]['rec_resid'],
                    atol=1e-4,
                    rtol=1e-4)

    # Test the RLS estimates against those from Stata (cusum6)
    assert_allclose(res.resid_recursive[3:],
                    results_stata.iloc[3:]['rr'],
                    atol=1e-3)

    # Test the RLS estimates against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:]
    assert_allclose(res.resid_recursive[2:],
                    desired_resid_recursive,
                    atol=1e-4,
                    rtol=1e-4)
Beispiel #15
0
def regress_out_pupils(raw,
                       ocular_channels=['Fpz', 'Fp1', 'Fp2', 'AF7', 'AF8'],
                       method='PCA'):
    """
    raw: Continuous raw data in MNE format
    ocular_channels: can be labels of EOG channels or EEG channels close to the
        eyes if no EOG was recorded
    method: how to combine the ocular channels. Can be 'PCA', 'mean', or 'median'.
    """

    raw_data = raw.get_data(picks='eeg')
    ocular_data = raw.get_data(picks=ocular_channels)

    if method == 'PCA':
        pca = PCA()
        comps = pca.fit_transform(ocular_data.T)
        ocular_chan = comps[:, 0]
    elif method == 'mean':
        ocular_chan = np.mean(ocular_data, axis=0)
    elif method == 'median':
        ocular_chan = np.median(ocular_data, axis=0)

    for ch in range(raw_data.shape[0]):
        m = OLS(raw_data[ch, :], ocular_chan)
        raw_data[ch, :] -= m.fit().predict()
    raw._data[:raw_data.shape[0], :] = raw_data
    return raw
def rew_prev_behaviour(data):

    dm = data['DM'][0]
    results_array = []
    std_err = []

    for s, sess in enumerate(dm):

        DM = dm[s]

        choices = DM[:, 1]

        reward = DM[:, 2]

        reward_2_ago = reward[1:-2]
        reward_3_ago = reward[:-3]
        reward_prev = reward[2:-1]
        reward_current = reward[3:]

        choices_2_ago = 0.5 - choices[1:-2]
        choices_3_ago = 0.5 - choices[:-3]
        choices_prev = 0.5 - choices[2:-1]
        choices_current = 0.5 - choices[3:]

        choices_2_ago_rew = ((choices_2_ago) * (reward_2_ago - 0.5)) * 2
        choices_3_ago_rew = ((choices_3_ago) * (reward_3_ago - 0.5)) * 2
        choices_prev_rew = ((choices_prev) * (reward_prev - 0.5)) * 2

        ones = np.ones(len(choices_current))
        trials = len(choices_current)
        predictors_all = OrderedDict([
            ('1 ago Outcome', reward_prev),
            ('2 ago Outcome', reward_2_ago),
            ('3 ago Outcome', reward_3_ago),
            #      ('4 ago Outcome', reward_4_ago),
            ('1 ago Choice', choices_prev),
            ('2 ago Choice', choices_2_ago),
            ('3 ago Choice', choices_3_ago),
            #     ('4 ago Choice', choices_4_ago),
            ('1 ago Choice Rew', choices_prev_rew),
            ('2 ago Choice Rew', choices_2_ago_rew),
            ('3 ago Choice Rew', choices_3_ago_rew),
            #     ('4 ago Choice Rew', choices_4_ago_rew),
            ('ones', ones)
        ])

        X = np.vstack(predictors_all.values()).T[:trials, :].astype(float)
        #choices_current = choices_current.reshape(trials,1)
        rank = np.linalg.matrix_rank(X)
        n_predictors = X.shape[1]

        #model = sm.Logit(choices_current,X)
        model = OLS(choices_current, X)
        results = model.fit()
        results_array.append(results.params)
        cov = results.cov_params()
        std_err.append(np.sqrt(np.diag(cov)))

    average = np.sum((results_array), 0) / np.sqrt(np.sum(std_err, 0))
    def setup_class(cls):
        cls.cov_type = 'HC0'

        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HC0')

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HC0')
Beispiel #18
0
def ols_autoreg_result(request):
    ar, seasonal, trend, exog, cov_type = request.param
    y, x, endog, exog = gen_ols_regressors(ar, seasonal, trend, exog)
    ar_mod = AutoReg(y, ar, seasonal=seasonal, trend=trend, exog=x)
    ar_res = ar_mod.fit(cov_type=cov_type)
    ols = OLS(endog, exog)
    ols_res = ols.fit(cov_type=cov_type, use_t=False)
    return ar_res, ols_res
    def setup_class(cls):
        cls.cov_type = 'cluster'

        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
    def stats(self, parent):
        from statsmodels.regression.linear_model import OLS

        model = OLS(parent.endog, parent.exog)
        result = model.fit()
        q = len(result.params) // 2
        stats = np.abs(result.params[0:q]) - np.abs(result.params[q:])
        return stats
Beispiel #21
0
    def setup_class(cls):
        cls.cov_type = 'cluster'

        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
Beispiel #22
0
    def setup_class(cls):
        cls.cov_type = 'HC0'

        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HC0')

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HC0')
Beispiel #23
0
def bivariate_expression_plot(
    ax: plt.Axes,
    data: [np.ndarray, np.ndarray],
    feature: str,
    feature_name: str = "Feature",
    cmap: colormap = plt.cm.magma,
    alpha: float = 0.05,
    distance_scale_factor: float = 1,
    **kwargs,
) -> np.ndarray:

    xs = data[0]
    ys = data[1]

    model_full = OLS(ys, xs, hasconst=True)

    model_x1 = OLS(ys, xs[:, [0, 1]])
    model_x2 = OLS(ys, xs[:, [0, 2]])
    model_0 = OLS(ys, xs[:, 0])

    results_full = model_full.fit()
    results_x1 = model_x1.fit()
    results_x2 = model_x2.fit()
    results_0 = model_0.fit()

    likelihood = np.array(
        [results_full.llf, results_x1.llf, results_x2.llf, results_0.llf])

    insig = np.any(results_full.pvalues > alpha)

    XY, Z, reshape_shape = expression_fields(xs, ys, results_full)

    XY = XY * distance_scale_factor

    plot_field(ax,
               Z.reshape(reshape_shape),
               XY,
               fontsize=kwargs.get("label_fontsize", 15),
               cmap=cmap)

    ax.set_title("{} : {}".format(feature_name, feature) +
                 ("(*)" if insig else ""),
                 fontsize=kwargs.get("title_fontsize", 25))

    return likelihood
def estimate_ols(x, y, constant=True):
    from statsmodels.regression.linear_model import OLS
    if constant == True:
        reg = OLS(y, add_constant(x))
    else:
        reg = OLS(y, x)  #Logistic regression
    result = reg.fit()
    betahat = result.params
    return np.array(betahat)
Beispiel #25
0
def polynomial_regression(df, target_col, cutoff_date, y_max, min_license_year, max_license_year, degree=5, type=""):
    print(f"{'=' * 10}Polynomial Regression {degree} Method Results{'=' * 10}")

    df.reset_index(inplace=True)
    X = df[["date", "treatment"]].astype(int).values
    y = df[[target_col]].values

    polyfeatures = PolynomialFeatures(degree, include_bias=False).fit_transform(X[:, 0].reshape(-1, 1))

    X_c = np.concatenate([polyfeatures, X[:, 1].reshape(-1, 1)], axis=1)

    X_sm = sm.add_constant(X_c.copy())
    lr_stats = OLS(y, X_sm)
    results = lr_stats.fit(method='qr')

    polyreg = LinearRegression()
    polyreg.fit(X_c, y)

    effect = polyreg.coef_[0][-1]
    print(f"Treatment effect on {target_col} is {effect}")
    print(f"CI: {np.round(results.conf_int()[-1], 3)}, pvalue={round(results.pvalues[-1], 3)}")

    plt.scatter(X[:, 0], y, c="black")
    plt.xlim([min_license_year - 0.2, max_license_year + 0.2])  # 2005.8, 2018.2
    plt.ylim([0, y_max])
    X0 = X_c[X_c[:, -1] == 0]
    cutoff_date_polyfeatures = PolynomialFeatures(degree, include_bias=False).fit_transform(np.array([cutoff_date]).reshape(-1, 1))
    X0 = np.concatenate([X0, [list(cutoff_date_polyfeatures[0]) + [0]]])
    X1 = X_c[X_c[:, -1] == 1]
    X1 = np.concatenate([[list(cutoff_date_polyfeatures[0]) + [1]], X1])

    # 300 represents number of points to make between T.min and T.max
    xnew = np.linspace(X0[:, 0].min(), X0[:, 0].max(), 300)
    spl = make_interp_spline(X0[:, 0], polyreg.predict(X0), k=3)  # type: BSpline
    power_smooth = spl(xnew)
    plt.plot(xnew, power_smooth, label="old accompaniment program")
    xnew = np.linspace(X1[:, 0].min(), X1[:, 0].max(), 300)
    spl = make_interp_spline(X1[:, 0], polyreg.predict(X1), k=3)  # type: BSpline
    power_smooth = spl(xnew)
    plt.plot(xnew, power_smooth, label="new accompaniment program")

    # plt.plot(X0[:, 0], polyreg.predict(X0), c="blue", label="old accompaniment program")
    # plt.plot(X1[:, 0], polyreg.predict(X1), c="orange", label="new accompaniment program")
    plt.axvline(x=cutoff_date, linestyle='--', c="black", label="cut-off date")
    plt.xlabel("Year of issued license")
    if target_col == "normalized_number_of_drivers_in_accidents":
        ylabel = "number of drivers in accidents in 2019 per 10K drivers"
    else:
        ylabel = "number of drivers in accidents in 2019"
    plt.ylabel(ylabel)
    plt.ylabel(ylabel)
    plt.title(f"RD by Polynomial Regression w/ degree {degree}")
    plt.legend()
    plt.savefig(f"results/PolynomialRegression_deg_{degree}_{target_col}_{type}.png")
    plt.show()

    return effect
def test_filter():
    # Basic test for filtering
    mod = RecursiveLS(endog, exog)
    res = mod.filter()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
def test_endog():
    # Tests for numpy input
    mod = RecursiveLS(endog.values, exog.values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)

    # Tests for 1-dim exog
    mod = RecursiveLS(endog, dta['m1'].values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
Beispiel #28
0
def test_filter():
    # Basic test for filtering
    mod = RecursiveLS(endog, exog)
    res = mod.filter()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
Beispiel #29
0
def fit_linear_model(formula, data_dict):
    """
    Creates a statsmodel OLS model for the R-style (patsy) formula given the
    variables in data_dict (created with make_dict_for_regression).
    Returns the statsmodels results object.
    """
    y, x = dmatrices(formula, data=data_dict, return_type='dataframe')
    model = OLS(y, x)
    return model.fit()
Beispiel #30
0
def test_endog():
    # Tests for numpy input
    mod = RecursiveLS(endog.values, exog.values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)

    # Tests for 1-dim exog
    mod = RecursiveLS(endog, dta['m1'].values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
Beispiel #31
0
def table_2_f_test():
    output = {}
    # f-test on whether all mean returns / alphas are the same
    for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']:
        for suffix in ['', ' #no peg']:
            ret = get_port_ret_df(prefix, suffix)
            ret.loc[:,'carry'] = data['carry' + suffix]
            ret = ret.dropna()

            y = ret.drop('carry', axis=1).unstack()

            if prefix == 'part1_carry_timed_dollar_port#':
                x = {}
                for i in range(6):
                    x['meandiff#' + str(i)] = y * 0
                    x['meandiff#' + str(i)].loc[i:] = 1
                x = pd.DataFrame(x)
                # test on alpha
                for i in range(6):
                    df = pd.DataFrame(0, index=ret.index, columns=range(6))
                    df[i] = ret['carry']
                    x.loc[:, 'carry#' + str(i)] = df.unstack()

                model = OLS(y, x)
                results = model.fit(cov_type='cluster', cov_kwds={'groups': y.index.get_level_values(1)})
                rmat = np.identity(len(results.params))[1:6, :]
                f_test = results.f_test(rmat)
                label = prefix + suffix + ' f-test on alpha to carry'
                output[label] = pd.Series({'f': f_test.fvalue[0][0], 'pvalue': f_test.pvalue, 'nobs': results.nobs})

            # test on returns
            x = {}
            for i in range(6):
                x['meandiff#' + str(i)] = y * 0
                x['meandiff#' + str(i)].loc[i:] = 1
            x = sm.add_constant(pd.DataFrame(x))
            model = OLS(y, x)
            results = model.fit(cov_type='cluster', cov_kwds={'groups': y.index.get_level_values(1)})
            rmat = np.identity(len(results.params))[1:,:]
            f_test = results.f_test(rmat)
            label = prefix + suffix + ' f-test'
            output[label] = pd.Series({'f': f_test.fvalue[0][0], 'pvalue': f_test.pvalue, 'nobs': results.nobs})
    output = pd.DataFrame(output).T
    return output
    def setup_class(cls):

        cls.cov_type = 'HAC'

        kwds = {'maxlags': 2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
Beispiel #33
0
def model_m_L(data):
    log_m, log_L = np.log10(data).dropna().as_matrix().T
    X = add_constant(log_m)

    model = OLS(log_L, X)
    results = model.fit()

    print(results.summary())

    return results.params
Beispiel #34
0
    def setup_class(cls):

        cls.cov_type = 'HAC'

        kwds={'maxlags':2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
Beispiel #35
0
    def setup_class(cls):

        cls.cov_type = 'HAC'

        kwds={'kernel': sw.weights_uniform, 'maxlags': 2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        # check kernel as string
        mod2 = OLS(endog, exog)
        kwds2 = {'kernel': 'uniform', 'maxlags': 2}
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
Beispiel #36
0
    def setup_class(cls):

        cls.cov_type = 'HAC'

        # check kernel specified as string
        kwds = {'kernel': 'bartlett', 'maxlags': 2}
        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        kwds2 = {'maxlags': 2}
        cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds2)
def q1_lab4(X, Y):
    model = OLS(Y, X)  # linear reg with c lines and k variables
    result = model.fit()  # fit the model


    # Question 1.2:
    # q_j = vector 8th len - the prob to vote per party in israel (if ***everybody would vote***)

    potential_per_party = (X * result.params).sum(axis=0)
    total_potential = potential_per_party.sum()  # sum of sum
    q_j_hat = potential_per_party / total_potential  # ratio like we saw in cass
    return q_j_hat
Beispiel #38
0
def test_cusum():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the cusum statistics against those from R (strucchange)
    # These values are not even close to ours, to Statas, or to the alternate
    # statsmodels values
    # assert_allclose(res.cusum, results_R['cusum'])

    # Test the cusum statistics against Stata (cusum6)
    # Note: cusum6 excludes the first 3 elements due to OLS initialization
    # whereas we exclude only the first 2. Also there are initialization
    # differences (as seen above in the recursive residuals).
    # Here we explicitly reverse engineer our cusum to match their to show the
    # equivalence
    d = res.nobs_diffuse
    cusum = res.cusum * np.std(res.resid_recursive[d:], ddof=1)
    cusum -= res.resid_recursive[d]
    cusum /= np.std(res.resid_recursive[d + 1:], ddof=1)
    cusum = cusum[1:]
    assert_allclose(cusum,
                    results_stata.iloc[3:]['cusum'],
                    atol=1e-6,
                    rtol=1e-5)

    # Test the cusum statistics against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_cusum = recursive_olsresiduals(res_ols)[-2][1:]
    assert_allclose(res.cusum, desired_cusum, rtol=1e-6)

    # Test the cusum bounds against Stata (cusum6)
    # Again note that cusum6 excludes the first 3 elements, so we need to
    # change the ddof and points.
    actual_bounds = res._cusum_significance_bounds(alpha=0.05,
                                                   ddof=1,
                                                   points=np.arange(
                                                       d + 1, res.nobs))
    desired_bounds = results_stata.iloc[3:][['lw', 'uw']].T
    assert_allclose(actual_bounds, desired_bounds, rtol=1e-6)

    # Test the cusum bounds against statsmodels
    actual_bounds = res._cusum_significance_bounds(alpha=0.05,
                                                   ddof=0,
                                                   points=np.arange(
                                                       d, res.nobs))
    desired_bounds = recursive_olsresiduals(res_ols)[-1]
    assert_allclose(actual_bounds, desired_bounds)

    # Test for invalid calls
    assert_raises(ValueError,
                  res._cusum_squares_significance_bounds,
                  alpha=0.123)
    def _get_start(self):

        # Use OLS to get starting values for mean structure parameters
        model = OLS(self.endog, self.exog)
        result = model.fit()

        m = self.exog_scale.shape[1] + self.exog_smooth.shape[1]

        if self._has_noise:
            m += self.exog_noise.shape[1]

        return np.concatenate((result.params, np.zeros(m)))
Beispiel #40
0
    def setup_class(cls):
        cls.cov_type = 'hac-panel'
        # time index is just made up to have a test case
        groups = np.repeat(np.arange(5), 7)[:-1]
        mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian())
        kwds = dict(groups=pd.Series(groups),  # check for #3606
                    maxlags=2,
                    kernel=sw.weights_uniform,
                    use_correction='hac',
                    df_correction=False)
        cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
Beispiel #41
0
    def setup_class(cls):
        cls.cov_type = 'hac-groupsum'
        # time index is just made up to have a test case
        time = np.tile(np.arange(7), 5)[:-1]
        mod1 = GLM(endog, exog, family=families.Gaussian())
        kwds = dict(time=pd.Series(time),  # check for #3606
                    maxlags=2,
                    use_correction='hac',
                    df_correction=False)
        cls.res1 = mod1.fit(cov_type='hac-groupsum', cov_kwds=kwds)
        cls.res1b = mod1.fit(cov_type='nw-groupsum', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='hac-groupsum', cov_kwds=kwds)
def test_regularized_refit():
    n = 100
    p = 5
    np.random.seed(3132)
    xmat = np.random.normal(size=(n, p))
    # covariates 0 and 2 matter
    yvec = xmat[:, 0] + xmat[:, 2] + np.random.normal(size=n)
    model1 = OLS(yvec, xmat)
    result1 = model1.fit_regularized(alpha=2., L1_wt=0.5, refit=True)
    model2 = OLS(yvec, xmat[:, [0, 2]])
    result2 = model2.fit()
    ii = [0, 2]
    assert_allclose(result1.params[ii], result2.params)
    assert_allclose(result1.bse[ii], result2.bse)
Beispiel #43
0
    def setup_class(cls):
        cls.cov_type = 'hac-panel'
        # time index is just made up to have a test case
        time = np.tile(np.arange(7), 5)[:-1]
        mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian())
        kwds = dict(time=time,
                    maxlags=2,
                    kernel=sw.weights_uniform,
                    use_correction='hac',
                    df_correction=False)
        cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds)
        cls.res1b = mod1.fit(cov_type='nw-panel', cov_kwds=kwds)

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def test_cusum():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the cusum statistics against those from R (strucchange)
    # These values are not even close to ours, to Statas, or to the alternate
    # statsmodels values
    # assert_allclose(res.cusum, results_R['cusum'])

    # Test the cusum statistics against Stata (cusum6)
    # Note: cusum6 excludes the first 3 elements due to OLS initialization
    # whereas we exclude only the first 2. Also there are initialization
    # differences (as seen above in the recursive residuals).
    # Here we explicitly reverse engineer our cusum to match their to show the
    # equivalence
    d = res.nobs_diffuse
    cusum = res.cusum * np.std(res.resid_recursive[d:], ddof=1)
    cusum -= res.resid_recursive[d]
    cusum /= np.std(res.resid_recursive[d+1:], ddof=1)
    cusum = cusum[1:]
    assert_allclose(cusum, results_stata.iloc[3:]['cusum'], atol=1e-6, rtol=1e-5)

    # Test the cusum statistics against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_cusum = recursive_olsresiduals(res_ols)[-2][1:]
    assert_allclose(res.cusum, desired_cusum, rtol=1e-6)

    # Test the cusum bounds against Stata (cusum6)
    # Again note that cusum6 excludes the first 3 elements, so we need to
    # change the ddof and points.
    actual_bounds = res._cusum_significance_bounds(
        alpha=0.05, ddof=1, points=np.arange(d+1, res.nobs))
    desired_bounds = results_stata.iloc[3:][['lw', 'uw']].T
    assert_allclose(actual_bounds, desired_bounds, rtol=1e-6)

    # Test the cusum bounds against statsmodels
    actual_bounds = res._cusum_significance_bounds(
        alpha=0.05, ddof=0, points=np.arange(d, res.nobs))
    desired_bounds = recursive_olsresiduals(res_ols)[-1]
    assert_allclose(actual_bounds, desired_bounds)

    # Test for invalid calls
    assert_raises(ValueError, res._cusum_squares_significance_bounds,
                  alpha=0.123)
    def fit(self):
        """
        Fits the model and provides regression results.

        Returns
        -------
        Results : class
            Empirical likelihood regression class

        """
        exog_with = add_constant(self.exog, prepend=True)
        restricted_model = OLS(self.endog, exog_with)
        restricted_fit = restricted_model.fit()
        restricted_el = restricted_fit.el_test(
        np.array([0]), np.array([0]), ret_params=1)
        params = np.squeeze(restricted_el[3])
        beta_hat_llr = restricted_el[0]
        llf = np.sum(np.log(restricted_el[2]))
        return OriginResults(restricted_model, params, beta_hat_llr, llf)
Beispiel #46
0
    def setup(self):
        model = OLS(self.res1.model.endog, self.res1.model.exog)
        # res_ols = self.res1.model.fit(cov_type='cluster',
        res_ols = model.fit(
            cov_type="cluster", cov_kwds=dict(groups=self.groups, use_correction=False, use_t=False, df_correction=True)
        )
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
        se1 = sw.se_cov(cov1)
        self.bse_robust2 = se1
        self.cov_robust2 = cov1
        self.small = False
        self.res2 = res2.results_cluster_large

        self.skip_f = True
        self.rtol = 1e-6
        self.rtolh = 1e-10
    def structure(self): # Make the chart label which predictor was removed
        '''Reruns the regression by removing one of the predictor columns and
        then plots the residuals versus the target'''

        # The length of the transpose of the predictors array
        # gives the number of predictors in the model
        model_list = []
        for i in range(1, len(self.predictors_array.transpose())):
            temp_target = self.predictors_array[:, i].reshape([len(
                                self.predictors_array), 1])
            temp_model = OLS(temp_target,
                             np.delete(self.predictors_array, i, 1))
            temp_results = temp_model.fit()
            model_list.append(temp_results)
            del temp_target
            del temp_model

        for model in model_list:
            plt.scatter(model.fittedvalues, model.resid)
            plt.show()
Beispiel #48
0
    def fit(self):
        """
        Fits the model and provides regression results.

        Returns
        -------
        Results: class
            Empirical likelihood regression class

        """
        exog_with = add_constant(self.exog, prepend=True)
        unrestricted_fit = OLS(self.endog, self.exog).fit()
        restricted_model = OLS(self.endog, exog_with)
        restricted_fit = restricted_model.fit()
        restricted_el = restricted_fit.el_test(
        np.array([0]), np.array([0]), ret_params=1)
        params = np.squeeze(restricted_el[3])
        beta_hat_llr = restricted_el[0]
        ls_params = np.hstack((0, unrestricted_fit.params))
        ls_llr = restricted_fit.el_test(ls_params, np.arange(self.nvar + 1, dtype=int))[0]
        return OriginResults(restricted_model, params, beta_hat_llr, ls_llr)
    def setupClass(cls):
        from .results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R  = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = np_matrix_rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2
def test_resid_recursive():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the recursive residuals against those from R (strucchange)
    assert_allclose(res.resid_recursive[2:10].T,
                    results_R.iloc[:8]['rec_resid'])
    assert_allclose(res.resid_recursive[9:20].T,
                    results_R.iloc[7:18]['rec_resid'])
    assert_allclose(res.resid_recursive[19:].T,
                    results_R.iloc[17:]['rec_resid'])

    # Test the RLS estimates against those from Stata (cusum6)
    assert_allclose(res.resid_recursive[3:],
                    results_stata.iloc[3:]['rr'], atol=1e-5, rtol=1e-5)

    # Test the RLS estimates against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:]
    assert_allclose(res.resid_recursive[2:], desired_resid_recursive)
def test_estimates():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test for start_params
    assert_equal(mod.start_params, 0)


    # Test the RLS coefficient estimates against those from R (quantreg)
    # Due to initialization issues, we get more agreement as we get
    # farther from the initial values.
    assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T,
                    results_R.iloc[:8][['beta1', 'beta2']], rtol=1e-5)
    assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T,
                    results_R.iloc[7:18][['beta1', 'beta2']])
    assert_allclose(res.recursive_coefficients.filtered[:, 19:].T,
                    results_R.iloc[17:][['beta1', 'beta2']])

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)
def test_single_partition():

    # tests that the results make sense if we have a single partition

    np.random.seed(435265)
    N = 200
    p = 10
    m = 1

    beta = np.random.normal(size=p)
    beta = beta * np.random.randint(0, 2, p)
    X = np.random.normal(size=(N, p))
    y = X.dot(beta) + np.random.normal(size=N)

    # test regularized OLS v. naive
    db_mod = DistributedModel(m)
    fitOLSdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0})

    nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive,
                              join_method=_join_naive)
    fitOLSnv = nv_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0})

    ols_mod = OLS(y, X)
    fitOLS = ols_mod.fit(alpha=0)

    assert_allclose(fitOLSdb.params, fitOLS.params)
    assert_allclose(fitOLSnv.params, fitOLS.params)

    # test regularized
    nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive,
                              join_method=_join_naive)
    fitOLSnv = nv_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.1})

    ols_mod = OLS(y, X)
    fitOLS = ols_mod.fit_regularized(alpha=0.1)

    assert_allclose(fitOLSnv.params, fitOLS.params)
def test_resid_recursive():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the recursive residuals against those from R (strucchange)
    # Due to initialization issues, we get more agreement as we get
    # farther from the initial values.
    assert_allclose(res.resid_recursive[2:10].T,
                    results_R.iloc[:8]['rec_resid'], atol=1e-2, rtol=1e-3)
    assert_allclose(res.resid_recursive[9:20].T,
                    results_R.iloc[7:18]['rec_resid'], atol=1e-3, rtol=1e-4)
    assert_allclose(res.resid_recursive[19:].T,
                    results_R.iloc[17:]['rec_resid'], atol=1e-4, rtol=1e-4)

    # Test the RLS estimates against those from Stata (cusum6)
    assert_allclose(res.resid_recursive[3:],
                    results_stata.iloc[3:]['rr'], atol=1e-3)

    # Test the RLS estimates against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:]
    assert_allclose(res.resid_recursive[2:], desired_resid_recursive,
                    atol=1e-4, rtol=1e-4)
Beispiel #54
0
        #using GMM and IV2SLS classes
        #----------------------------

        mod = IVGMM(endog, exog, instrument, nmoms=instrument.shape[1])
        res = mod.fit()
        modgmmols = IVGMM(endog, exog, exog, nmoms=exog.shape[1])
        resgmmols = modgmmols.fit()
        #the next is the same as IV2SLS, (Z'Z)^{-1} as weighting matrix
        modgmmiv = IVGMM(endog, exog, instrument, nmoms=instrument.shape[1]) #same as mod
        resgmmiv = modgmmiv.fitgmm(np.ones(exog.shape[1], float),
                        weights=np.linalg.inv(np.dot(instrument.T, instrument)))
        modls = IV2SLS(endog, exog, instrument)
        resls = modls.fit()
        modols = OLS(endog, exog)
        resols = modols.fit()

        print '\nIV case'
        print 'params'
        print 'IV2SLS', resls.params
        print 'GMMIV ', resgmmiv # .params
        print 'GMM   ', res.params
        print 'diff  ', res.params - resls.params
        print 'OLS   ', resols.params
        print 'GMMOLS', resgmmols.params

        print '\nbse'
        print 'IV2SLS', resls.bse
        print 'GMM   ', mod.bse   #bse currently only attached to model not results
        print 'diff  ', mod.bse - resls.bse
        print '%-diff', resls.bse / mod.bse * 100 - 100
    nobs, k_vars = 200, 1
    x = np.random.uniform(-2, 2, size=(nobs, k_vars))
    x.sort()

    order = 3
    exog = x ** np.arange(order + 1)
    beta = np.array([1, 1, 0.1, 0.0])[: order + 1]  # 1. / np.arange(1, order + 2)
    y_true = np.dot(exog, beta)
    y = y_true + sig_e * np.random.normal(size=nobs)
    endog = y

    print "DGP"
    print "nobs=%d, beta=%r, sig_e=%3.1f" % (nobs, beta, sig_e)

    mod_ols = OLS(endog, exog[:, :2])
    res_ols = mod_ols.fit()
    #'cv_ls'[1000, 0.5][0.01, 0.45]
    tst = smke.TestFForm(
        endog,
        exog[:, :2],
        bw=[0.01, 0.45],
        var_type="cc",
        fform=lambda x, p: mod_ols.predict(p, x),
        estimator=lambda y, x: OLS(y, x).fit().params,
        nboot=1000,
    )

    print "bw", tst.bw
    print "tst.test_stat", tst.test_stat
    print tst.sig
    print "tst.boots_results mean, min, max", (
def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)
Beispiel #57
0
    def fit(self):
        """estimate the model and compute the Anova table

        Returns
        -------
        AnovaResults instance

        """
        y = self.data[self.depvar].values

        # Construct OLS endog and exog from string using patsy
        within = ['C(%s, Sum)' % i for i in self.within]
        subject = 'C(%s, Sum)' % self.subject
        factors = within + [subject]
        x = patsy.dmatrix('*'.join(factors), data=self.data)
        term_slices = x.design_info.term_name_slices
        for key in term_slices:
            ind = np.array([False]*x.shape[1])
            ind[term_slices[key]] = True
            term_slices[key] = np.array(ind)
        term_exclude = [':'.join(factors)]
        ind = _not_slice(term_slices, term_exclude, x.shape[1])
        x = x[:, ind]

        # Fit OLS
        model = OLS(y, x)
        results = model.fit()
        if model.rank < x.shape[1]:
            raise ValueError('Independent variables are collinear.')
        for i in term_exclude:
            term_slices.pop(i)
        for key in term_slices:
            term_slices[key] = term_slices[key][ind]
        params = results.params
        df_resid = results.df_resid
        ssr = results.ssr

        anova_table = pd.DataFrame(
            {'F Value': [], 'Num DF': [], 'Den DF': [], 'Pr > F': []})

        for key in term_slices:
            if self.subject not in key and key != 'Intercept':
                #  Independen variables are orthogonal
                ssr1, df_resid1 = _ssr_reduced_model(
                    y, x, term_slices, params, [key])
                df1 = df_resid1 - df_resid
                msm = (ssr1 - ssr) / df1
                if (key == ':'.join(factors[:-1]) or
                        (key + ':' + subject not in term_slices)):
                    mse = ssr / df_resid
                    df2 = df_resid
                else:
                    ssr1, df_resid1 = _ssr_reduced_model(
                        y, x, term_slices, params,
                        [key + ':' + subject])
                    df2 = df_resid1 - df_resid
                    mse = (ssr1 - ssr) / df2
                F = msm / mse
                p = stats.f.sf(F, df1, df2)
                term = key.replace('C(', '').replace(', Sum)', '')
                anova_table.loc[term, 'F Value'] = F
                anova_table.loc[term, 'Num DF'] = df1
                anova_table.loc[term, 'Den DF'] = df2
                anova_table.loc[term, 'Pr > F'] = p

        return AnovaResults(anova_table.iloc[:, [1, 2, 0, 3]])
Beispiel #58
0
# 初始化数据集
data = []

# 查询所有合约
cursor.execute("select distinct vari,deli from contract_daily where deli between '1401' and '1712'")
for vari,deli in cursor.fetchall():
	# 查询合约结算价
	cursor.execute("select settle from contract_daily where vari=%s and deli=%s order by day asc", (vari,deli))
	# 标准化
	scaler = MinMaxScaler()
	settle = scaler.fit_transform(cursor.fetchall())
	settle = [row[0] for row in settle]
	# 估计一阶差分方程的系数
	ols = OLS(settle[1:], [[1.,x] for x in settle[:-1]])
	result = ols.fit()
	data.append([vari, deli, result.params[0], result.params[1], result.rsquared])

# 生成DataFrame对象
df = pd.DataFrame(data, columns=['vari','deli','beta0','beta1','R2'])

# 对beta0, beta1, R2做Z标准化

# 存入文件
df.to_csv('ols.csv', index=False)

# 关闭数据库
cursor.close()
conn.close()

print('\n\n')
tt = res_hac4.t_test(np.eye(len(res_hac4.params)))
print(tt.summary())
print('\n\n')
print(tt.summary_frame())

print(vars(res_hac4.f_test(np.eye(len(res_hac4.params))[:-1])))

print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=True)))
print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=False)))

# new cov_type can be set in fit method of model

mod_olsg = OLS(g_inv, exogg)
res_hac4b = mod_olsg.fit(cov_type='HAC',
                         cov_kwds=dict(maxlags=4, use_correction=True))
print(res_hac4b.summary())

res_hc1b = mod_olsg.fit(cov_type='HC1')
print(res_hc1b.summary())

# force t-distribution
res_hc1c = mod_olsg.fit(cov_type='HC1', cov_kwds={'use_t':True})
print(res_hc1c.summary())

# force t-distribution
decade = (d2['year'][1:] // 10).astype(int)  # just make up a group variable
res_clu = mod_olsg.fit(cov_type='cluster',
                       cov_kwds={'groups':decade, 'use_t':True})
print(res_clu.summary())
Beispiel #60
-1
def table_2_t_test():
    output = {}
    # t-test on return diffs for dollar ports, carry-timed dollar ports, and its alpha to carry
    for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']:
        for suffix in ['', ' #no peg']:
            ret = get_port_ret_df(prefix, suffix)
            ret.loc[:,'carry'] = data['carry' + suffix]
            ret = ret.dropna()
            for i in range(5):
                y = ret[i + 1] - ret[i]
                if prefix == 'part1_carry_timed_dollar_port#':
                    # t test on alpha diff
                    x = sm.add_constant(ret['carry'])
                    model = OLS(y, x)
                    results = model.fit()
                    label = 'part1_carry_timed_dollar_port_alpha_to_carry#' + suffix + str(i + 1) + '-' + str(i)
                    output[label] = pd.Series({'annualized ret diff': results.params['const'] * 12,
                                               't': results.tvalues['const'],
                                               'nobs': results.nobs})
                # t test on return diff
                x = np.ones(len(y))
                model = OLS(y, x)
                results = model.fit()
                label = prefix + suffix + str(i + 1) + '-' + str(i)
                output[label] = pd.Series({'annualized ret diff': results.params['const'] * 12,
                                           't': results.tvalues['const'],
                                           'nobs': results.nobs})

    output = pd.DataFrame(output).T
    return output