Python add_constant Examples, scikits.statsmodels.api.add_constant Python Examples

Example #1

0

Show file

File: test_poisson.py Project: zed/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset)
        self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:],
                            method='nm',
                            disp=0)

Example #2

0

Show file

File: test_poisson.py Project: chrisjordansquire/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:,0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:],
                            method='nm', disp=0)

Example #3

0

Show file

    def checkOLS(self, exog, endog, x, y):

        try:
            import scikits.statsmodels.api as sm
        except ImportError:
            import scikits.statsmodels as sm

        reference = sm.OLS(endog, sm.add_constant(exog)).fit()

        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)

Example #4

0

Show file

File: test_ols.py Project: choketsu/pandas

    def checkOLS(self, exog, endog, x, y):

        try:
            import scikits.statsmodels.api as sm
        except ImportError:
            import scikits.statsmodels as sm

        reference = sm.OLS(endog, sm.add_constant(exog)).fit()

        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)

Example #5

0

Show file

File: test_discrete.py Project: katherineranney/statsmodels

 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg", disp=0, avextol=1e-8)

Example #6

0

Show file

File: wrappers.py Project: zed/statsmodels

def linmod(y,
           x,
           weights=None,
           sigma=None,
           add_const=True,
           filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)

Example #7

0

Show file

File: test_numdiff.py Project: zed/statsmodels

 def __init__(self):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     #mod = sm.Probit(data.endog, data.exog)
     self.mod = sm.Logit(data.endog, data.exog)
     #res = mod.fit(method="newton")
     self.params = [np.array([1,0.25,1.4,-7])]

Example #8

0

Show file

File: test_discrete.py Project: chrisjordansquire/statsmodels

 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2

Example #9

0

Show file

File: test_rlm.py Project: zed/statsmodels

class TestRlm(CheckRlmResults):
    from scikits.statsmodels.datasets.stackloss import load
    data = load()  # class attributes for subclasses
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_1
        self.decimal_scale = DECIMAL_3

        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit()   # default M
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        #        r.library('MASS')
        #        self.res2 = RModel(self.data.endog, self.data.exog,
        #                        r.rlm, psi="psi.huber")
        from results.results_rlm import Huber
        self.res2 = Huber()

Example #10

0

Show file

def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)

Example #11

0

Show file

File: test_cov.py Project: takluyver/statsmodels

def test_HC_use():
    np.random.seed(0)
    nsample = 100
    x = np.linspace(0,10, 100)
    X = sm.add_constant(np.column_stack((x, x**2)), prepend=False)
    beta = np.array([1, 0.1, 10])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    results = sm.OLS(y, X).fit()

    #test cov_params
    idx = np.array([1,2])
    #need to call HC0_se to have cov_HC0 available
    results.HC0_se
    cov12 = results.cov_params(column=[1,2], cov_p=results.cov_HC0)
    assert_almost_equal(cov12, results.cov_HC0[idx[:,None], idx], decimal=15)

    #test t_test
    tvals = results.params/results.HC0_se
    ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0)
    assert_almost_equal(ttest.tvalue, tvals, decimal=14)
    assert_almost_equal(ttest.sd, results.HC0_se, decimal=14)

    #test f_test
    ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0)
    slopes = results.params[:-1]
    idx = np.array([0,1])
    cov_slopes = results.cov_HC0[idx[:,None], idx]
    fval = np.dot(slopes, np.linalg.inv(cov_slopes).dot(slopes))/len(idx)
    assert_almost_equal(ftest.fvalue, fval, decimal=12)

Example #12

0

Show file

File: py_stats_analysis.py Project: along1x/r_vs_py

def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)

Example #13

0

Show file

File: test_discrete.py Project: Creamery/OOTO-Miner-V5

 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2

Example #14

0

Show file

File: test_regressionplots.py Project: smc77/statsmodels

 def test_qqplot(self):
   #just test that it runs
   data = sm.datasets.longley.load()
   data.exog = sm.add_constant(data.exog)
   mod_fit = sm.OLS(data.endog, data.exog).fit()
   res = mod_fit.resid
   fig = sm.qqplot(res)
   plt.close(fig)

Example #15

0

Show file

File: test_discrete.py Project: jaydenwhyte/statsmodels

 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg",
         disp=0, avextol=1e-8)

Example #16

0

Show file

File: test_discrete.py Project: Creamery/OOTO-Miner-V5

 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog.view((float, 9)))
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2

Example #17

0

Show file

File: chi2_example.py Project: pyfit/pyfit

def run_WLS():
    import scikits.statsmodels.api as sm
    res = sm.WLS(y, sm.add_constant(x, prepend=True),
                 weights=1. / sigma ** 2).fit()
    print ('statsmodels.api.WLS')
    print('popt: {0}'.format(res.params))
    print('perr: {0}'.format(res.bse))
    return res

Example #18

0

Show file

File: test_regressionplots.py Project: jtripper24/statsmodels

 def test_qqplot(self):
     #just test that it runs
     data = sm.datasets.longley.load()
     data.exog = sm.add_constant(data.exog)
     mod_fit = sm.OLS(data.endog, data.exog).fit()
     res = mod_fit.resid
     fig = sm.qqplot(res)
     plt.close(fig)

Example #19

0

Show file

File: test_discrete.py Project: chrisjordansquire/statsmodels

 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog.view((float,9)))
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2

Example #20

0

Show file

File: test_discrete.py Project: katherineranney/statsmodels

 def setupClass(cls):
     if iswindows:  # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg", disp=0, maxiter=250)

Example #21

0

Show file

File: test_discrete.py Project: jaydenwhyte/statsmodels

 def setupClass(cls):
     if iswindows:   # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg",
         disp=0, maxiter=250)

Example #22

0

Show file

File: TL_functions.py Project: ethanwhite/TL

def quadratic_term(list_of_mean, list_of_var):
    """Fit a quadratic term and return its p-value"""
    # Remove records with 0 variance
    log_var = [np.log(x) for x in list_of_var if x > 0]
    log_mean = [np.log(list_of_mean[i]) for i in range(len(list_of_mean)) if list_of_var[i] > 0]
    log_mean_quad = [x ** 2 for x in log_mean]
    indep_var = np.column_stack((log_mean, log_mean_quad))
    indep_var = sm.add_constant(indep_var, prepend = True)
    quad_res = sm.OLS(log_var, indep_var).fit()
    return quad_res.pvalues[2]

Example #23

0

Show file

File: rfreq_vs_rseq.py Project: poneill/motifs

def explain_rseq_by_rfreq_and_copy():
    r_rseqs = [motif_ic(getattr(Escherichia_coli,tf)) for tf in Escherichia_coli.tfs
               if tf in copy_numbers]
    r_rfreqs = [log2(4.6*10**6/len(getattr(Escherichia_coli,tf)))
          for tf in Escherichia_coli.tfs
                if tf in copy_numbers]
    copies = [copy_numbers[tf] for tf in Escherichia_coli.tfs if tf in copy_numbers]
    log_copies = map(log2,copies)
    X = sm.add_constant(np.column_stack((r_rfreqs,log_copies)),prepend=True)
    res = sm.OLS(r_rseqs,X).fit()
    print res.summary()

Example #24

0

Show file

File: py_stats_analysis.py Project: flashus/r_vs_py

def age_design(indices):
    tmp = np.hstack(
        (
            sm.categorical(hrdat["sex"][indices])[:, 2:],
            sm.categorical(hrdat["educ"][indices])[:, 2:],
            sm.categorical(hrdat["PTFT"][indices])[:, 2:],
            hrdat["age"].reshape(n, 1)[indices, :],
            (hrdat["age"] ** 2).reshape(n, 1)[indices, :],
        )
    )
    return sm.add_constant(tmp, prepend=True)

Example #25

0

Show file

File: test_discrete.py Project: Creamery/OOTO-Miner-V5

 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:, 0] = np.log(exog[:, 0] + .1)
     exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2

Example #26

0

Show file

File: cond_moment.py Project: jstac/lae_test

def cm_test(X):
    """
    Conditional moment test.  X is a flat numpy array.
    """
    betahat, alphahat, shat = ar1_functions.fit(X)
    n = len(X)
    xL = X[:(n-1)]  #  All but the last one
    xF = X[1:]      #  All but the first one
    Z = (xF - betahat - alphahat * xL)**2 
    XX = sm.add_constant(xL)
    out = sm.OLS(Z, XX).fit()
    return np.abs(out.tvalues[0]) > 1.96

Example #27

0

Show file

File: test_discrete.py Project: katherineranney/statsmodels

def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, "..", "..", "genmod", "tests", "results")
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, "iris.csv"), delimiter=",", skip_header=1)
    y = iris[:, -1]
    X = iris[:, :-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y, X)
    assert_raises(PerfectSeparationError, mod.fit)

Example #28

0

Show file

File: test_discrete.py Project: chrisjordansquire/statsmodels

 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:,0] = np.log(exog[:,0] + .1)
     exog = np.column_stack((exog[:,0],exog[:,2],
         exog[:,5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2

Example #29

0

Show file

File: test_discrete.py Project: jaydenwhyte/statsmodels

def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",",
                            skip_header=1)
    y = iris[:,-1]
    X = iris[:,:-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y,X)
    assert_raises(PerfectSeparationError, mod.fit)

Example #30

0

Show file

File: test_regressionplots.py Project: jtripper24/statsmodels

    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3 * np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5)**2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res

Example #31

0

Show file

File: test_regressionplots.py Project: chrisjordansquire/statsmodels

    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3* np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res

Example #32

0

Show file

File: test_numdiff.py Project: zed/statsmodels

    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog[:,0] = np.log(exog[:,0] + .1)
        exog = np.column_stack((exog[:,0],exog[:,2],
            exog[:,5:8]))
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        def loglikeflat(self, params):
            #reshapes flattened params
            return self.loglike(params.reshape(6,6))
        self.mod.loglike = loglikeflat  #need instance method
        self.params = [np.ones((6,6))]

Example #33

0

Show file

File: test_discrete.py Project: Creamery/OOTO-Miner-V5

    def setupClass(cls):
        #        import scipy
        #        major, minor, micro = scipy.__version__.split('.')[:3]
        #        if int(minor) < 9:
        #            raise SkipTest
        #Skip this unconditionally for release 0.3.0
        #since there are still problems with scipy 0.9.0 on some machines
        #Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)

Example #34

0

Show file

File: test_discrete.py Project: katherineranney/statsmodels

    def setupClass(cls):
        #        import scipy
        #        major, minor, micro = scipy.__version__.split('.')[:3]
        #        if int(minor) < 9:
        #            raise SkipTest
        # Skip this unconditionally for release 0.3.0
        # since there are still problems with scipy 0.9.0 on some machines
        # Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)

Example #35

0

Show file

File: util.py Project: fspaolo/code

def linear_fit_robust(x, y, return_coef=False):
    """
    Fit a straight-line by robust regression (M-estimate).

    If `return_coef=True` returns the slope (m) and intercept (c).
    """
    import scikits.statsmodels.api as sm
    ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
    x, y = x[ind], y[ind]
    X = sm.add_constant(x, prepend=False)
    y_model = sm.RLM(y, X, M=sm.robust.norms.HuberT())
    y_fit = y_model.fit()
    if return_coef:
        if len(y_fit.params) < 2: return (y_fit.params[0], 0.)
        else: return y_fit.params[:]
    else:
        return (x, y_fit.fittedvalues)

Example #36

0

Show file

File: util.py Project: sjl421/code-2

def linear_fit_robust(x, y, return_coef=False):
    """
    Fit a straight-line by robust regression (M-estimate).

    If `return_coef=True` returns the slope (m) and intercept (c).
    """
    import scikits.statsmodels.api as sm
    ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
    x, y = x[ind], y[ind]
    X = sm.add_constant(x, prepend=False)
    y_model = sm.RLM(y, X, M=sm.robust.norms.HuberT())
    y_fit = y_model.fit()
    if return_coef:
        if len(y_fit.params) < 2: return (y_fit.params[0], 0.)
        else: return y_fit.params[:]
    else:
        return (x, y_fit.fittedvalues)

Example #37

0

Show file

File: Analyze.py Project: rsummers618/SportRanker

def regression_analysis(play_arr,dataFunction1,dataFunction2):

	totalBefore = []
	totalAfter = []
	for weekNum in range(10,15):
		Before,After = regression_weekly(play_arr,weekNum,dataFunction1, dataFunction2)

		totalBefore = np.concatenate([totalBefore, Before])
		totalAfter = np.concatenate([totalAfter, After])

	slope, intercept, r_value, p_value, err = stats.linregress(totalBefore, totalAfter)
	results = sm.OLS(totalAfter, sm.add_constant(totalBefore)).fit()

	print results.summary()

	plt.plot(totalBefore, totalAfter, '.')
	X_plot = np.linspace(0, 1, 100)
	plt.plot(X_plot, X_plot * results.params[0] + results.params[1])
	plt.show()

Example #38

0

Show file

    def _check_wls(self, x, y, weights):
        result = ols(y=y, x=x, weights=1/weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1/aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)

Example #39

0

Show file

File: Analyze.py Project: rsummers618/SportRanker

def regression_analysis(play_arr, dataFunction1, dataFunction2):

    totalBefore = []
    totalAfter = []
    for weekNum in range(10, 15):
        Before, After = regression_weekly(play_arr, weekNum, dataFunction1,
                                          dataFunction2)

        totalBefore = np.concatenate([totalBefore, Before])
        totalAfter = np.concatenate([totalAfter, After])

    slope, intercept, r_value, p_value, err = stats.linregress(
        totalBefore, totalAfter)
    results = sm.OLS(totalAfter, sm.add_constant(totalBefore)).fit()

    print results.summary()

    plt.plot(totalBefore, totalAfter, '.')
    X_plot = np.linspace(0, 1, 100)
    plt.plot(X_plot, X_plot * results.params[0] + results.params[1])
    plt.show()

Example #40

0

Show file

File: test_poisson.py Project: chrisjordansquire/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm', disp=0)

Example #41

0

Show file

File: factormodels.py Project: zed/statsmodels

    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs

Example #42

0

Show file

File: test_rlm.py Project: zed/statsmodels

class TestRlmHuber(CheckRlmResults):
    from scikits.statsmodels.datasets.stackloss import load
    data = load()
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(scale_est=\
                    sm.robust.scale.HuberScale())
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        from results.results_rlm import HuberHuber
        self.res2 = HuberHuber()

Example #43

0

Show file

    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
        result = ols(y=y, x=x)

        # check that sparse version is the same
        sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
        _compare_ols_results(result, sparse_result)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.tvalues, result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)

Example #44

0

Show file

File: test_poisson.py Project: zed/statsmodels

    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm',
                                disp=0)

Example #45

0

Show file

File: factormodels.py Project: takluyver/statsmodels

    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs  = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs

Example #46

0

Show file

File: wrappers.py Project: Rtasseff/robustFourierSeries

def linmod(y, x, weights=None, sigma=None, add_const=True, filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)

Example #47

0

Show file

File: test_glm.py Project: chrisjordansquire/statsmodels

    def setupClass(cls):
        cls.decimal_resids = DECIMAL_1 # working resids off a bit in outlier
        cls.decimal_fittedvalues = DECIMAL_3 # ditto

        data = sm.datasets.wfs.load()
        offset = np.log(data.exog[:,-1])
        exog = data.exog[:,:-1]

        # convert dur to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # drop reference category
        # convert res to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # convert edu to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # drop reference categories and add intercept
        exog = sm.add_constant(exog[:,[1,2,3,4,5,7,8,10,11,12]])

        endog = np.round(data.endog)

        cls.res1 = GLM(endog, exog, family=sm.families.Poisson(),
                            offset=offset).fit(tol=1e-12, maxiter=250)
        from results.results_glm import Wfs
        cls.res2 = Wfs()

Example #48

0

Show file

File: 5.1.py Project: saiftech1/kod

nes = nes[nes[:,0] == 1992] # get only the data for 1992

# get only non Nan data
nes = nes[(nes[:,2] < 3)  | numpy.isnan(nes[:,2])] # get where presvote < 3 or not nan
nes[:,2] -= 1 # convert pres vals into 0 or 1 for republicans 
                        # like gelman now

nes = nes[numpy.isnan(nes[:,2]) == False] 
nes = nes[numpy.isnan(nes[:,1]) == False] # drop nans

exog = nes[:,1]
endog = nes[:,2]

#endog, exog = sm.tools.drop_missing(endog, exog)

exog = sm.add_constant(exog)

print exog.shape
print endog.shape
print exog
print endog

logit_mod = sm.Logit(endog, exog)
logit_res = logit_mod.fit()
print logit_res.params
print logit_res.bse
print logit_res.prsquared
print logit_res.margeff()
print logit_res.conf_int()
print logit_res.df_resid

Example #49

0

Show file

File: ex_cusum.py Project: zed/statsmodels

#choose example
#--------------
example = ['null', 'smalldiff', 'mediumdiff', 'largediff'][1]
example_size = [20, 100][1]
example_groups = ['2', '2-2'][1]
#'2-2': 4 groups,
#       groups 0 and 1 and groups 2 and 3 have identical parameters in DGP

#generate example
#----------------
#np.random.seed(87654589)
nobs = example_size
x1 = 0.1 + np.random.randn(nobs)
y1 = 10 + 15 * x1 + 2 * np.random.randn(nobs)

x1 = sm.add_constant(x1)  #, prepend=True)
#assert_almost_equal(x1, np.vander(x1[:,0],2), 16)
#res1 = sm.OLS(y1, x1).fit()
#print res1.params
#print np.polyfit(x1[:,0], y1, 1)
#assert_almost_equal(res1.params, np.polyfit(x1[:,0], y1, 1), 14)
#print res1.summary(xname=['x1','const1'])

#regression 2
x2 = 0.1 + np.random.randn(nobs)
if example == 'null':
    y2 = 10 + 15 * x2 + 2 * np.random.randn(nobs)  # if H0 is true
elif example == 'smalldiff':
    y2 = 11 + 16 * x2 + 2 * np.random.randn(nobs)
elif example == 'mediumdiff':
    y2 = 12 + 16 * x2 + 2 * np.random.randn(nobs)

Example #50

0

Show file

File: py_stats_analysis.py Project: flashus/r_vs_py

print len(indf)
print len(indm)

# With each of these models, typically do some
# commands to look more at the models, like summary(),
# , anova for the model on its own or betwen two models to see
# how much additional explantory power you get with the added
# variables, and plots to look at residuals, qqplot, and hist of residuals
# Currently can't do anova or lowess in python, and the qqplots are annoying
# to make.


# Initial model, only look at log(hrwage)~sex
X1 = hrdat["sex"] == 2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat["hrwage"]), X1, weights=hrdat["A_ERNLWT"])
results1 = model1.fit()

print results1.summary()


# Pre-defining model matrix components for more complicated models
# dat_mat is DATa model MATtrices
n = len(hrdat)
dat_mat = {}
dat_names = {}
factor_vars = ["sex", "educ", "PTFT", "ind", "occ", "marstat", "GEDIV", "race", "hispanic", "disabled"]
for name in factor_vars:
    dat_mat[name], dat_names[name] = sm.categorical(hrdat[name], dictnames=True)
    dat_mat[name] = dat_mat[name][:, 2:]

Example #51

0

Show file

File: anova_nistcertified.py Project: katherineranney/statsmodels

def anova_ols(y, x):
    X = sm.add_constant(data2dummy(x))
    res = sm.OLS(y, X).fit()
    return res.fvalue, res.f_pvalue, res.rsquared, np.sqrt(res.mse_resid)

Example #52

0

Show file

            pred = np.dot(self.wexog, self.coeffs)
            eps = np.diag((self.wendog - pred) ** 2)
            sigmaSq = np.sum(eps)
            pinvX = np.dot(self.rnorm_cov_params, self.wexog.T)
            self._wncp = np.dot(np.dot(pinvX, eps), pinvX.T) * df / sigmaSq
        return self._wncp

    _coeffs = None
    @property
    def coeffs(self):
        """Estimated parameters"""
        if self._coeffs is None:
            betaLambda = np.dot(self.inv_rwexog, self.rwendog)
            self._coeffs = betaLambda[:self.ncoeffs]
        return self._coeffs

    def fit(self):
        rncp = self.wrnorm_cov_params
        lfit = RegressionResults(self, self.coeffs, normalized_cov_params=rncp)
        return lfit

if __name__=="__main__":
    import scikits.statsmodels.api as sm
    dta = np.genfromtxt('./rlsdata.txt', names=True)
    design = np.column_stack((dta['Y'],dta['Y']**2,dta[['NE','NC','W','S']].view(float).reshape(dta.shape[0],-1)))
    design = sm.add_constant(design, prepend=True)
    rls_mod = RLS(dta['G'],design, constr=[0,0,0,1,1,1,1])
    rls_fit = rls_mod.fit()
    print rls_fit.params

Example #53

0

Show file

File: 7.4.1.py Project: EJHortala/books-2

import numpy
from matplotlib import pyplot as plt
from matplotlib import rc
import scikits.statsmodels.api as sm
from scipy import stats
import sim

data = numpy.loadtxt("../doc/gelman/ARM_Data/arsenic/wells.dat",
                     usecols=(1, 2, 3, 4, 5),
                     skiprows=1)

exog = data[:, 2]
endog = data[:, 0]

exog = sm.add_constant(exog, prepend=True)

logit_mod = sm.Logit(endog, exog)
logit_res = logit_mod.fit()

[beta, sigma] = sim.sim_glm(logit_res, 1000)

print numpy.mean(beta[:, 0])
print numpy.mean(beta[:, 1])

plt.plot(beta[:, 0], beta[:, 1], '.')
plt.xlabel('beta_0')
plt.ylabel('beta_1')
plt.show()

Example #54

0

Show file

pl.vlines([pooled], -.75, len(n), linewidth=1, linestyle='dashed', color='k')
pl.axis([-.01, 1.05, -.75, .25 + .5 * len(n)])
pl.text(-2 * xmax / 50, -.5, 'Pooled Estimate', ha='right', va='center')
pl.title('North Africa/Middle East')

pl.savefig('vzv_forest.pdf')

### @export 'OLS'
pl.figure()
import scikits.statsmodels.api as sm

Y = df['Parameter Value'].__array__()
X = .5 * (df['Age Start'] + df['Age End']).__array__()
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1)

XX = sm.add_constant(X)
X_pred = pl.arange(65)
XX_pred = sm.add_constant(X_pred)

model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS')

Y = mc.logit(df['Parameter Value'].__array__())
model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred,

Example #55

0

Show file

File: example_rlm.py Project: zed/statsmodels

"""
Examples: statsmodels.models.RLM

Notes
-----
The syntax for the arguments will be shortened to accept string arguments
in the future.
"""

import scikits.statsmodels.api as sm

### Example for using Huber's T norm with the default
### median absolute deviation scaling

data = sm.datasets.stackloss.load()
data.exog = sm.add_constant(data.exog)
huber_t = sm.RLM(data.endog, data.exog, M=sm.robust.norms.HuberT())
hub_results = huber_t.fit()
print hub_results.params
print hub_results.bse

### or with the 'H2' covariance matrix
hub_results2 = huber_t.fit(cov="H2")
print hub_results2.params
print hub_results2.bse

### Example for using Andrew's Wave norm with
### Huber's Proposal 2 scaling and 'H3' covariance matrix
andrew_mod = sm.RLM(data.endog, data.exog, M=sm.robust.norms.AndrewWave())
andrew_results = andrew_mod.fit(scale_est=sm.robust.scale.HuberScale(),
                                cov="H3")

Example #56

0

Show file

File: example_rpy.py Project: zed/statsmodels

was created in a very ad hoc manner and due to the idiosyncracies in R
it does not work for all types of R models.

There are also R scripts included with most of the datasets to run
some basic models for comparisons of results to statsmodels.
'''

from rpy import r
import numpy as np
import scikits.statsmodels.api as sm

examples = [1, 2]

if 1 in examples:
    data = sm.datasets.longley.load()
    y, x = data.endog, sm.add_constant(data.exog)
    des_cols = ['x.%d' % (i + 1) for i in range(x.shape[1])]
    formula = r('y~%s-1' % '+'.join(des_cols))
    frame = r.data_frame(y=y, x=x)
    results = r.lm(formula, data=frame)
    print results.keys()
    print results['coefficients']

if 2 in examples:
    data2 = sm.datasets.star98.load()
    y2, x2 = data2.endog, sm.add_constant(data2.exog)
    import rpy
    y2 = y2[:, 0] / y2.sum(axis=1)
    des_cols2 = ['x.%d' % (i + 1) for i in range(x2.shape[1])]
    formula2 = r('y~%s-1' % '+'.join(des_cols2))
    frame2 = r.data_frame(y=y2, x=x2)