Beispiel #1
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset)
        self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:],
                            method='nm',
                            disp=0)
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:,0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:],
                            method='nm', disp=0)
Beispiel #3
0
    def checkOLS(self, exog, endog, x, y):

        try:
            import scikits.statsmodels.api as sm
        except ImportError:
            import scikits.statsmodels as sm

        reference = sm.OLS(endog, sm.add_constant(exog)).fit()

        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Beispiel #4
0
    def checkOLS(self, exog, endog, x, y):

        try:
            import scikits.statsmodels.api as sm
        except ImportError:
            import scikits.statsmodels as sm

        reference = sm.OLS(endog, sm.add_constant(exog)).fit()

        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg", disp=0, avextol=1e-8)
Beispiel #6
0
def linmod(y,
           x,
           weights=None,
           sigma=None,
           add_const=True,
           filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)
Beispiel #7
0
 def __init__(self):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     #mod = sm.Probit(data.endog, data.exog)
     self.mod = sm.Logit(data.endog, data.exog)
     #res = mod.fit(method="newton")
     self.params = [np.array([1,0.25,1.4,-7])]
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2
Beispiel #9
0
class TestRlm(CheckRlmResults):
    from scikits.statsmodels.datasets.stackloss import load
    data = load()  # class attributes for subclasses
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_1
        self.decimal_scale = DECIMAL_3

        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit()   # default M
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        #        r.library('MASS')
        #        self.res2 = RModel(self.data.endog, self.data.exog,
        #                        r.rlm, psi="psi.huber")
        from results.results_rlm import Huber
        self.res2 = Huber()
Beispiel #10
0
def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)
Beispiel #11
0
def test_HC_use():
    np.random.seed(0)
    nsample = 100
    x = np.linspace(0,10, 100)
    X = sm.add_constant(np.column_stack((x, x**2)), prepend=False)
    beta = np.array([1, 0.1, 10])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    results = sm.OLS(y, X).fit()

    #test cov_params
    idx = np.array([1,2])
    #need to call HC0_se to have cov_HC0 available
    results.HC0_se
    cov12 = results.cov_params(column=[1,2], cov_p=results.cov_HC0)
    assert_almost_equal(cov12, results.cov_HC0[idx[:,None], idx], decimal=15)

    #test t_test
    tvals = results.params/results.HC0_se
    ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0)
    assert_almost_equal(ttest.tvalue, tvals, decimal=14)
    assert_almost_equal(ttest.sd, results.HC0_se, decimal=14)

    #test f_test
    ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0)
    slopes = results.params[:-1]
    idx = np.array([0,1])
    cov_slopes = results.cov_HC0[idx[:,None], idx]
    fval = np.dot(slopes, np.linalg.inv(cov_slopes).dot(slopes))/len(idx)
    assert_almost_equal(ftest.fvalue, fval, decimal=12)
Beispiel #12
0
def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)
Beispiel #13
0
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2
 def test_qqplot(self):
   #just test that it runs
   data = sm.datasets.longley.load()
   data.exog = sm.add_constant(data.exog)
   mod_fit = sm.OLS(data.endog, data.exog).fit()
   res = mod_fit.resid
   fig = sm.qqplot(res)
   plt.close(fig)
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg",
         disp=0, avextol=1e-8)
Beispiel #16
0
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog.view((float, 9)))
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
Beispiel #17
0
def run_WLS():
    import scikits.statsmodels.api as sm
    res = sm.WLS(y, sm.add_constant(x, prepend=True),
                 weights=1. / sigma ** 2).fit()
    print ('statsmodels.api.WLS')
    print('popt: {0}'.format(res.params))
    print('perr: {0}'.format(res.bse))
    return res
 def test_qqplot(self):
     #just test that it runs
     data = sm.datasets.longley.load()
     data.exog = sm.add_constant(data.exog)
     mod_fit = sm.OLS(data.endog, data.exog).fit()
     res = mod_fit.resid
     fig = sm.qqplot(res)
     plt.close(fig)
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog.view((float,9)))
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
 def setupClass(cls):
     if iswindows:  # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg", disp=0, maxiter=250)
 def setupClass(cls):
     if iswindows:   # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg",
         disp=0, maxiter=250)
Beispiel #22
0
def quadratic_term(list_of_mean, list_of_var):
    """Fit a quadratic term and return its p-value"""
    # Remove records with 0 variance
    log_var = [np.log(x) for x in list_of_var if x > 0]
    log_mean = [np.log(list_of_mean[i]) for i in range(len(list_of_mean)) if list_of_var[i] > 0]
    log_mean_quad = [x ** 2 for x in log_mean]
    indep_var = np.column_stack((log_mean, log_mean_quad))
    indep_var = sm.add_constant(indep_var, prepend = True)
    quad_res = sm.OLS(log_var, indep_var).fit()
    return quad_res.pvalues[2]
Beispiel #23
0
def explain_rseq_by_rfreq_and_copy():
    r_rseqs = [motif_ic(getattr(Escherichia_coli,tf)) for tf in Escherichia_coli.tfs
               if tf in copy_numbers]
    r_rfreqs = [log2(4.6*10**6/len(getattr(Escherichia_coli,tf)))
          for tf in Escherichia_coli.tfs
                if tf in copy_numbers]
    copies = [copy_numbers[tf] for tf in Escherichia_coli.tfs if tf in copy_numbers]
    log_copies = map(log2,copies)
    X = sm.add_constant(np.column_stack((r_rfreqs,log_copies)),prepend=True)
    res = sm.OLS(r_rseqs,X).fit()
    print res.summary()
Beispiel #24
0
def age_design(indices):
    tmp = np.hstack(
        (
            sm.categorical(hrdat["sex"][indices])[:, 2:],
            sm.categorical(hrdat["educ"][indices])[:, 2:],
            sm.categorical(hrdat["PTFT"][indices])[:, 2:],
            hrdat["age"].reshape(n, 1)[indices, :],
            (hrdat["age"] ** 2).reshape(n, 1)[indices, :],
        )
    )
    return sm.add_constant(tmp, prepend=True)
Beispiel #25
0
 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:, 0] = np.log(exog[:, 0] + .1)
     exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2
Beispiel #26
0
def cm_test(X):
    """
    Conditional moment test.  X is a flat numpy array.
    """
    betahat, alphahat, shat = ar1_functions.fit(X)
    n = len(X)
    xL = X[:(n-1)]  #  All but the last one
    xF = X[1:]      #  All but the first one
    Z = (xF - betahat - alphahat * xL)**2 
    XX = sm.add_constant(xL)
    out = sm.OLS(Z, XX).fit()
    return np.abs(out.tvalues[0]) > 1.96
def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, "..", "..", "genmod", "tests", "results")
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, "iris.csv"), delimiter=",", skip_header=1)
    y = iris[:, -1]
    X = iris[:, :-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y, X)
    assert_raises(PerfectSeparationError, mod.fit)
 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:,0] = np.log(exog[:,0] + .1)
     exog = np.column_stack((exog[:,0],exog[:,2],
         exog[:,5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2
def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",",
                            skip_header=1)
    y = iris[:,-1]
    X = iris[:,:-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y,X)
    assert_raises(PerfectSeparationError, mod.fit)
    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3 * np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5)**2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res
    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3* np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res
Beispiel #32
0
    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog[:,0] = np.log(exog[:,0] + .1)
        exog = np.column_stack((exog[:,0],exog[:,2],
            exog[:,5:8]))
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        def loglikeflat(self, params):
            #reshapes flattened params
            return self.loglike(params.reshape(6,6))
        self.mod.loglike = loglikeflat  #need instance method
        self.params = [np.ones((6,6))]
Beispiel #33
0
    def setupClass(cls):
        #        import scipy
        #        major, minor, micro = scipy.__version__.split('.')[:3]
        #        if int(minor) < 9:
        #            raise SkipTest
        #Skip this unconditionally for release 0.3.0
        #since there are still problems with scipy 0.9.0 on some machines
        #Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)
    def setupClass(cls):
        #        import scipy
        #        major, minor, micro = scipy.__version__.split('.')[:3]
        #        if int(minor) < 9:
        #            raise SkipTest
        # Skip this unconditionally for release 0.3.0
        # since there are still problems with scipy 0.9.0 on some machines
        # Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)
Beispiel #35
0
def linear_fit_robust(x, y, return_coef=False):
    """
    Fit a straight-line by robust regression (M-estimate).

    If `return_coef=True` returns the slope (m) and intercept (c).
    """
    import scikits.statsmodels.api as sm
    ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
    x, y = x[ind], y[ind]
    X = sm.add_constant(x, prepend=False)
    y_model = sm.RLM(y, X, M=sm.robust.norms.HuberT())
    y_fit = y_model.fit()
    if return_coef:
        if len(y_fit.params) < 2: return (y_fit.params[0], 0.)
        else: return y_fit.params[:]
    else:
        return (x, y_fit.fittedvalues)
Beispiel #36
0
def linear_fit_robust(x, y, return_coef=False):
    """
    Fit a straight-line by robust regression (M-estimate).

    If `return_coef=True` returns the slope (m) and intercept (c).
    """
    import scikits.statsmodels.api as sm
    ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
    x, y = x[ind], y[ind]
    X = sm.add_constant(x, prepend=False)
    y_model = sm.RLM(y, X, M=sm.robust.norms.HuberT())
    y_fit = y_model.fit()
    if return_coef:
        if len(y_fit.params) < 2: return (y_fit.params[0], 0.)
        else: return y_fit.params[:]
    else:
        return (x, y_fit.fittedvalues)
Beispiel #37
0
def regression_analysis(play_arr,dataFunction1,dataFunction2):

	totalBefore = []
	totalAfter = []
	for weekNum in range(10,15):
		Before,After = regression_weekly(play_arr,weekNum,dataFunction1, dataFunction2)

		totalBefore = np.concatenate([totalBefore, Before])
		totalAfter = np.concatenate([totalAfter, After])

	slope, intercept, r_value, p_value, err = stats.linregress(totalBefore, totalAfter)
	results = sm.OLS(totalAfter, sm.add_constant(totalBefore)).fit()

	print results.summary()

	plt.plot(totalBefore, totalAfter, '.')
	X_plot = np.linspace(0, 1, 100)
	plt.plot(X_plot, X_plot * results.params[0] + results.params[1])
	plt.show()
Beispiel #38
0
    def _check_wls(self, x, y, weights):
        result = ols(y=y, x=x, weights=1/weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1/aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)
Beispiel #39
0
def regression_analysis(play_arr, dataFunction1, dataFunction2):

    totalBefore = []
    totalAfter = []
    for weekNum in range(10, 15):
        Before, After = regression_weekly(play_arr, weekNum, dataFunction1,
                                          dataFunction2)

        totalBefore = np.concatenate([totalBefore, Before])
        totalAfter = np.concatenate([totalAfter, After])

    slope, intercept, r_value, p_value, err = stats.linregress(
        totalBefore, totalAfter)
    results = sm.OLS(totalAfter, sm.add_constant(totalBefore)).fit()

    print results.summary()

    plt.plot(totalBefore, totalAfter, '.')
    X_plot = np.linspace(0, 1, 100)
    plt.plot(X_plot, X_plot * results.params[0] + results.params[1])
    plt.show()
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm', disp=0)
Beispiel #41
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
Beispiel #42
0
class TestRlmHuber(CheckRlmResults):
    from scikits.statsmodels.datasets.stackloss import load
    data = load()
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(scale_est=\
                    sm.robust.scale.HuberScale())
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        from results.results_rlm import HuberHuber
        self.res2 = HuberHuber()
Beispiel #43
0
    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
        result = ols(y=y, x=x)

        # check that sparse version is the same
        sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
        _compare_ols_results(result, sparse_result)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.tvalues, result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Beispiel #44
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm',
                                disp=0)
Beispiel #45
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs  = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
def linmod(y, x, weights=None, sigma=None, add_const=True, filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)
    def setupClass(cls):
        cls.decimal_resids = DECIMAL_1 # working resids off a bit in outlier
        cls.decimal_fittedvalues = DECIMAL_3 # ditto

        data = sm.datasets.wfs.load()
        offset = np.log(data.exog[:,-1])
        exog = data.exog[:,:-1]

        # convert dur to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # drop reference category
        # convert res to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # convert edu to dummy
        exog = sm.tools.categorical(exog, col=0, drop=True)
        # drop reference categories and add intercept
        exog = sm.add_constant(exog[:,[1,2,3,4,5,7,8,10,11,12]])

        endog = np.round(data.endog)

        cls.res1 = GLM(endog, exog, family=sm.families.Poisson(),
                            offset=offset).fit(tol=1e-12, maxiter=250)
        from results.results_glm import Wfs
        cls.res2 = Wfs()
Beispiel #48
0
nes = nes[nes[:,0] == 1992] # get only the data for 1992

# get only non Nan data
nes = nes[(nes[:,2] < 3)  | numpy.isnan(nes[:,2])] # get where presvote < 3 or not nan
nes[:,2] -= 1 # convert pres vals into 0 or 1 for republicans 
                        # like gelman now

nes = nes[numpy.isnan(nes[:,2]) == False] 
nes = nes[numpy.isnan(nes[:,1]) == False] # drop nans

exog = nes[:,1]
endog = nes[:,2]

#endog, exog = sm.tools.drop_missing(endog, exog)

exog = sm.add_constant(exog)

print exog.shape
print endog.shape
print exog
print endog

logit_mod = sm.Logit(endog, exog)
logit_res = logit_mod.fit()
print logit_res.params
print logit_res.bse
print logit_res.prsquared
print logit_res.margeff()
print logit_res.conf_int()
print logit_res.df_resid
Beispiel #49
0
#choose example
#--------------
example = ['null', 'smalldiff', 'mediumdiff', 'largediff'][1]
example_size = [20, 100][1]
example_groups = ['2', '2-2'][1]
#'2-2': 4 groups,
#       groups 0 and 1 and groups 2 and 3 have identical parameters in DGP

#generate example
#----------------
#np.random.seed(87654589)
nobs = example_size
x1 = 0.1 + np.random.randn(nobs)
y1 = 10 + 15 * x1 + 2 * np.random.randn(nobs)

x1 = sm.add_constant(x1)  #, prepend=True)
#assert_almost_equal(x1, np.vander(x1[:,0],2), 16)
#res1 = sm.OLS(y1, x1).fit()
#print res1.params
#print np.polyfit(x1[:,0], y1, 1)
#assert_almost_equal(res1.params, np.polyfit(x1[:,0], y1, 1), 14)
#print res1.summary(xname=['x1','const1'])

#regression 2
x2 = 0.1 + np.random.randn(nobs)
if example == 'null':
    y2 = 10 + 15 * x2 + 2 * np.random.randn(nobs)  # if H0 is true
elif example == 'smalldiff':
    y2 = 11 + 16 * x2 + 2 * np.random.randn(nobs)
elif example == 'mediumdiff':
    y2 = 12 + 16 * x2 + 2 * np.random.randn(nobs)
Beispiel #50
0
print len(indf)
print len(indm)

# With each of these models, typically do some
# commands to look more at the models, like summary(),
# , anova for the model on its own or betwen two models to see
# how much additional explantory power you get with the added
# variables, and plots to look at residuals, qqplot, and hist of residuals
# Currently can't do anova or lowess in python, and the qqplots are annoying
# to make.


# Initial model, only look at log(hrwage)~sex
X1 = hrdat["sex"] == 2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat["hrwage"]), X1, weights=hrdat["A_ERNLWT"])
results1 = model1.fit()

print results1.summary()


# Pre-defining model matrix components for more complicated models
# dat_mat is DATa model MATtrices
n = len(hrdat)
dat_mat = {}
dat_names = {}
factor_vars = ["sex", "educ", "PTFT", "ind", "occ", "marstat", "GEDIV", "race", "hispanic", "disabled"]
for name in factor_vars:
    dat_mat[name], dat_names[name] = sm.categorical(hrdat[name], dictnames=True)
    dat_mat[name] = dat_mat[name][:, 2:]
def anova_ols(y, x):
    X = sm.add_constant(data2dummy(x))
    res = sm.OLS(y, X).fit()
    return res.fvalue, res.f_pvalue, res.rsquared, np.sqrt(res.mse_resid)
Beispiel #52
0
            pred = np.dot(self.wexog, self.coeffs)
            eps = np.diag((self.wendog - pred) ** 2)
            sigmaSq = np.sum(eps)
            pinvX = np.dot(self.rnorm_cov_params, self.wexog.T)
            self._wncp = np.dot(np.dot(pinvX, eps), pinvX.T) * df / sigmaSq
        return self._wncp

    _coeffs = None
    @property
    def coeffs(self):
        """Estimated parameters"""
        if self._coeffs is None:
            betaLambda = np.dot(self.inv_rwexog, self.rwendog)
            self._coeffs = betaLambda[:self.ncoeffs]
        return self._coeffs

    def fit(self):
        rncp = self.wrnorm_cov_params
        lfit = RegressionResults(self, self.coeffs, normalized_cov_params=rncp)
        return lfit

if __name__=="__main__":
    import scikits.statsmodels.api as sm
    dta = np.genfromtxt('./rlsdata.txt', names=True)
    design = np.column_stack((dta['Y'],dta['Y']**2,dta[['NE','NC','W','S']].view(float).reshape(dta.shape[0],-1)))
    design = sm.add_constant(design, prepend=True)
    rls_mod = RLS(dta['G'],design, constr=[0,0,0,1,1,1,1])
    rls_fit = rls_mod.fit()
    print rls_fit.params

Beispiel #53
0
import numpy
from matplotlib import pyplot as plt
from matplotlib import rc
import scikits.statsmodels.api as sm
from scipy import stats
import sim

data = numpy.loadtxt("../doc/gelman/ARM_Data/arsenic/wells.dat",
                     usecols=(1, 2, 3, 4, 5),
                     skiprows=1)

exog = data[:, 2]
endog = data[:, 0]

exog = sm.add_constant(exog, prepend=True)

logit_mod = sm.Logit(endog, exog)
logit_res = logit_mod.fit()

[beta, sigma] = sim.sim_glm(logit_res, 1000)

print numpy.mean(beta[:, 0])
print numpy.mean(beta[:, 1])

plt.plot(beta[:, 0], beta[:, 1], '.')
plt.xlabel('beta_0')
plt.ylabel('beta_1')
plt.show()
Beispiel #54
0
pl.vlines([pooled], -.75, len(n), linewidth=1, linestyle='dashed', color='k')
pl.axis([-.01, 1.05, -.75, .25 + .5 * len(n)])
pl.text(-2 * xmax / 50, -.5, 'Pooled Estimate', ha='right', va='center')
pl.title('North Africa/Middle East')

pl.savefig('vzv_forest.pdf')

### @export 'OLS'
pl.figure()
import scikits.statsmodels.api as sm

Y = df['Parameter Value'].__array__()
X = .5 * (df['Age Start'] + df['Age End']).__array__()
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1)

XX = sm.add_constant(X)
X_pred = pl.arange(65)
XX_pred = sm.add_constant(X_pred)

model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS')

Y = mc.logit(df['Parameter Value'].__array__())
model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred,
Beispiel #55
0
"""
Examples: statsmodels.models.RLM

Notes
-----
The syntax for the arguments will be shortened to accept string arguments
in the future.
"""

import scikits.statsmodels.api as sm

### Example for using Huber's T norm with the default
### median absolute deviation scaling

data = sm.datasets.stackloss.load()
data.exog = sm.add_constant(data.exog)
huber_t = sm.RLM(data.endog, data.exog, M=sm.robust.norms.HuberT())
hub_results = huber_t.fit()
print hub_results.params
print hub_results.bse

### or with the 'H2' covariance matrix
hub_results2 = huber_t.fit(cov="H2")
print hub_results2.params
print hub_results2.bse

### Example for using Andrew's Wave norm with
### Huber's Proposal 2 scaling and 'H3' covariance matrix
andrew_mod = sm.RLM(data.endog, data.exog, M=sm.robust.norms.AndrewWave())
andrew_results = andrew_mod.fit(scale_est=sm.robust.scale.HuberScale(),
                                cov="H3")
Beispiel #56
0
was created in a very ad hoc manner and due to the idiosyncracies in R
it does not work for all types of R models.

There are also R scripts included with most of the datasets to run
some basic models for comparisons of results to statsmodels.
'''

from rpy import r
import numpy as np
import scikits.statsmodels.api as sm

examples = [1, 2]

if 1 in examples:
    data = sm.datasets.longley.load()
    y, x = data.endog, sm.add_constant(data.exog)
    des_cols = ['x.%d' % (i + 1) for i in range(x.shape[1])]
    formula = r('y~%s-1' % '+'.join(des_cols))
    frame = r.data_frame(y=y, x=x)
    results = r.lm(formula, data=frame)
    print results.keys()
    print results['coefficients']

if 2 in examples:
    data2 = sm.datasets.star98.load()
    y2, x2 = data2.endog, sm.add_constant(data2.exog)
    import rpy
    y2 = y2[:, 0] / y2.sum(axis=1)
    des_cols2 = ['x.%d' % (i + 1) for i in range(x2.shape[1])]
    formula2 = r('y~%s-1' % '+'.join(des_cols2))
    frame2 = r.data_frame(y=y2, x=x2)