Ejemplo n.º 1
0
    def __init__(self,
                 y,
                 x,
                 intercept=True,
                 weights=None,
                 nw_lags=None,
                 nw_overlap=False):
        import scikits.statsmodels.api as sm
        self._x_orig = x
        self._y_orig = y
        self._weights_orig = weights
        self._intercept = intercept
        self._nw_lags = nw_lags
        self._nw_overlap = nw_overlap

        (self._y, self._x, self._weights, self._x_filtered, self._index,
         self._time_has_obs) = self._prepare_data()

        if self._weights is not None:
            self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0)
            self._y_trans = self._y * np.sqrt(self._weights)
            self.sm_ols = sm.WLS(self._y.values,
                                 self._x.values,
                                 weights=self._weights.values).fit()
        else:
            self._x_trans = self._x
            self._y_trans = self._y
            self.sm_ols = sm.OLS(self._y.values, self._x.values).fit()
Ejemplo n.º 2
0
    def _check_wls(self, x, y, weights):
        result = ols(y=y, x=x, weights=1/weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1/aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)
Ejemplo n.º 3
0
ols_fit = sm.OLS(data.endog, data.exog).fit()

# perhaps the residuals from this fit depend on the square of income
incomesq = data.exog[:, 2]
plt.scatter(incomesq, ols_fit.resid)
plt.grid()

# If we think that the variance is proportional to income**2
# we would want to weight the regression by income
# the weights argument in WLS weights the regression by its square root
# and since income enters the equation, if we have income/income
# it becomes the constant, so we would want to perform
# this type of regression without an explicit constant in the design

#data.exog = data.exog[:,:-1]
wls_fit = sm.WLS(data.endog, data.exog[:, :-1], weights=1 / incomesq).fit()

# This however, leads to difficulties in interpreting the post-estimation
# statistics.  Statsmodels does not yet handle this elegantly, but
# the following may be more appropriate

# explained sum of squares
ess = wls_fit.uncentered_tss - wls_fit.ssr
# rsquared
rsquared = ess / wls_fit.uncentered_tss
# mean squared error of the model
mse_model = ess / (wls_fit.df_model + 1)  # add back the dof of the constant
# f statistic
fvalue = mse_model / wls_fit.mse_resid
# adjusted r-squared
rsquared_adj = 1 - (wls_fit.nobs) / (wls_fit.df_resid) * (1 - rsquared)
Ejemplo n.º 4
0
#use correction
#sandwich estimators of parameter covariance matrix
print 'heteroscedasticity corrected standard error of beta estimates'
print res2.HC0_se
print res2.HC1_se
print res2.HC2_se
print res2.HC3_se

#print res.predict
#plt.plot(x1, res2.fittedvalues, '--')

#WLS knowing the true variance ratio of heteroscedasticity
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

print '\nWLS'
res3 = sm.WLS(y2, X[:, [0, 2]], 1. / w).fit()
print 'WLS beta estimates'
print res3.params
print 'WLS stddev of beta'
print res3.bse
#print res.predict
#plt.plot(x1, res3.fittedvalues, '--.')

#Detour write function for prediction standard errors

#Prediction Interval for OLS
#---------------------------
covb = res2.cov_params()
# full covariance:
#predvar = res2.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T)))
# predication variance only
Ejemplo n.º 5
0
    # generate dataset
    nsample = 50
    x1 = np.linspace(0, 20, nsample)
    X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
    np.random.seed(0)#9876789) #9876543)
    beta = [0.5, -0.01, 5.]
    y_true2 = np.dot(X, beta)
    w = np.ones(nsample)
    w[nsample*6/10:] = 3
    sig = 0.5
    y2 = y_true2 + sig*w* np.random.normal(size=nsample)
    X2 = X[:,[0,2]]

    # estimate OLS, WLS, (OLS not used in these tests)
    res2 = sm.OLS(y2, X2).fit()
    res3 = sm.WLS(y2, X2, 1./w).fit()

    #direct calculation
    covb = res3.cov_params()
    predvar = res3.mse_resid*w + (X2 * np.dot(covb,X2.T).T).sum(1)
    predstd = np.sqrt(predvar)


    prstd, iv_l, iv_u = wls_prediction_std(res3)
    np.testing.assert_almost_equal(predstd, prstd, 15)

    # testing shapes of exog
    prstd, iv_l, iv_u = wls_prediction_std(res3, X2[-1:,:], weights=3.)
    np.testing.assert_equal( prstd[-1], prstd)
    prstd, iv_l, iv_u = wls_prediction_std(res3, X2[-1,:], weights=3.)
    np.testing.assert_equal( prstd[-1], prstd)
Ejemplo n.º 6
0
def age_model(indices):
  return sm.WLS(logwage[indices], age_design(indices), weights = w[indices])
Ejemplo n.º 7
0
print len(indf)
print len(indm)

#With each of these models, typically do some
#commands to look more at the models, like summary(),
#, anova for the model on its own or betwen two models to see
#how much additional explantory power you get with the added
#variables, and plots to look at residuals, qqplot, and hist of residuals
#Currently can't do anova or lowess in python, and the qqplots are annoying
#to make.


#Initial model, only look at log(hrwage)~sex
X1 = hrdat['sex']==2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat['hrwage']), X1, weights = hrdat['A_ERNLWT'])
results1 = model1.fit()

print results1.summary()




#More complicated model, log(hrwage)~sex+educ+age+PTFT
n = len(hrdat)
logwage = np.log(hrdat['hrwage'])
w = hrdat['A_ERNLWT']

X2 = np.hstack((sm.categorical(hrdat['sex'])[:,2:],
                sm.categorical(hrdat['educ'])[:,2:],
                hrdat['age'].reshape(n,1),