예제 #1
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit the ordinary least squares model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data
        y : array_like, shape (n_samples, 1) or (n_samples,)
            Target values
        sample_weight : array_like, shape (n_samples,)
            Individual weights for each sample

        Returns
        -------
        self
        """
        assert ndim(y) == 1 or (ndim(y) == 2 and shape(y)[1] == 1)
        y = reshape(y, (-1,))
        if self.fit_intercept:
            X = add_constant(X, has_constant='add')
        if sample_weight is not None:
            ols = WLS(y, X, weights=sample_weight, hasconst=self.fit_intercept)
        else:
            ols = WLS(y, X, hasconst=self.fit_intercept)
        self.results = ols.fit(**self.fit_args)
        return self
예제 #2
0
def compute_WLS_stdevs(X, Y, weights, alpha):
    """Function to calculate standard deviations of Weighted Ridge coefficients

    Args:
        X: dataset containing the explanatory variables
        Y: vector of the response variable
        weights: vector of weights (one for each tuple of the X dataset)
        alpha: regularization parameter

    Returns:
        stdevs_beta: list containing the standard deviations of the coefficients
        """

    # Build Weighted Regression (WLS) model
    X_enh = add_constant(X)
    wls_model = WLS(Y, X_enh, weights=weights)
    results = wls_model.fit()
    errors_wls_weighted = results.wresid

    # Estimate of the sigma squared quantity
    sigma2 = np.dot(errors_wls_weighted.T,
                    errors_wls_weighted) / (X.shape[0] - X.shape[1])
    weights_matr = np.diag(weights)  # reformulate weights as diagonal matrix

    # Standard deviations of the coefficients
    partial_ = np.linalg.inv(
        np.linalg.multi_dot([X.T, weights_matr, X]) + alpha * np.diag([
            1,
        ] * X.shape[1]))
    variances_beta_matrix = sigma2 * np.linalg.multi_dot(
        [partial_, X.T, weights_matr, X, partial_.T])
    variances_beta = np.diag(variances_beta_matrix)
    stdevs_beta = list(np.sqrt(variances_beta))

    return stdevs_beta
예제 #3
0
    def _fit(self, polys=1, periods=None, discontinuities=None):
        A, parameters = self.getDesignMatrix(polys=polys,
                                             periods=periods,
                                             discontinuities=discontinuities)
        model = WLS(self.series['y'], A, weights=self.series['dy'])
        model.data.xnames = parameters
        result = model.fit()
        return result

        P = np.diag(1.0 / self.series['dy']**2)
        AP = np.dot(A.T, P)
        N = np.dot(AP, A)
        Q_xx = np.linalg.inv(N)
        W = np.dot(AP, self.series['y'])
        p = np.dot(Q_xx, W)
        pickle.dump(p, open('p.pkl', 'wb'))
        fit = np.dot(A, p)
        v = fit - self.series['y']
        result = {'parameters': parameters, 'p': p}
        result['v'] = v
        result['fit'] = fit
        result['aic'] = self._aic(v, A.shape[1])
        sigma2 = np.dot(np.dot(v.T, P), v) / (self.nepochs - A.shape[1])
        sigma_x = np.sqrt(np.diag(Q_xx * sigma2))
        result['psigma'] = sigma_x
        w = np.diag(P)
        chi_square = np.dot(v**2, w)
        result['chi_square'] = chi_square
        nrms = np.sqrt(chi_square / (self.nepochs - A.shape[1]))
        wrms = np.sqrt(
            (self.nepochs / (self.nepochs - A.shape[1])) * chi_square / sum(w))
        result['nrms'] = nrms
        result['wrms'] = wrms
        return result
예제 #4
0
    def test_fixed_scale(self):
        cov_type = 'fixed_scale'
        kwds = {}
        res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
        res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
        assert_allclose(res1.params, res2.params, rtol=1e-13)
        assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
        assert_allclose(res1.bse, res2.bse, rtol=1e-13)
        assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-12)

        tt = res2.t_test(np.eye(len(res2.params)),
                         cov_p=res2.normalized_cov_params)
        assert_allclose(res2.cov_params(), res2.normalized_cov_params,
                        rtol=1e-13)
        assert_allclose(res2.bse, tt.sd, rtol=1e-13)
        assert_allclose(res2.pvalues, tt.pvalue, rtol=1e-13)
        assert_allclose(res2.tvalues, tt.tvalue, rtol=1e-13)

        # using cov_type in fit
        mod = self.res_wls.model
        mod3 = WLS(mod.endog, mod.exog, weights=mod.weights)
        res3 = mod3.fit(cov_type=cov_type, cov_kwds=kwds)
        tt = res3.t_test(np.eye(len(res3.params)),
                         cov_p=res3.normalized_cov_params)
        assert_allclose(res3.cov_params(), res3.normalized_cov_params,
                        rtol=1e-13)
        assert_allclose(res3.bse, tt.sd, rtol=1e-13)
        assert_allclose(res3.pvalues, tt.pvalue, rtol=1e-13)
        assert_allclose(res3.tvalues, tt.tvalue, rtol=1e-13)
예제 #5
0
 def discontinuitiesSignificanceTest(self, discontinuities, polys=1, periods=None):
     # while 1:
     #     result = self._fit(polys=polys, periods=periods, discontinuities=discontinuities)
     #     j = polys + 1 + 2 * len(periods)
     #     psigma = result['psigma'][j:]
     #     p = result['p'][j:]
     #     temp = []
     #     for i, discontinuity in enumerate(discontinuities):
     #         npars = discontinuity.nParameters()
     #         p_ = p[:npars]
     #         psigma_ = psigma[:npars]
     #         p = p[npars:]
     #         psigma = psigma[npars:]
     #         if np.all(np.abs(p_) > 2 * psigma_):
     #             temp.append(discontinuity)
     #         # else:
     #
     #     if temp == discontinuities:
     #         print('test significance end', temp)
     #         return temp
     #
     #     discontinuities = temp
     # while 1:
     A, params = self.getDesignMatrix(
         polys=polys, periods=periods, discontinuities=discontinuities)
     model = WLS(self.series['y'], A, weights=self.series['dy'])
     model.data.xnames = params
     result = model.fit()
     # print(result.summary2())
     j = polys + 1 + 2 * len(periods)
     ind = result.pvalues[j:] < 0.05
     discontinuities = list(compress(discontinuities, ind))
     return discontinuities
예제 #6
0
 def fit_WLS(self,
             X,
             assignment,
             outcome,
             confounder_types,
             weight_name='weights',
             intercept='True'):
     df = X[[assignment, outcome]].copy()
     regression_confounders = []
     for confounder, var_type in confounder_types.items():
         if var_type == 'o' or var_type == 'u':
             c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder)
             if len(c_dummies.columns) == 1:
                 df = pd.concat([df, c_dummies[c_dummies.columns]], axis=1)
                 regression_confounders.extend(c_dummies.columns)
             else:
                 df = pd.concat([df, c_dummies[c_dummies.columns[1:]]],
                                axis=1)
                 regression_confounders.extend(c_dummies.columns[1:])
         else:
             regression_confounders.append(confounder)
             df.loc[:, confounder] = X[confounder].copy()
             df.loc[:, confounder] = X[confounder].copy()
     if intercept:
         df.loc[:, 'intercept'] = 1.
         regression_confounders.append('intercept')
     model = WLS(df[outcome],
                 df[[assignment] + regression_confounders],
                 weights=X[weight_name])
     result = model.fit()
     self.wls_model = result
     return result
    def do_egger_regression_single_variance_term(self):
        """
        Does egger regression based on single variance term estimates.

        :return: list of length two each with a tuple of floats: beta, se, wald_p_val of the estimate for intercept
        and slope respectively.
        """

        num_estimates = len(self.estimation_data)

        # runtime checks.
        if num_estimates < 3:
            raise ValueError(
                "Only {} estimates supplied, need at least three to estimate egger"
                .format(num_estimates))

        if len(self.exposure_tuples) != num_estimates:
            raise ValueError(
                "No exposure data present, cannot do Egger regression.")

        if len(self.outcome_tuples) != num_estimates:
            raise ValueError(
                "No outcome data present, cannot do Egger regression.")
        """
        Now turn exposure into positive values.
        """
        outcome_tuples = copy.deepcopy(self.outcome_tuples)
        exposure_tuples = copy.deepcopy(self.exposure_tuples)

        for i in range(num_estimates):
            if exposure_tuples[i][0] < 0:
                # flip.
                exposure_tuples[i] = (-1.0 * exposure_tuples[i][0],
                                      exposure_tuples[i][0])
                outcome_tuples[i] = (-1.0 * outcome_tuples[i][0],
                                     outcome_tuples[i][0])

        x_dat = np.asarray([x[0] for x in exposure_tuples], dtype=float)
        x_dat = add_constant(x_dat)

        y_dat = np.asarray([x[0] for x in outcome_tuples], dtype=float)

        w_dat = np.zeros(len(self.estimation_data), dtype=float)

        for i in range(len(self.estimation_data)):
            w_dat[i] = 1 / (float(outcome_tuples[i][1])**2)

        wls_model = WLS(y_dat, x_dat, weights=w_dat)
        results = wls_model.fit()

        self.egger_intercept = (results.params[0], results.bse[0],
                                results.pvalues[0])
        self.egger_slope = (results.params[1], results.bse[1],
                            results.pvalues[1])

        self.egger_done = True

        return self.egger_intercept, self.egger_slope
예제 #8
0
def wls(x):
    '''
	Performs weighted least squares on the numpy array x of shape
	(N,3), where the three components are indep variable, dep variable,
	dep variable errors. Returns log(likelihood).
	'''
    wls_model = WLS(x[:, 1], sm.add_constant(x[:, 0]), weights=1 / x[:, 2]**2)
    result = wls_model.fit()
    return -np.sum(
        (x[:, 1] - wls_model.predict(result.params))**2 / x[:, 2]**
        2), result.params[1], x[:, 1] - wls_model.predict(result.params)
예제 #9
0
def blp(y, d, prop, b_hat, s_hat, print_table=True):
    """Return intercept and slope for Best Linear Predictor (BLP)

    Parameters
    ----------
    y : ndarray
        vector of outcomes
    d : ndarray
        treatment indicator
    prop : ndarray
        treatment propensity
    b_hat : ndarray
        [description]
    s_hat : ndarray
        [description]
    print_table : bool, optional
        Toggle results table, by default True

    Returns
    -------
    dict
        results for ATE and HET
    """

    # Calculate model matrix
    y_reg = y  # outcome
    w_reg = (prop * (1 - prop)) ** (-1)  # weights
    x_reg = np.column_stack(
        (
            np.repeat(1, repeats=len(y)),  # constant
            b_hat,  # baseline b0
            d - prop,  # average treatment effect ate
            (d - prop) * (s_hat - np.mean(s_hat)),  # heterogeneity het
        )
    )
    labels = ["const.", "b0", "ate", "het"]

    # Run weighted least squares
    wls = WLS(endog=y_reg, exog=x_reg, w=w_reg)
    wls = wls.fit()

    if print_table:
        print(wls.summary(xname=labels))

    return {
        "ate": wls.params[labels.index("ate")],
        "het": wls.params[labels.index("het")],
    }
예제 #10
0
    def test_pm(self):
        res = results_meta.exk1_metafor
        eff, var_eff = self.eff, self.var_eff

        tau2, converged = _fit_tau_iterative(eff,
                                             var_eff,
                                             tau2_start=0.1,
                                             atol=1e-8)
        assert_equal(converged, True)
        assert_allclose(tau2, res.tau2, atol=1e-10)

        # compare with WLS, PM weights
        mod_wls = WLS(eff, np.ones(len(eff)), weights=1 / (var_eff + tau2))
        res_wls = mod_wls.fit(cov_type="fixed_scale")

        assert_allclose(res_wls.params, res.b, atol=1e-13)
        assert_allclose(res_wls.bse, res.se, atol=1e-10)
        ci_low, ci_upp = res_wls.conf_int()[0]
        assert_allclose(ci_low, res.ci_lb, atol=1e-10)
        assert_allclose(ci_upp, res.ci_ub, atol=1e-10)

        # need stricter atol to match metafor,
        # I also used higher precision in metafor
        res3 = combine_effects(eff, var_eff, method_re="pm", atol=1e-7)
        # TODO: asserts below are copy paste, DRY?
        assert_allclose(res3.tau2, res.tau2, atol=1e-10)
        assert_allclose(res3.mean_effect_re, res.b, atol=1e-13)
        assert_allclose(res3.sd_eff_w_re, res.se, atol=1e-10)

        ci = res3.conf_int(use_t=False)[1]
        assert_allclose(ci[0], res.ci_lb, atol=1e-10)
        assert_allclose(ci[1], res.ci_ub, atol=1e-10)

        assert_allclose(res3.q, res.QE, atol=1e-10)
        # the following does not pass yet
        # assert_allclose(res3.i2, res.I2 / 100, atol=1e-10)  # percent in R
        # assert_allclose(res3.h2, res.H2, atol=1e-10)
        th = res3.test_homogeneity()
        q, pv = th
        df = th.df
        assert_allclose(pv, res.QEp, atol=1e-10)
        assert_allclose(q, res.QE, atol=1e-10)
        assert_allclose(df, 9 - 1, atol=1e-10)
예제 #11
0
 def _fit(self,
          polys=1,
          periods=None,
          discontinuities=None):
     A, parameters = self.getDesignMatrix(polys=polys, periods=periods, discontinuities=discontinuities)
     model = WLS(self.series['y'], A, weights=self.series['dy'])
     model.data.xnames = parameters
     result = model.fit()
     _fit = result.fittedvalues
     npar_without_discontinuouites = polys + 2 * len(periods) + 1
     A[:, npar_without_discontinuouites:] = 0
     continuous = np.dot(A, result.params.values)
     
     return result, _fit, continuous
     
     P = np.diag(1.0 / self.series['dy'] ** 2)
     AP = np.dot(A.T, P)
     N = np.dot(AP, A)
     Q_xx = np.linalg.inv(N)
     W = np.dot(AP, self.series['y'])
     p = np.dot(Q_xx, W)
     fit = np.dot(A, p)
     npar_without_discontinuouites = polys + 2 * periods + 1
     A[:, npar_without_discontinuouites:] = 0
     continuous = np.dot(A, p)
     v = fit - self.series['y']
     result = {'parameters': parameters, 'p': p}
     result['v'] = v
     result['fit'] = fit
     result['continuous'] = continuous
     result['aic'] = self._aic(v, A.shape[1])
     sigma2 = np.dot(np.dot(v.T, P), v) / (self.nepochs - A.shape[1])
     sigma_x = np.sqrt(np.diag(Q_xx * sigma2))
     result['psigma'] = sigma_x
     w = np.diag(P)
     chi_square = np.dot(v**2, w)
     result['chi_square'] = chi_square
     nrms = np.sqrt(chi_square / (self.nepochs - A.shape[1]))
     wrms = np.sqrt(
         (self.nepochs / (self.nepochs - A.shape[1])) * chi_square / sum(w))
     result['nrms'] = nrms
     result['wrms'] = wrms
     return result
    def setup_class(cls):

        # from example wls.py

        nsample = 50
        x = np.linspace(0, 20, nsample)
        X = np.column_stack((x, (x - 5)**2))
        from statsmodels.tools.tools import add_constant
        X = add_constant(X)
        beta = [5., 0.5, -0.01]
        sig = 0.5
        w = np.ones(nsample)
        w[int(nsample * 6. / 10):] = 3
        y_true = np.dot(X, beta)
        e = np.random.normal(size=nsample)
        y = y_true + sig * w * e
        X = X[:, [0, 1]]

        # ### WLS knowing the true variance ratio of heteroscedasticity

        mod_wls = WLS(y, X, weights=1. / w)
        cls.res_wls = mod_wls.fit()
예제 #13
0
    def setup_class(cls):

        # from example wls.py

        nsample = 50
        x = np.linspace(0, 20, nsample)
        X = np.column_stack((x, (x - 5)**2))
        from statsmodels.tools.tools import add_constant
        X = add_constant(X)
        beta = [5., 0.5, -0.01]
        sig = 0.5
        w = np.ones(nsample)
        w[int(nsample * 6. / 10):] = 3
        y_true = np.dot(X, beta)
        e = np.random.normal(size=nsample)
        y = y_true + sig * w * e
        X = X[:,[0,1]]


        # ### WLS knowing the true variance ratio of heteroscedasticity

        mod_wls = WLS(y, X, weights=1./w)
        cls.res_wls = mod_wls.fit()
예제 #14
0
class LinearRegression(object):
    '''
	Patsy wrapper for linear estimation and prediction.

	Uses statsmodels WLS to allow weights.
	If no weights are provided, results are equivalent to OLS.
	'''
    def __init__(self, formula=None, data=None, **kwargs):

        # convert all variables raised to a power to float64
        # this prevents mis-specification of probabilities in cases of variable overflow
        # (if the original var was compressed to a smaller bit integer/float)
        if type(data) == pd.DataFrame:
            power_vars = list(set(re.findall(r'(?<=power\().+?(?=,)',
                                             formula)))
            for var in power_vars:
                data[var] = data[var].astype('float64')

        if formula:
            y, X = patsy.dmatrices(formula, data, 1)

            self._y_design_info = y.design_info
            self._X_design_info = X.design_info

            self._model = WLS(y, X, **kwargs)
            self._fit = self._model.fit()
            self._betas = self._fit.params
            self._std = np.std(data[self._model.data.ynames].values -
                               self.predict(data))
            self._r2 = self._fit.rsquared
            self._r2_adj = self._fit.rsquared_adj
        else:
            self._y_design_info = None
            self._X_design_info = None
            self._model = None
            self._fit = None
            self._betas = None
            self._std = None
            self._r2 = None
            self._r2_adj = None

    def __repr__(self):
        return str(self._fit.summary())

    def predict(self, data):
        '''
		Returns fitted values for the data provided.
		'''

        if len(data) == 0:
            return []

        # identifies exponential variables from the design matrix (via the 'power' flag) and converts to float64
        # this prevents mis-specification of probabilities in cases of variable overflow
        # (if the original var was compressed to a smaller bit integer/float)
        power_vars = list(set([
         re.search(r'(?<=power\().+?(?=,)', column).group() for column in \
         self._X_design_info.column_names if 'power' in column
        ]))
        for var in power_vars:
            data[var] = data[var].astype('float64')

        (X, ) = patsy.build_design_matrices([self._X_design_info], data)

        return linear_transform(np.asarray(X), self._betas)

    def residuals(self, data):
        '''
		Returns residuals from fitting the model to the data provided.
		'''

        if len(data) == 0:
            return []

        return data[self._model.data.ynames].values - self.predict(data)

    def draw(self, data, rand_engine):
        '''
		Returns fitted values for the data provided plus a random draw
		from a normal distribution with the regression standard error.
		'''

        return self.predict(data) + rand_engine.normal(0, self._std, len(data))

    def Rsquared(self, adjusted=True):
        '''
		Returns the model's adjusted R squared.
		To return unadjusted R squared, pass adjusted=False.
		'''

        if adjusted:
            return self._r2_adj
        else:
            return self._r2

    def to_pickle(self, filename):
        '''
		Writes basic model information to a pickle file.
		'''

        pickle.dump((self._y_design_info, self._X_design_info, self._betas,
                     self._std, self._r2, self._r2_adj), open(filename, "wb"))

    @staticmethod
    def read_pickle(filename):
        '''
		Reads basic model information from a pickle file.

		Returns a LinearRegression object that does not include the model 
		summary or fit object but can execute all class functions.
		'''

        y_design_info, X_design_info, betas, std, r2, r2_adj = pickle.load(
            open(filename, "rb"))

        linear_regression = LinearRegression()
        linear_regression._y_design_info = y_design_info
        linear_regression._X_design_info = X_design_info
        linear_regression._betas = betas
        linear_regression._std = std
        linear_regression._r2 = r2
        linear_regression._r2_adj = r2_adj

        return linear_regression

    def __add__(self, other):
        ret = copy(self)
        ret._betas = self._betas + other._betas
        return ret

    def __sub__(self, other):
        ret = copy(self)
        ret._betas = self._betas - other._betas
        return ret

    def __mul__(self, other):
        ret = copy(self)
        ret._betas = ret._betas * other
        return ret
예제 #15
0
    def ipw_ra(self, return_results=True, effect_group="all", disp=False):
        """
        ATE and POM from inverse probability weighted regression adjustment.

        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        treat_mask = self.treat_mask
        endog = self.model_pool.endog
        exog = self.model_pool.exog
        prob = self.prob_select

        prob0 = prob[~treat_mask]
        prob1 = prob[treat_mask]
        if effect_group == "all":
            w0 = 1 / (1 - prob0)
            w1 = 1 / prob1
            exogt = exog
        elif effect_group in [1, "treated"]:
            w0 = prob0 / (1 - prob0)
            w1 = prob1 / prob1
            exogt = exog[treat_mask]
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            w0 = (1 - prob0) / (1 - prob0)
            w1 = (1 - prob1) / prob1
            exogt = exog[~treat_mask]
            effect_group = 0  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=w0)
        result0 = mod0.fit(cov_type='HC1')
        # mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean()
        mean0_ipwra = result0.predict(exogt).mean()

        mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=w1)
        result1 = mod1.fit(cov_type='HC1')
        # mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean()
        mean1_ipwra = result1.predict(exogt).mean()

        if not return_results:
            return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra

        # GMM
        mod_gmm = _IPWRAGMM(endog,
                            self.results_select,
                            None,
                            teff=self,
                            effect_group=effect_group)
        start_params = np.concatenate(
            ([mean1_ipwra - mean0_ipwra,
              mean0_ipwra], result0.params, result1.params,
             np.asarray(self.results_select.params)))
        res_gmm = mod_gmm.fit(start_params=start_params,
                              inv_weights=np.eye(len(start_params)),
                              optim_method='nm',
                              optim_args={
                                  "maxiter": 2000,
                                  "disp": disp
                              },
                              maxiter=1)

        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group=effect_group,
        )
        return res
예제 #16
0
    def aipw_wls(self, return_results=True, disp=False):
        """
        ATE and POM from double robust augmented inverse probability weighting.

        This uses weighted outcome regression, while `aipw` uses unweighted
        outcome regression.
        Option for effect on treated or on untreated is not available.
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        nobs = self.nobs
        prob = self.prob_select

        endog = self.model_pool.endog
        exog = self.model_pool.exog
        tind = self.treatment
        treat_mask = self.treat_mask

        ww1 = tind / prob * (tind / prob - 1)
        mod1 = WLS(endog[treat_mask],
                   exog[treat_mask],
                   weights=ww1[treat_mask])
        result1 = mod1.fit(cov_type='HC1')
        mean1_ipw2 = result1.predict(exog).mean()

        ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1)
        mod0 = WLS(endog[~treat_mask],
                   exog[~treat_mask],
                   weights=ww0[~treat_mask])
        result0 = mod0.fit(cov_type='HC1')
        mean0_ipw2 = result0.predict(exog).mean()

        self.results_ipwwls0 = result0
        self.results_ipwwls1 = result1

        correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs
        correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs
        tmean0 = mean0_ipw2 + correct0
        tmean1 = mean1_ipw2 + correct1
        ate = tmean1 - tmean0

        if not return_results:
            return ate, tmean0, tmean1

        p2_aipw_wls = np.asarray([ate, tmean0]).squeeze()

        # GMM
        mod_gmm = _AIPWWLSGMM(endog, self.results_select, None, teff=self)
        start_params = np.concatenate(
            (p2_aipw_wls, result0.params, result1.params,
             self.results_select.params))
        res_gmm = mod_gmm.fit(start_params=start_params,
                              inv_weights=np.eye(len(start_params)),
                              optim_method='nm',
                              optim_args={
                                  "maxiter": 5000,
                                  "disp": disp
                              },
                              maxiter=1)
        res = TreatmentEffectResults(
            self,
            res_gmm,
            "IPW",
            start_params=start_params,
            effect_group="all",
        )
        return res
예제 #17
0
def gates(y, d, prop, s_hat, q=10, print_table=True):
    """Calculate Group Average Treatment Effect

    Parameters
    ----------
    y : ndarray
        vector of outcomes
    d : ndarray
        treatment indicator
    prop : ndarray
        treatment propensity
    s_hat : ndarray
        estimated treatment effect
    q : int, optional
        number of groups, by default 10
    print_table : bool, optional
        toggle results table, by default True

    Returns
    -------
    dict
        results with baseline and treatment effect for each group
    """

    # Define groups
    bin_indices, bin_edges, bin_pct = quantile_grid(
        x=s_hat + 1e-16 * np.random.uniform(size=len(s_hat)), q=q  # Break ties
    )

    # Dummy coding
    s_onehot = np.zeros((len(s_hat), len(bin_edges)))
    s_onehot[np.arange(0, len(s_hat)), bin_indices] = 1

    # Calculate model matrix
    x_reg = np.column_stack(
        (s_onehot, s_onehot * np.reshape(d - prop, newshape=(-1, 1)))
    )
    w_reg = (prop * (1 - prop)) ** (-1)  # weights
    y_reg = y

    # Run weighted least squares
    labels_baseline = [
        f"Baseline: p={p / 100:.2f} ({x:.2f})"
        for p, x in zip(bin_pct.tolist(), bin_edges.tolist())
    ]
    labels_treatment = [
        f"Treatment: p={p / 100:.2f} ({x:.2f})"
        for p, x in zip(bin_pct.tolist(), bin_edges.tolist())
    ]
    labels = labels_baseline + labels_treatment

    wls = WLS(endog=y_reg, exog=x_reg, w=w_reg)
    wls = wls.fit()

    if print_table:
        print(wls.summary(xname=labels))

    return {
        "coef_baseline": wls.params[: len(labels_baseline)],
        "coef_treatment": wls.params[len(labels_baseline) :],
        "bin_values": bin_edges,
        "bin_count": np.sum(s_onehot, axis=0),
    }
예제 #18
0
from statsmodels.tools.tools import add_constant

X = add_constant(X)
beta = [5., 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6 / 10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:, [0, 1]]

# ### WLS knowing the true variance ratio of heteroscedasticity

mod_wls = WLS(y, X, weights=1. / w)
res_wls = mod_wls.fit()

prstd, iv_l, iv_u = wls_prediction_std(res_wls)
pred_res = get_prediction(res_wls)
ci = pred_res.conf_int(obs=True)

from numpy.testing import assert_allclose

assert_allclose(pred_res.se_obs, prstd, rtol=1e-13)
assert_allclose(ci, np.column_stack((iv_l, iv_u)), rtol=1e-13)

print pred_res.summary_frame().head()

pred_res2 = res_wls.get_prediction()
ci2 = pred_res2.conf_int(obs=True)
    def do_egger_regression_two_variance_term(self):
        """
        Does egger regression based on two variance term estimates.

        :return: list of length two each with a tuple of floats: beta, se, wald_p_val of the estimate for intercept
        and slope respectively.
        """

        num_estimates = len(self.estimation_data)

        # runtime checks.

        if num_estimates < 3:
            raise ValueError(
                "Only {} estimates supplied, need at least three to estimate egger"
                .format(num_estimates))

        if len(self.exposure_tuples) != num_estimates:
            raise ValueError(
                "No exposure data present, cannot do Egger regression.")

        if len(self.outcome_tuples) != num_estimates:
            raise ValueError(
                "No outcome data present, cannot do Egger regression.")
        """
        Now turn exposure into positive values.
        """

        outcome_tuples = copy.deepcopy(self.outcome_tuples)
        exposure_tuples = copy.deepcopy(self.exposure_tuples)

        for i in range(num_estimates):
            if exposure_tuples[i][0] < 0:
                # flip.
                exposure_tuples[i] = (-1 * exposure_tuples[i][0],
                                      exposure_tuples[i][0])
                outcome_tuples[i] = (-1 * outcome_tuples[i][0],
                                     outcome_tuples[i][0])

        x_dat = np.asarray([x[0] for x in exposure_tuples])
        x_dat = add_constant(x_dat)

        y_dat = np.asarray([x[0] for x in outcome_tuples])

        #if this value is zero, we add the smallest possible constant, so it can still be used as weights.
        #checked with the 2015 paper introducing MR-Egger, and it works as expected.

        w_dat = np.zeros(len(self.estimation_data))
        for i in range(len(self.estimation_data)):
            w_dat[i] = outcome_tuples[i][0] ** -2  / \
                       ( (outcome_tuples[i][0]**-2 * outcome_tuples[i][1] ** 2) +
                         (exposure_tuples[i][0]**-2 * exposure_tuples[i][1] ** 2)
                    )

        wls_model = WLS(y_dat, x_dat, weights=w_dat)
        results = wls_model.fit()

        self.egger_intercept = (results.params[0], results.bse[0],
                                results.pvalues[0])
        self.egger_slope = (results.params[1], results.bse[1],
                            results.pvalues[1])

        self.egger_done = True

        return self.egger_intercept, self.egger_slope
예제 #20
0
from statsmodels.tools.tools import add_constant
X = add_constant(X)
beta = [5., 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6/10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:,[0,1]]


# ### WLS knowing the true variance ratio of heteroscedasticity

mod_wls = WLS(y, X, weights=1./w)
res_wls = mod_wls.fit()



prstd, iv_l, iv_u = wls_prediction_std(res_wls)
pred_res = get_prediction(res_wls)
ci = pred_res.conf_int(obs=True)

from numpy.testing import assert_allclose
assert_allclose(pred_res.se_obs, prstd, rtol=1e-13)
assert_allclose(ci, np.column_stack((iv_l, iv_u)), rtol=1e-13)

print(pred_res.summary_frame().head())

pred_res2 = res_wls.get_prediction()
ci2 = pred_res2.conf_int(obs=True)
예제 #21
0
class LinearRegression(object):
    """Patsy wrapper for linear estimation and prediction.
    """

    def __init__(self, formula=None, data=None, **kwargs):

        if formula:
            y, X = patsy.dmatrices(formula, data, 1)

            self._y_design_info = y.design_info
            self._X_design_info = X.design_info

            self._model = WLS(y, X, **kwargs)
            self._fit = self._model.fit()
            self._betas = self._fit.params
            self._std = numpy.std(data[self._model.data.ynames].values - self.predict(data))
        else:
            self._y_design_info = None
            self._X_design_info = None
            self._model = None
            self._fit = None
            self._betas = None
            self._std = None

    def __repr__(self):
        return str(self._fit.summary())

    def predict(self, data):

        if len(data) == 0:
            return []

        (X, ) = patsy.build_design_matrices([self._X_design_info], data)

        return linear_transform(numpy.asarray(X), self._betas)

    def draw(self, data, rand_engine):

        return self.predict(data) + rand_engine.normal(0, self._std, len(data))

    def to_pickle(self, filename):

        pickle.dump((self._y_design_info, self._X_design_info, self._betas, self._std),
                    open(filename, "wb"))

    @staticmethod
    def read_pickle(filename):
        y_design_info, X_design_info, betas, std = pickle.load(open(filename, "rb"))

        linear_regression = LinearRegression()
        linear_regression._y_design_info = y_design_info
        linear_regression._X_design_info = X_design_info
        linear_regression._betas = betas
        linear_regression._std = std

        return linear_regression

    def __add__(self, other):
        ret = copy(self)
        ret._betas = self._betas + other._betas
        return ret

    def __sub__(self, other):
        ret = copy(self)
        ret._betas = self._betas - other._betas
        return ret

    def __mul__(self, other):
        ret = copy(self)
        ret._betas = ret._betas * other
        return ret