Exemple #1
0
    def fit(self, p, samples=100, bound=None):
        """Estimate IPTW for network data under weak interference at coverage `p`	
        """
        if not self._denominator_estimated_:
            self._denominator_ = self._estimate_g_(data=self.df.copy(), distribution=self._map_dist_)

        # Creating pooled sample to estimate weights
        pooled_df = self._generate_pooled_sample(p=p, samples=samples)

        # Generating numerator weights for treatment plan
        numerator = self._estimate_gstar_(pooled_data=pooled_df.copy(),
                                          data_to_predict=self.df.copy(), distribution=self._map_dist_)

        # Calculating H = g-star(As | Ws) / g(As | Ws)
        iptw = numerator / self._denominator_
        if bound is not None:
            bounding(ipw=iptw, bound=bound)

        # Calculating marginal outcome
        self.marginal_outcome = np.average(self.df[self.outcome], weights=iptw)

        # Estimating Variance
        y_ = np.array(self.df[self.outcome])
        zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1)

        self.conditional_variance = self._est_variance_conditional_(iptw=iptw, obs_y=y_, psi=self.marginal_outcome)
        self.conditional_ci = [self.marginal_outcome - zalpha*np.sqrt(self.conditional_variance),
                               self.marginal_outcome + zalpha*np.sqrt(self.conditional_variance)]
    def exposure_model(self, model, custom_model=None, bound=None):
        """Estimation of the exposure model, Pr(A=1|W). This value is used as the denominator for the inverse
        probability weights.

        Parameters
        ----------
        model : str
            Independent variables to predict the exposure. Example) 'var1 + var2 + var3'
        custom_model : optional
            Input for a custom model that is used in place of the logit model (default). The model must have the
            "fit()" and  "predict()" attributes. Both sklearn and supylearner are supported as custom models. In the
            background, TMLE will fit the custom model and generate the predicted probablities
        bound : float, list, optional
            Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations.
            Specifying this argument can improve finite sample performance for random positivity violations. However,
            truncating weights leads to additional confounding. Default is False, meaning no truncation of
            predicted probabilities occurs. Providing a single float assumes symmetric trunctation, where values below
            or above the threshold are set to the threshold value. Alternatively a list of floats can be provided for
            asymmetric trunctation, with the first value being the lower bound and the second being the upper bound
        """
        self._g_model = self.exposure + ' ~ ' + model

        if custom_model is None:  # Standard parametric regression model
            fitmodel = propensity_score(self.df,
                                        self._g_model,
                                        print_results=self._verbose_)
            pred = fitmodel.predict(self.df)
        else:  # User-specified prediction model
            self._exp_model_custom = True
            data = patsy.dmatrix(model + ' - 1', self.df)
            pred = exposure_machine_learner(xdata=np.asarray(data),
                                            ydata=np.asarray(
                                                self.df[self.exposure]),
                                            ml_model=custom_model,
                                            pdata=np.asarray(data))

        if bound is not None:
            pred2 = bounding(ipw=pred, bound=bound)
            self._specified_bound_ = np.sum(np.where(pred2 == pred, 0, 1))
            pred = pred2

        self._denominator_ = np.where(self.df[self.exposure] == 1, pred,
                                      1 - pred)
 def test_bound_below1(self):
     v = bounding(np.array([0.1, 0.2, 0.5, 1.0, 40]), bound=0.3)
     npt.assert_allclose([0.3, 0.3, 0.5, 1.0, 1 / 0.3], v, atol=1e-5)
 def test_bound_above1(self):
     v = bounding(np.array([0.2, 1.1, 2, 5, 10]), bound=3)
     npt.assert_allclose([1 / 3, 1.1, 2, 3, 3], v, atol=1e-5)
 def test_error_order(self):
     with pytest.raises(ValueError):
         bounding(np.array([0.1, 0.5, 1.3]), bound=[5, 0.1])
 def test_error_string(self):
     with pytest.raises(ValueError):
         bounding(np.array([0.1, 0.5, 1.3]), bound='three')
 def test_error_negative_bound(self):
     with pytest.raises(ValueError):
         bounding(np.array([0.1, 0.5, 1.3]), bound=-3)