def fit(self, p, samples=100, bound=None): """Estimate IPTW for network data under weak interference at coverage `p` """ if not self._denominator_estimated_: self._denominator_ = self._estimate_g_(data=self.df.copy(), distribution=self._map_dist_) # Creating pooled sample to estimate weights pooled_df = self._generate_pooled_sample(p=p, samples=samples) # Generating numerator weights for treatment plan numerator = self._estimate_gstar_(pooled_data=pooled_df.copy(), data_to_predict=self.df.copy(), distribution=self._map_dist_) # Calculating H = g-star(As | Ws) / g(As | Ws) iptw = numerator / self._denominator_ if bound is not None: bounding(ipw=iptw, bound=bound) # Calculating marginal outcome self.marginal_outcome = np.average(self.df[self.outcome], weights=iptw) # Estimating Variance y_ = np.array(self.df[self.outcome]) zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) self.conditional_variance = self._est_variance_conditional_(iptw=iptw, obs_y=y_, psi=self.marginal_outcome) self.conditional_ci = [self.marginal_outcome - zalpha*np.sqrt(self.conditional_variance), self.marginal_outcome + zalpha*np.sqrt(self.conditional_variance)]
def exposure_model(self, model, custom_model=None, bound=None): """Estimation of the exposure model, Pr(A=1|W). This value is used as the denominator for the inverse probability weights. Parameters ---------- model : str Independent variables to predict the exposure. Example) 'var1 + var2 + var3' custom_model : optional Input for a custom model that is used in place of the logit model (default). The model must have the "fit()" and "predict()" attributes. Both sklearn and supylearner are supported as custom models. In the background, TMLE will fit the custom model and generate the predicted probablities bound : float, list, optional Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations. Specifying this argument can improve finite sample performance for random positivity violations. However, truncating weights leads to additional confounding. Default is False, meaning no truncation of predicted probabilities occurs. Providing a single float assumes symmetric trunctation, where values below or above the threshold are set to the threshold value. Alternatively a list of floats can be provided for asymmetric trunctation, with the first value being the lower bound and the second being the upper bound """ self._g_model = self.exposure + ' ~ ' + model if custom_model is None: # Standard parametric regression model fitmodel = propensity_score(self.df, self._g_model, print_results=self._verbose_) pred = fitmodel.predict(self.df) else: # User-specified prediction model self._exp_model_custom = True data = patsy.dmatrix(model + ' - 1', self.df) pred = exposure_machine_learner(xdata=np.asarray(data), ydata=np.asarray( self.df[self.exposure]), ml_model=custom_model, pdata=np.asarray(data)) if bound is not None: pred2 = bounding(ipw=pred, bound=bound) self._specified_bound_ = np.sum(np.where(pred2 == pred, 0, 1)) pred = pred2 self._denominator_ = np.where(self.df[self.exposure] == 1, pred, 1 - pred)
def test_bound_below1(self): v = bounding(np.array([0.1, 0.2, 0.5, 1.0, 40]), bound=0.3) npt.assert_allclose([0.3, 0.3, 0.5, 1.0, 1 / 0.3], v, atol=1e-5)
def test_bound_above1(self): v = bounding(np.array([0.2, 1.1, 2, 5, 10]), bound=3) npt.assert_allclose([1 / 3, 1.1, 2, 3, 3], v, atol=1e-5)
def test_error_order(self): with pytest.raises(ValueError): bounding(np.array([0.1, 0.5, 1.3]), bound=[5, 0.1])
def test_error_string(self): with pytest.raises(ValueError): bounding(np.array([0.1, 0.5, 1.3]), bound='three')
def test_error_negative_bound(self): with pytest.raises(ValueError): bounding(np.array([0.1, 0.5, 1.3]), bound=-3)