def test_conversions(self): p = np.array([0.7, 0.5, 0.1, 0.01, 0.99]) npt.assert_allclose(p, odds_to_probability(probability_to_odds(p)), rtol=1e-12) o = np.array([ 2, 1, 0.1, 9, 12, 0.3, ]) npt.assert_allclose(o, probability_to_odds(odds_to_probability(o)), rtol=1e-12)
def targeting_step(y, q_init, iptw, verbose): f = sm.families.family.Binomial() log = sm.GLM( y, # Outcome / dependent variable np.repeat(1, y.shape[0]), # Generating intercept only model offset=np.log(probability_to_odds( q_init)), # Offset by g-formula predictions freq_weights=iptw, # Weighted by calculated IPW family=f).fit() if verbose: # Optional argument to print each intermediary result print( '==============================================================================' ) print('Targeting Model') print(log.summary()) return log.params[0] # Returns single-step estimated Epsilon term
shift = bool(int(shift)) if shift: prop_treated = [-2.5, -2.0, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5] # Generating probabilities (true) to assign data = network_to_df(G) adj_matrix = nx.adjacency_matrix(G, weight=None) data['O_mean'] = fast_exp_map(adj_matrix, np.array(data['O']), measure='mean') data['G_mean'] = fast_exp_map(adj_matrix, np.array(data['G']), measure='mean') prob = logistic.cdf(-1.3 - 1.5 * data['P'] + 1.5 * data['P'] * data['G'] + 0.95 * data['O_mean'] + 0.95 * data['G_mean']) log_odds = np.log(probability_to_odds(prob)) else: prop_treated = [ 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95 ] truth = truth_values(network=network, dgm=exposure, restricted_degree=restrict, shift=shift) print("#############################################") print("Sim Script:", script_name) print("=============================================")
def fit(self, p, conditional=None, samples=100, seed=None): """Calculate the mean from the predicted exposure probabilities and predicted outcome values using the TMLE procedure. Confidence intervals are calculated using influence curves. Parameters ---------- p : float, list, tuple Proportion that correspond to the number of persons treated (all values must be between 0.0 and 1.0). If conditional is specified, p must be a list/tuple of floats of the same length conditional : None, list, tuple, optional A samples : int, optional Number of samples to use for the Monte Carlo integration procedure seed : None, int, optional Seed for the Monte Carlo integration procedure Note ---- Exposure and outcome models must be specified prior to `fit()` Returns ------- `StochasticTMLE` gains `marginal_vector` and `marginal_outcome` along with `marginal_ci` """ if self._denominator_ is None: raise ValueError( "The exposure_model() function must be specified before the fit() function" ) if self._Qinit_ is None: raise ValueError( "The outcome_model() function must be specified before the fit() function" ) if seed is None: pass else: np.random.seed(seed) p = np.array(p) if np.any(p > 1) or np.any(p < 0): raise ValueError( "All specified treatment probabilities must be between 0 and 1" ) if conditional is not None: if len(p) != len(conditional): raise ValueError( "'p' and 'conditional' must be the same length") # Step 1) Calculating clever covariate (HAW) if conditional is None: numerator = np.where(self.df[self.exposure] == 1, p, 1 - p) else: df = self.df.copy() stochastic_check_conditional(df=self.df, conditional=conditional) numerator = np.array([np.nan] for i in range(self.df.shape[0])) for c, prop in zip(conditional, p): numerator = np.where( eval(c), np.where(df[self.exposure] == 1, prop, 1 - prop), numerator) haw = np.array(numerator / self._denominator_).astype(float) # Step 2) Estimate from Q-model # process completed in outcome_model() function and stored in self._Qinit_ # Step 3) Target parameter TMLE self.epsilon = self.targeting_step(y=self.df[self.outcome], q_init=self._Qinit_, iptw=haw, verbose=self._verbose_) # Step 4) Monte-Carlo Integration procedure q_star_list = [] q_i_star_list = [] self._resamples_ = samples for i in range(samples): # Applying treatment plan df = self.df.copy() if conditional is None: df[self.exposure] = np.random.binomial(n=1, p=p, size=df.shape[0]) else: df[self.exposure] = np.nan for c, prop in zip(conditional, p): df[self.exposure] = np.random.binomial(n=1, p=prop, size=df.shape[0]) # Outcome model under treatment plan if self._out_model_custom: _, data_star = patsy.dmatrices(self._q_model + ' - 1', self.df) y_star = stochastic_outcome_predict( xdata=data_star, fit_ml_model=self._outcome_model, continuous=self._continuous_outcome) else: y_star = self._outcome_model.predict(df) if self._continuous_outcome: # Ensures all predicted values are bounded y_star = np.where(y_star < self._q_min_bound, self._q_min_bound, y_star) y_star = np.where(y_star > self._q_max_bound, self._q_max_bound, y_star) # Targeted Estimate logit_qstar = np.log( probability_to_odds(y_star)) + self.epsilon # logit(Y^*) + e q_star = odds_to_probability(np.exp(logit_qstar)) # Y^* q_i_star_list.append(q_star) # Saving Y_i^* for marginal variance q_star_list.append(np.mean(q_star)) # Saving E[Y^*] if self._continuous_outcome: self.marginals_vector = _tmle_unit_unbound_( np.array(q_star_list), mini=self._continuous_min, maxi=self._continuous_max) y_ = np.array( _tmle_unit_unbound_(self.df[self.outcome], mini=self._continuous_min, maxi=self._continuous_max)) yq0_ = _tmle_unit_unbound_(self._Qinit_, mini=self._continuous_min, maxi=self._continuous_max) yqstar_ = _tmle_unit_unbound_(np.array(q_i_star_list), mini=self._continuous_min, maxi=self._continuous_max) else: self.marginals_vector = q_star_list y_ = np.array(self.df[self.outcome]) yq0_ = self._Qinit_ yqstar_ = np.array(q_i_star_list) self.marginal_outcome = np.mean(self.marginals_vector) # Step 5) Estimating Var(psi) zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) # Marginal variance estimator variance_marginal = self.est_marginal_variance( haw=haw, y_obs=y_, y_pred=yq0_, y_pred_targeted=np.mean(yqstar_, axis=0), psi=self.marginal_outcome) self.marginal_se = np.sqrt(variance_marginal) / np.sqrt( self.df.shape[0]) self.marginal_ci = [ self.marginal_outcome - zalpha * self.marginal_se, self.marginal_outcome + zalpha * self.marginal_se ] # Conditional on W variance estimator (not generally recommended but I need it for other work) variance_conditional = self.est_conditional_variance(haw=haw, y_obs=y_, y_pred=yq0_) self.conditional_se = np.sqrt(variance_conditional) / np.sqrt( self.df.shape[0]) self.conditional_ci = [ self.marginal_outcome - zalpha * self.conditional_se, self.marginal_outcome + zalpha * self.conditional_se ]