def _estimate_effect(self): #first_stage_features = self.build_first_stage_features() #fs_model = self.first_stage_model() #if self._target_estimand.identifier_method=="frontdoor": # first_stage_outcome = self._frontdoor_variables #elif self._target_estimand.identifier_method=="mediation": # first_stage_outcome = self._mediators #fs_model.fit(first_stage_features, self._frontdoor_variables) #self.logger.debug("Coefficients of the fitted model: " + # ",".join(map(str, fs_model.coef_))) #residuals = self._frontdoor_variables - fs_model.predict(first_stage_features) #self._data["residual"] = residuals estimate_value = None # First stage modified_target_estimand = copy.deepcopy(self._target_estimand) modified_target_estimand.identifier_method="backdoor" modified_target_estimand.backdoor_variables = self._target_estimand.mediation_first_stage_confounders if self._target_estimand.identifier_method=="frontdoor": modified_target_estimand.outcome_variable = parse_state(self._frontdoor_variables_names) elif self._target_estimand.identifier_method=="mediation": modified_target_estimand.outcome_variable = parse_state(self._mediators_names) first_stage_estimate = self.first_stage_model(self._data, modified_target_estimand, self._treatment_name, parse_state(modified_target_estimand.outcome_variable), control_value=self._control_value, treatment_value=self._treatment_value, test_significance=self._significance_test, evaluate_effect_strength=self._effect_strength_eval, confidence_intervals = self._confidence_intervals, target_units=self._target_units, effect_modifiers=self._effect_modifier_names, params=self.method_params)._estimate_effect() # Second Stage modified_target_estimand = copy.deepcopy(self._target_estimand) modified_target_estimand.identifier_method="backdoor" modified_target_estimand.backdoor_variables = self._target_estimand.mediation_second_stage_confounders if self._target_estimand.identifier_method=="frontdoor": modified_target_estimand.treatment_variable = parse_state(self._frontdoor_variables_names) elif self._target_estimand.identifier_method=="mediation": modified_target_estimand.treatment_variable = parse_state(self._mediators_names) second_stage_estimate = self.second_stage_model(self._data, modified_target_estimand, parse_state(modified_target_estimand.treatment_variable), self._outcome_name, control_value=self._control_value, treatment_value=self._treatment_value, test_significance=self._significance_test, evaluate_effect_strength=self._effect_strength_eval, confidence_intervals = self._confidence_intervals, target_units=self._target_units, effect_modifiers=self._effect_modifier_names, params=self.method_params)._estimate_effect() # Combining the two estimates natural_direct_effect = first_stage_estimate.value * second_stage_estimate.value estimate_value = natural_direct_effect self.symbolic_estimator = self.construct_symbolic_estimator( first_stage_estimate.realized_estimand_expr, second_stage_estimate.realized_estimand_expr, estimand_type=CausalIdentifier.NONPARAMETRIC_NDE) if self._target_estimand.estimand_type == CausalIdentifier.NONPARAMETRIC_NIE: # Total effect of treatment modified_target_estimand = copy.deepcopy(self._target_estimand) modified_target_estimand.identifier_method="backdoor" total_effect_estimate = self.second_stage_model(self._data, modified_target_estimand, self._treatment_name, self._outcome_name, control_value=self._control_value, treatment_value=self._treatment_value, test_significance=self._significance_test, evaluate_effect_strength=self._effect_strength_eval, confidence_intervals = self._confidence_intervals, target_units=self._target_units, effect_modifiers=self._effect_modifier_names, params=self.method_params)._estimate_effect() natural_indirect_effect = total_effect_estimate.value - natural_direct_effect estimate_value = natural_indirect_effect self.symbolic_estimator = self.construct_symbolic_estimator( first_stage_estimate.realized_estimand_expr, second_stage_estimate.realized_estimand_expr, total_effect_estimate.realized_estimand_expr, estimand_type=self._target_estimand.estimand_type) return CausalEstimate(estimate=estimate_value, target_estimand=self._target_estimand, realized_estimand_expr=self.symbolic_estimator)
def _estimate_effect(self, recalculate_propensity_score=False): if self._propensity_score_model is None or recalculate_propensity_score is True: self._propensity_score_model = linear_model.LogisticRegression() self._propensity_score_model.fit(self._observed_common_causes, self._treatment) self._data['ps'] = self._propensity_score_model.predict_proba( self._observed_common_causes)[:, 1] # trim propensity score weights self._data['ps'] = np.minimum(self.max_ps_score, self._data['ps']) self._data['ps'] = np.maximum(self.min_ps_score, self._data['ps']) # ips ==> (isTreated(y)/ps(y)) + ((1-isTreated(y))/(1-ps(y))) # nips ==> ips / (sum of ips over all units) # icps ==> ps(y)/(1-ps(y)) / (sum of (ps(y)/(1-ps(y))) over all control units) # itps ==> ps(y)/(1-ps(y)) / (sum of (ps(y)/(1-ps(y))) over all treatment units) ipst_sum = sum(self._data[self._treatment_name[0]] / self._data['ps']) ipsc_sum = sum( (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps'])) num_units = len(self._data[self._treatment_name[0]]) num_treatment_units = sum(self._data[self._treatment_name[0]]) num_control_units = num_units - num_treatment_units # Vanilla IPS estimator self._data['ips_weight'] = (1 / num_units) * ( self._data[self._treatment_name[0]] / self._data['ps'] + (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps'])) self._data['tips_weight'] = (1 / num_treatment_units) * ( self._data[self._treatment_name[0]] + (1 - self._data[self._treatment_name[0]]) * self._data['ps'] / (1 - self._data['ps'])) self._data['cips_weight'] = (1 / num_control_units) * ( self._data[self._treatment_name[0]] * (1 - self._data['ps']) / self._data['ps'] + (1 - self._data[self._treatment_name[0]])) # Also known as the Hajek estimator self._data['ips_normalized_weight'] = ( self._data[self._treatment_name[0]] / self._data['ps'] / ipst_sum + (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']) / ipsc_sum) ipst_for_att_sum = sum(self._data[self._treatment_name[0]]) ipsc_for_att_sum = sum((1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']) * self._data['ps']) self._data['tips_normalized_weight'] = ( self._data[self._treatment_name[0]] / ipst_for_att_sum + (1 - self._data[self._treatment_name[0]]) * self._data['ps'] / (1 - self._data['ps']) / ipsc_for_att_sum) ipst_for_atc_sum = sum(self._data[self._treatment_name[0]] / self._data['ps'] * (1 - self._data['ps'])) ipsc_for_atc_sum = sum((1 - self._data[self._treatment_name[0]])) self._data['cips_normalized_weight'] = ( self._data[self._treatment_name[0]] * (1 - self._data['ps']) / self._data['ps'] / ipst_for_atc_sum + (1 - self._data[self._treatment_name[0]]) / ipsc_for_atc_sum) # Stabilized weights p_treatment = sum(self._data[self._treatment_name[0]]) / num_units self._data['ips_stabilized_weight'] = (1 / num_units) * ( self._data[self._treatment_name[0]] / self._data['ps'] * p_treatment + (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']) * (1 - p_treatment)) self._data['tips_stabilized_weight'] = (1 / num_treatment_units) * ( self._data[self._treatment_name[0]] * p_treatment + (1 - self._data[self._treatment_name[0]]) * self._data['ps'] / (1 - self._data['ps']) * (1 - p_treatment)) self._data['cips_stabilized_weight'] = (1 / num_control_units) * ( self._data[self._treatment_name[0]] * (1 - self._data['ps']) / self._data['ps'] * p_treatment + (1 - self._data[self._treatment_name[0]]) * (1 - p_treatment)) # Simple normalized estimator (commented out for now) #ips_sum = self._data['ips_weight'].sum() #self._data['nips_weight'] = self._data['ips_weight'] / ips_sum #self._data['ips2'] = self._data['ps'] / (1 - self._data['ps']) #treated_ips_sum = (self._data['ips2'] * self._data[self._treatment_name[0]]).sum() #control_ips_sum = (self._data['ips2'] * (1 - self._data[self._treatment_name[0]])).sum() #self._data['itps_weight'] = self._data['ips2'] / treated_ips_sum #self._data['icps_weight'] = self._data['ips2'] / control_ips_sum if self._target_units == "ate": weighting_scheme_name = self.weighting_scheme elif self._target_units == "att": weighting_scheme_name = "t" + self.weighting_scheme elif self._target_units == "atc": weighting_scheme_name = "c" + self.weighting_scheme else: raise ValueError("Target units string value not supported") # Calculating the effect self._data['d_y'] = (self._data[weighting_scheme_name] * self._data[self._treatment_name[0]] * self._data[self._outcome_name]) self._data['dbar_y'] = (self._data[weighting_scheme_name] * (1 - self._data[self._treatment_name[0]]) * self._data[self._outcome_name]) est = self._data['d_y'].sum() - self._data['dbar_y'].sum() # TODO - how can we add additional information into the returned estimate? estimate = CausalEstimate( estimate=est, target_estimand=self._target_estimand, realized_estimand_expr=self.symbolic_estimator, propensity_scores=self._data["ps"]) return estimate