def null_refutation_test(self, data=None, dataset="linear", beta=10, num_common_causes=1, num_instruments=1, num_samples=100000, treatment_is_binary=True): # Supports user-provided dataset object if data is None: data = dowhy.datasets.linear_dataset(beta=beta, num_common_causes=num_common_causes, num_instruments=num_instruments, num_samples=num_samples, treatment_is_binary=treatment_is_binary) model = CausalModel( data=data['df'], treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["gml_graph"], proceed_when_unidentifiable=True, test_significance=None ) target_estimand = model.identify_effect() ate_estimate = model.estimate_effect( identified_estimand=target_estimand, method_name=self.estimator_method, test_significance=None ) true_ate = data["ate"] self.logger.debug(true_ate) # To test if there are any exceptions ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment = self.confounders_effect_on_t, confounders_effect_on_outcome = self.confounders_effect_on_y, effect_strength_on_treatment =self.effect_strength_on_t, effect_strength_on_outcome=self.effect_strength_on_y) self.logger.debug(ref.new_effect) # To test if the estimate is identical if refutation parameters are zero refute = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment = self.confounders_effect_on_t, confounders_effect_on_outcome = self.confounders_effect_on_y, effect_strength_on_treatment = 0, effect_strength_on_outcome = 0) error = abs(refute.new_effect - ate_estimate.value) print("Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}".format( error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect) ) res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False assert res
def predict(self, dataset: DatasetInterface): data = dataset.get_data() # Temporally add treatment. data['treatment'] = True treatment = 'treatment' outcome = dataset.get_outcome() common_causes = dataset.get_causes() model = CausalModel(data, treatment, outcome, common_causes=common_causes, proceed_when_unidentifiable=True) # Identify the causal effect relation = model.identify_effect() # Estimate the causal effect estimate = model.estimate_effect( relation, method_name="backdoor.linear_regression", test_significance=True) # Refute the obtained estimate result = model.refute_estimate(relation, estimate, method_name="random_common_cause") return result.estimated_effect, result.new_effect
def predict_tutorial(self, data: pd.DataFrame): # https://towardsdatascience.com/implementing-causal-inference-a-key-step-towards-agi-de2cde8ea599 data = pd.read_csv( 'https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv', header=None) col = [ 'treatment', 'y_factual', 'y_cfactual', 'mu0', 'mu1', ] for i in range(1, 26): col.append('x' + str(i)) data.columns = col data = data.astype({'treatment': 'bool'}, copy=False) result = data.head() # Create a causal model from the data and given common causes. xs = "" for i in range(1, 26): xs += ("x" + str(i) + "+") model = CausalModel(data=data, treatment='treatment', outcome='y_factual', common_causes=xs.split('+')) # Identify the causal effect identified_estimand = model.identify_effect() print(identified_estimand) # Estimate the causal effect and compare it with Average Treatment Effect estimate = model.estimate_effect( identified_estimand, method_name="backdoor.linear_regression", test_significance=True) print(estimate) print("Causal Estimate is " + str(estimate.value)) refute_results = model.refute_estimate( identified_estimand, estimate, method_name="random_common_cause") print(refute_results) dd = 3
def predict_example(self, data: pd.DataFrame): # https://github.com/Microsoft/dowhy # https://ntanmayee.github.io/articles/2018/11/16/tools-for-causality.html x = 'E1' y = 'E3' causes = ['E1', 'E2'] model = CausalModel(data=data, treatment=causes, outcome=y, proceed_when_unidentifiable=True) # Identify causal effect and return target estimands identified_estimand = model.identify_effect() # Estimate the target estimand using a statistical method. estimate = model.estimate_effect( identified_estimand, method_name="backdoor.propensity_score_matching") # Refute the obtained estimate using multiple robustness checks. refute_results = model.refute_estimate( identified_estimand, estimate, method_name="random_common_cause")
def null_refutation_test(self, data=None, dataset="linear", beta=10, num_common_causes=1, num_instruments=1, num_samples=100000, treatment_is_binary=True): # Supports user-provided dataset object if data is None: data = dowhy.datasets.linear_dataset( beta=beta, num_common_causes=num_common_causes, num_instruments=num_instruments, num_samples=num_samples, treatment_is_binary=treatment_is_binary) print(data['df']) print("") model = CausalModel(data=data['df'], treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["gml_graph"], proceed_when_unidentifiable=True, test_significance=None) target_estimand = model.identify_effect() ate_estimate = model.estimate_effect( identified_estimand=target_estimand, method_name=self.estimator_method, test_significance=None) true_ate = data["ate"] self.logger.debug(true_ate) if self.refuter_method == "add_unobserved_common_cause": # To test if there are any exceptions ref = model.refute_estimate( target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment=self.confounders_effect_on_t, confounders_effect_on_outcome=self.confounders_effect_on_y, effect_strength_on_treatment=self.effect_strength_on_t, effect_strength_on_outcome=self.effect_strength_on_y) self.logger.debug(ref.new_effect) # To test if the estimate is identical if refutation parameters are zero refute = model.refute_estimate( target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment=self.confounders_effect_on_t, confounders_effect_on_outcome=self.confounders_effect_on_y, effect_strength_on_treatment=0, effect_strength_on_outcome=0) error = abs(refute.new_effect - ate_estimate.value) print( "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}" .format(error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect)) res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False assert res elif self.refuter_method == "placebo_treatment_refuter": if treatment_is_binary is True: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, num_simulations=10) else: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method) # This value is hardcoded to be zero as we are runnning this on a linear dataset. # Ordinarily, we should expect this value to be zero. EXPECTED_PLACEBO_VALUE = 0 error = abs(ref.new_effect - EXPECTED_PLACEBO_VALUE) print( "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}" .format(error, self._error_tolerance * 100, EXPECTED_PLACEBO_VALUE, ref.new_effect)) print(ref) res = True if (error < self._error_tolerance) else False assert res elif self.refuter_method == "data_subset_refuter": if treatment_is_binary is True: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, num_simulations=5) else: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method) error = abs(ref.new_effect - ate_estimate.value) print( "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}" .format(error, self._error_tolerance * 100, ate_estimate.value, ref.new_effect)) print(ref) res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False assert res elif self.refuter_method == "bootstrap_refuter": if treatment_is_binary is True: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, num_simulations=5) else: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method) error = abs(ref.new_effect - ate_estimate.value) print( "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}" .format(error, self._error_tolerance * 100, ate_estimate.value, ref.new_effect)) print(ref) res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False assert res elif self.refuter_method == "dummy_outcome_refuter": if self.transformations is None: ref = model.refute_estimate(target_estimand, ate_estimate, method_name=self.refuter_method, num_simulations=2) else: ref = model.refute_estimate( target_estimand, ate_estimate, method_name=self.refuter_method, transformations=self.transformations, params=self.params, num_simulations=2) # This value is hardcoded to be zero as we are runnning this on a linear dataset. # Ordinarily, we should expect this value to be zero. EXPECTED_DUMMY_OUTCOME_VALUE = 0 error = abs(ref.new_effect - EXPECTED_DUMMY_OUTCOME_VALUE) print( "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}" .format(error, self._error_tolerance * 100, EXPECTED_DUMMY_OUTCOME_VALUE, ref.new_effect)) print(ref) assert ref
class DoWhyWrapper: """ A wrapper class to allow user call other methods from dowhy package through EconML. (e.g. causal graph, refutation test, etc.) Parameters ---------- cate_estimator: instance An instance of any CATE estimator we currently support """ def __init__(self, cate_estimator): self._cate_estimator = cate_estimator def _get_params(self): init = self._cate_estimator.__init__ # introspect the constructor arguments to find the model parameters # to represent init_signature = inspect.signature(init) parameters = init_signature.parameters.values() for p in parameters: if p.kind == p.VAR_POSITIONAL or p.kind == p.VAR_KEYWORD: raise RuntimeError("cate estimators should always specify their parameters in the signature " "of their __init__ (no varargs, no varkwargs). " f"{self._cate_estimator} with constructor {init_signature} doesn't " "follow this convention.") # Extract and sort argument names excluding 'self' return sorted([p.name for p in parameters]) def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None, confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate", proceed_when_unidentifiable=True, missing_nodes_as_confounders=False, control_value=0, treatment_value=1, target_units="ate", **kwargs): """ Estimate the counterfactual model from data through dowhy package. Parameters ---------- Y: vector of length n Outcomes for each sample T: vector of length n Treatments for each sample X: optional (n, d_x) matrix (Default=None) Features for each sample W: optional (n, d_w) matrix (Default=None) Controls for each sample Z: optional (n, d_z) matrix (Default=None) Instruments for each sample outcome_names: optional list (Default=None) Name of the outcome treatment_names: optional list (Default=None) Name of the treatment feature_names: optional list (Default=None) Name of the features confounder_names: optional list (Default=None) Name of the confounders instrument_names: optional list (Default=None) Name of the instruments graph: optional Path to DOT file containing a DAG or a string containing a DAG specification in DOT format estimand_type: optional string Type of estimand requested (currently only "nonparametric-ate" is supported). In the future, may support other specific parametric forms of identification proceed_when_unidentifiable: optional bool (Default=True) Whether the identification should proceed by ignoring potential unobserved confounders missing_nodes_as_confounders: optional bool (Default=False) Whether variables in the dataframe that are not included in the causal graph should be automatically included as confounder nodes control_value: optional scalar (Default=0) Value of the treatment in the control group, for effect estimation treatment_value: optional scalar (Default=1) Value of the treatment in the treated group, for effect estimation target_units: optional (Default="ate") The units for which the treatment effect should be estimated. This can be of three types: 1. A string for common specifications of target units (namely, "ate", "att" and "atc"), 2. A lambda function that can be used as an index for the data (pandas DataFrame), 3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data kwargs: optional Other keyword arguments from fit method for CATE estimator Returns ------- self """ Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z) # create dataframe n_obs = Y.shape[0] Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z) # currently dowhy only support single outcome and single treatment assert Y.shape[1] == 1, "Can only accept single dimensional outcome." assert T.shape[1] == 1, "Can only accept single dimensional treatment." # column names if outcome_names is None: outcome_names = [f"Y{i}" for i in range(Y.shape[1])] if treatment_names is None: treatment_names = [f"T{i}" for i in range(T.shape[1])] if feature_names is None: feature_names = [f"X{i}" for i in range(X.shape[1])] if confounder_names is None: confounder_names = [f"W{i}" for i in range(W.shape[1])] if instrument_names is None: instrument_names = [f"Z{i}" for i in range(Z.shape[1])] column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names) self.dowhy_ = CausalModel( data=df, treatment=treatment_names, outcome=outcome_names, graph=graph, common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None, instruments=instrument_names if Z.shape[1] > 0 else None, effect_modifiers=feature_names if X.shape[1] > 0 else None, estimand_type=estimand_type, proceed_when_unidetifiable=proceed_when_unidentifiable, missing_nodes_as_confounders=missing_nodes_as_confounders ) self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True) method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__ init_params = {} for p in self._get_params(): init_params[p] = getattr(self._cate_estimator, p) self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_, method_name=method_name, control_value=control_value, treatment_value=treatment_value, target_units=target_units, method_params={ "init_params": init_params, "fit_params": kwargs, }, ) return self def refute_estimate(self, *, method_name, **kwargs): """ Refute an estimated causal effect. If method_name is provided, uses the provided method. In the future, we may support automatic selection of suitable refutation tests. Following refutation methods are supported: - Adding a randomly-generated confounder: "random_common_cause" - Adding a confounder that is associated with both treatment and outcome: "add_unobserved_common_cause" - Replacing the treatment with a placebo (random) variable): "placebo_treatment_refuter" - Removing a random subset of the data: "data_subset_refuter" For more details, see docs :mod:`dowhy.causal_refuters` Parameters ---------- method_name: string Name of the refutation method kwargs: optional Additional arguments that are passed directly to the refutation method. Can specify a random seed here to ensure reproducible results ('random_seed' parameter). For method-specific parameters, consult the documentation for the specific method. All refutation methods are in the causal_refuters subpackage. Returns ------- RefuteResult: an instance of the RefuteResult class """ return self.dowhy_.refute_estimate( self.identified_estimand_, self.estimate_, method_name=method_name, **kwargs ) # We don't allow user to call refit_final from this class, since internally dowhy effect estimate will only update # cate estimator but not the effect. def refit_final(self, inference=None): raise AttributeError( "Method refit_final is not allowed through a dowhy object; please perform a full fit instead.") def __getattr__(self, attr): # don't proxy special methods if attr.startswith('__'): raise AttributeError(attr) elif attr in ['_cate_estimator', 'dowhy_', 'identified_estimand_', 'estimate_']: return super().__getattr__(attr) elif attr.startswith('dowhy__'): return getattr(self.dowhy_, attr[len('dowhy__'):]) elif hasattr(self.estimate_._estimator_object, attr): if hasattr(self.dowhy_, attr): warnings.warn("This call is ambiguous, " "we're defaulting to CATE estimator's attribute. " "Please add 'dowhy__' as prefix if you want to get dowhy attribute.", UserWarning) return getattr(self.estimate_._estimator_object, attr) else: return getattr(self.dowhy_, attr) def __setattr__(self, attr, value): if attr in ['_cate_estimator', 'dowhy_', 'identified_estimand_', 'estimate_']: super().__setattr__(attr, value) elif attr.startswith('dowhy__'): setattr(self.dowhy_, attr[len('dowhy__'):], value) elif hasattr(self.estimate_._estimator_object, attr): if hasattr(self.dowhy_, attr): warnings.warn("This call is ambiguous, " "we're defaulting to CATE estimator's attribute. " "Please add 'dowhy__' as prefix if you want to set dowhy attribute.", UserWarning) setattr(self.estimate_._estimator_object, attr, value) else: setattr(self.dowhy_, attr, value)
data=data, treatment='treatment', outcome='y_factual', common_causes=xs.split('+'), ) #save the model as a png model.view_model() display(Image(filename="causal_model.png")) #Identify the causal effect identified_estimand = model.identify_effect() print(identified_estimand) # Estimate the causal effect and compare it with Average Treatment Effect estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression", test_significance=True) print(estimate) print("Causal Estimate is " + str(estimate.value)) data_1 = data[data["treatment"] == 1] data_0 = data[data["treatment"] == 0] print("ATE", np.mean(data_1["y_factual"]) - np.mean(data_0["y_factual"])) refute_results = model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause") print(refute_results)