def plot(self, *args, **kwargs): if kwargs.get('method_name'): method_name = kwargs.get('method_name') else: method_name = "backdoor.propensity_score_matching" logging.info("Using {} for estimation.".format(method_name)) if kwargs.get('common_causes'): self.use_graph = False elif kwargs.get('dot_graph'): self.use_graph = True else: raise Exception("You must specify a method for determining a backdoor set.") if self.use_graph: model = CausalModel(data=self._obj, treatment=self._obj[kwargs["treatment_name"]], outcome=self._obj[kwargs["outcome_name"]], graph=args["dot_graph"]) else: model = CausalModel(data=self._obj, treatment=self._obj[kwargs["treatment_name"]], outcome=self._obj[kwargs["outcome_name"]], common_causes=args["common_causes"]) if kwargs['kind'] == 'bar': identified_estimand = model.identify_effect() estimate = model.estimate_effect(identified_estimand, method_name=method_name) elif kwargs['kind'] == 'line' or not kwargs['kind'].get(): identified_estimand = model.identify_effect() estimate = model.estimate_effect(identified_estimand, method_name=method_name) else: raise Exception("Plot type {} not supported for causal plots!".format(kwargs.get('kind'))) self._obj.plot(*args, **kwargs)
def null_refutation_test(self, data=None, dataset="linear", beta=10, num_common_causes=1, num_instruments=1, num_samples=100000, treatment_is_binary=True): # Supports user-provided dataset object if data is None: data = dowhy.datasets.linear_dataset( beta=beta, num_common_causes=num_common_causes, num_instruments=num_instruments, num_samples=num_samples, treatment_is_binary=treatment_is_binary) model = CausalModel(data=data['df'], treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["gml_graph"], proceed_when_unidentifiable=True, test_significance=None) target_estimand = model.identify_effect() ate_estimate = model.estimate_effect( identified_estimand=target_estimand, method_name=self.estimator_method, test_significance=None) true_ate = data["ate"] # To test if there are any exceptions ref = model.refute_estimate( target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment=self.confounders_effect_on_t, confounders_effect_on_outcome=self.confounders_effect_on_y, effect_strength_on_treatment=self.effect_strength_on_t, effect_strength_on_outcome=self.effect_strength_on_y) # To test if the estimate is identical if refutation parameters are zero refute = model.refute_estimate( target_estimand, ate_estimate, method_name=self.refuter_method, confounders_effect_on_treatment=self.confounders_effect_on_t, confounders_effect_on_outcome=self.confounders_effect_on_y, effect_strength_on_treatment=0, effect_strength_on_outcome=0) error = abs(refute.new_effect - ate_estimate.value) print( "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}" .format(error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect)) res = True if ( error < abs(ate_estimate.value) * self._error_tolerance) else False assert res
def CalDoWhy(dat): model = CausalModel(data=dat["df"], treatment=dat["treatment_name"], outcome=dat["outcome_name"], graph=dat["gml_graph"]) # Identification identified_estimand = model.identify_effect() # Estimation causal_estimate = model.estimate_effect( identified_estimand, method_name="backdoor.linear_regression") return causal_estimate
def register_graph(): digraph = request.args.get('digraph') dataset = request.args.get('dataset') treatment_name = request.args.get('treatment') outcome_name = request.args.get('outcome') df = dataiku.Dataset(dataset).get_dataframe() model = CausalModel( data=df, treatment=treatment_name, outcome=outcome_name, graph=digraph, ) identified_estimand = model.identify_effect() causal_estimate_reg = model.estimate_effect( identified_estimand, method_name="backdoor.linear_regression", test_significance=True) d = {'results': str(causal_estimate_reg)} return json.dumps(d)
data = dowhy.datasets.linear_dataset(beta=10, num_common_causes=5, num_instruments=2, num_samples=10000, treatment_is_binary=True) # Create a causal model from the data and given graph. model = CausalModel( data=data["df"], treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["dot_graph"]) identified_estimand = model.identify_effect() estimate = model.estimate_effect( identified_estimand, method_name="backdoor.linear_regression") print("Causal Estimate is " + str(estimate.value)) # Adding a random common cause variable res_random = model.refute_estimate( identified_estimand, estimate, method_name="random_common_cause") print(res_random) # Replacing treatment with a random (placebo) variable res_placebo = model.refute_estimate( identified_estimand, estimate, method_name="placebo_treatment_refuter", placebo_type="permute") print(res_placebo)
data=data, treatment='treatment', outcome='y_factual', common_causes=xs.split('+') ) # Identify # Identify the causal effect identified_estimand = model.identify_effect() # Estimate (using different methods) # 3.1 Using Linear Regression # Estimate the causal effect and compare it with Average Treatment Effect estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression", test_significance=True ) print(estimate) print("Causal Estimate is " + str(estimate.value)) data_1 = data[data["treatment"] == 1] data_0 = data[data["treatment"] == 0] print("ATE", np.mean(data_1["y_factual"]) - np.mean(data_0["y_factual"])) # 3.2 Using Propensity Score Matching estimate = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_matching" )
from dowhy.do_why import CausalModel import dowhy.datasets import Pygraphviz # Load some sample data data = dowhy.datasets.linear_dataset(beta=10, num_common_causes=5, num_instruments=2, num_samples=10000, treatment_is_binary=True) # Create a causal model from the data and given graph. model = CausalModel( data=data["df"], treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["dot_graph"], ) # Identify causal effect and return target estimands identified_estimand = model.identify_effect() # Estimate the target estimand using a statistical method. estimate = model.estimate_effect( identified_estimand, method_name="backdoor.propensity_score_matching") # Refute the obtained estimate using multiple robustness checks. refute_results = model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause")
print(data["gml_graph"]) # With graph model = CausalModel(data=df, treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["gml_graph"]) model.view_model() from IPython.display import Image, display display(Image(filename="causal_model_simple_example.png")) # DoWhy philosophy: Keep identification and estimation separate # Identification can be achieved without access to the data, acccesing only the graph. # This results in an expression to be computed. # This expression can then be evaluated using the available data in the estimation step. # It is important to understand that these are orthogonal steps. identified_estimand = model.identify_effect() print(identified_estimand) causal_estimate = model.estimate_effect( identified_estimand, method_name="backdoor.propensity_score_stratification") print(causal_estimate) print("Causal Estimate is " + str(causal_estimate.value)) # Refuting the estimate follows in the Jupyter Notebook
class DoWhyExample: data_old = ds.linear_dataset(beta=10, num_common_causes=5, num_instruments=5, num_samples=10000, treatment_is_binary=True) gml_graph = ('graph[directed 1' 'node[ id "TOJ" label "TOJ"]' 'node[ id "IntCur" label "IntCur"]' 'node[ id "U" label "Unobserved Confounders"]' 'edge[source "TOJ" target "IntCur"]' 'edge[source "U" target "TOJ"]' 'edge[source "U" target "IntCur"]') gml_graph = add_node(gml_graph, "YeshivaAdults", "IntCur") gml_graph = add_node(gml_graph, "Sex", "IntCur") gml_graph = add_node(gml_graph, "Age", "IntCur") gml_graph = connect_node(gml_graph, "Age", "TOJ") gml_graph = connect_node(gml_graph, "Age", "YeshivaAdults") gml_graph = connect_node(gml_graph, "Sex", "YeshivaAdults") gml_graph = connect_node(gml_graph, "TOJ", "YeshivaAdults") gml_graph = gml_graph + ']' # table # ID Age Sex TOJ (Orthodox)? (Treatment?) Yeshiva? Intell. Curios? (Outcome) data = pd.DataFrame( np.array([[30.0, 1.0, 1.0, 1.0, 0.0], [40.0, 1.0, 0.0, 0.0, 1.0]]), columns=['Age', 'Sex', 'TOJ', 'YeshivaAdults', 'IntCur']) # t_model = None t_identify = None t_estimate = None def model(self, force_again=False): if self.t_model is None or force_again: self.t_model = CausalModel(data=self.data, treatment='TOJ', outcome='IntCur', graph=self.gml_graph) # CausalModel(data=self.data["df"], # treatment=self.data["treatment_name"], # outcome=self.data["outcome_name"], # graph=self.data["gml_graph"]) return self.t_model def identify(self, force_again=False): if self.t_identify is None or force_again: if self.t_model is None or force_again: self.model(force_again=force_again) self.t_identify = self.t_model.identify_effect() return self.t_identify def estimate(self, method_name="backdoor.propensity_score_matching", force_again=False): if self.t_estimate is None or force_again: self.t_estimate = self.t_model.estimate_effect( self.identify(force_again), method_name) return self.t_estimate def refute(self, method_name="random_common_cause", force_again=False): return self.model(force_again=force_again).refute_estimate( self.identify(force_again), self.estimate(force_again=force_again), method_name=method_name)