Esempio n. 1
0
    def plot(self, *args, **kwargs):
        if kwargs.get('method_name'):
            method_name = kwargs.get('method_name')
        else:
            method_name = "backdoor.propensity_score_matching"
        logging.info("Using {} for estimation.".format(method_name))

        if kwargs.get('common_causes'):
            self.use_graph = False
        elif kwargs.get('dot_graph'):
            self.use_graph = True
        else:
            raise Exception("You must specify a method for determining a backdoor set.")

        if self.use_graph:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                graph=args["dot_graph"])
        else:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                common_causes=args["common_causes"])
        if kwargs['kind'] == 'bar':
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        elif kwargs['kind'] == 'line' or not kwargs['kind'].get():
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        else:
            raise Exception("Plot type {} not supported for causal plots!".format(kwargs.get('kind')))
        self._obj.plot(*args, **kwargs)
Esempio n. 2
0
    def null_refutation_test(self,
                             data=None,
                             dataset="linear",
                             beta=10,
                             num_common_causes=1,
                             num_instruments=1,
                             num_samples=100000,
                             treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(
                beta=beta,
                num_common_causes=num_common_causes,
                num_instruments=num_instruments,
                num_samples=num_samples,
                treatment_is_binary=treatment_is_binary)

        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None)
        true_ate = data["ate"]

        # To test if there are any exceptions
        ref = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=self.effect_strength_on_t,
            effect_strength_on_outcome=self.effect_strength_on_y)
        # To test if the estimate is identical if refutation parameters are zero
        refute = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=0,
            effect_strength_on_outcome=0)
        error = abs(refute.new_effect - ate_estimate.value)
        print(
            "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}"
            .format(error, self._error_tolerance * 100, ate_estimate.value,
                    refute.new_effect))
        res = True if (
            error < abs(ate_estimate.value) * self._error_tolerance) else False
        assert res
Esempio n. 3
0
def CalDoWhy(dat):
    model = CausalModel(data=dat["df"],
                        treatment=dat["treatment_name"],
                        outcome=dat["outcome_name"],
                        graph=dat["gml_graph"])

    # Identification
    identified_estimand = model.identify_effect()

    # Estimation
    causal_estimate = model.estimate_effect(
        identified_estimand, method_name="backdoor.linear_regression")

    return causal_estimate
Esempio n. 4
0
def register_graph():
    digraph = request.args.get('digraph')
    dataset = request.args.get('dataset')
    treatment_name = request.args.get('treatment')
    outcome_name = request.args.get('outcome')
    df = dataiku.Dataset(dataset).get_dataframe()

    model = CausalModel(
        data=df,
        treatment=treatment_name,
        outcome=outcome_name,
        graph=digraph,
    )

    identified_estimand = model.identify_effect()
    causal_estimate_reg = model.estimate_effect(
        identified_estimand,
        method_name="backdoor.linear_regression",
        test_significance=True)

    d = {'results': str(causal_estimate_reg)}

    return json.dumps(d)
    data = dowhy.datasets.linear_dataset(beta=10,
                                         num_common_causes=5,
                                         num_instruments=2,
                                         num_samples=10000,
                                         treatment_is_binary=True)
    # Create a causal model from the data and given graph.
    model = CausalModel(
        data=data["df"],
        treatment=data["treatment_name"],
        outcome=data["outcome_name"],
        graph=data["dot_graph"])

    identified_estimand = model.identify_effect()

    estimate = model.estimate_effect(
        identified_estimand,
        method_name="backdoor.linear_regression")
    print("Causal Estimate is " + str(estimate.value))

    # Adding a random common cause variable
    res_random = model.refute_estimate(
        identified_estimand, estimate,
        method_name="random_common_cause")
    print(res_random)

    # Replacing treatment with a random (placebo) variable
    res_placebo = model.refute_estimate(
        identified_estimand,
        estimate,
        method_name="placebo_treatment_refuter", placebo_type="permute")
    print(res_placebo)
Esempio n. 6
0
    data=data,
    treatment='treatment',
    outcome='y_factual',
    common_causes=xs.split('+')
)

# Identify
# Identify the causal effect
identified_estimand = model.identify_effect()

# Estimate (using different methods)

# 3.1 Using Linear Regression
# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
                                 method_name="backdoor.linear_regression", test_significance=True
                                 )

print(estimate)

print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"] == 1]
data_0 = data[data["treatment"] == 0]

print("ATE", np.mean(data_1["y_factual"]) - np.mean(data_0["y_factual"]))

# 3.2 Using Propensity Score Matching
estimate = model.estimate_effect(identified_estimand,
                                 method_name="backdoor.propensity_score_matching"
                                 )
Esempio n. 7
0
from dowhy.do_why import CausalModel
import dowhy.datasets
import Pygraphviz

# Load some sample data
data = dowhy.datasets.linear_dataset(beta=10,
                                     num_common_causes=5,
                                     num_instruments=2,
                                     num_samples=10000,
                                     treatment_is_binary=True)

# Create a causal model from the data and given graph.
model = CausalModel(
    data=data["df"],
    treatment=data["treatment_name"],
    outcome=data["outcome_name"],
    graph=data["dot_graph"],
)

# Identify causal effect and return target estimands
identified_estimand = model.identify_effect()

# Estimate the target estimand using a statistical method.
estimate = model.estimate_effect(
    identified_estimand, method_name="backdoor.propensity_score_matching")

# Refute the obtained estimate using multiple robustness checks.
refute_results = model.refute_estimate(identified_estimand,
                                       estimate,
                                       method_name="random_common_cause")
print(data["gml_graph"])

# With graph
model = CausalModel(data=df,
                    treatment=data["treatment_name"],
                    outcome=data["outcome_name"],
                    graph=data["gml_graph"])

model.view_model()

from IPython.display import Image, display

display(Image(filename="causal_model_simple_example.png"))

# DoWhy philosophy: Keep identification and estimation separate
# Identification can be achieved without access to the data, acccesing only the graph.
# This results in an expression to be computed.
# This expression can then be evaluated using the available data in the estimation step.
# It is important to understand that these are orthogonal steps.

identified_estimand = model.identify_effect()
print(identified_estimand)

causal_estimate = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.propensity_score_stratification")
print(causal_estimate)
print("Causal Estimate is " + str(causal_estimate.value))

# Refuting the estimate follows in the Jupyter Notebook
Esempio n. 9
0
class DoWhyExample:
    data_old = ds.linear_dataset(beta=10,
                                 num_common_causes=5,
                                 num_instruments=5,
                                 num_samples=10000,
                                 treatment_is_binary=True)

    gml_graph = ('graph[directed 1'
                 'node[ id "TOJ" label "TOJ"]'
                 'node[ id "IntCur" label "IntCur"]'
                 'node[ id "U" label "Unobserved Confounders"]'
                 'edge[source "TOJ" target "IntCur"]'
                 'edge[source "U" target "TOJ"]'
                 'edge[source "U" target "IntCur"]')

    gml_graph = add_node(gml_graph, "YeshivaAdults", "IntCur")
    gml_graph = add_node(gml_graph, "Sex", "IntCur")
    gml_graph = add_node(gml_graph, "Age", "IntCur")
    gml_graph = connect_node(gml_graph, "Age", "TOJ")
    gml_graph = connect_node(gml_graph, "Age", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "Sex", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "TOJ", "YeshivaAdults")
    gml_graph = gml_graph + ']'
    # table
    # ID    Age     Sex     TOJ (Orthodox)? (Treatment?)     Yeshiva?    Intell. Curios? (Outcome)

    data = pd.DataFrame(
        np.array([[30.0, 1.0, 1.0, 1.0, 0.0], [40.0, 1.0, 0.0, 0.0, 1.0]]),
        columns=['Age', 'Sex', 'TOJ', 'YeshivaAdults', 'IntCur'])
    #
    t_model = None
    t_identify = None
    t_estimate = None

    def model(self, force_again=False):

        if self.t_model is None or force_again:
            self.t_model = CausalModel(data=self.data,
                                       treatment='TOJ',
                                       outcome='IntCur',
                                       graph=self.gml_graph)
            # CausalModel(data=self.data["df"],
            #                        treatment=self.data["treatment_name"],
            #                        outcome=self.data["outcome_name"],
            #                        graph=self.data["gml_graph"])

        return self.t_model

    def identify(self, force_again=False):
        if self.t_identify is None or force_again:
            if self.t_model is None or force_again:
                self.model(force_again=force_again)
            self.t_identify = self.t_model.identify_effect()
        return self.t_identify

    def estimate(self,
                 method_name="backdoor.propensity_score_matching",
                 force_again=False):
        if self.t_estimate is None or force_again:
            self.t_estimate = self.t_model.estimate_effect(
                self.identify(force_again), method_name)
        return self.t_estimate

    def refute(self, method_name="random_common_cause", force_again=False):
        return self.model(force_again=force_again).refute_estimate(
            self.identify(force_again),
            self.estimate(force_again=force_again),
            method_name=method_name)