コード例 #1
0
    def plot(self, *args, **kwargs):
        if kwargs.get('method_name'):
            method_name = kwargs.get('method_name')
        else:
            method_name = "backdoor.propensity_score_matching"
        logging.info("Using {} for estimation.".format(method_name))

        if kwargs.get('common_causes'):
            self.use_graph = False
        elif kwargs.get('dot_graph'):
            self.use_graph = True
        else:
            raise Exception("You must specify a method for determining a backdoor set.")

        if self.use_graph:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                graph=args["dot_graph"])
        else:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                common_causes=args["common_causes"])
        if kwargs['kind'] == 'bar':
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        elif kwargs['kind'] == 'line' or not kwargs['kind'].get():
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        else:
            raise Exception("Plot type {} not supported for causal plots!".format(kwargs.get('kind')))
        self._obj.plot(*args, **kwargs)
コード例 #2
0
ファイル: base.py プロジェクト: youtang1993/dowhy
    def average_treatment_effect_test(self,
                                      dataset="linear",
                                      beta=10,
                                      num_common_causes=1,
                                      num_instruments=1,
                                      num_samples=10000,
                                      treatment_is_binary=True):
        data = dowhy.datasets.linear_dataset(
            beta=beta,
            num_common_causes=num_common_causes,
            num_instruments=num_instruments,
            num_samples=num_samples,
            treatment_is_binary=treatment_is_binary)

        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        estimator_ate = self._Estimator(data['df'],
                                        identified_estimand=target_estimand,
                                        treatment=data["treatment_name"],
                                        outcome=data["outcome_name"],
                                        test_significance=None)
        true_ate = data["ate"]
        ate_estimate = estimator_ate.estimate_effect()
        error = ate_estimate.value - true_ate
        print(
            "Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}"
            .format(error, self._error_tolerance * 100, ate_estimate.value,
                    true_ate))
        res = True if (error < true_ate * self._error_tolerance) else False
        assert res
コード例 #3
0
ファイル: base.py プロジェクト: while519/dowhy
    def null_refutation_test(self,
                             data=None,
                             dataset="linear",
                             beta=10,
                             num_common_causes=1,
                             num_instruments=1,
                             num_samples=100000,
                             treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(
                beta=beta,
                num_common_causes=num_common_causes,
                num_instruments=num_instruments,
                num_samples=num_samples,
                treatment_is_binary=treatment_is_binary)

        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None)
        true_ate = data["ate"]

        # To test if there are any exceptions
        ref = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=self.effect_strength_on_t,
            effect_strength_on_outcome=self.effect_strength_on_y)
        # To test if the estimate is identical if refutation parameters are zero
        refute = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=0,
            effect_strength_on_outcome=0)
        error = abs(refute.new_effect - ate_estimate.value)
        print(
            "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}"
            .format(error, self._error_tolerance * 100, ate_estimate.value,
                    refute.new_effect))
        res = True if (
            error < abs(ate_estimate.value) * self._error_tolerance) else False
        assert res
コード例 #4
0
class CausalAccessor(object):
    def __init__(self, pandas_obj):
        self._obj = pandas_obj
        self._causal_model = None
        self._sampler = None
        self._identified_estimand = None
        self._method = None

    def reset(self):
        self._causal_model = None
        self._identified_estimand = None
        self._sampler = None
        self._method = None

    def do(self, x, method='weighting', num_cores=1, variable_types={}, outcome=None, params=None, dot_graph=None,
           common_causes=None, instruments=None, estimand_type='ate', proceed_when_unidentifiable=False,
           stateful=False):
        x, keep_original_treatment = self.parse_x(x)
        if not stateful or method != self._method:
            self.reset()
        if not self._causal_model:
            self._causal_model = CausalModel(self._obj,
                                             [xi for xi in x.keys()][0],
                                             outcome,
                                             graph=dot_graph,
                                             common_causes=common_causes,
                                             instruments=instruments,
                                             estimand_type=estimand_type,
                                             proceed_when_unidentifiable=proceed_when_unidentifiable)
        self._identified_estimand = self._causal_model.identify_effect()
        if not self._sampler:
            self._method = method
            do_sampler_class = do_samplers.get_class_object(method + "_sampler")
            self._sampler = do_sampler_class(self._obj,
                                             self._identified_estimand,
                                             self._causal_model._treatment,
                                             self._causal_model._outcome,
                                             params=params,
                                             variable_types=variable_types,
                                             num_cores=num_cores,
                                             causal_model=self._causal_model,
                                             keep_original_treatment=keep_original_treatment)
        result = self._sampler.do_sample(x)
        if not stateful:
            self.reset()
        return result

    def parse_x(self, x):
        if type(x) == str:
            return {x: None}, True
        if type(x) == list:
            return {xi: None for xi in x}, True
        if type(x) == dict:
            return x, False
        raise Exception('x format not recognized: {}'.format(type(x)))
コード例 #5
0
def CalDoWhy(dat):
    model = CausalModel(data=dat["df"],
                        treatment=dat["treatment_name"],
                        outcome=dat["outcome_name"],
                        graph=dat["gml_graph"])

    # Identification
    identified_estimand = model.identify_effect()

    # Estimation
    causal_estimate = model.estimate_effect(
        identified_estimand, method_name="backdoor.linear_regression")

    return causal_estimate
コード例 #6
0
ファイル: base.py プロジェクト: youtang1993/dowhy
 def custom_data_average_treatment_effect_test(self, data):
     model = CausalModel(data=data['df'],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         graph=data["gml_graph"],
                         proceed_when_unidentifiable=True,
                         test_significance=None)
     target_estimand = model.identify_effect()
     estimator_ate = self._Estimator(data['df'],
                                     identified_estimand=target_estimand,
                                     treatment=data["treatment_name"],
                                     outcome=data["outcome_name"],
                                     test_significance=None)
     true_ate = data["ate"]
     ate_estimate = estimator_ate.estimate_effect()
     error = ate_estimate.value - true_ate
     print(
         "Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}"
         .format(error, self._error_tolerance * 100, ate_estimate.value,
                 true_ate))
     res = True if (error < true_ate * self._error_tolerance) else False
     assert res
コード例 #7
0
ファイル: app.py プロジェクト: waimunThales/dataiku-contrib
def register_graph():
    digraph = request.args.get('digraph')
    dataset = request.args.get('dataset')
    treatment_name = request.args.get('treatment')
    outcome_name = request.args.get('outcome')
    df = dataiku.Dataset(dataset).get_dataframe()

    model = CausalModel(
        data=df,
        treatment=treatment_name,
        outcome=outcome_name,
        graph=digraph,
    )

    identified_estimand = model.identify_effect()
    causal_estimate_reg = model.estimate_effect(
        identified_estimand,
        method_name="backdoor.linear_regression",
        test_significance=True)

    d = {'results': str(causal_estimate_reg)}

    return json.dumps(d)
コード例 #8
0
        method_name="iv.regression_discontinuity",
        method_params={
            'rd_variable_name': 'Z1',
            'rd_threshold_value': 0.5,
            'rd_bandwidth': 0.1
        })

    print(causal_estimate_regdist)
    print("Causal Estimate is " + str(causal_estimate_regdist.value))


if __name__ == "__main__":
    data = dowhy.datasets.linear_dataset(beta=10,
                                         num_common_causes=5,
                                         num_instruments=2,
                                         num_samples=10000,
                                         treatment_is_binary=True)

    # With graph
    model = CausalModel(data=data['df'],
                        treatment=data["treatment_name"],
                        outcome=data["outcome_name"],
                        graph=data["dot_graph"],
                        instruments=data["instrument_names"],
                        logging_level=logging.INFO)

    model.view_model()
    identified_estimand = model.identify_effect()
    print(identified_estimand)
    regression(model, identified_estimand)
コード例 #9
0
ファイル: causal_data_frame.py プロジェクト: RaulPL/dowhy
class CausalAccessor(object):
    def __init__(self, pandas_obj):
        """
        An accessor for the pandas.DataFrame under the `causal` namespace.

        :param pandas_obj:
        """
        self._obj = pandas_obj
        self._causal_model = None
        self._sampler = None
        self._identified_estimand = None
        self._method = None

    def reset(self):
        """
        If a `causal` namespace method (especially `do`) was run statefully, this resets the namespace.

        :return:
        """
        self._causal_model = None
        self._identified_estimand = None
        self._sampler = None
        self._method = None

    def do(self,
           x,
           method='weighting',
           num_cores=1,
           variable_types={},
           outcome=None,
           params=None,
           dot_graph=None,
           common_causes=None,
           estimand_type='ate',
           proceed_when_unidentifiable=False,
           stateful=False):
        """
        The do-operation implemented with sampling. This will return a pandas.DataFrame with the outcome
        variable(s) replaced with samples from P(Y|do(X=x)).

        If the value of `x` is left unspecified (e.g. as a string or list), then the original values of `x` are left in
        the DataFrame, and Y is sampled from its respective P(Y|do(x)). If the value of `x` is specified (passed with a
        `dict`, where variable names are keys, and values are specified) then the new `DataFrame` will contain the
        specified values of `x`.

        For some methods, the `variable_types` field must be specified. It should be a `dict`, where the keys are
        variable names, and values are 'o' for ordered discrete, 'u' for un-ordered discrete, 'd' for discrete, or 'c'
        for continuous.

        Inference requires a set of control variables. These can be provided explicitly using `common_causes`, which
        contains a list of variable names to control for. These can be provided implicitly by specifying a causal graph
        with `dot_graph`, from which they will be chosen using the default identification method.

        When the set of control variables can't be identified with the provided assumptions, a prompt will raise to the
        user asking whether to proceed. To automatically over-ride the prompt, you can set the flag
        `proceed_when_unidentifiable` to `True`.

        Some methods build components during inference which are expensive. To retain those components for later
        inference (e.g. successive calls to `do` with different values of `x`), you can set the `stateful` flag to `True`.
        Be cautious about using the `do` operation statefully. State is set on the namespace, rather than the method, so
        can behave unpredictably. To reset the namespace and run statelessly again, you can call the `reset` method.

        :param x: str, list, dict: The causal state on which to intervene, and (optional) its interventional value(s).
        :param method: The inference method to use with the sampler. Currently, `'mcmc'`, `'weighting'`, and
        `'kernel_density'` are supported.
        :param num_cores: int: if the inference method only supports sampling a point at a time, this will parallelize
        sampling.
        :param variable_types: dict: The dictionary containing the variable types. Must contain the union of the causal
        state, control variables, and the outcome.
        :param outcome: str: The outcome variable.
        :param params: dict: extra parameters to set as attributes on the sampler object
        :param dot_graph: str: A string specifying the causal graph.
        :param common_causes: list: A list of strings containing the variable names to control for.
        :param estimand_type: str: 'ate' is the only one currently supported. Others may be added later, to allow for
        CATE estimation.
        :param proceed_when_unidentifiable: bool: A flag to over-ride user prompts to proceed when effects aren't
        identifiable with the assumptions provided.
        :param stateful: bool: Whether to retain state. By default, the do operation is stateless.
        :return: pandas.DataFrame: A DataFrame containing the sampled outcome
        """
        x, keep_original_treatment = self.parse_x(x)
        if not stateful or method != self._method:
            self.reset()
        if not self._causal_model:
            self._causal_model = CausalModel(
                self._obj, [xi for xi in x.keys()][0],
                outcome,
                graph=dot_graph,
                common_causes=common_causes,
                instruments=None,
                estimand_type=estimand_type,
                proceed_when_unidentifiable=proceed_when_unidentifiable)
        self._identified_estimand = self._causal_model.identify_effect()
        if not self._sampler:
            self._method = method
            do_sampler_class = do_samplers.get_class_object(method +
                                                            "_sampler")
            self._sampler = do_sampler_class(
                self._obj,
                self._identified_estimand,
                self._causal_model._treatment,
                self._causal_model._outcome,
                params=params,
                variable_types=variable_types,
                num_cores=num_cores,
                causal_model=self._causal_model,
                keep_original_treatment=keep_original_treatment)
        result = self._sampler.do_sample(x)
        if not stateful:
            self.reset()
        return result

    def parse_x(self, x):
        if type(x) == str:
            return {x: None}, True
        if type(x) == list:
            return {xi: None for xi in x}, True
        if type(x) == dict:
            return x, False
        raise Exception('x format not recognized: {}'.format(type(x)))
コード例 #10
0
class DoWhyExample:
    data_old = ds.linear_dataset(beta=10,
                                 num_common_causes=5,
                                 num_instruments=5,
                                 num_samples=10000,
                                 treatment_is_binary=True)

    gml_graph = ('graph[directed 1'
                 'node[ id "TOJ" label "TOJ"]'
                 'node[ id "IntCur" label "IntCur"]'
                 'node[ id "U" label "Unobserved Confounders"]'
                 'edge[source "TOJ" target "IntCur"]'
                 'edge[source "U" target "TOJ"]'
                 'edge[source "U" target "IntCur"]')

    gml_graph = add_node(gml_graph, "YeshivaAdults", "IntCur")
    gml_graph = add_node(gml_graph, "Sex", "IntCur")
    gml_graph = add_node(gml_graph, "Age", "IntCur")
    gml_graph = connect_node(gml_graph, "Age", "TOJ")
    gml_graph = connect_node(gml_graph, "Age", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "Sex", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "TOJ", "YeshivaAdults")
    gml_graph = gml_graph + ']'
    # table
    # ID    Age     Sex     TOJ (Orthodox)? (Treatment?)     Yeshiva?    Intell. Curios? (Outcome)

    data = pd.DataFrame(
        np.array([[30.0, 1.0, 1.0, 1.0, 0.0], [40.0, 1.0, 0.0, 0.0, 1.0]]),
        columns=['Age', 'Sex', 'TOJ', 'YeshivaAdults', 'IntCur'])
    #
    t_model = None
    t_identify = None
    t_estimate = None

    def model(self, force_again=False):

        if self.t_model is None or force_again:
            self.t_model = CausalModel(data=self.data,
                                       treatment='TOJ',
                                       outcome='IntCur',
                                       graph=self.gml_graph)
            # CausalModel(data=self.data["df"],
            #                        treatment=self.data["treatment_name"],
            #                        outcome=self.data["outcome_name"],
            #                        graph=self.data["gml_graph"])

        return self.t_model

    def identify(self, force_again=False):
        if self.t_identify is None or force_again:
            if self.t_model is None or force_again:
                self.model(force_again=force_again)
            self.t_identify = self.t_model.identify_effect()
        return self.t_identify

    def estimate(self,
                 method_name="backdoor.propensity_score_matching",
                 force_again=False):
        if self.t_estimate is None or force_again:
            self.t_estimate = self.t_model.estimate_effect(
                self.identify(force_again), method_name)
        return self.t_estimate

    def refute(self, method_name="random_common_cause", force_again=False):
        return self.model(force_again=force_again).refute_estimate(
            self.identify(force_again),
            self.estimate(force_again=force_again),
            method_name=method_name)