예제 #1
0
    def identify_effect(self):
        estimands_dict = {}
        common_causes = self._graph.get_common_causes(self.treatment_name,
                                                      self.outcome_name)
        self.logger.info("Common causes of treatment and outcome:" +
                         str(common_causes))
        if self._graph.all_observed(
                common_causes) or self._proceed_when_unidentifiable:
            self.logger.info(
                "All common causes are observed. Causal effect can be identified."
            )
        else:
            self.logger.warning(
                "There are unobserved common causes. Causal effect cannot be identified."
            )
            cli.query_yes_no(
                "WARN: Do you want to continue by ignoring these unobserved confounders?",
                default=None)
        observed_common_causes = self._graph.filter_unobserved_variables(
            common_causes)
        observed_common_causes = list(observed_common_causes)

        backdoor_estimand_expr = self.construct_backdoor_estimand(
            self.estimand_type, self._graph.treatment_name,
            self._graph.outcome_name, observed_common_causes)

        self.logger.debug("Identified expression = " +
                          str(backdoor_estimand_expr))
        estimands_dict["backdoor"] = backdoor_estimand_expr

        # Now checking if there is also a valid iv estimand
        instrument_names = self._graph.get_instruments(self.treatment_name,
                                                       self.outcome_name)
        self.logger.info("Instrumental variables for treatment and outcome:" +
                         str(instrument_names))
        if len(instrument_names) > 0:
            iv_estimand_expr = self.construct_iv_estimand(
                self.estimand_type, self._graph.treatment_name,
                self._graph.outcome_name, instrument_names)
            self.logger.debug("Identified expression = " +
                              str(iv_estimand_expr))
            estimands_dict["iv"] = iv_estimand_expr
        else:
            estimands_dict["iv"] = None

        estimand = IdentifiedEstimand(
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=observed_common_causes,
            instrumental_variables=instrument_names)
        return estimand
예제 #2
0
    def build_backdoor_estimands_dict(self,
                                      treatment_name,
                                      outcome_name,
                                      backdoor_sets,
                                      estimands_dict,
                                      proceed_when_unidentifiable=None):
        backdoor_variables_dict = {}
        if proceed_when_unidentifiable is None:
            proceed_when_unidentifiable = self._proceed_when_unidentifiable
        is_identified = [
            self._graph.all_observed(bset["backdoor_set"])
            for bset in backdoor_sets
        ]

        if all(is_identified):
            self.logger.info(
                "All common causes are observed. Causal effect can be identified."
            )
            backdoor_sets_arr = [
                list(bset["backdoor_set"]) for bset in backdoor_sets
            ]
        else:  # there is unobserved confounding
            self.logger.warning(
                "If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly."
            )
            response = False  # user response
            if proceed_when_unidentifiable:
                self.logger.info(
                    "Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True."
                )
            else:
                response = cli.query_yes_no(
                    "WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt)",
                    default=None)
                if response is False:
                    self.logger.warn(
                        "Identification failed due to unobserved variables.")
                    backdoor_sets_arr = []
            if proceed_when_unidentifiable or response is True:
                max_paths_blocked = max(
                    bset['num_paths_blocked_by_observed_nodes']
                    for bset in backdoor_sets)
                backdoor_sets_arr = [
                    list(
                        self._graph.filter_unobserved_variables(
                            bset["backdoor_set"])) for bset in backdoor_sets
                    if bset["num_paths_blocked_by_observed_nodes"] ==
                    max_paths_blocked
                ]

        for i in range(len(backdoor_sets_arr)):
            backdoor_estimand_expr = self.construct_backdoor_estimand(
                self.estimand_type, treatment_name, outcome_name,
                backdoor_sets_arr[i])
            self.logger.debug("Identified expression = " +
                              str(backdoor_estimand_expr))
            estimands_dict["backdoor" + str(i + 1)] = backdoor_estimand_expr
            backdoor_variables_dict["backdoor" +
                                    str(i + 1)] = backdoor_sets_arr[i]
        return estimands_dict, backdoor_variables_dict
예제 #3
0
    def __init__(self,
                 data,
                 treatment,
                 outcome,
                 graph=None,
                 common_causes=None,
                 instruments=None,
                 effect_modifiers=None,
                 estimand_type="nonparametric-ate",
                 proceed_when_unidentifiable=False,
                 missing_nodes_as_confounders=False,
                 **kwargs):
        """Initialize data and create a causal graph instance.

        Assigns treatment and outcome variables.
        Also checks and finds the common causes and instruments for treatment
        and outcome.

        At least one of graph, common_causes or instruments must be provided.

        :param data: a pandas dataframe containing treatment, outcome and other
        variables.
        :param treatment: name of the treatment variable
        :param outcome: name of the outcome variable
        :param graph: path to DOT file containing a DAG or a string containing
        a DAG specification in DOT format
        :param common_causes: names of common causes of treatment and _outcome. Only used when graph is None.
        :param instruments: names of instrumental variables for the effect of
        treatment on outcome. Only used when graph is None.
        :param effect_modifiers: names of variables that can modify the treatment effect. If not provided, then the causal graph is used to find the effect modifiers. Estimators will return multiple different estimates based on each value of effect_modifiers.
        :param estimand_type: the type of estimand requested (currently only "nonparametric-ate" is supported). In the future, may support other specific parametric forms of identification.
        :param proceed_when_unidentifiable: does the identification proceed by ignoring potential unobserved confounders. Binary flag.
        :param missing_nodes_as_confounders: Binary flag indicating whether variables in the dataframe that are not included in the causal graph, should be  automatically included as confounder nodes.
        :returns: an instance of CausalModel class

        """
        self._data = data
        self._treatment = parse_state(treatment)
        self._outcome = parse_state(outcome)
        self._effect_modifiers = parse_state(effect_modifiers)
        self._estimand_type = estimand_type
        self._proceed_when_unidentifiable = proceed_when_unidentifiable
        self._missing_nodes_as_confounders = missing_nodes_as_confounders
        self.logger = logging.getLogger(__name__)

        if graph is None:
            self.logger.warning(
                "Causal Graph not provided. DoWhy will construct a graph based on data inputs."
            )
            self._common_causes = parse_state(common_causes)
            self._instruments = parse_state(instruments)
            if common_causes is not None and instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    instrument_names=self._instruments,
                    effect_modifier_names=self._effect_modifiers,
                    observed_node_names=self._data.columns.tolist())
            elif common_causes is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    effect_modifier_names=self._effect_modifiers,
                    observed_node_names=self._data.columns.tolist())
            elif instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    instrument_names=self._instruments,
                    effect_modifier_names=self._effect_modifiers,
                    observed_node_names=self._data.columns.tolist())
            else:
                cli.query_yes_no(
                    "WARN: Are you sure that there are no common causes of treatment and outcome?",
                    default=None)

        else:
            self._graph = CausalGraph(
                self._treatment,
                self._outcome,
                graph,
                effect_modifier_names=self._effect_modifiers,
                observed_node_names=self._data.columns.tolist(),
                missing_nodes_as_confounders=self._missing_nodes_as_confounders
            )
            self._common_causes = self._graph.get_common_causes(
                self._treatment, self._outcome)
            self._instruments = self._graph.get_instruments(
                self._treatment, self._outcome)
            # Sometimes, effect modifiers from the graph may not match those provided by the user.
            # (Because some effect modifiers may also be common causes)
            # In such cases, the user-provided modifiers are used.
            # If no effect modifiers are provided,  then the ones from the graph are used.
            if self._effect_modifiers is None or not self._effect_modifiers:
                self._effect_modifiers = self._graph.get_effect_modifiers(
                    self._treatment, self._outcome)

        self._other_variables = kwargs
        self.summary()
예제 #4
0
    def __init__(self, data, treatment, outcome, graph=None,
                 common_causes=None, instruments=None, estimand_type="ate",
                 proceed_when_unidentifiable=False,
                 **kwargs):
        """Initialize data and create a causal graph instance.

        Assigns treatment and outcome variables.
        Also checks and finds the common causes and instruments for treatment
        and outcome.

        At least one of graph, common_causes or instruments must be provided.

        :param data: a pandas dataframe containing treatment, outcome and other
        variables.
        :param treatment: name of the treatment variable
        :param outcome: name of the outcome variable
        :param graph: path to DOT file containing a DAG or a string containing
        a DAG specification in DOT format
        :param common_causes: names of common causes of treatment and _outcome
        :param instruments: names of instrumental variables for the effect of
        treatment on outcome
        :returns: an instance of CausalModel class

        """
        self._data = data
        self._treatment = parse_state(treatment)
        self._outcome = parse_state(outcome)
        self._estimand_type = estimand_type
        self._proceed_when_unidentifiable = proceed_when_unidentifiable
        if 'logging_level' in kwargs:
            logging.basicConfig(level=kwargs['logging_level'])
        else:
            logging.basicConfig(level=logging.INFO)

        # TODO: move the logging level argument to a json file. Tue 20 Feb 2018 06:56:27 PM DST
        self.logger = logging.getLogger(__name__)

        if graph is None:
            self.logger.warning("Causal Graph not provided. DoWhy will construct a graph based on data inputs.")
            self._common_causes = parse_state(common_causes)
            self._instruments = parse_state(instruments)
            if common_causes is not None and instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    instrument_names=self._instruments,
                    observed_node_names=self._data.columns.tolist()
                )
            elif common_causes is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    observed_node_names=self._data.columns.tolist()
                )
            elif instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    instrument_names=self._instruments,
                    observed_node_names=self._data.columns.tolist()
                )
            else:
                cli.query_yes_no(
                    "WARN: Are you sure that there are no common causes of treatment and outcome?",
                    default=None
                )

        else:
            self._graph = CausalGraph(
                self._treatment,
                self._outcome,
                graph,
                observed_node_names=self._data.columns.tolist()
            )
            self._common_causes = self._graph.get_common_causes(self._treatment, self._outcome)
            self._instruments = self._graph.get_instruments(self._treatment,
                                                            self._outcome)

        self._other_variables = kwargs
        self.summary()
예제 #5
0
    def __init__(self,
                 data,
                 treatment,
                 outcome,
                 graph=None,
                 common_causes=None,
                 instruments=None,
                 effect_modifiers=None,
                 mediator=None,
                 estimand_type="nonparametric-ate",
                 proceed_when_unidentifiable=False,
                 missing_nodes_as_confounders=False,
                 **kwargs):
        """Initialize data and create a causal graph instance.

        Assigns treatment and outcome variables.
        Also checks and finds the common causes and instruments for treatment
        and outcome.

        At least one of graph, common_causes or instruments must be provided.

        :param data: a pandas dataframe containing treatment, outcome and other
        variables.
        :param treatment: name of the treatment variable
        :param outcome: name of the outcome variable
        :param graph: path to DOT file containing a DAG or a string containing
        a DAG specification in DOT format
        :param common_causes: names of common causes of treatment and _outcome
        :param instruments: names of instrumental variables for the effect of
        treatment on outcome
        :param effect_modifiers: names of variables that can modify the treatment effect (useful for heterogeneous treatment effect estimation)
        :param mediator: names of mediator between treatment and outcome
        :param estimand_type: the type of estimand requested (currently only "nonparametric-ate" is supported). In the future, may support other specific parametric forms of identification.
        :proceed_when_unidentifiable: does the identification proceed by ignoring potential unobserved confounders. Binary flag.
        :missing_nodes_as_confounders: Binary flag indicating whether variables in the dataframe that are not included in the causal graph, should be  automatically included as confounder nodes.

        :returns: an instance of CausalModel class

        """
        self._data = data
        self._treatment = parse_state(treatment)
        self._outcome = parse_state(outcome)
        self._estimand_type = estimand_type
        self._proceed_when_unidentifiable = proceed_when_unidentifiable
        self._missing_nodes_as_confounders = missing_nodes_as_confounders
        if 'logging_level' in kwargs:
            logging.basicConfig(level=kwargs['logging_level'])
        else:
            logging.basicConfig(level=logging.INFO)

        # TODO: move the logging level argument to a json file. Tue 20 Feb 2018 06:56:27 PM DST
        self.logger = logging.getLogger(__name__)

        if graph is None:
            self.logger.warning(
                "Causal Graph not provided. DoWhy will construct a graph based on data inputs."
            )
            self._common_causes = parse_state(common_causes)
            self._instruments = parse_state(instruments)
            self._effect_modifiers = parse_state(effect_modifiers)
            self._mediator = parse_state(mediator)
            if common_causes is not None and instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    instrument_names=self._instruments,
                    effect_modifier_names=self._effect_modifiers,
                    mediator_name=mediator,
                    observed_node_names=self._data.columns.tolist())
            elif common_causes is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    common_cause_names=self._common_causes,
                    effect_modifier_names=self._effect_modifiers,
                    mediator_name=mediator,
                    observed_node_names=self._data.columns.tolist())
            elif instruments is not None:
                self._graph = CausalGraph(
                    self._treatment,
                    self._outcome,
                    instrument_names=self._instruments,
                    effect_modifier_names=self._effect_modifiers,
                    mediator_name=mediator,
                    observed_node_names=self._data.columns.tolist())
            else:
                cli.query_yes_no(
                    "WARN: Are you sure that there are no common causes of treatment and outcome?",
                    default=None)

        else:
            self._graph = CausalGraph(
                self._treatment,
                self._outcome,
                graph,
                observed_node_names=self._data.columns.tolist(),
                missing_nodes_as_confounders=self._missing_nodes_as_confounders
            )
            self._common_causes = self._graph.get_common_causes(
                self._treatment, self._outcome)
            self._instruments = self._graph.get_instruments(
                self._treatment, self._outcome)
            self._effect_modifiers = self._graph.get_effect_modifiers(
                self._treatment, self._outcome)
            self._mediator = self._graph.get_mediators(self._treatment,
                                                       self._outcome)

        self._other_variables = kwargs
        self.summary()
예제 #6
0
    def identify_effect(self):
        """Main method that returns an identified estimand (if one exists). 

        Uses both backdoor and instrumental variable methods to check if an identified estimand exists, based on the causal graph. 

        :param self: instance of the CausalEstimator class (or its subclass)
        :returns:  target estimand, an instance of the IdentifiedEstimand class
        """

        estimands_dict = {}
        ### 1. BACKDOOR IDENTIFICATION
        # First, checking if there are any valid backdoor adjustment sets
        backdoor_variables_dict = {}
        backdoor_sets = self.identify_backdoor()
        is_identified = [
            self._graph.all_observed(bset["backdoor_set"])
            for bset in backdoor_sets
        ]

        if all(is_identified):
            self.logger.info(
                "All common causes are observed. Causal effect can be identified."
            )
            backdoor_sets_arr = [
                list(bset["backdoor_set"]) for bset in backdoor_sets
            ]
        else:
            self.logger.warning(
                "If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly."
            )
            response = False  # user response
            if self._proceed_when_unidentifiable:
                self.logger.info(
                    "Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True."
                )
            else:
                response = cli.query_yes_no(
                    "WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt)",
                    default=None)
                if response is False:
                    self.logger.warn(
                        "Identification failed due to unobserved variables.")
                    backdoor_sets_arr = []
            if self._proceed_when_unidentifiable or response is True:
                max_paths_blocked = max(
                    bset['num_paths_blocked_by_observed_nodes']
                    for bset in backdoor_sets)
                backdoor_sets_arr = [
                    list(
                        self._graph.filter_unobserved_variables(
                            bset["backdoor_set"])) for bset in backdoor_sets
                    if bset["num_paths_blocked_by_observed_nodes"] ==
                    max_paths_blocked
                ]

        for i in range(len(backdoor_sets_arr)):
            backdoor_estimand_expr = self.construct_backdoor_estimand(
                self.estimand_type, self._graph.treatment_name,
                self._graph.outcome_name, backdoor_sets_arr[i])

            self.logger.debug("Identified expression = " +
                              str(backdoor_estimand_expr))
            estimands_dict["backdoor" + str(i + 1)] = backdoor_estimand_expr
            backdoor_variables_dict["backdoor" +
                                    str(i + 1)] = backdoor_sets_arr[i]
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(
            backdoor_variables_dict)
        estimands_dict["backdoor"] = estimands_dict.get(
            str(default_backdoor_id), None)
        backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(
            str(default_backdoor_id), None)

        ### 2. INSTRUMENTAL VARIABLE IDENTIFICATION
        # Now checking if there is also a valid iv estimand
        instrument_names = self._graph.get_instruments(self.treatment_name,
                                                       self.outcome_name)
        self.logger.info("Instrumental variables for treatment and outcome:" +
                         str(instrument_names))
        if len(instrument_names) > 0:
            iv_estimand_expr = self.construct_iv_estimand(
                self.estimand_type, self._graph.treatment_name,
                self._graph.outcome_name, instrument_names)
            self.logger.debug("Identified expression = " +
                              str(iv_estimand_expr))
            estimands_dict["iv"] = iv_estimand_expr
        else:
            estimands_dict["iv"] = None

        ### 3. FRONTDOOR IDENTIFICATION
        # Now checking if there is a valid frontdoor variable
        frontdoor_variables_names = self.identify_frontdoor()
        self.logger.info("Frontdoor variables for treatment and outcome:" +
                         str(frontdoor_variables_names))
        if len(frontdoor_variables_names) > 0:
            frontdoor_estimand_expr = self.construct_frontdoor_estimand(
                self.estimand_type, self._graph.treatment_name,
                self._graph.outcome_name, frontdoor_variables_names)
            self.logger.debug("Identified expression = " +
                              str(frontdoor_estimand_expr))
            estimands_dict["frontdoor"] = frontdoor_estimand_expr
        else:
            estimands_dict["frontdoor"] = None

        # Finally returning the estimand object
        estimand = IdentifiedEstimand(
            self,
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=backdoor_variables_dict,
            instrumental_variables=instrument_names,
            frontdoor_variables=frontdoor_variables_names,
            default_backdoor_id=default_backdoor_id)
        return estimand
예제 #7
0
    def identify_effect(self): 
        """Main method that returns an identified estimand (if one exists). 

        Uses both backdoor and instrumental variable methods to check if an identified estimand exists, based on the causal graph. 

        :param self: instance of the CausalEstimator class (or its subclass)
        :returns:  target estimand, an instance of the IdentifiedEstimand class
        """

        estimands_dict = {}
        causes_t = self._graph.get_causes(self.treatment_name)
        causes_y = self._graph.get_causes(self.outcome_name, remove_edges={'sources':self.treatment_name, 'targets':self.outcome_name})
        common_causes = list(causes_t.intersection(causes_y))
        self.logger.info("Common causes of treatment and outcome:" + str(common_causes))
        if self._graph.all_observed(common_causes):
            self.logger.info("All common causes are observed. Causal effect can be identified.")
        else:
            self.logger.warning("If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.")
            if self._proceed_when_unidentifiable:
                self.logger.info(
                    "Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True."
                )
            else:
                cli.query_yes_no(
                    "WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt)",
                    default=None
                )
        observed_common_causes = self._graph.filter_unobserved_variables(common_causes)
        observed_common_causes = list(observed_common_causes)

        backdoor_estimand_expr = self.construct_backdoor_estimand(
            self.estimand_type, self._graph.treatment_name,
            self._graph.outcome_name, observed_common_causes
        )

        self.logger.debug("Identified expression = " + str(backdoor_estimand_expr))
        estimands_dict["backdoor"] = backdoor_estimand_expr

        # Now checking if there is also a valid iv estimand
        instrument_names = self._graph.get_instruments(self.treatment_name,
                                                       self.outcome_name)
        self.logger.info("Instrumental variables for treatment and outcome:" +
                         str(instrument_names))
        if len(instrument_names) > 0:
            iv_estimand_expr = self.construct_iv_estimand(
                self.estimand_type,
                self._graph.treatment_name,
                self._graph.outcome_name,
                instrument_names
            )
            self.logger.debug("Identified expression = " + str(iv_estimand_expr))
            estimands_dict["iv"] = iv_estimand_expr
        else:
            estimands_dict["iv"] = None

        estimand = IdentifiedEstimand(
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=observed_common_causes,
            instrumental_variables=instrument_names
        )
        return estimand