Exemple #1
0
def init_data():
    data = linear_dataset(beta=10,
                          num_common_causes=4,
                          num_instruments=2,
                          num_effect_modifiers=2,
                          num_treatments=1,
                          num_samples=1000,
                          treatment_is_binary=True)

    return data
 def test_iv_estimators(self):
     keras = pytest.importorskip("keras")
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test DeepIV
     dims_zx = len(model._instruments) + len(model._effect_modifiers)
     dims_tx = len(model._treatment) + len(model._effect_modifiers)
     treatment_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_zx, )),  # sum of dims of Z and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17)
     ])
     response_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_tx, )),  # sum of dims of T and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(1)
     ])
     deepiv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.deepiv.DeepIVEstimator",
         target_units=lambda df: df["X0"] > -1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'n_components':
                 10,  # Number of gaussians in the mixture density networks
                 # Treatment model,
                 'm':
                 lambda z, x: treatment_model(
                     keras.layers.concatenate([z, x])),
                 # Response model
                 "h":
                 lambda t, x: response_model(
                     keras.layers.concatenate([t, x])),
                 'n_samples':
                 1,  # Number of samples used to estimate the response
                 'first_stage_options': {
                     'epochs': 25
                 },
                 'second_stage_options': {
                     'epochs': 25
                 }
             },
             "fit_params": {}
         })
     # Test IntentToTreatDRIV
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=1,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=True,
                                    num_discrete_instruments=1)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     driv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.ortho_iv.LinearIntentToTreatDRIV",
         target_units=lambda df: df["X0"] > 1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'model_T_XZ': GradientBoostingClassifier(),
                 'model_Y_X': GradientBoostingRegressor(),
                 'flexible_model_effect': GradientBoostingRegressor(),
                 'featurizer': PolynomialFeatures(degree=1,
                                                  include_bias=False)
             },
             "fit_params": {}
         })
 def test_backdoor_estimators(self):
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test LinearDML
     dml_estimate = model.estimate_effect(
         identified_estimand,
         method_name="backdoor.econml.dml.LinearDML",
         control_value=0,
         treatment_value=1,
         target_units=lambda df: df["X0"] > 1,  # condition used for CATE
         method_params={
             "init_params": {
                 'model_y': GradientBoostingRegressor(),
                 'model_t': GradientBoostingRegressor(),
                 'featurizer': PolynomialFeatures(degree=1,
                                                  include_bias=True)
             },
             "fit_params": {}
         })
     # Test ContinuousTreatmentOrthoForest
     orthoforest_estimate = model.estimate_effect(
         identified_estimand,
         method_name=
         "backdoor.econml.ortho_forest.ContinuousTreatmentOrthoForest",
         target_units=lambda df: df["X0"] > 2,
         method_params={
             "init_params": {
                 'n_trees': 10
             },
             "fit_params": {}
         })
     # Test LinearDRLearner
     data_binary = datasets.linear_dataset(10,
                                           num_common_causes=4,
                                           num_samples=10000,
                                           num_instruments=2,
                                           num_effect_modifiers=2,
                                           treatment_is_binary=True,
                                           outcome_is_binary=True)
     model_binary = CausalModel(
         data=data_binary["df"],
         treatment=data_binary["treatment_name"],
         outcome=data_binary["outcome_name"],
         effect_modifiers=data["effect_modifier_names"],
         graph=data_binary["gml_graph"])
     identified_estimand_binary = model_binary.identify_effect(
         proceed_when_unidentifiable=True)
     drlearner_estimate = model_binary.estimate_effect(
         identified_estimand_binary,
         method_name="backdoor.econml.drlearner.LinearDRLearner",
         target_units=lambda df: df["X0"] > 1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'model_propensity':
                 LogisticRegressionCV(cv=3,
                                      solver='lbfgs',
                                      multi_class='auto')
             },
             "fit_params": {}
         })
 def test_iv_estimators(self):
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test DeepIV
     dims_zx = len(model._instruments) + len(model._effect_modifiers)
     dims_tx = len(model._treatment) + len(model._effect_modifiers)
     treatment_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_zx, )),  # sum of dims of Z and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17)
     ])
     response_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_tx, )),  # sum of dims of T and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(1)
     ])
     deepiv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.deepiv.DeepIVEstimator",
         target_units=lambda df: df["X0"] > -1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'n_components':
                 10,  # Number of gaussians in the mixture density networks
                 'm':
                 lambda z, x: treatment_model(
                     keras.layers.concatenate([z, x])),  # Treatment model,
                 "h":
                 lambda t, x: response_model(
                     keras.layers.concatenate([t, x])),  # Response model
                 'n_samples':
                 1,  # Number of samples used to estimate the response
                 'first_stage_options': {
                     'epochs': 25
                 },
                 'second_stage_options': {
                     'epochs': 25
                 }
             },
             "fit_params": {}
         })
class DoWhyExample:
    data_old = ds.linear_dataset(beta=10,
                                 num_common_causes=5,
                                 num_instruments=5,
                                 num_samples=10000,
                                 treatment_is_binary=True)

    gml_graph = ('graph[directed 1'
                 'node[ id "TOJ" label "TOJ"]'
                 'node[ id "IntCur" label "IntCur"]'
                 'node[ id "U" label "Unobserved Confounders"]'
                 'edge[source "TOJ" target "IntCur"]'
                 'edge[source "U" target "TOJ"]'
                 'edge[source "U" target "IntCur"]')

    gml_graph = add_node(gml_graph, "YeshivaAdults", "IntCur")
    gml_graph = add_node(gml_graph, "Sex", "IntCur")
    gml_graph = add_node(gml_graph, "Age", "IntCur")
    gml_graph = connect_node(gml_graph, "Age", "TOJ")
    gml_graph = connect_node(gml_graph, "Age", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "Sex", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "TOJ", "YeshivaAdults")
    gml_graph = gml_graph + ']'
    # table
    # ID    Age     Sex     TOJ (Orthodox)? (Treatment?)     Yeshiva?    Intell. Curios? (Outcome)

    data = pd.DataFrame(
        np.array([[30.0, 1.0, 1.0, 1.0, 0.0], [40.0, 1.0, 0.0, 0.0, 1.0]]),
        columns=['Age', 'Sex', 'TOJ', 'YeshivaAdults', 'IntCur'])
    #
    t_model = None
    t_identify = None
    t_estimate = None

    def model(self, force_again=False):

        if self.t_model is None or force_again:
            self.t_model = CausalModel(data=self.data,
                                       treatment='TOJ',
                                       outcome='IntCur',
                                       graph=self.gml_graph)
            # CausalModel(data=self.data["df"],
            #                        treatment=self.data["treatment_name"],
            #                        outcome=self.data["outcome_name"],
            #                        graph=self.data["gml_graph"])

        return self.t_model

    def identify(self, force_again=False):
        if self.t_identify is None or force_again:
            if self.t_model is None or force_again:
                self.model(force_again=force_again)
            self.t_identify = self.t_model.identify_effect()
        return self.t_identify

    def estimate(self,
                 method_name="backdoor.propensity_score_matching",
                 force_again=False):
        if self.t_estimate is None or force_again:
            self.t_estimate = self.t_model.estimate_effect(
                self.identify(force_again), method_name)
        return self.t_estimate

    def refute(self, method_name="random_common_cause", force_again=False):
        return self.model(force_again=force_again).refute_estimate(
            self.identify(force_again),
            self.estimate(force_again=force_again),
            method_name=method_name)