Beispiel #1
0
    def test_summary(self):
        """Tests the inference results summary for continuous treatment estimators."""
        # Test inference results when `cate_feature_names` doesn not exist

        for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']:
            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(),
                                 featurizer=PolynomialFeatures(degree=2,
                                                               include_bias=False)
                                 )
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = cate_est.summary()
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            default_names = get_input_columns(TestInference.X)
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names(default_names)
            np.testing.assert_array_equal(coef_rows, fnames)
            intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
            np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])

            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(),
                                 featurizer=PolynomialFeatures(degree=2,
                                                               include_bias=False)
                                 )
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names(input_features=fnames)
            np.testing.assert_array_equal(coef_rows, fnames)
            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = cate_est.summary()
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
            cate_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)

            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            summary_results = wrapped_est.summary()
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(
                TestInference.Y,
                TestInference.T,
                TestInference.X,
                TestInference.W,
                inference=inference
            )
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = wrapped_est.summary(feature_names=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)
Beispiel #2
0
    def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None,
            confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate",
            proceed_when_unidentifiable=True, missing_nodes_as_confounders=False,
            control_value=0, treatment_value=1, target_units="ate", **kwargs):
        """
        Estimate the counterfactual model from data through dowhy package.

        Parameters
        ----------
        Y: vector of length n
            Outcomes for each sample
        T: vector of length n
            Treatments for each sample
        X: optional (n, d_x) matrix (Default=None)
            Features for each sample
        W: optional (n, d_w) matrix (Default=None)
            Controls for each sample
        Z: optional (n, d_z) matrix (Default=None)
            Instruments for each sample
        outcome_names: optional list (Default=None)
            Name of the outcome
        treatment_names: optional list (Default=None)
            Name of the treatment
        feature_names: optional list (Default=None)
            Name of the features
        confounder_names: optional list (Default=None)
            Name of the confounders
        instrument_names: optional list (Default=None)
            Name of the instruments
        graph: optional
            Path to DOT file containing a DAG or a string containing a DAG specification in DOT format
        estimand_type: optional string
            Type of estimand requested (currently only "nonparametric-ate" is supported).
            In the future, may support other specific parametric forms of identification
        proceed_when_unidentifiable: optional bool (Default=True)
            Whether the identification should proceed by ignoring potential unobserved confounders
        missing_nodes_as_confounders: optional bool (Default=False)
            Whether variables in the dataframe that are not included in the causal graph should be automatically
            included as confounder nodes
        control_value: optional scalar (Default=0)
            Value of the treatment in the control group, for effect estimation
        treatment_value: optional scalar (Default=1)
            Value of the treatment in the treated group, for effect estimation
        target_units: optional (Default="ate")
            The units for which the treatment effect should be estimated.
            This can be of three types:

            1. A string for common specifications of target units (namely, "ate", "att" and "atc"),
            2. A lambda function that can be used as an index for the data (pandas DataFrame),
            3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated
               only for this new data

        kwargs: optional
            Other keyword arguments from fit method for CATE estimator

        Returns
        -------
        self
        """

        Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z)

        # create dataframe
        n_obs = Y.shape[0]
        Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z)

        # currently dowhy only support single outcome and single treatment
        assert Y.shape[1] == 1, "Can only accept single dimensional outcome."
        assert T.shape[1] == 1, "Can only accept single dimensional treatment."

        # column names
        if outcome_names is None:
            outcome_names = get_input_columns(Y, prefix="Y")
        if treatment_names is None:
            treatment_names = get_input_columns(T, prefix="T")
        if feature_names is None:
            feature_names = get_input_columns(X, prefix="X")
        if confounder_names is None:
            confounder_names = get_input_columns(W, prefix="W")
        if instrument_names is None:
            instrument_names = get_input_columns(Z, prefix="Z")
        column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names
        df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names)
        self.dowhy_ = CausalModel(
            data=df,
            treatment=treatment_names,
            outcome=outcome_names,
            graph=graph,
            common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None,
            instruments=instrument_names if Z.shape[1] > 0 else None,
            effect_modifiers=feature_names if X.shape[1] > 0 else None,
            estimand_type=estimand_type,
            proceed_when_unidetifiable=proceed_when_unidentifiable,
            missing_nodes_as_confounders=missing_nodes_as_confounders
        )
        self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True)
        method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__
        init_params = {}
        for p in self._get_params():
            init_params[p] = getattr(self._cate_estimator, p)
        self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_,
                                                     method_name=method_name,
                                                     control_value=control_value,
                                                     treatment_value=treatment_value,
                                                     target_units=target_units,
                                                     method_params={
                                                         "init_params": init_params,
                                                         "fit_params": kwargs,
                                                     },
                                                     )
        return self