def test_summary(self): """Tests the inference results summary for continuous treatment estimators.""" # Test inference results when `cate_feature_names` doesn not exist for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']: cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] default_names = get_input_columns(TestInference.X) fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(default_names) np.testing.assert_array_equal(coef_rows, fnames) intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0] np.testing.assert_array_equal(intercept_rows, ['cate_intercept']) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(input_features=fnames) np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = wrapped_est.summary() coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = wrapped_est.summary(feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames)
def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None, confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate", proceed_when_unidentifiable=True, missing_nodes_as_confounders=False, control_value=0, treatment_value=1, target_units="ate", **kwargs): """ Estimate the counterfactual model from data through dowhy package. Parameters ---------- Y: vector of length n Outcomes for each sample T: vector of length n Treatments for each sample X: optional (n, d_x) matrix (Default=None) Features for each sample W: optional (n, d_w) matrix (Default=None) Controls for each sample Z: optional (n, d_z) matrix (Default=None) Instruments for each sample outcome_names: optional list (Default=None) Name of the outcome treatment_names: optional list (Default=None) Name of the treatment feature_names: optional list (Default=None) Name of the features confounder_names: optional list (Default=None) Name of the confounders instrument_names: optional list (Default=None) Name of the instruments graph: optional Path to DOT file containing a DAG or a string containing a DAG specification in DOT format estimand_type: optional string Type of estimand requested (currently only "nonparametric-ate" is supported). In the future, may support other specific parametric forms of identification proceed_when_unidentifiable: optional bool (Default=True) Whether the identification should proceed by ignoring potential unobserved confounders missing_nodes_as_confounders: optional bool (Default=False) Whether variables in the dataframe that are not included in the causal graph should be automatically included as confounder nodes control_value: optional scalar (Default=0) Value of the treatment in the control group, for effect estimation treatment_value: optional scalar (Default=1) Value of the treatment in the treated group, for effect estimation target_units: optional (Default="ate") The units for which the treatment effect should be estimated. This can be of three types: 1. A string for common specifications of target units (namely, "ate", "att" and "atc"), 2. A lambda function that can be used as an index for the data (pandas DataFrame), 3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data kwargs: optional Other keyword arguments from fit method for CATE estimator Returns ------- self """ Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z) # create dataframe n_obs = Y.shape[0] Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z) # currently dowhy only support single outcome and single treatment assert Y.shape[1] == 1, "Can only accept single dimensional outcome." assert T.shape[1] == 1, "Can only accept single dimensional treatment." # column names if outcome_names is None: outcome_names = get_input_columns(Y, prefix="Y") if treatment_names is None: treatment_names = get_input_columns(T, prefix="T") if feature_names is None: feature_names = get_input_columns(X, prefix="X") if confounder_names is None: confounder_names = get_input_columns(W, prefix="W") if instrument_names is None: instrument_names = get_input_columns(Z, prefix="Z") column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names) self.dowhy_ = CausalModel( data=df, treatment=treatment_names, outcome=outcome_names, graph=graph, common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None, instruments=instrument_names if Z.shape[1] > 0 else None, effect_modifiers=feature_names if X.shape[1] > 0 else None, estimand_type=estimand_type, proceed_when_unidetifiable=proceed_when_unidentifiable, missing_nodes_as_confounders=missing_nodes_as_confounders ) self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True) method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__ init_params = {} for p in self._get_params(): init_params[p] = getattr(self._cate_estimator, p) self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_, method_name=method_name, control_value=control_value, treatment_value=treatment_value, target_units=target_units, method_params={ "init_params": init_params, "fit_params": kwargs, }, ) return self