def test_individualized_policy(self): y_arr = np.random.choice([0, 1], size=(500, )) X = pd.DataFrame({ 'a': np.random.normal(size=500), 'b': np.random.normal(size=500), 'c': np.random.choice([0, 1], size=500), 'd': np.random.choice(['a', 'b', 'c'], size=500) }) inds = ['a', 'b', 'c', 'd'] cats = ['c', 'd'] hinds = ['a', 'd'] for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]: for classification in [True, False]: ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear', classification=classification) ca.fit(X, y) df = ca.individualized_policy(X, 'a') self.assertEqual(df.shape[0], 500) # all rows included by default self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds df = ca.individualized_policy(X, 'b', n_rows=5) self.assertEqual(df.shape[0], 5) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds # verify that we can use a scalar treatment cost df = ca.individualized_policy(X, 'c', treatment_costs=100) self.assertEqual(df.shape[0], 500) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds # verify that we can specify per-treatment costs for each sample df = ca.individualized_policy( X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2))) self.assertEqual(df.shape[0], 500) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds dictionary = ca._individualized_policy_dict(X, 'a')
def test_policy_with_index(self): inds = np.arange(1000) np.random.shuffle(inds) X = pd.DataFrame(np.random.normal(0, 1, size=(1000, 2)), columns=['A', 'B'], index=inds) y = np.random.normal(0, 1, size=1000) ca_test = CausalAnalysis(feature_inds=['A'], categorical=[]) ca_test.fit(X, y) ind_policy = ca_test.individualized_policy(X[:50], feature_index='A') self.assertFalse(ind_policy.isnull().values.any())
def compute(self): """Computes the causal insights by running the causal configuration.""" for config in self._causal_config_list: if config.is_computed: continue config.is_computed = True if config.nuisance_model not in [CausalConstants.AUTOML, CausalConstants.LINEAR]: message = (f"nuisance_model should be one of " f"['{CausalConstants.AUTOML}', " f"'{CausalConstants.LINEAR}'], " f"got {config.nuisance_model}") raise UserConfigValidationException(message) is_classification = self._task_type == ModelTask.CLASSIFICATION X = pd.concat([self._train, self._test], ignore_index=True)\ .drop([self._target_column], axis=1) y = pd.concat([self._train, self._test], ignore_index=True)[ self._target_column].values.ravel() categoricals = self._categorical_features if categoricals is None: categoricals = [] analysis = CausalAnalysis( config.treatment_features, categoricals, heterogeneity_inds=config.heterogeneity_features, classification=is_classification, nuisance_models=config.nuisance_model, upper_bound_on_cat_expansion=config.max_cat_expansion, skip_cat_limit_checks=config.skip_cat_limit_checks, n_jobs=-1) analysis.fit(X, y) config.causal_analysis = analysis X_test = self._test.drop([self._target_column], axis=1) config.global_effects = analysis.global_causal_effect( alpha=config.alpha, keep_all_levels=True) config.local_effects = analysis.local_causal_effect( X_test, alpha=config.alpha, keep_all_levels=True) config.policies = [] for treatment_feature in config.treatment_features: local_policies = analysis.individualized_policy( X_test, treatment_feature, treatment_costs=config.treatment_cost, alpha=config.alpha) tree = analysis._policy_tree_output( X_test, treatment_feature, treatment_costs=config.treatment_cost, max_depth=config.max_tree_depth, min_samples_leaf=config.min_tree_leaf_samples, alpha=config.alpha) policy = { self.TREATMENT_FEATURE: treatment_feature, self.CONTROL_TREATMENT: tree.control_name, self.LOCAL_POLICIES: local_policies, self.POLICY_GAINS: { self.RECOMMENDED_POLICY_GAINS: tree.policy_value, self.TREATMENT_GAINS: tree.always_treat, }, self.POLICY_TREE: tree.tree_dictionary } config.policies.append(policy)