Esempio n. 1
0
 def test_empty_hinds(self):
     for h_model in ['linear', 'forest']:
         for classification in [True, False]:
             X1 = np.random.normal(0, 1, size=(500, 5))
             X2 = np.random.choice([0, 1], size=(500, 1))
             X3 = np.random.choice([0, 1, 2], size=(500, 1))
             X = np.hstack((X1, X2, X3))
             X_df = pd.DataFrame(X, columns=[f"x{i} " for i in range(7)])
             y = np.random.choice([0, 1], size=(500, ))
             y_df = pd.Series(y)
             # model
             hetero_inds = [[], [], []]
             feat_inds = [1, 3, 5]
             categorical = [5, 6]
             ca = CausalAnalysis(feat_inds,
                                 categorical,
                                 heterogeneity_inds=hetero_inds,
                                 classification=classification,
                                 nuisance_models='linear',
                                 heterogeneity_model=h_model,
                                 n_jobs=-1)
             ca.fit(X_df, y)
             eff = ca.global_causal_effect(alpha=0.05)
             eff = ca.local_causal_effect(X_df, alpha=0.05)
             for ind in feat_inds:
                 pto = ca._policy_tree_output(X_df, ind)
                 ca._individualized_policy_dict(X_df, ind)
Esempio n. 2
0
    def test_individualized_policy(self):
        y_arr = np.random.choice([0, 1], size=(500, ))
        X = pd.DataFrame({
            'a': np.random.normal(size=500),
            'b': np.random.normal(size=500),
            'c': np.random.choice([0, 1], size=500),
            'd': np.random.choice(['a', 'b', 'c'], size=500)
        })
        inds = ['a', 'b', 'c', 'd']
        cats = ['c', 'd']
        hinds = ['a', 'd']

        for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]:
            for classification in [True, False]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    heterogeneity_model='linear',
                                    classification=classification)
                ca.fit(X, y)
                df = ca.individualized_policy(X, 'a')
                self.assertEqual(df.shape[0],
                                 500)  # all rows included by default
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                df = ca.individualized_policy(X, 'b', n_rows=5)
                self.assertEqual(df.shape[0], 5)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                # verify that we can use a scalar treatment cost
                df = ca.individualized_policy(X, 'c', treatment_costs=100)
                self.assertEqual(df.shape[0], 500)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                # verify that we can specify per-treatment costs for each sample
                df = ca.individualized_policy(
                    X,
                    'd',
                    alpha=0.05,
                    treatment_costs=np.random.normal(size=(500, 2)))
                self.assertEqual(df.shape[0], 500)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds

                dictionary = ca._individualized_policy_dict(X, 'a')