def test_causal_manager_global_cohort_effects(self, housing_data):
        train_df, test_df, target_feature = housing_data

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, None)
        manager.add(['AveRooms'])
        manager.compute()

        id = manager.get()[0].id
        X_test = test_df.drop(target_feature, axis=1)
        causal_data = manager.request_global_cohort_effects(id, X_test)

        self.verify_common_causal_data_attributes(causal_data)
        assert hasattr(causal_data, 'global_effects')
        EFFECTS_ATTRIBUTES = [
            'point',
            'stderr',
            'zstat',
            'ci_lower',
            'ci_upper',
            'p_value',
        ]
        for effect in EFFECTS_ATTRIBUTES:
            assert effect in causal_data.global_effects[0]

        incorrect_query_id = "incorrect_query_id"
        X_test = test_df.drop(target_feature, axis=1)
        with pytest.raises(ValueError,
                           match="Failed to find causal result with ID: "
                           "incorrect_query_id"):
            manager.request_global_cohort_effects(incorrect_query_id, X_test)
Exemple #2
0
    def _initialize_managers(self):
        """Initializes the managers.

        Initialized the causal, counterfactual, error analysis
        and explainer managers.
        """
        self._causal_manager = CausalManager(
            self.train, self.test, self.target_column,
            self.task_type, self.categorical_features)

        self._counterfactual_manager = CounterfactualManager(
            model=self.model, train=self.train, test=self.test,
            target_column=self.target_column, task_type=self.task_type,
            categorical_features=self.categorical_features)

        self._error_analysis_manager = ErrorAnalysisManager(
            self.model, self.test, self.target_column,
            self._classes,
            self.categorical_features)

        self._explainer_manager = ExplainerManager(
            self.model, self.train, self.test,
            self.target_column,
            self._classes,
            categorical_features=self.categorical_features)

        self._managers = [self._causal_manager,
                          self._counterfactual_manager,
                          self._error_analysis_manager,
                          self._explainer_manager]
Exemple #3
0
    def test_causal_train_test_categories(self, parks_data):
        train_df, test_df, target_feature = parks_data

        test_df = test_df.copy()
        test_df.loc[len(test_df.index)] = ['indiana', 301, 'trees', 78]
        test_df.loc[len(test_df.index)] = ['indiana', 222, 'trees', 81]

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, ['state', 'attraction'])

        message = ("Causal analysis requires that every category of "
                   "categorical features present in the test data be "
                   "also present in the train data. "
                   "Categories missing from train data: "
                   "{'state': \\['indiana'\\]}")
        with pytest.raises(UserConfigValidationException, match=message):
            manager.add(['state'],
                        skip_cat_limit_checks=True,
                        upper_bound_on_cat_expansion=50)
def causal_result(parks_data):
    train_df, test_df, target_feature = parks_data
    manager = CausalManager(train_df, test_df, target_feature,
                            ModelTask.REGRESSION, ['state', 'attraction'])

    manager.add(['attraction'],
                skip_cat_limit_checks=True,
                upper_bound_on_cat_expansion=50)
    manager.compute()
    return manager.get()[0]
Exemple #5
0
    def test_categorical_policy(self, housing_data_categorical):
        train_df, test_df, target_feature = housing_data_categorical
        categoricals = train_df.select_dtypes(include=[object]).columns

        # Just use categoricals to force categorical policy tree
        new_features = list(categoricals) + [target_feature]
        train_df = train_df[new_features]
        test_df = test_df[new_features]

        # Sample data for easier debug
        test_df = test_df[:20]

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, categoricals)

        result = manager.add(['HouseAge_CAT', 'Population_CAT'],
                             random_state=42)
        manager.compute()
        result = manager.get()[0]
        dashboard_data = result._get_dashboard_data()

        policies = dashboard_data['policies']
        assert len(policies) > 0
        for policy in policies:
            tree = policy['policy_tree']
            assert not tree['leaf']
            assert tree['feature'] in categoricals
            assert tree['right_comparison'] == 'eq'
            is_very_old_comparison = tree['comparison_value'] == 'very-old'
            is_high_comparison = tree['comparison_value'] == 'high'
            assert is_very_old_comparison or is_high_comparison
Exemple #6
0
    def test_causal_no_categoricals(self, housing_data):
        train_df, test_df, target_feature = housing_data

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, None)
        manager.add(['AveRooms'])
        manager.compute()
        result = manager.get()[0]

        assert len(result.policies) == 1
        assert len(result.config.treatment_features) == 1
        assert result.config.treatment_features[0] == 'AveRooms'
    def test_causal_manager_global_cohort_policy(self, housing_data):
        train_df, test_df, target_feature = housing_data

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, None)
        manager.add(['AveRooms'])
        manager.compute()

        id = manager.get()[0].id
        X_test = test_df.head(5).drop(target_feature, axis=1)
        causal_data = manager.request_global_cohort_policy(id, X_test)

        self.verify_common_causal_data_attributes(causal_data)
        assert hasattr(causal_data, 'policies')
        assert len(causal_data.policies[0].local_policies) == X_test.shape[0]
        assert causal_data.policies[0].treatment_feature == "AveRooms"

        incorrect_query_id = "incorrect_query_id"
        X_test = test_df.drop(target_feature, axis=1)
        with pytest.raises(ValueError,
                           match="Failed to find causal result with ID: "
                           "incorrect_query_id"):
            manager.request_global_cohort_effects(incorrect_query_id, X_test)
Exemple #8
0
    def test_causal_cat_expansion(self, parks_data):
        train_df, test_df, target_feature = parks_data

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, ['state', 'attraction'])

        expected = "Increase the value 50"
        with pytest.raises(ValueError, match=expected):
            manager.add(['state'])
            manager.compute()
Exemple #9
0
def cost_manager(housing_data):
    train_df, test_df, target_feature = housing_data

    test_df = test_df[:7]
    return CausalManager(train_df, test_df, target_feature,
                         ModelTask.REGRESSION, None)
    def test_causal_manager_local_instance_effects(self, housing_data):
        train_df, test_df, target_feature = housing_data

        manager = CausalManager(train_df, test_df, target_feature,
                                ModelTask.REGRESSION, None)
        manager.add(['AveRooms'])
        manager.compute()

        id = manager.get()[0].id
        X_test = test_df.head(1).drop(target_feature, axis=1)
        causal_data = manager.request_local_instance_effects(id, X_test)

        self.verify_common_causal_data_attributes(causal_data)
        assert hasattr(causal_data, 'local_effects')
        EFFECTS_ATTRIBUTES = [
            'sample',
            'outcome',
            'feature',
            'feature_value',
            'point',
            'stderr',
            'zstat',
            'ci_lower',
            'ci_upper',
            'p_value',
        ]
        for effect in EFFECTS_ATTRIBUTES:
            assert effect in causal_data.local_effects[0][0]

        incorrect_query_id = "incorrect_query_id"
        X_test = test_df.drop(target_feature, axis=1)
        with pytest.raises(ValueError,
                           match="Failed to find causal result with ID: "
                           "incorrect_query_id"):
            manager.request_local_instance_effects(incorrect_query_id, X_test)

        id = manager.get()[0].id
        X_test = test_df.head(1).drop(target_feature, axis=1).values
        with pytest.raises(UserConfigValidationException,
                           match='Data is of type <class \'numpy.ndarray\'>'
                           ' but it must be a pandas DataFrame.'):
            manager.request_local_instance_effects(id, X_test)

        id = manager.get()[0].id
        X_test = test_df.head(5).drop(target_feature, axis=1)
        with pytest.raises(UserConfigValidationException,
                           match='Only one row of data is allowed for '
                           'local causal effects.'):
            manager.request_local_instance_effects(id, X_test)