Ejemplo n.º 1
0
    def _save(self, path):
        """Save the ErrorAnalysisManager to the given path.

        :param path: The directory path to save the ErrorAnalysisManager to.
        :type path: str
        """
        top_dir = Path(path)
        top_dir.mkdir(parents=True, exist_ok=True)

        if len(self._ea_config_list) != len(self._ea_report_list):
            raise ConfigAndResultMismatchException(
                "The number of error analysis configs {0} doesn't match the "
                "number of results {1}".format(len(self._ea_config_list),
                                               len(self._ea_report_list)))

        for index in range(0, len(self._ea_report_list)):
            # save the configs
            directory_manager = DirectoryManager(parent_directory_path=path)
            config_path = (directory_manager.create_config_directory() /
                           'config.json')
            ea_config = self._ea_config_list[index]
            with open(config_path, 'w') as file:
                json.dump(ea_config, file, default=config_json_converter)

            # save the reports
            report_path = (directory_manager.create_data_directory() /
                           'report.json')
            ea_report = self._ea_report_list[index]
            with open(report_path, 'w') as file:
                json.dump(ea_report, file, default=report_json_converter)
Ejemplo n.º 2
0
def validate_state_directory(path, manager_type):
    all_dirs = os.listdir(path)
    assert manager_type in all_dirs
    all_component_paths = os.listdir(path / manager_type)
    for component_path in all_component_paths:
        # Test if the component directory has UUID structure
        UUID(component_path, version=4)

        dm = DirectoryManager(path / manager_type, component_path)

        config_path = dm.get_config_directory()
        data_path = dm.get_data_directory()
        generators_path = dm.get_generators_directory()

        if manager_type == ManagerNames.EXPLAINER:
            assert not config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
        elif manager_type == ManagerNames.COUNTERFACTUAL:
            assert config_path.exists()
            assert data_path.exists()
            assert generators_path.exists()
        elif manager_type == ManagerNames.ERROR_ANALYSIS:
            assert config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
        elif manager_type == ManagerNames.CAUSAL:
            assert not config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
    def _load(path, rai_insights):
        """Load the CausalManager from the given path.

        :param path: The directory path to load the CausalManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The CausalManager manager after loading.
        :rtype: CausalManager
        """
        inst = CausalManager.__new__(CausalManager)

        # Rehydrate results
        all_causal_dirs = DirectoryManager.list_sub_directories(path)
        inst.__dict__['_results'] = []
        for causal_dir in all_causal_dirs:
            dm = DirectoryManager(parent_directory_path=path,
                                  sub_directory_name=causal_dir)
            causal_result = CausalResult.load(dm.get_data_directory())
            inst.__dict__['_results'].append(causal_result)

        # Rehydrate model analysis data
        inst.__dict__['_train'] = rai_insights.train
        inst.__dict__['_test'] = rai_insights.test
        inst.__dict__['_target_column'] = rai_insights.target_column
        inst.__dict__['_task_type'] = rai_insights.task_type
        inst.__dict__['_categorical_features'] = \
            rai_insights.categorical_features

        return inst
    def _save(self, path):
        """
        Save the DataBalanceManager to the given path.

        :param path: The directory path to save the DataBalanceManager to.
        :type path: str
        """
        top_dir = Path(path)
        top_dir.mkdir(parents=True, exist_ok=True)

        # If manager is not added to RAIInsights, don't save any of its configs
        if not self._is_added:
            return

        dir_manager = DirectoryManager(parent_directory_path=path)
        config_dir = dir_manager.create_config_directory()

        manager_path = config_dir / MANAGER_JSON
        with open(manager_path, "w") as f:
            json.dump(self.list(), f)

        # if measures have been computed, save the computed measures
        if self._data_balance_measures:
            measures_path = config_dir / MEASURES_JSON
            with open(measures_path, "w") as f:
                json.dump(self._data_balance_measures, f)

        data_path = dir_manager.create_data_directory() / DATA_JSON
        self._df.to_json(data_path, orient="split")
    def test_counterfactual_manager_save_load(self, tmpdir):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(
            model=model,
            train=X_train,
            test=X_test.iloc[0:10],
            target_column='target',
            task_type='classification')

        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=2,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=1,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.compute()

        assert len(rai_insights.counterfactual.get()) == 2
        cf_obj = rai_insights.counterfactual.get()[0]
        assert cf_obj is not None

        save_dir = tmpdir.mkdir('save-dir')
        rai_insights.save(save_dir)
        rai_insights_copy = RAIInsights.load(save_dir)

        assert len(rai_insights_copy.counterfactual.get()) == 2
        cf_obj = rai_insights_copy.counterfactual.get()[0]
        assert cf_obj is not None

        # Delete the dice-ml explainer directory so that the dice-ml
        # explainer can be re-trained rather being loaded from the
        # disc
        counterfactual_path = save_dir / "counterfactual"
        all_cf_dirs = DirectoryManager.list_sub_directories(
            counterfactual_path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=counterfactual_path,
                sub_directory_name=counterfactual_config_dir)
            explainer_pkl_path = \
                directory_manager.get_generators_directory() / "explainer.pkl"
            os.remove(explainer_pkl_path)

        rai_insights_copy_new = RAIInsights.load(save_dir)
        counterfactual_config_list = \
            rai_insights_copy_new.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list) == 2
        assert counterfactual_config_list[0].explainer is not None
        assert counterfactual_config_list[1].explainer is not None
Ejemplo n.º 6
0
    def _load(path, rai_insights):
        """Load the ErrorAnalysisManager from the given path.

        :param path: The directory path to load the ErrorAnalysisManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The ErrorAnalysisManager manager after loading.
        :rtype: ErrorAnalysisManager
        """
        # create the ErrorAnalysisManager without any properties using
        # the __new__ function, similar to pickle
        inst = ErrorAnalysisManager.__new__(ErrorAnalysisManager)

        ea_config_list = []
        ea_report_list = []
        all_ea_dirs = DirectoryManager.list_sub_directories(path)
        for ea_dir in all_ea_dirs:
            directory_manager = DirectoryManager(parent_directory_path=path,
                                                 sub_directory_name=ea_dir)

            config_path = (directory_manager.get_config_directory() /
                           'config.json')
            with open(config_path, 'r') as file:
                ea_config = json.load(file, object_hook=as_error_config)
                ea_config_list.append(ea_config)

            report_path = (directory_manager.get_data_directory() /
                           'report.json')
            with open(report_path, 'r') as file:
                ea_report = json.load(file, object_hook=as_error_report)
                # Validate the serialized output against schema
                schema = ErrorAnalysisManager._get_error_analysis_schema()
                jsonschema.validate(json.loads(ea_report.to_json()), schema)
                ea_report_list.append(ea_report)

        inst.__dict__['_ea_report_list'] = ea_report_list
        inst.__dict__['_ea_config_list'] = ea_config_list

        categorical_features = rai_insights.categorical_features
        inst.__dict__['_categorical_features'] = categorical_features
        target_column = rai_insights.target_column
        true_y = rai_insights.test[target_column]
        dataset = rai_insights.test.drop(columns=[target_column])
        inst.__dict__['_dataset'] = dataset
        inst.__dict__['_true_y'] = true_y
        feature_names = list(dataset.columns)
        inst.__dict__['_feature_names'] = feature_names
        inst.__dict__['_analyzer'] = ModelAnalyzer(rai_insights.model, dataset,
                                                   true_y, feature_names,
                                                   categorical_features)
        return inst
    def _load(path, rai_insights):
        """Load the ExplainerManager from the given path.

        :param path: The directory path to load the ExplainerManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The ExplainerManager manager after loading.
        :rtype: ExplainerManager
        """
        # create the ExplainerManager without any properties using the __new__
        # function, similar to pickle
        inst = ExplainerManager.__new__(ExplainerManager)

        all_cf_dirs = DirectoryManager.list_sub_directories(path)
        if len(all_cf_dirs) != 0:
            directory_manager = DirectoryManager(
                parent_directory_path=path,
                sub_directory_name=all_cf_dirs[0])
            data_directory = directory_manager.get_data_directory()

            with open(data_directory / META_JSON, 'r') as meta_file:
                meta = meta_file.read()
            meta = json.loads(meta)
            inst.__dict__['_' + IS_RUN] = meta[IS_RUN]
            inst.__dict__['_' + IS_ADDED] = meta[IS_ADDED]

            inst.__dict__[EXPLANATION] = None
            explanation_path = data_directory / ManagerNames.EXPLAINER
            if explanation_path.exists():
                explanation = load_explanation(explanation_path)
                inst.__dict__[EXPLANATION] = explanation
        else:
            inst.__dict__['_' + IS_RUN] = False
            inst.__dict__['_' + IS_ADDED] = False
            inst.__dict__[EXPLANATION] = None

        inst.__dict__['_' + MODEL] = rai_insights.model
        inst.__dict__['_' + CLASSES] = rai_insights._classes
        inst.__dict__['_' + CATEGORICAL_FEATURES] = \
            rai_insights.categorical_features
        target_column = rai_insights.target_column
        train = rai_insights.train.drop(columns=[target_column])
        test = rai_insights.test.drop(columns=[target_column])
        inst.__dict__[U_INITIALIZATION_EXAMPLES] = train
        inst.__dict__[U_EVALUATION_EXAMPLES] = test
        inst.__dict__['_' + FEATURES] = list(train.columns)

        # reset the surrogate model
        inst._initialize_surrogate_model()

        return inst
    def _save(self, path):
        """Save the CausalManager to the given path.

        :param path: The directory path to save the CausalManager to.
        :type path: str
        """
        causal_dir = Path(path)
        causal_dir.mkdir(parents=True, exist_ok=True)

        # Save results to disk
        for result in self._results:
            directory_manager = DirectoryManager(parent_directory_path=path)
            data_path = directory_manager.create_data_directory()
            result.save(data_path)
Ejemplo n.º 9
0
    def test_causal_save_and_load(self, housing_data, tmpdir):
        train_df, test_df, target_feature = housing_data

        save_dir = tmpdir.mkdir('save-dir')

        insights = RAIInsights(None, train_df, test_df, target_feature,
                               ModelTask.REGRESSION)

        insights.causal.add(['AveRooms'])
        insights.compute()

        pre_results = insights.causal.get()
        pre_result = pre_results[0]

        insights.causal._save(save_dir)
        manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is not None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None

        # Remove the causal analysis models to test the loading of
        # causal models in case there is error in loading of the causal
        # models.
        all_causal_dirs = DirectoryManager.list_sub_directories(save_dir)
        for causal_dir in all_causal_dirs:
            dm = DirectoryManager(parent_directory_path=save_dir,
                                  sub_directory_name=causal_dir)
            causal_analysis_pkl_file_path = \
                dm.get_data_directory() / "causal_analysis.pkl"
            os.remove(causal_analysis_pkl_file_path)

        model_load_err = ('ERROR-LOADING-EXPLAINER: '
                          'There was an error loading the explainer. '
                          'Some of RAI dashboard features may not work.')
        with pytest.warns(UserWarning, match=model_load_err):
            manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None
Ejemplo n.º 10
0
    def _save(self, path):
        """Save the CounterfactualManager to the given path.

        :param path: The directory path to save the CounterfactualManager to.
        :type path: str
        """
        counterfactual_dir = Path(path)
        counterfactual_dir.mkdir(parents=True, exist_ok=True)
        for counterfactual_config in self._counterfactual_config_list:
            directory_manager = DirectoryManager(parent_directory_path=path)

            counterfactual_config.save_config(
                directory_manager.create_config_directory())

            counterfactual_config.save_result(
                directory_manager.create_data_directory())

            counterfactual_config.save_explainer(
                directory_manager.create_generators_directory())
    def _save(self, path):
        """Save the ExplainerManager to the given path.

        :param path: The directory path to save the ExplainerManager to.
        :type path: str
        """
        top_dir = Path(path)
        top_dir.mkdir(parents=True, exist_ok=True)
        if self._is_added:
            directory_manager = DirectoryManager(parent_directory_path=path)
            data_directory = directory_manager.create_data_directory()

            # save the explanation
            if self._explanation:
                save_explanation(self._explanation,
                                 data_directory / ManagerNames.EXPLAINER)

            meta = {IS_RUN: self._is_run, IS_ADDED: self._is_added}
            with open(data_directory / META_JSON, 'w') as file:
                json.dump(meta, file)
Ejemplo n.º 12
0
    def test_directory_manager(self, tmpdir, create_parent_directory):
        if create_parent_directory:
            parent_directory = tmpdir.mkdir('parent_directory')
        else:
            parent_directory = tmpdir / 'parent_directory'
        dm_one = DirectoryManager(parent_directory_path=parent_directory,
                                  sub_directory_name='known')

        assert dm_one.parent_directory_path.exists()
        assert dm_one.sub_directory_name == 'known'
        assert (dm_one.parent_directory_path /
                dm_one.sub_directory_name).exists()

        self._verify_directory_manager_operations(dm_one)

        assert isinstance(
            DirectoryManager.list_sub_directories(parent_directory), list)
        assert len(
            DirectoryManager.list_sub_directories(parent_directory)) == 1
        assert 'known' in\
            DirectoryManager.list_sub_directories(parent_directory)

        dm_two = DirectoryManager(parent_directory_path=parent_directory)

        assert dm_two.parent_directory_path.exists()
        assert dm_two.sub_directory_name is not None
        assert (dm_two.parent_directory_path /
                dm_two.sub_directory_name).exists()

        self._verify_directory_manager_operations(dm_two)

        assert isinstance(
            DirectoryManager.list_sub_directories(parent_directory), list)
        assert len(
            DirectoryManager.list_sub_directories(parent_directory)) == 2
        assert dm_two.sub_directory_name in\
            DirectoryManager.list_sub_directories(parent_directory)
Ejemplo n.º 13
0
    def _load(path, rai_insights):
        """Load the CounterfactualManager from the given path.

        :param path: The directory path to load the CounterfactualManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The CounterfactualManager manager after loading.
        :rtype: CounterfactualManager
        """
        inst = CounterfactualManager.__new__(CounterfactualManager)

        # Rehydrate model analysis data
        inst.__dict__[CounterfactualManager._MODEL] = rai_insights.model
        inst.__dict__[CounterfactualManager._TRAIN] = rai_insights.train
        inst.__dict__[CounterfactualManager._TEST] = rai_insights.test
        inst.__dict__[CounterfactualManager._TARGET_COLUMN] = \
            rai_insights.target_column
        inst.__dict__[CounterfactualManager._TASK_TYPE] = \
            rai_insights.task_type
        inst.__dict__[CounterfactualManager._CATEGORICAL_FEATURES] = \
            rai_insights.categorical_features

        inst.__dict__[CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST] = []

        # DirectoryManager.ensure_dir_exists(path)
        all_cf_dirs = DirectoryManager.list_sub_directories(path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=path,
                sub_directory_name=counterfactual_config_dir)

            counterfactual_config = CounterfactualConfig.load_config(
                directory_manager.get_config_directory())

            counterfactual_config.load_result(
                directory_manager.get_data_directory())

            counterfactual_config.load_explainer(
                directory_manager.get_generators_directory())

            if counterfactual_config.explainer is None:
                explainer_load_err = (
                    'ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: '
                    'There was an error loading the '
                    'counterfactual explainer model. '
                    'Retraining the counterfactual '
                    'explainer.')
                warnings.warn(explainer_load_err)
                counterfactual_config.explainer = \
                    inst._create_diceml_explainer(
                        counterfactual_config.method,
                        counterfactual_config.continuous_features)

            if counterfactual_config.counterfactual_obj is not None:
                # Validate the serialized output against schema
                schema = CounterfactualManager._get_counterfactual_schema(
                    version=counterfactual_config.counterfactual_obj.
                    metadata['version'])
                jsonschema.validate(
                    json.loads(
                        counterfactual_config.counterfactual_obj.to_json()),
                    schema)

            inst.__dict__[
                CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST].append(
                    counterfactual_config)

        return inst
    def test_counterfactual_manager_save_load(self, tmpdir):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test.iloc[0:10],
                                   target_column='target',
                                   task_type='classification')

        rai_insights.counterfactual.add(
            total_CFs=10,
            desired_class=2,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.add(
            total_CFs=10,
            desired_class=1,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.compute()

        counterfactual_config_list_before_save = \
            rai_insights.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list_before_save) == 2
        assert len(rai_insights.counterfactual.get()) == 2
        cf_obj_1 = rai_insights.counterfactual.get()[0]
        assert cf_obj_1 is not None
        cf_obj_2 = rai_insights.counterfactual.get()[1]
        assert cf_obj_2 is not None

        save_dir = tmpdir.mkdir('save-dir')
        rai_insights.save(save_dir)
        rai_insights_copy = RAIInsights.load(save_dir)

        counterfactual_config_list_after_save = \
            rai_insights_copy.counterfactual._counterfactual_config_list
        assert len(rai_insights_copy.counterfactual.get()) == 2
        cf_obj_1 = rai_insights_copy.counterfactual.get()[0]
        assert cf_obj_1 is not None
        cf_obj_2 = rai_insights_copy.counterfactual.get()[1]
        assert cf_obj_2 is not None

        assert counterfactual_config_list_before_save[0].id in \
            [counterfactual_config_list_after_save[0].id,
             counterfactual_config_list_after_save[1].id]
        assert counterfactual_config_list_before_save[1].id in \
            [counterfactual_config_list_after_save[0].id,
             counterfactual_config_list_after_save[1].id]

        # Delete the dice-ml explainer directory so that the dice-ml
        # explainer can be re-trained rather being loaded from the
        # disc
        counterfactual_path = save_dir / "counterfactual"
        all_cf_dirs = DirectoryManager.list_sub_directories(
            counterfactual_path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=counterfactual_path,
                sub_directory_name=counterfactual_config_dir)
            explainer_pkl_path = \
                directory_manager.get_generators_directory() / "explainer.pkl"
            os.remove(explainer_pkl_path)

        with pytest.warns(UserWarning,
                          match='ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: '
                          'There was an error loading the '
                          'counterfactual explainer model. '
                          'Retraining the counterfactual '
                          'explainer.'):
            rai_insights_copy_new = RAIInsights.load(save_dir)
        counterfactual_config_list = \
            rai_insights_copy_new.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list) == 2
        assert counterfactual_config_list[0].explainer is not None
        assert counterfactual_config_list[1].explainer is not None
    def _load(path, rai_insights):
        """
        Load the DataBalanceManager from the given path.

        :param path: The directory path to load the DataBalanceManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The DataBalanceManager after loading.
        :rtype: DataBalanceManager
        """
        # create the DataBalanceManager without any properties using the
        # __new__ function, similar to pickle
        inst = DataBalanceManager.__new__(DataBalanceManager)

        inst.__dict__["_train"] = rai_insights.train
        inst.__dict__["_test"] = rai_insights.test

        is_added = False
        cols_of_interest = None
        task_type = rai_insights.task_type
        target_column = rai_insights.target_column
        classes = (list(map(str, rai_insights._classes))
                   if rai_insights._classes is not None else [])
        df = pd.concat([rai_insights.train, rai_insights.test])
        data_balance_measures = None

        all_db_dirs = DirectoryManager.list_sub_directories(path)
        if len(all_db_dirs) != 0:
            dir_manager = DirectoryManager(parent_directory_path=path,
                                           sub_directory_name=all_db_dirs[0])
            config_dir = dir_manager.get_config_directory()

            # Load manager
            with open(config_dir / MANAGER_JSON, "r") as f:
                manager_info = json.load(f)
                is_added = manager_info[Keys.IS_ADDED]
                task_type = manager_info[Keys.TASK_TYPE]
                cols_of_interest = manager_info[Keys.COLS_OF_INTEREST]
                target_column = manager_info[Keys.TARGET_COLUMN]
                classes = manager_info[Keys.CLASSES]

            # Load from data json
            data_path = dir_manager.get_data_directory() / DATA_JSON
            if data_path.exists():
                df = pd.read_json(data_path, orient="split")

            # Load measures
            measures_path = config_dir / MEASURES_JSON
            if measures_path.exists():
                with open(measures_path, "r") as f:
                    data_balance_measures = json.load(f)

        inst.__dict__["_is_added"] = is_added
        inst.__dict__["_task_type"] = task_type
        inst.__dict__["_cols_of_interest"] = cols_of_interest
        inst.__dict__["_target_column"] = target_column
        inst.__dict__["_classes"] = classes
        inst.__dict__["_df"] = df
        inst.__dict__["_data_balance_measures"] = data_balance_measures

        return inst