def test_sets_data_balance_measures( self, adult_data_feature_balance_measures, adult_data_distribution_balance_measures, adult_data_aggregate_balance_measures, ): manager = DataBalanceManager( target_column=None, classes=None, train=None, test=None, task_type=None, ) assert manager._data_balance_measures is None manager._set_data_balance_measures( adult_data_feature_balance_measures, adult_data_distribution_balance_measures, adult_data_aggregate_balance_measures, ) assert manager._data_balance_measures is not None d = manager._data_balance_measures assert isinstance(d, dict) assert FEATURE_BALANCE_MEASURES_KEY in d assert DISTRIBUTION_BALANCE_MEASURES_KEY in d assert AGGREGATE_BALANCE_MEASURES_KEY in d
def test_validate_with_valid_input(self, adult_data): train_df, test_df, categorical_cols, target_col, classes = adult_data manager = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager._cols_of_interest = categorical_cols manager._validate() # should not raise any exceptions
def test_validate_with_invalid_classes(self, adult_data, classes): train_df, test_df, _, target_col, _ = adult_data with pytest.raises(ValueError): manager = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager._validate()
def test_add_errors_on_invalid_input_basic(self, adult_data, target_col, cols_of_interest): train_df, test_df, _, _, classes = adult_data with pytest.raises(ValueError): manager = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager.add(cols_of_interest=cols_of_interest)
def test_save_and_load_basic(self, tmpdir, adult_data): train_df, test_df, categorical_cols, target_col, classes = adult_data task_type = TaskType.CLASSIFICATION saved = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=task_type, ) assert saved._is_added is False save_dir = tmpdir.mkdir("save-dir") saved._save(save_dir) # should not create any configs rai_insights = RAIInsights( model=None, train=train_df, test=test_df, target_column=target_col, task_type="classification", categorical_features=categorical_cols, ) loaded = saved._load(save_dir, rai_insights) assert saved._target_column == loaded._target_column == target_col assert saved._classes == loaded._classes == list(map(str, classes)) assert saved._task_type == loaded._task_type == task_type assert_frame_equal(saved._train, loaded._train) assert_frame_equal(saved._test, loaded._test) assert saved._is_added is loaded._is_added is False # All the instances variables from RAIInsights get set. assert_frame_equal(loaded._train, saved._train) assert_frame_equal(loaded._test, saved._test) assert loaded._target_column == saved._target_column # Also, df gets created using rai_insights.train and rai_insights.test assert_frame_equal(loaded._df, saved._df) # All the instance variables specific to the manager, such as # _cols_of_interest, don't get set. assert loaded._is_added is False assert loaded._cols_of_interest is None assert loaded._data_balance_measures is None
def _initialize_managers(self): """Initializes the managers. Initialized the causal, counterfactual, error analysis and explainer managers. """ self._causal_manager = CausalManager(self.train, self.test, self.target_column, self.task_type, self.categorical_features) self._counterfactual_manager = CounterfactualManager( model=self.model, train=self.train, test=self.test, target_column=self.target_column, task_type=self.task_type, categorical_features=self.categorical_features) self._data_balance_manager = DataBalanceManager( train=self.train, test=self.test, target_column=self.target_column, classes=self._classes, task_type=self.task_type) self._error_analysis_manager = ErrorAnalysisManager( self.model, self.test, self.target_column, self._classes, self.categorical_features) self._explainer_manager = ExplainerManager( self.model, self.train, self.test, self.target_column, self._classes, categorical_features=self.categorical_features) self._managers = [ self._causal_manager, self._counterfactual_manager, self._data_balance_manager, self._error_analysis_manager, self._explainer_manager ]
def test_init_with_invalid_input(self): manager = DataBalanceManager( train=None, test=None, target_column=None, classes=None, task_type=None, ) assert manager._target_column is None assert manager._classes is None assert manager._task_type is None assert manager._train is None assert manager._test is None assert manager._df is None
def test_save_and_load_with_add_and_compute(self, tmpdir, adult_data): train_df, test_df, categorical_cols, target_col, classes = adult_data task_type = TaskType.CLASSIFICATION saved = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=task_type, ) saved.add(cols_of_interest=categorical_cols) saved.compute() save_dir = tmpdir.mkdir("save-dir") saved._save(save_dir) rai_insights = RAIInsights( model=None, train=train_df, test=test_df, target_column=target_col, task_type="classification", categorical_features=categorical_cols, ) loaded = saved._load(save_dir, rai_insights) assert saved._target_column == loaded._target_column == target_col assert saved._classes == loaded._classes == list(map(str, classes)) assert saved._task_type == loaded._task_type == task_type assert_frame_equal(saved._train, loaded._train) assert_frame_equal(saved._test, loaded._test) assert saved._is_added is loaded._is_added is True assert_frame_equal(saved._df, loaded._df) assert saved._cols_of_interest == loaded._cols_of_interest assert saved._data_balance_measures == loaded._data_balance_measures
def test_compute_transforms_and_sets_data_balance_measures( self, adult_data): train_df, test_df, categorical_cols, target_col, classes = adult_data manager = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager.add(cols_of_interest=categorical_cols) assert manager._data_balance_measures is None manager.compute() assert manager._data_balance_measures is not None
def test_validate_with_invalid_input_advanced(self, adult_data): train, test, _, target_col, classes = adult_data # train and test not specified with pytest.raises(ValueError): manager = DataBalanceManager( train=None, test=None, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager._validate() # task_type is not classification with pytest.raises(ValueError): manager = DataBalanceManager( train=train, test=test, target_column=target_col, classes=classes, task_type=TaskType.REGRESSION, ) manager._validate()
def test_add_errors_on_invalid_input_advanced(self, adult_data): train_df, test_df, categorical_cols, target_col, classes = adult_data # train and test not specified with pytest.raises(ValueError): manager = DataBalanceManager( train=None, test=None, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) manager.add(cols_of_interest=categorical_cols) # task_type is not classification with pytest.raises(ValueError): manager = DataBalanceManager( train=train_df, test=test_df, target_column=target_col, classes=classes, task_type=TaskType.REGRESSION, ) manager.add(cols_of_interest=categorical_cols)
def test_init_with_valid_input(self, adult_data): train_df, test_df, _, target_col, classes = adult_data combined = pd.concat([train_df, test_df]) # Try a combination of train only, test only, and both # Since fixtures are being used, easier not to use pytest.parametrize combinations = [(train_df, None), (None, test_df), (train_df, test_df)] for train, test in combinations: combined = pd.concat([train, test]) manager = DataBalanceManager( train=train, test=test, target_column=target_col, classes=classes, task_type=TaskType.CLASSIFICATION, ) assert manager._target_column == target_col assert manager._train is train assert manager._test is test assert_frame_equal( manager._df.reset_index(drop=True), combined.reset_index(drop=True), )
class RAIInsights(RAIBaseInsights): """Defines the top-level Model Analysis API. Use RAIInsights to analyze errors, explain the most important features, compute counterfactuals and run causal analysis in a single API. """ def __init__(self, model: Optional[Any], train: pd.DataFrame, test: pd.DataFrame, target_column: str, task_type: str, categorical_features: Optional[List[str]] = None, classes: Optional[np.ndarray] = None, serializer: Optional[Any] = None, maximum_rows_for_test: int = 5000, feature_metadata: Optional[FeatureMetadata] = None): """Creates an RAIInsights object. :param model: The model to compute RAI insights for. A model that implements sklearn.predict or sklearn.predict_proba or function that accepts a 2d ndarray. :type model: object :param train: The training dataset including the label column. :type train: pandas.DataFrame :param test: The test dataset including the label column. :type test: pandas.DataFrame :param target_column: The name of the label column. :type target_column: str :param task_type: The task to run, can be `classification` or `regression`. :type task_type: str :param categorical_features: The categorical feature names. :type categorical_features: list[str] :param classes: The class labels in the training dataset :type classes: numpy.ndarray :param serializer: Picklable custom serializer with save and load methods for custom model serialization. The save method writes the model to file given a parent directory. The load method returns the deserialized model from the same parent directory. :type serializer: object :param maximum_rows_for_test: Limit on size of test data (for performance reasons) :type maximum_rows_for_test: int :param feature_metadata: Feature metadata for the train/test dataset to identify different kinds of features in the dataset. :type feature_metadata: FeatureMetadata """ categorical_features = categorical_features or [] self._validate_rai_insights_input_parameters( model=model, train=train, test=test, target_column=target_column, task_type=task_type, categorical_features=categorical_features, classes=classes, serializer=serializer, maximum_rows_for_test=maximum_rows_for_test, feature_metadata=feature_metadata) self._classes = RAIInsights._get_classes(task_type=task_type, train=train, target_column=target_column, classes=classes) self._feature_columns = \ test.drop(columns=[target_column]).columns.tolist() self._feature_ranges = RAIInsights._get_feature_ranges( test=test, categorical_features=categorical_features, feature_columns=self._feature_columns) self._feature_metadata = feature_metadata self.categorical_features = categorical_features self._categories, self._categorical_indexes, \ self._category_dictionary, self._string_ind_data = \ process_categoricals( all_feature_names=self._feature_columns, categorical_features=self.categorical_features, dataset=test.drop(columns=[target_column])) super(RAIInsights, self).__init__(model, train, test, target_column, task_type, serializer) self._try_add_data_balance() def _initialize_managers(self): """Initializes the managers. Initialized the causal, counterfactual, error analysis and explainer managers. """ self._causal_manager = CausalManager(self.train, self.test, self.target_column, self.task_type, self.categorical_features) self._counterfactual_manager = CounterfactualManager( model=self.model, train=self.train, test=self.test, target_column=self.target_column, task_type=self.task_type, categorical_features=self.categorical_features) self._data_balance_manager = DataBalanceManager( train=self.train, test=self.test, target_column=self.target_column, classes=self._classes, task_type=self.task_type) self._error_analysis_manager = ErrorAnalysisManager( self.model, self.test, self.target_column, self._classes, self.categorical_features) self._explainer_manager = ExplainerManager( self.model, self.train, self.test, self.target_column, self._classes, categorical_features=self.categorical_features) self._managers = [ self._causal_manager, self._counterfactual_manager, self._data_balance_manager, self._error_analysis_manager, self._explainer_manager ] @staticmethod def _get_classes(task_type, train, target_column, classes): if task_type == ModelTask.CLASSIFICATION: if classes is None: classes = train[target_column].unique() # sort the classes after calling unique in numeric case classes.sort() return classes else: return classes else: return None def _try_add_data_balance(self): """ Add data balance measures to be computed on categorical features if it is a classification task. """ if self.task_type == ModelTask.CLASSIFICATION and \ len(self.categorical_features) > 0 and \ self._classes is not None: self._data_balance_manager.add( cols_of_interest=self.categorical_features) def _validate_rai_insights_input_parameters( self, model: Any, train: pd.DataFrame, test: pd.DataFrame, target_column: str, task_type: str, categorical_features: List[str], classes: np.ndarray, serializer, maximum_rows_for_test: int, feature_metadata: Optional[FeatureMetadata] = None): """Validate the inputs for the RAIInsights constructor. :param model: The model to compute RAI insights for. A model that implements sklearn.predict or sklearn.predict_proba or function that accepts a 2d ndarray. :type model: object :param train: The training dataset including the label column. :type train: pandas.DataFrame :param test: The test dataset including the label column. :type test: pandas.DataFrame :param target_column: The name of the label column. :type target_column: str :param task_type: The task to run, can be `classification` or `regression`. :type task_type: str :param categorical_features: The categorical feature names. :type categorical_features: list[str] :param classes: The class labels in the training dataset :type classes: numpy.ndarray :param serializer: Picklable custom serializer with save and load methods defined for model that is not serializable. The save method returns a dictionary state and load method returns the model. :type serializer: object :param maximum_rows_for_test: Limit on size of test data (for performance reasons) :type maximum_rows_for_test: int :param feature_metadata: Feature metadata for the train/test dataset to identify different kinds of features in the dataset. :type feature_metadata: FeatureMetadata """ valid_tasks = [ ModelTask.CLASSIFICATION.value, ModelTask.REGRESSION.value ] if task_type not in valid_tasks: message = (f"Unsupported task type '{task_type}'. " f"Should be one of {valid_tasks}") raise UserConfigValidationException(message) if model is None: warnings.warn( 'INVALID-MODEL-WARNING: No valid model is supplied. ' 'The explanations, error analysis and counterfactuals ' 'may not work') if serializer is not None: raise UserConfigValidationException( 'No valid model is specified but model ' 'serializer provided.') if serializer is not None: if not hasattr(serializer, 'save'): raise UserConfigValidationException( 'The serializer does not implement save()') if not hasattr(serializer, 'load'): raise UserConfigValidationException( 'The serializer does not implement load()') try: pickle.dumps(serializer) except Exception: raise UserConfigValidationException( 'The serializer should be serializable via pickle') if isinstance(train, pd.DataFrame) and isinstance(test, pd.DataFrame): if test.shape[0] > maximum_rows_for_test: msg_fmt = 'The test data has {0} rows, ' +\ 'but limit is set to {1} rows. ' +\ 'Please resample the test data or ' +\ 'adjust maximum_rows_for_test' raise UserConfigValidationException( msg_fmt.format(test.shape[0], maximum_rows_for_test)) if len(set(train.columns) - set(test.columns)) != 0 or \ len(set(test.columns) - set(train.columns)): raise UserConfigValidationException( 'The features in train and test data do not match') if target_column not in list(train.columns) or \ target_column not in list(test.columns): raise UserConfigValidationException( 'Target name {0} not present in train/test data'.format( target_column)) if categorical_features is not None and \ len(categorical_features) > 0: if target_column in categorical_features: raise UserConfigValidationException( 'Found target name {0} in ' 'categorical feature list'.format(target_column)) difference_set = set(categorical_features) - set(train.columns) if len(difference_set) > 0: message = ("Feature names in categorical_features " "do not exist in train data: " f"{list(difference_set)}") raise UserConfigValidationException(message) for column in categorical_features: try: np.unique(train[column]) except Exception: raise UserConfigValidationException( "Error finding unique values in column {0}. " "Please check your train data.".format(column)) try: np.unique(test[column]) except Exception: raise UserConfigValidationException( "Error finding unique values in column {0}. " "Please check your test data.".format(column)) if classes is not None and task_type == \ ModelTask.CLASSIFICATION: if len(set(train[target_column].unique()) - set(classes)) != 0 or \ len(set(classes) - set(train[target_column].unique())) != 0: raise UserConfigValidationException( 'The train labels and distinct values in ' 'target (train data) do not match') if len(set(test[target_column].unique()) - set(classes)) != 0 or \ len(set(classes) - set(test[target_column].unique())) != 0: raise UserConfigValidationException( 'The train labels and distinct values in ' 'target (test data) do not match') if model is not None: # Pick one row from train and test data small_train_data = train.iloc[0:1].drop([target_column], axis=1) small_test_data = test.iloc[0:1].drop([target_column], axis=1) small_train_features_before = list(small_train_data.columns) # Run predict() of the model try: model.predict(small_train_data) model.predict(small_test_data) except Exception: raise UserConfigValidationException( 'The model passed cannot be used for' ' getting predictions via predict()') self._validate_features_same(small_train_features_before, small_train_data, SKLearn.PREDICT) # Run predict_proba() of the model if task_type == ModelTask.CLASSIFICATION: try: model.predict_proba(small_train_data) model.predict_proba(small_test_data) except Exception: raise UserConfigValidationException( 'The model passed cannot be used for' ' getting predictions via predict_proba()') self._validate_features_same(small_train_features_before, small_train_data, SKLearn.PREDICT_PROBA) if task_type == ModelTask.REGRESSION: if hasattr(model, SKLearn.PREDICT_PROBA): raise UserConfigValidationException( 'The regression model' 'provided has a predict_proba function. ' 'Please check the task_type.') else: raise UserConfigValidationException( "Unsupported data type for either train or test. " "Expecting pandas DataFrame for train and test.") if feature_metadata is not None: if not isinstance(feature_metadata, FeatureMetadata): raise UserConfigValidationException( "Expecting type FeatureMetadata but got {0}".format( type(feature_metadata))) feature_metadata.validate_feature_metadata_with_user_features( list(train.columns)) def _validate_features_same(self, small_train_features_before, small_train_data, function): """ Validate the features are unmodified on the DataFrame. :param small_train_features_before: The features saved before an operation was performed. :type small_train_features_before: list[str] :param small_train_data: The DataFrame after the operation. :type small_train_data: pandas.DataFrame :param function: The name of the operation performed. :type function: str """ small_train_features_after = list(small_train_data.columns) if small_train_features_before != small_train_features_after: raise UserConfigValidationException( ('Calling model {} function modifies ' 'input dataset features. Please check if ' 'predict function is defined correctly.').format(function)) @property def causal(self) -> CausalManager: """Get the causal manager. :return: The causal manager. :rtype: CausalManager """ return self._causal_manager @property def counterfactual(self) -> CounterfactualManager: """Get the counterfactual manager. :return: The counterfactual manager. :rtype: CounterfactualManager """ return self._counterfactual_manager @property def error_analysis(self) -> ErrorAnalysisManager: """Get the error analysis manager. :return: The error analysis manager. :rtype: ErrorAnalysisManager """ return self._error_analysis_manager @property def explainer(self) -> ExplainerManager: """Get the explainer manager. :return: The explainer manager. :rtype: ExplainerManager """ return self._explainer_manager def get_filtered_test_data(self, filters, composite_filters, include_original_columns_only=False): """Get the filtered test data based on cohort filters. :param filters: The filters to apply. :type filters: list[Filter] :param composite_filters: The composite filters to apply. :type composite_filters: list[CompositeFilter] :param include_original_columns_only: Whether to return the original data columns. :type include_original_columns_only: bool :return: The filtered test data. :rtype: pandas.DataFrame """ pred_y = self.model.predict( self.test.drop(columns=[self.target_column])) filter_data_with_cohort = FilterDataWithCohortFilters( model=self.model, dataset=self.test.drop(columns=[self.target_column]), features=self.test.drop(columns=[self.target_column]).columns, categorical_features=self.categorical_features, categories=self._categories, true_y=self.test[self.target_column], pred_y=pred_y, model_task=self.task_type) return filter_data_with_cohort.filter_data_from_cohort( filters=filters, composite_filters=composite_filters, include_original_columns_only=include_original_columns_only) def get_data(self): """Get all data as RAIInsightsData object :return: Model Analysis Data :rtype: RAIInsightsData """ data = RAIInsightsData() data.dataset = self._get_dataset() data.modelExplanationData = self.explainer.get_data() data.errorAnalysisData = self.error_analysis.get_data() data.causalAnalysisData = self.causal.get_data() data.counterfactualData = self.counterfactual.get_data() return data def _get_dataset(self): dashboard_dataset = Dataset() dashboard_dataset.task_type = self.task_type dashboard_dataset.categorical_features = self.categorical_features dashboard_dataset.class_names = convert_to_list(self._classes) if self._feature_metadata is not None: dashboard_dataset.feature_metadata = \ self._feature_metadata.to_dict() else: dashboard_dataset.feature_metadata = None dashboard_dataset.data_balance_measures = \ self._data_balance_manager.get_data() predicted_y = None feature_length = None dataset: pd.DataFrame = self.test.drop([self.target_column], axis=1) if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns try: list_dataset = convert_to_list(dataset) except Exception as ex: raise ValueError("Unsupported dataset type") from ex if dataset is not None and self.model is not None: try: predicted_y = self.model.predict(dataset) except Exception as ex: msg = "Model does not support predict method for given" "dataset type" raise ValueError(msg) from ex try: predicted_y = convert_to_list(predicted_y) except Exception as ex: raise ValueError( "Model prediction output of unsupported type,") from ex if predicted_y is not None: if (self.task_type == "classification" and dashboard_dataset.class_names is not None): predicted_y = [ dashboard_dataset.class_names.index(y) for y in predicted_y ] dashboard_dataset.predicted_y = predicted_y row_length = 0 if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") if feature_length > 1000: raise ValueError("Exceeds maximum number of features for" " visualization (1000). Please regenerate the" " explanation using fewer features or" " initialize the dashboard without passing a" " dataset.") dashboard_dataset.features = list_dataset true_y = self.test[self.target_column] if true_y is not None and len(true_y) == row_length: if (self.task_type == "classification" and dashboard_dataset.class_names is not None): true_y = [ dashboard_dataset.class_names.index(y) for y in true_y ] dashboard_dataset.true_y = convert_to_list(true_y) features = dataset.columns if features is not None: features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") dashboard_dataset.feature_names = features dashboard_dataset.target_column = self.target_column if is_classifier(self.model) and dataset is not None: try: probability_y = self.model.predict_proba(dataset) except Exception as ex: raise ValueError("Model does not support predict_proba method" " for given dataset type,") from ex try: probability_y = convert_to_list(probability_y) except Exception as ex: raise ValueError( "Model predict_proba output of unsupported type,") from ex dashboard_dataset.probability_y = probability_y return dashboard_dataset def _save_predictions(self, path): """Save the predict() and predict_proba() output. :param path: The directory path to save the RAIInsights to. :type path: str """ prediction_output_path = Path(path) / _PREDICTIONS prediction_output_path.mkdir(parents=True, exist_ok=True) if self.model is None: return test_without_target_column = self.test.drop([self.target_column], axis=1) predict_output = self.model.predict(test_without_target_column) self._write_to_file( prediction_output_path / (_PREDICT + _JSON_EXTENSION), json.dumps(predict_output.tolist())) if hasattr(self.model, SKLearn.PREDICT_PROBA): predict_proba_output = self.model.predict_proba( test_without_target_column) self._write_to_file( prediction_output_path / (_PREDICT_PROBA + _JSON_EXTENSION), json.dumps(predict_proba_output.tolist())) def _save_metadata(self, path): """Save the metadata like target column, categorical features, task type and the classes (if any). :param path: The directory path to save the RAIInsights to. :type path: str """ top_dir = Path(path) classes = convert_to_list(self._classes) feature_metadata_dict = None if self._feature_metadata is not None: feature_metadata_dict = self._feature_metadata.to_dict() meta = { _TARGET_COLUMN: self.target_column, _TASK_TYPE: self.task_type, _CATEGORICAL_FEATURES: self.categorical_features, _CLASSES: classes, _FEATURE_COLUMNS: self._feature_columns, _FEATURE_RANGES: self._feature_ranges, _FEATURE_METADATA: feature_metadata_dict } with open(top_dir / _META_JSON, 'w') as file: json.dump(meta, file) @staticmethod def _get_feature_ranges(test, categorical_features, feature_columns): """Get feature ranges like min, max and unique values for all columns""" result = [] for col in feature_columns: res_object = {} if (col in categorical_features): unique_value = test[col].unique() res_object[_COLUMN_NAME] = col res_object[_RANGE_TYPE] = "categorical" res_object[_UNIQUE_VALUES] = unique_value.tolist() else: min_value = float(test[col].min()) max_value = float(test[col].max()) res_object[_COLUMN_NAME] = col res_object[_RANGE_TYPE] = "integer" res_object[_MIN_VALUE] = min_value res_object[_MAX_VALUE] = max_value result.append(res_object) return result @staticmethod def _load_metadata(inst, path): """Load the metadata. :param inst: RAIInsights object instance. :type inst: RAIInsights :param path: The directory path to metadata location. :type path: str """ top_dir = Path(path) with open(top_dir / _META_JSON, 'r') as meta_file: meta = meta_file.read() meta = json.loads(meta) inst.__dict__[_TARGET_COLUMN] = meta[_TARGET_COLUMN] inst.__dict__[_TASK_TYPE] = meta[_TASK_TYPE] inst.__dict__[_CATEGORICAL_FEATURES] = meta[_CATEGORICAL_FEATURES] classes = None if _TRAIN_LABELS in meta: classes = meta[_TRAIN_LABELS] else: classes = meta[_CLASSES] inst.__dict__['_' + _CLASSES] = RAIInsights._get_classes( task_type=meta[_TASK_TYPE], train=inst.__dict__[_TRAIN], target_column=meta[_TARGET_COLUMN], classes=classes) inst.__dict__['_' + _FEATURE_COLUMNS] = meta[_FEATURE_COLUMNS] inst.__dict__['_' + _FEATURE_RANGES] = meta[_FEATURE_RANGES] if meta[_FEATURE_METADATA] is None: inst.__dict__['_' + _FEATURE_METADATA] = None else: inst.__dict__['_' + _FEATURE_METADATA] = FeatureMetadata( identity_feature_name=meta[_FEATURE_METADATA] ['identity_feature_name'], datetime_features=meta[_FEATURE_METADATA]['datetime_features'], categorical_features=meta[_FEATURE_METADATA] ['categorical_features'], dropped_features=meta[_FEATURE_METADATA]['dropped_features'], ) inst.__dict__['_' + _CATEGORIES], \ inst.__dict__['_' + _CATEGORICAL_INDEXES], \ inst.__dict__['_' + _CATEGORY_DICTIONARY], \ inst.__dict__['_' + _STRING_IND_DATA] = \ process_categoricals( all_feature_names=inst.__dict__['_' + _FEATURE_COLUMNS], categorical_features=inst.__dict__[_CATEGORICAL_FEATURES], dataset=inst.__dict__[_TEST].drop(columns=[ inst.__dict__[_TARGET_COLUMN]])) @staticmethod def load(path): """Load the RAIInsights from the given path. :param path: The directory path to load the RAIInsights from. :type path: str :return: The RAIInsights object after loading. :rtype: RAIInsights """ # create the RAIInsights without any properties using the __new__ # function, similar to pickle inst = RAIInsights.__new__(RAIInsights) manager_map = { ManagerNames.CAUSAL: CausalManager, ManagerNames.COUNTERFACTUAL: CounterfactualManager, ManagerNames.DATA_BALANCE: DataBalanceManager, ManagerNames.ERROR_ANALYSIS: ErrorAnalysisManager, ManagerNames.EXPLAINER: ExplainerManager, } # load current state RAIBaseInsights._load(path, inst, manager_map, RAIInsights._load_metadata) return inst