def test_feature_metadata_with_dropped_features(self): with pytest.warns(UserWarning, match='dropped_features are not in use currently.'): feature_metadata = FeatureMetadata(dropped_features=['d1', 'd2']) assert feature_metadata.identity_feature_name is None assert feature_metadata.datetime_features is None assert feature_metadata.categorical_features is None assert feature_metadata.dropped_features == ['d1', 'd2'] feature_metadata_dict = feature_metadata.to_dict() expected_feature_metadata_dict = { 'identity_feature_name': None, 'datetime_features': None, 'categorical_features': None, 'dropped_features': ['d1', 'd2'] } assert feature_metadata_dict == expected_feature_metadata_dict
def test_feature_metadata_with_other_feature_metadata(self): feature_metadata_1 = FeatureMetadata(identity_feature_name='id', datetime_features=['d1', 'd2'], categorical_features=['c1', 'c2'], dropped_features=['d3', 'd4']) feature_metadata_2 = FeatureMetadata(identity_feature_name='id', datetime_features=['d1', 'd2'], categorical_features=['c1', 'c2'], dropped_features=['d3', 'd4']) feature_metadata_3 = FeatureMetadata(identity_feature_name='id', datetime_features=['d1', 'd2'], categorical_features=['c1', 'c2'], dropped_features=['d3', 'd5']) assert feature_metadata_1 == feature_metadata_2 assert feature_metadata_1 != feature_metadata_3
def test_feature_metadata_with_identity_feature(self): feature_metadata = FeatureMetadata(identity_feature_name='id') assert feature_metadata.identity_feature_name == 'id' assert feature_metadata.datetime_features is None assert feature_metadata.categorical_features is None assert feature_metadata.dropped_features is None with pytest.raises( UserConfigValidationException, match='The given identity feature name id is not present' ' in user features.'): feature_metadata.validate_feature_metadata_with_user_features( user_features=['id1', 's1', 's2']) feature_metadata_dict = feature_metadata.to_dict() expected_feature_metadata_dict = { 'identity_feature_name': 'id', 'datetime_features': None, 'categorical_features': None, 'dropped_features': None } assert feature_metadata_dict == expected_feature_metadata_dict
def _load_metadata(inst, path): """Load the metadata. :param inst: RAIInsights object instance. :type inst: RAIInsights :param path: The directory path to metadata location. :type path: str """ top_dir = Path(path) with open(top_dir / _META_JSON, 'r') as meta_file: meta = meta_file.read() meta = json.loads(meta) inst.__dict__[_TARGET_COLUMN] = meta[_TARGET_COLUMN] inst.__dict__[_TASK_TYPE] = meta[_TASK_TYPE] inst.__dict__[_CATEGORICAL_FEATURES] = meta[_CATEGORICAL_FEATURES] classes = None if _TRAIN_LABELS in meta: classes = meta[_TRAIN_LABELS] else: classes = meta[_CLASSES] inst.__dict__['_' + _CLASSES] = RAIInsights._get_classes( task_type=meta[_TASK_TYPE], train=inst.__dict__[_TRAIN], target_column=meta[_TARGET_COLUMN], classes=classes) inst.__dict__['_' + _FEATURE_COLUMNS] = meta[_FEATURE_COLUMNS] inst.__dict__['_' + _FEATURE_RANGES] = meta[_FEATURE_RANGES] if meta[_FEATURE_METADATA] is None: inst.__dict__['_' + _FEATURE_METADATA] = None else: inst.__dict__['_' + _FEATURE_METADATA] = FeatureMetadata( identity_feature_name=meta[_FEATURE_METADATA] ['identity_feature_name'], datetime_features=meta[_FEATURE_METADATA]['datetime_features'], categorical_features=meta[_FEATURE_METADATA] ['categorical_features'], dropped_features=meta[_FEATURE_METADATA]['dropped_features'], ) inst.__dict__['_' + _CATEGORIES], \ inst.__dict__['_' + _CATEGORICAL_INDEXES], \ inst.__dict__['_' + _CATEGORY_DICTIONARY], \ inst.__dict__['_' + _STRING_IND_DATA] = \ process_categoricals( all_feature_names=inst.__dict__['_' + _FEATURE_COLUMNS], categorical_features=inst.__dict__[_CATEGORICAL_FEATURES], dataset=inst.__dict__[_TEST].drop(columns=[ inst.__dict__[_TARGET_COLUMN]]))
def test_feature_metadata(self): X_train, X_test, y_train, y_test, _, _ = \ create_cancer_data() model = create_lightgbm_classifier(X_train, y_train) X_train[TARGET] = y_train X_test[TARGET] = y_test from responsibleai.feature_metadata import FeatureMetadata feature_metadata = FeatureMetadata(identity_feature_name='id') err_msg = ('The given identity feature name id is not present' ' in user features.') with pytest.raises(UserConfigValidationException, match=err_msg): RAIInsights(model=model, train=X_train, test=X_test, target_column=TARGET, task_type='classification', feature_metadata=feature_metadata)
def test_rai_insights_save_load_add_save(self, manager_type): data_train, data_test, y_train, y_test, categorical_features, \ continuous_features, target_name, classes, \ feature_columns, feature_range_keys = \ create_adult_income_dataset() X_train = data_train.drop([target_name], axis=1) model = create_complex_classification_pipeline( X_train, y_train, continuous_features, categorical_features) # Cut down size for counterfactuals, in the interests of speed if manager_type == ManagerNames.COUNTERFACTUAL: data_test = data_test[0:1] rai_insights = RAIInsights( model, data_train, data_test, target_name, categorical_features=categorical_features, task_type=ModelTask.CLASSIFICATION, feature_metadata=FeatureMetadata(identity_feature_name="age")) with TemporaryDirectory() as tmpdir: save_1 = Path(tmpdir) / "first_save" save_2 = Path(tmpdir) / "second_save" # Save it rai_insights.save(save_1) # Load rai_2 = RAIInsights.load(save_1) # Call a single manager if manager_type == ManagerNames.CAUSAL: rai_2.causal.add( treatment_features=['age', 'hours_per_week'] ) elif manager_type == ManagerNames.COUNTERFACTUAL: rai_2.counterfactual.add( total_CFs=10, desired_class='opposite', feature_importance=False ) elif manager_type == ManagerNames.DATA_BALANCE: rai_2._data_balance_manager.add( cols_of_interest=categorical_features ) elif manager_type == ManagerNames.ERROR_ANALYSIS: rai_2.error_analysis.add() elif manager_type == ManagerNames.EXPLAINER: rai_2.explainer.add() else: raise ValueError( "Bad manager_type: {0}".format(manager_type)) rai_2.compute() # Validate, but this isn't the main check validate_rai_insights( rai_2, data_train, data_test, target_name, ModelTask.CLASSIFICATION, categorical_features=categorical_features, feature_range_keys=feature_range_keys, feature_columns=feature_columns, feature_metadata=FeatureMetadata(identity_feature_name="age")) # Save again (this is where Issue #1046 manifested) rai_2.save(save_2)