def test_feature_metadata_with_dropped_features(self):
        with pytest.warns(UserWarning,
                          match='dropped_features are not in use currently.'):
            feature_metadata = FeatureMetadata(dropped_features=['d1', 'd2'])
        assert feature_metadata.identity_feature_name is None
        assert feature_metadata.datetime_features is None
        assert feature_metadata.categorical_features is None
        assert feature_metadata.dropped_features == ['d1', 'd2']

        feature_metadata_dict = feature_metadata.to_dict()
        expected_feature_metadata_dict = {
            'identity_feature_name': None,
            'datetime_features': None,
            'categorical_features': None,
            'dropped_features': ['d1', 'd2']
        }
        assert feature_metadata_dict == expected_feature_metadata_dict
    def test_feature_metadata_with_other_feature_metadata(self):
        feature_metadata_1 = FeatureMetadata(identity_feature_name='id',
                                             datetime_features=['d1', 'd2'],
                                             categorical_features=['c1', 'c2'],
                                             dropped_features=['d3', 'd4'])

        feature_metadata_2 = FeatureMetadata(identity_feature_name='id',
                                             datetime_features=['d1', 'd2'],
                                             categorical_features=['c1', 'c2'],
                                             dropped_features=['d3', 'd4'])

        feature_metadata_3 = FeatureMetadata(identity_feature_name='id',
                                             datetime_features=['d1', 'd2'],
                                             categorical_features=['c1', 'c2'],
                                             dropped_features=['d3', 'd5'])

        assert feature_metadata_1 == feature_metadata_2
        assert feature_metadata_1 != feature_metadata_3
    def test_feature_metadata_with_identity_feature(self):
        feature_metadata = FeatureMetadata(identity_feature_name='id')
        assert feature_metadata.identity_feature_name == 'id'
        assert feature_metadata.datetime_features is None
        assert feature_metadata.categorical_features is None
        assert feature_metadata.dropped_features is None
        with pytest.raises(
                UserConfigValidationException,
                match='The given identity feature name id is not present'
                ' in user features.'):
            feature_metadata.validate_feature_metadata_with_user_features(
                user_features=['id1', 's1', 's2'])

        feature_metadata_dict = feature_metadata.to_dict()
        expected_feature_metadata_dict = {
            'identity_feature_name': 'id',
            'datetime_features': None,
            'categorical_features': None,
            'dropped_features': None
        }
        assert feature_metadata_dict == expected_feature_metadata_dict
    def _load_metadata(inst, path):
        """Load the metadata.

        :param inst: RAIInsights object instance.
        :type inst: RAIInsights
        :param path: The directory path to metadata location.
        :type path: str
        """
        top_dir = Path(path)
        with open(top_dir / _META_JSON, 'r') as meta_file:
            meta = meta_file.read()
        meta = json.loads(meta)
        inst.__dict__[_TARGET_COLUMN] = meta[_TARGET_COLUMN]
        inst.__dict__[_TASK_TYPE] = meta[_TASK_TYPE]
        inst.__dict__[_CATEGORICAL_FEATURES] = meta[_CATEGORICAL_FEATURES]
        classes = None
        if _TRAIN_LABELS in meta:
            classes = meta[_TRAIN_LABELS]
        else:
            classes = meta[_CLASSES]

        inst.__dict__['_' + _CLASSES] = RAIInsights._get_classes(
            task_type=meta[_TASK_TYPE],
            train=inst.__dict__[_TRAIN],
            target_column=meta[_TARGET_COLUMN],
            classes=classes)

        inst.__dict__['_' + _FEATURE_COLUMNS] = meta[_FEATURE_COLUMNS]
        inst.__dict__['_' + _FEATURE_RANGES] = meta[_FEATURE_RANGES]
        if meta[_FEATURE_METADATA] is None:
            inst.__dict__['_' + _FEATURE_METADATA] = None
        else:
            inst.__dict__['_' + _FEATURE_METADATA] = FeatureMetadata(
                identity_feature_name=meta[_FEATURE_METADATA]
                ['identity_feature_name'],
                datetime_features=meta[_FEATURE_METADATA]['datetime_features'],
                categorical_features=meta[_FEATURE_METADATA]
                ['categorical_features'],
                dropped_features=meta[_FEATURE_METADATA]['dropped_features'],
            )

        inst.__dict__['_' + _CATEGORIES], \
            inst.__dict__['_' + _CATEGORICAL_INDEXES], \
            inst.__dict__['_' + _CATEGORY_DICTIONARY], \
            inst.__dict__['_' + _STRING_IND_DATA] = \
            process_categoricals(
                all_feature_names=inst.__dict__['_' + _FEATURE_COLUMNS],
                categorical_features=inst.__dict__[_CATEGORICAL_FEATURES],
                dataset=inst.__dict__[_TEST].drop(columns=[
                    inst.__dict__[_TARGET_COLUMN]]))
    def test_feature_metadata(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test
        from responsibleai.feature_metadata import FeatureMetadata
        feature_metadata = FeatureMetadata(identity_feature_name='id')

        err_msg = ('The given identity feature name id is not present'
                   ' in user features.')
        with pytest.raises(UserConfigValidationException, match=err_msg):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        feature_metadata=feature_metadata)
    def test_rai_insights_save_load_add_save(self, manager_type):
        data_train, data_test, y_train, y_test, categorical_features, \
            continuous_features, target_name, classes, \
            feature_columns, feature_range_keys = \
            create_adult_income_dataset()
        X_train = data_train.drop([target_name], axis=1)

        model = create_complex_classification_pipeline(
            X_train, y_train, continuous_features, categorical_features)

        # Cut down size for counterfactuals, in the interests of speed
        if manager_type == ManagerNames.COUNTERFACTUAL:
            data_test = data_test[0:1]

        rai_insights = RAIInsights(
            model, data_train, data_test,
            target_name,
            categorical_features=categorical_features,
            task_type=ModelTask.CLASSIFICATION,
            feature_metadata=FeatureMetadata(identity_feature_name="age"))

        with TemporaryDirectory() as tmpdir:
            save_1 = Path(tmpdir) / "first_save"
            save_2 = Path(tmpdir) / "second_save"

            # Save it
            rai_insights.save(save_1)

            # Load
            rai_2 = RAIInsights.load(save_1)

            # Call a single manager
            if manager_type == ManagerNames.CAUSAL:
                rai_2.causal.add(
                    treatment_features=['age', 'hours_per_week']
                )
            elif manager_type == ManagerNames.COUNTERFACTUAL:
                rai_2.counterfactual.add(
                    total_CFs=10,
                    desired_class='opposite',
                    feature_importance=False
                )
            elif manager_type == ManagerNames.DATA_BALANCE:
                rai_2._data_balance_manager.add(
                    cols_of_interest=categorical_features
                )
            elif manager_type == ManagerNames.ERROR_ANALYSIS:
                rai_2.error_analysis.add()
            elif manager_type == ManagerNames.EXPLAINER:
                rai_2.explainer.add()
            else:
                raise ValueError(
                    "Bad manager_type: {0}".format(manager_type))

            rai_2.compute()

            # Validate, but this isn't the main check
            validate_rai_insights(
                rai_2, data_train, data_test,
                target_name, ModelTask.CLASSIFICATION,
                categorical_features=categorical_features,
                feature_range_keys=feature_range_keys,
                feature_columns=feature_columns,
                feature_metadata=FeatureMetadata(identity_feature_name="age"))

            # Save again (this is where Issue #1046 manifested)
            rai_2.save(save_2)