Beispiel #1
0
    def test_check_preprocessing_options_1(self):
        """
        Unit test 1 for check_preprocessing_options
        """
        y = pd.DataFrame(data=[0, 1], columns=['y'])
        train = pd.DataFrame({
            'num1': [0, 1],
            'num2': [0, 2],
            'other': ['A', 'B']
        })
        enc = ColumnTransformer(transformers=[
            ('power', skp.QuantileTransformer(n_quantiles=2), ['num1', 'num2'])
        ],
                                remainder='drop')
        enc.fit(train, y)

        with self.assertRaises(ValueError):
            check_preprocessing_options(enc)

        enc = ColumnTransformer(transformers=[
            ('power', skp.QuantileTransformer(n_quantiles=2), ['num1', 'num2'])
        ],
                                remainder='passthrough')
        enc.fit(train, y)
        check_preprocessing_options(enc)
Beispiel #2
0
    def test_check_preprocessing_options_1(self):
        """
        Unit test check_preprocessing_options 1
        """
        df = pd.DataFrame(range(0, 5), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 2 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df = df.set_index('id')
        df['x2'] = ["S", "M", "S", "D", "M"]
        df['x3'] = np.random.randint(1, 123, df.shape[0])
        df['x4'] = ["S", "M", "S", "D", "M"]

        features_dict = {
            "x1": "age",
            "x2": "weight",
            "x3": 'test',
            'x4': "test2"
        }
        columns_dict = {0: "x1", 1: "x2", 2: "x3", 3: "x4"}

        encoder = ColumnTransformer(transformers=[
            ('onehot_ce_1', ce.OneHotEncoder(), ['x2']),
            ('onehot_ce_2', ce.OneHotEncoder(), ['x4'])
        ],
                                    remainder='drop')
        encoder_fitted = encoder.fit(df[["x1", "x2", "x3", 'x4']])

        encoder_2 = ColumnTransformer(transformers=[
            ('onehot_ce_1', ce.OneHotEncoder(), ['x2']),
            ('onehot_ce_2', ce.OneHotEncoder(), ['x4'])
        ],
                                      remainder='passthrough')
        encoder_fitted_2 = encoder_2.fit(df[["x1", "x2", "x3", 'x4']])

        expected_dict = {
            'features_to_drop': ['x1', 'x3'],
            'features_dict_op': {
                'x2': 'weight',
                'x4': 'test2'
            },
            'columns_dict_op': {
                0: 'x2',
                1: 'x4'
            }
        }

        expected_dict_2 = None

        drop_option_1 = check_preprocessing_options(columns_dict,
                                                    features_dict,
                                                    encoder_fitted,
                                                    [encoder_fitted])
        drop_option_2 = check_preprocessing_options(columns_dict,
                                                    features_dict,
                                                    encoder_fitted_2,
                                                    [encoder_fitted_2])
        assert drop_option_1 == expected_dict
        assert drop_option_2 == expected_dict_2
Beispiel #3
0
    def __init__(
        self,
        features_dict,
        model,
        columns_dict,
        explainer,
        features_types,
        label_dict=None,
        preprocessing=None,
        postprocessing=None,
        mask_params={
            "features_to_hide": None,
            "threshold": None,
            "positive": None,
            "max_contrib": None
        }):

        params_dict = [
            features_dict, features_types, label_dict, columns_dict,
            postprocessing
        ]

        for params in params_dict:
            if params is not None and isinstance(params, dict) == False:
                raise ValueError("""
                    {0} must be a dict.
                    """.format(str(params)))

        self.model = model
        self._case, self._classes = self.check_model()
        self.explainer = self.check_explainer(explainer)
        check_preprocessing_options(preprocessing)
        self.preprocessing = preprocessing
        self.check_preprocessing()
        self.features_dict = features_dict
        self.features_types = features_types
        self.label_dict = label_dict
        self.check_label_dict()
        self.columns_dict = columns_dict
        self.mask_params = mask_params
        self.check_mask_params()
        self.postprocessing = postprocessing
        check_consistency_model_features(self.features_dict, self.model,
                                         self.columns_dict,
                                         self.features_types, self.mask_params,
                                         self.preprocessing,
                                         self.postprocessing)
        check_consistency_model_label(self.columns_dict, self.label_dict)