def test_check_preprocessing_options_1(self): """ Unit test 1 for check_preprocessing_options """ y = pd.DataFrame(data=[0, 1], columns=['y']) train = pd.DataFrame({ 'num1': [0, 1], 'num2': [0, 2], 'other': ['A', 'B'] }) enc = ColumnTransformer(transformers=[ ('power', skp.QuantileTransformer(n_quantiles=2), ['num1', 'num2']) ], remainder='drop') enc.fit(train, y) with self.assertRaises(ValueError): check_preprocessing_options(enc) enc = ColumnTransformer(transformers=[ ('power', skp.QuantileTransformer(n_quantiles=2), ['num1', 'num2']) ], remainder='passthrough') enc.fit(train, y) check_preprocessing_options(enc)
def test_check_preprocessing_options_1(self): """ Unit test check_preprocessing_options 1 """ df = pd.DataFrame(range(0, 5), columns=['id']) df['y'] = df['id'].apply(lambda x: 1 if x < 2 else 0) df['x1'] = np.random.randint(1, 123, df.shape[0]) df = df.set_index('id') df['x2'] = ["S", "M", "S", "D", "M"] df['x3'] = np.random.randint(1, 123, df.shape[0]) df['x4'] = ["S", "M", "S", "D", "M"] features_dict = { "x1": "age", "x2": "weight", "x3": 'test', 'x4': "test2" } columns_dict = {0: "x1", 1: "x2", 2: "x3", 3: "x4"} encoder = ColumnTransformer(transformers=[ ('onehot_ce_1', ce.OneHotEncoder(), ['x2']), ('onehot_ce_2', ce.OneHotEncoder(), ['x4']) ], remainder='drop') encoder_fitted = encoder.fit(df[["x1", "x2", "x3", 'x4']]) encoder_2 = ColumnTransformer(transformers=[ ('onehot_ce_1', ce.OneHotEncoder(), ['x2']), ('onehot_ce_2', ce.OneHotEncoder(), ['x4']) ], remainder='passthrough') encoder_fitted_2 = encoder_2.fit(df[["x1", "x2", "x3", 'x4']]) expected_dict = { 'features_to_drop': ['x1', 'x3'], 'features_dict_op': { 'x2': 'weight', 'x4': 'test2' }, 'columns_dict_op': { 0: 'x2', 1: 'x4' } } expected_dict_2 = None drop_option_1 = check_preprocessing_options(columns_dict, features_dict, encoder_fitted, [encoder_fitted]) drop_option_2 = check_preprocessing_options(columns_dict, features_dict, encoder_fitted_2, [encoder_fitted_2]) assert drop_option_1 == expected_dict assert drop_option_2 == expected_dict_2
def __init__( self, features_dict, model, columns_dict, explainer, features_types, label_dict=None, preprocessing=None, postprocessing=None, mask_params={ "features_to_hide": None, "threshold": None, "positive": None, "max_contrib": None }): params_dict = [ features_dict, features_types, label_dict, columns_dict, postprocessing ] for params in params_dict: if params is not None and isinstance(params, dict) == False: raise ValueError(""" {0} must be a dict. """.format(str(params))) self.model = model self._case, self._classes = self.check_model() self.explainer = self.check_explainer(explainer) check_preprocessing_options(preprocessing) self.preprocessing = preprocessing self.check_preprocessing() self.features_dict = features_dict self.features_types = features_types self.label_dict = label_dict self.check_label_dict() self.columns_dict = columns_dict self.mask_params = mask_params self.check_mask_params() self.postprocessing = postprocessing check_consistency_model_features(self.features_dict, self.model, self.columns_dict, self.features_types, self.mask_params, self.preprocessing, self.postprocessing) check_consistency_model_label(self.columns_dict, self.label_dict)