def test_understand(self): x, y = make_moons(n_samples=1500, noise=.4, random_state=17) clf = MLPClassifier() x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=.8, test_size=.2, random_state=17) clf.fit(x_train, y_train) gpx = Gpx(clf.predict_proba, x_train=x, y_train=y, feature_names=['x', 'y']) gpx.explaining(x_test[30, :]) y = clf.predict_proba(x_test) gpx.logger.info(gpx.proba_transform(y)) try: u = gpx.understand(metric='loss') except ValueError as e: gpx.logger.exception(e) u = gpx.understand(metric='accuracy') gpx.logger.info('test_understand accuracy {}'.format(u)) self.assertGreater(u, .9, 'test_understand accuracy {}'.format(u))
def test_gpx_classify(self): MY_INST = 33 x_varied, y_varied = make_moons(n_samples=500, random_state=170) model = MLPClassifier() model.fit(x_varied, y_varied) my_predict = model.predict_proba gpx = Gpx(my_predict, x_train=x_varied, y_train=y_varied, random_state=42, num_samples=250) y_hat_gpx = gpx.explaining(x_varied[MY_INST, :]) y_hat_bb = my_predict(x_varied[MY_INST, :].reshape(1, -1)) acc = gpx.understand(metric='accuracy') gpx.logger.info('{} / y_hat_gpx: {} / y_hat_bb: {}'.format( self.test_gpx_classify.__name__, type(y_hat_gpx), type(y_hat_bb))) self.assertEqual(np.sum(y_hat_gpx), np.sum(y_hat_bb), "gpx fail in predict the black-box prediction") gpx.logger.info('test accuracy: {}'.format(acc)) self.assertGreater( acc, 0.9, 'Accuracy decreasing in understand() method of GPX class!!')
def test_gpx_regression(self): INSTANCE: int = 13 reg = RandomForestRegressor() x, y = load_boston(return_X_y=True) x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=.8, test_size=.2, random_state=42) reg.fit(x_train, y_train) gpx = Gpx(predict=reg.predict, x_train=x_train, y_train=y_train, problem='regression', random_state=42) gpx.explaining(x_test[INSTANCE, :]) y_hat = reg.predict(x_test) mse = mean_squared_error(y_test, y_hat) d = gpx.features_distribution() # self.assertEqual(max(list(d.values())), d['x_2']) self.assertLess( gpx.understand(metric='mse'), mse, '{} mse greater than understand (local mse)'.format( self.test_gpx_regression.__name__))
def test_multi_class_wine(self): INSTANCE: int = 17 iris = load_wine() X, y = load_wine(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3) scaler = Normalizer() scaler.fit(X_train) X_train = scaler.transform(X_train) clf = RandomForestClassifier() clf.fit(X_train, y_train) gp_hyper_parameters = { 'population_size': 50, 'generations': 50, 'stopping_criteria': 0.0001, 'p_crossover': 0.7, 'p_subtree_mutation': 0.1, 'p_hoist_mutation': 0.05, 'p_point_mutation': 0.1, 'const_range': (-1, 1), 'parsimony_coefficient': 0.0005, 'init_depth': (3, 6), 'n_jobs': -1, 'function_set': ('add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan') } gpx = Gpx(clf.predict_proba, gp_hyper_parameters=gp_hyper_parameters, x_train=X_train, y_train=y_train, feature_names=iris.feature_names, num_samples=1000, k_neighbor=5) y_hat = gpx.explaining( scaler.transform(X_test[INSTANCE, :].reshape(-1, 1))) x_around = gpx.x_around gpx_y = gpx.gp_prediction(x_around) bb_y = clf.predict(x_around) gpx.logger.info('Multiclass: gpx_understand {}'.format( gpx.understand(metric='f1'))) gpx.logger.info('Multiclass gpx_y:{} / bb_y {}'.format(gpx_y, bb_y)) gpx.logger.info('test_understand mult-class accuracy {}'.format( accuracy_score(gpx_y, bb_y)))