Example #1
0
    def test_understand(self):
        x, y = make_moons(n_samples=1500, noise=.4, random_state=17)
        clf = MLPClassifier()
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            train_size=.8,
                                                            test_size=.2,
                                                            random_state=17)
        clf.fit(x_train, y_train)

        gpx = Gpx(clf.predict_proba,
                  x_train=x,
                  y_train=y,
                  feature_names=['x', 'y'])
        gpx.explaining(x_test[30, :])

        y = clf.predict_proba(x_test)
        gpx.logger.info(gpx.proba_transform(y))

        try:
            u = gpx.understand(metric='loss')
        except ValueError as e:
            gpx.logger.exception(e)
            u = gpx.understand(metric='accuracy')
            gpx.logger.info('test_understand accuracy {}'.format(u))
            self.assertGreater(u, .9, 'test_understand accuracy {}'.format(u))
Example #2
0
    def test_gpx_classify(self):

        MY_INST = 33
        x_varied, y_varied = make_moons(n_samples=500, random_state=170)
        model = MLPClassifier()
        model.fit(x_varied, y_varied)
        my_predict = model.predict_proba

        gpx = Gpx(my_predict,
                  x_train=x_varied,
                  y_train=y_varied,
                  random_state=42,
                  num_samples=250)

        y_hat_gpx = gpx.explaining(x_varied[MY_INST, :])
        y_hat_bb = my_predict(x_varied[MY_INST, :].reshape(1, -1))

        acc = gpx.understand(metric='accuracy')

        gpx.logger.info('{} / y_hat_gpx: {} / y_hat_bb: {}'.format(
            self.test_gpx_classify.__name__, type(y_hat_gpx), type(y_hat_bb)))

        self.assertEqual(np.sum(y_hat_gpx), np.sum(y_hat_bb),
                         "gpx fail in predict the black-box prediction")

        gpx.logger.info('test accuracy: {}'.format(acc))
        self.assertGreater(
            acc, 0.9,
            'Accuracy decreasing in understand()  method of GPX class!!')
Example #3
0
    def test_gpx_regression(self):

        INSTANCE: int = 13
        reg = RandomForestRegressor()
        x, y = load_boston(return_X_y=True)
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            train_size=.8,
                                                            test_size=.2,
                                                            random_state=42)
        reg.fit(x_train, y_train)

        gpx = Gpx(predict=reg.predict,
                  x_train=x_train,
                  y_train=y_train,
                  problem='regression',
                  random_state=42)
        gpx.explaining(x_test[INSTANCE, :])
        y_hat = reg.predict(x_test)
        mse = mean_squared_error(y_test, y_hat)

        d = gpx.features_distribution()

        # self.assertEqual(max(list(d.values())), d['x_2'])

        self.assertLess(
            gpx.understand(metric='mse'), mse,
            '{} mse greater than understand (local mse)'.format(
                self.test_gpx_regression.__name__))
Example #4
0
    def test_multi_class_wine(self):

        INSTANCE: int = 17
        iris = load_wine()
        X, y = load_wine(return_X_y=True)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)

        scaler = Normalizer()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)

        clf = RandomForestClassifier()
        clf.fit(X_train, y_train)

        gp_hyper_parameters = {
            'population_size':
            50,
            'generations':
            50,
            'stopping_criteria':
            0.0001,
            'p_crossover':
            0.7,
            'p_subtree_mutation':
            0.1,
            'p_hoist_mutation':
            0.05,
            'p_point_mutation':
            0.1,
            'const_range': (-1, 1),
            'parsimony_coefficient':
            0.0005,
            'init_depth': (3, 6),
            'n_jobs':
            -1,
            'function_set': ('add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs',
                             'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan')
        }

        gpx = Gpx(clf.predict_proba,
                  gp_hyper_parameters=gp_hyper_parameters,
                  x_train=X_train,
                  y_train=y_train,
                  feature_names=iris.feature_names,
                  num_samples=1000,
                  k_neighbor=5)

        y_hat = gpx.explaining(
            scaler.transform(X_test[INSTANCE, :].reshape(-1, 1)))

        x_around = gpx.x_around

        gpx_y = gpx.gp_prediction(x_around)
        bb_y = clf.predict(x_around)

        gpx.logger.info('Multiclass: gpx_understand {}'.format(
            gpx.understand(metric='f1')))
        gpx.logger.info('Multiclass gpx_y:{} / bb_y {}'.format(gpx_y, bb_y))
        gpx.logger.info('test_understand mult-class accuracy {}'.format(
            accuracy_score(gpx_y, bb_y)))