Python CDLRandom.tosparse 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: secml.data.loader

클래스/타입: CDLRandom

메소드/함수: tosparse

hotexamples.com에서의 예제들: 9

Python CDLRandom.tosparse - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 secml.data.loader.CDLRandom.tosparse에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CDLRandom(30)

tosparse(9)

todense(7)

get_bounds(4)

load(3)

X(2)

get_labels_onehot(1)

예제 #1

파일 보기

파일: test_train_test_split.py 프로젝트: eweroliveira/Poisoning-Attacks-on-Algorithmic-Fairness

    def test_train_test_split(self):

        ds = CDLRandom(n_samples=10, random_state=0).load()

        tts = CTrainTestSplit(train_size=0.5, random_state=0, shuffle=False)

        tr_idx, ts_idx = tts.compute_indices(ds)

        self.logger.info("TR IDX:\n{:}".format(tr_idx))
        self.logger.info("TS IDX:\n{:}".format(ts_idx))

        tr_idx_expected = CArray([0, 1, 2, 3, 4])
        ts_idx_expected = CArray([5, 6, 7, 8, 9])

        self.assertIsInstance(tr_idx, CArray)
        self.assertIsInstance(ts_idx, CArray)

        self.assertFalse((tr_idx != tr_idx_expected).any())
        self.assertFalse((ts_idx != ts_idx_expected).any())

        tr, ts = tts.split(ds)

        tr_expected = ds[tr_idx, :]
        ts_expected = ds[ts_idx, :]

        self.assertIsInstance(tr, CDataset)
        self.assertIsInstance(ts, CDataset)

        self.assertFalse((tr.X != tr_expected.X).any())
        self.assertFalse((tr.Y != tr_expected.Y).any())
        self.assertFalse((ts.X != ts_expected.X).any())
        self.assertFalse((ts.Y != ts_expected.Y).any())

        self.logger.info("Testing splitting of sparse dataset")
        ds = CDLRandom(n_samples=10, random_state=0).load()

        ds = ds.tosparse()

        tts = CTrainTestSplit(train_size=0.25, random_state=0, shuffle=False)
        tr, ts = tts.split(ds)

        self.assertEqual(2, tr.num_samples)
        self.assertEqual(8, ts.num_samples)

        self.assertTrue(tr.issparse)
        self.assertTrue(ts.issparse)

예제 #2

파일 보기

파일: test_c_classifier_nearest_centroid.py 프로젝트: eweroliveira/Poisoning-Attacks-on-Algorithmic-Fairness

class TestCClassifierNearestCentroid(CClassifierTestCases):
    """Unit test for CClassifierNearestCentroid."""

    def setUp(self):
        """Test for init and fit methods."""

        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.nc = CClassifierNearestCentroid()

    def test_plot(self):
        """ Compare the classifiers graphically"""
        ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                            random_state=1).load()
        fig = self._test_plot(self.nc, ds, [-10])
        fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                            'test_c_classifier_nearest_centroid.pdf'))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        scores_d = self._test_fun(self.nc, self.dataset.todense())
        scores_s = self._test_fun(self.nc, self.dataset.tosparse())

        self.assert_array_almost_equal(scores_d, scores_s)

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()

        # All linear transformations
        self._test_preprocess(ds, self.nc,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])

        # Mixed linear/nonlinear transformations
        self._test_preprocess(ds, self.nc,
                              ['pca', 'unit-norm'], [{}, {}])

예제 #3

파일 보기

class TestCClassifierRidge(CClassifierTestCases):
    """Unit test for Ridge Classifier."""

    def setUp(self):
        """Test for init and fit methods."""

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100, n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.ridges = [CClassifierRidge(
            preprocess=kernel() if kernel is not None else None)
            for kernel in kernel_types]
        self.logger.info(
            "Testing RIDGE with kernel functions: %s", str(kernel_types))

        for ridge in self.ridges:
            ridge.verbose = 2  # Enabling debug output for each classifier
            ridge.fit(self.dataset.X, self.dataset.Y)

    def test_time(self):
        """ Compare execution time of ridge and SVM"""
        self.logger.info("Testing training speed of ridge compared to SVM ")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            svm = CClassifierSVM(ridge.preprocess)

            with self.timer() as t_svm:
                svm.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of SVM: {:}".format(t_svm.interval))
            with self.timer() as t_ridge:
                ridge.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of ridge: {:}".format(t_ridge.interval))

    def test_plot(self):
        """ Compare the classifiers graphically"""
        ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2,
                       n_clusters_per_class=1, random_state=0).load()
        ds.X = CNormalizerMinMax().fit_transform(ds.X)
        fig = self._test_plot(self.ridges[0], ds)
        fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                            'test_c_classifier_ridge.pdf'))

    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            if ridge.preprocess is not None:
                svm_kernel = ridge.preprocess.deepcopy()
            else:
                svm_kernel = None

            svm = CClassifierSVM(kernel=svm_kernel)
            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(
                self.dataset.X, return_decision_function=True)
            label_ridge, y_ridge = ridge.predict(
                self.dataset.X, return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_ridge = CMetric.create('f1').performance_score(
                self.dataset.Y, label_ridge)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of ridge: {:}".format(acc_ridge))
            self.assertGreater(acc_ridge, 0.90,
                               "Accuracy of ridge: {:}".format(acc_ridge))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            scores_d = self._test_fun(ridge, self.dataset.todense())
            scores_s = self._test_fun(ridge, self.dataset.tosparse())

            # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
            # self.assert_array_almost_equal(scores_d, scores_s)

    def test_gradient(self):
        """Unittests for gradient_f_x."""
        self.logger.info("Testing Ridge.gradient_f_x() method")

        i = 5  # IDX of the point to test
        pattern = self.dataset.X[i, :]
        self.logger.info("P {:}: {:}".format(i, pattern))

        for ridge in self.ridges:

            self.logger.info(
                "Checking grad. for Ridge with kernel: %s", ridge.preprocess)

            # set gamma for poly and rbf
            if hasattr(ridge.preprocess, 'gamma'):
                ridge.set('gamma', 1e-5)
            if hasattr(ridge.preprocess, 'degree'):  # set degree for poly
                ridge.set('degree', 3)

            self.logger.info("Testing dense data...")
            ds = self.dataset.todense()
            ridge.fit(ds.X, ds.Y)

            # Run the comparison with numerical gradient
            # (all classes will be tested)
            grads_d = self._test_gradient_numerical(ridge, pattern.todense())

            self.logger.info("Testing sparse data...")
            ds = self.dataset.tosparse()
            ridge.fit(ds.X, ds.Y)

            # Run the comparison with numerical gradient
            # (all classes will be tested)
            grads_s = self._test_gradient_numerical(ridge, pattern.tosparse())

            # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
            # Compare dense gradients with sparse gradients
            # for grad_i, grad in enumerate(grads_d):
            #     self.assert_array_almost_equal(
            #         grad.atleast_2d(), grads_s[grad_i])

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()
        clf = CClassifierRidge()

        # All linear transformations with gradient implemented
        self._test_preprocess(ds, clf,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])
        self._test_preprocess_grad(ds, clf,
                                   ['min-max', 'mean-std'],
                                   [{'feature_range': (-1, 1)}, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}])

예제 #4

파일 보기

파일: test_c_classifier_svm.py 프로젝트: pralab/secml

class TestCClassifierSVM(CClassifierTestCases):
    def setUp(self):

        # generate synthetic data
        self.dataset = CDLRandom(n_features=2,
                                 n_redundant=0,
                                 n_informative=1,
                                 n_clusters_per_class=1,
                                 random_state=1).load()

        self.dataset_sparse = self.dataset.tosparse()

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.svms = [
            CClassifierSVM(kernel=kernel() if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SVM with kernel functions: %s",
                         str(kernel_types))

        for svm in self.svms:  # Enabling debug output for each classifier
            svm.verbose = 2

        self.logger.info("." * 50)
        self.logger.info("Number of Patterns: %s",
                         str(self.dataset.num_samples))
        self.logger.info("Features: %s", str(self.dataset.num_features))

    def test_attributes(self):
        """Performs test on SVM attributes setting."""
        self.logger.info("Testing SVM attributes setting")

        for svm in self.svms:
            svm.set('C', 10)
            self.assertEqual(svm.C, 10)
            svm.set('class_weight', {-1: 1, 1: 50})
            # set gamma for poly and rbf and check if it is set properly
            if hasattr(svm.kernel, 'gamma'):
                svm.set('gamma', 100)
                self.assertEqual(svm.kernel.gamma, 100)

    def test_linear_svm(self):
        """Performs tests on linear SVM."""
        self.logger.info("Testing SVM linear variants (kernel and not)")

        # Instancing a linear SVM and an SVM with linear kernel
        linear_svm = CClassifierSVM(kernel=None)
        kernel_linear_svm = self.svms[0]

        self.logger.info("SVM w/ linear kernel in the primal")
        self.assertIsNone(linear_svm.kernel)

        self.logger.info("Training both classifiers on dense data")
        linear_svm.fit(self.dataset.X, self.dataset.Y)
        kernel_linear_svm.fit(self.dataset.X, self.dataset.Y)

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)

        self.logger.info("Training both classifiers on sparse data")
        linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
        kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)

        self.assertTrue(
            linear_svm.w.issparse, "Weights vector is not sparse even "
            "if training data is sparse")

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)

    def test_predict(self):
        """Performs tests on SVM prediction capabilities."""
        self.logger.info("Testing SVM predict accuracy")

        for svm in self.svms:
            self.logger.info("SVM with kernel: %s", svm.kernel.__class__)

            # Training and predicting using our SVM
            svm.fit(self.dataset.X, self.dataset.Y)

            pred_y, pred_score = svm.predict(self.dataset.X,
                                             return_decision_function=True)

            # Training and predicting an SKlearn SVC
            k = svm.kernel.class_type if svm.kernel is not None else 'linear'
            sklearn_svm = SVC(kernel=k)

            # Setting similarity function parameters into SVC too
            # Exclude params not settable in sklearn_svm
            if svm.kernel is not None:
                p_dict = {}
                for p in svm.kernel.get_params():
                    if p in sklearn_svm.get_params():
                        p_dict[p] = svm.kernel.get_params()[p]
                sklearn_svm.set_params(**p_dict)

            sklearn_svm.fit(self.dataset.X.get_data(),
                            np.ravel(self.dataset.Y.get_data()))
            sklearn_pred_y = sklearn_svm.predict(self.dataset.X.get_data())
            sklearn_score = sklearn_svm.decision_function(
                self.dataset.X.get_data())

            # Test if sklearn pred_y are equal to our predicted labels
            self.assert_array_equal(pred_y, sklearn_pred_y)
            # Test if sklearn computed distance from separating hyperplane
            # is the same of own. This is a fix for some architectures that
            # exhibit floating point problems
            self.assert_allclose(pred_score[:, 1].ravel(), sklearn_score)

            # EVALUATE PERFORMANCE
            accuracy = skm.accuracy_score(self.dataset.Y.get_data(),
                                          sklearn_pred_y)
            self.logger.info("Prediction accuracy for kernel %s is %f ",
                             svm.kernel.__class__, accuracy)

    def test_shape(self):
        """Test shape of SVM parameters, scores etc."""
        import random

        self.logger.info("Testing SVM related vector shape")

        def _check_flattness(array):
            # self.assertEqual(len(array.shape) == 1, True)
            self.assertTrue(array.is_vector_like)

        for svm in self.svms:

            self.logger.info("SVM with similarity function: %s",
                             svm.kernel.__class__)

            # Training and predicting using our SVM
            svm.fit(self.dataset.X, self.dataset.Y)
            pred_y, pred_score = svm.predict(self.dataset.X,
                                             return_decision_function=True)
            # chose random one pattern
            pattern = CArray(random.choice(self.dataset.X.get_data()))
            gradient = svm.grad_f_x(pattern, y=1)

            if svm.w is not None:
                _check_flattness(svm.w)
            else:
                _check_flattness(svm.alpha)

            _check_flattness(pred_y)
            _check_flattness(gradient)

    def test_sparse(self):
        """Performs tests on sparse dataset."""
        self.logger.info("Testing SVM on sparse data")

        def _check_sparsedata(y, score, y_sparse, score_sparse):
            self.assertFalse((y != y_sparse).any(),
                             "Predicted labels on sparse data are different.")
            # Rounding scores to prevent false positives in assert
            score_rounded = score[:, 1].ravel().round(3)
            score_sparse_rounded = score_sparse[:, 1].ravel().round(3)
            self.assertFalse((score_rounded != score_sparse_rounded).any(),
                             "Predicted Scores on sparse data are different.")

        for svm in self.svms:
            self.logger.info("SVM with similarity function: %s",
                             svm.kernel.__class__)

            # Training and predicting on dense data for reference
            svm.fit(self.dataset.X, self.dataset.Y)
            pred_y, pred_score = svm.predict(self.dataset.X,
                                             return_decision_function=True)

            # Training and predicting on sparse data
            svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
            pred_y_sparse, pred_score_sparse = svm.predict(
                self.dataset_sparse.X, return_decision_function=True)

            _check_sparsedata(pred_y, pred_score, pred_y_sparse,
                              pred_score_sparse)

            # Training on sparse and predicting on dense
            svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
            pred_y_sparse, pred_score_sparse = svm.predict(
                self.dataset.X, return_decision_function=True)

            _check_sparsedata(pred_y, pred_score, pred_y_sparse,
                              pred_score_sparse)

            # Training on dense and predicting on sparse
            svm.fit(self.dataset.X, self.dataset.Y)
            pred_y_sparse, pred_score_sparse = svm.predict(
                self.dataset_sparse.X, return_decision_function=True)

            _check_sparsedata(pred_y, pred_score, pred_y_sparse,
                              pred_score_sparse)

    def test_margin(self):
        self.logger.info("Testing margin separation of SVM...")

        import numpy as np

        # we create 40 separable points
        rng = np.random.RandomState(0)
        n_samples_1 = 1000
        n_samples_2 = 100
        X = np.r_[1.5 * rng.randn(n_samples_1, 2),
                  0.5 * rng.randn(n_samples_2, 2) + [2, 2]]
        y = [0] * (n_samples_1) + [1] * (n_samples_2)

        dataset = CDataset(X, y)

        # fit the model
        clf = CClassifierSVM()
        clf.fit(dataset.X, dataset.Y)

        w = clf.w
        a = -w[0] / w[1]

        xx = CArray.linspace(-5, 5)
        yy = a * xx - clf.b / w[1]

        wclf = CClassifierSVM(class_weight={0: 1, 1: 10})
        wclf.fit(dataset.X, dataset.Y)

        ww = wclf.w
        wa = -ww[0] / ww[1]
        wyy = wa * xx - wclf.b / ww[1]

        fig = CFigure(linewidth=1)
        fig.sp.plot(xx, yy.ravel(), 'k-', label='no weights')
        fig.sp.plot(xx, wyy.ravel(), 'k--', label='with weights')
        fig.sp.scatter(X[:, 0].ravel(), X[:, 1].ravel(), c=y)
        fig.sp.legend()

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_svm.pdf'))

    def test_store_dual_vars(self):
        """Test of parameters that control storing of dual space variables."""
        self.logger.info("Checking CClassifierSVM.store_dual_vars...")

        self.logger.info("Linear SVM in primal space")
        svm = CClassifierSVM()
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNone(svm.alpha)

        self.logger.info("Linear SVM in dual space")
        svm = CClassifierSVM(kernel='linear')
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.alpha)

        self.logger.info("Nonlinear SVM in dual space")
        svm = CClassifierSVM(kernel='rbf')
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.alpha)

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        for clf in self.svms:
            self.logger.info("SVM kernel: {:}".format(clf.kernel))

            scores_d = self._test_fun(clf, self.dataset.todense())
            scores_s = self._test_fun(clf, self.dataset.tosparse())

            self.assert_array_almost_equal(scores_d, scores_s)

    def test_gradient(self):
        """Performs tests on gradient."""
        self.logger.info("Testing SVM.gradient() method")

        import random
        for svm in self.svms:

            self.logger.info("Computing gradient for SVM with kernel: %s",
                             svm.kernel)

            if hasattr(svm.kernel, 'gamma'):  # set gamma for poly and rbf
                svm.set('gamma', 1e-5)
            if hasattr(svm.kernel, 'degree'):  # set degree for poly
                svm.set('degree', 3)

            samps = random.sample(range(self.dataset.num_samples), 5)

            self.logger.info("Testing dense data...")
            ds = self.dataset.todense()
            svm.fit(ds.X, ds.Y)

            grads_d = []
            for i in samps:
                # Randomly extract a pattern to test
                pattern = ds.X[i, :]
                self.logger.info("P {:}: {:}".format(i, pattern))
                # Run the comparison with numerical gradient
                # (all classes will be tested)
                grads_d += self._test_gradient_numerical(svm, pattern)

            self.logger.info("Testing sparse data...")
            ds = self.dataset.tosparse()
            svm.fit(ds.X, ds.Y)

            grads_s = []
            for i in samps:
                # Randomly extract a pattern to test
                pattern = ds.X[i, :]
                self.logger.info("P {:}: {:}".format(i, pattern))
                # Run the comparison with numerical gradient
                # (all classes will be tested)
                grads_s += self._test_gradient_numerical(svm, pattern)

            # Compare dense gradients with sparse gradients
            for grad_i, grad in enumerate(grads_d):
                self.assert_array_almost_equal(grad.atleast_2d(),
                                               grads_s[grad_i])

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()
        clf = CClassifierSVM()

        # All linear transformations with gradient implemented
        self._test_preprocess(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])
        self._test_preprocess_grad(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}])

    def test_multiclass(self):
        """Test multiclass SVM on MNIST digits."""

        self.logger.info("Testing multiclass SVM.")

        digits = tuple(range(0, 10))
        n_tr = 100  # Number of training set samples
        n_ts = 200  # Number of test set samples

        loader = CDataLoaderMNIST()
        tr = loader.load('training', digits=digits, num_samples=n_tr)
        ts = loader.load('testing', digits=digits, num_samples=n_ts)

        # Normalize the features in `[0, 1]`
        tr.X /= 255
        ts.X /= 255

        svm_params = {
            'kernel': CKernelRBF(gamma=0.1),
            'C': 10,
            'class_weight': {
                0: 1,
                1: 1
            },
            'n_jobs': 2
        }
        classifiers = [
            CClassifierMulticlassOVA(CClassifierSVM, **svm_params),
            CClassifierSVM(**svm_params),
        ]

        grads = []
        acc = []
        for clf in classifiers:
            clf.verbose = 1
            # We can now fit the classifier
            clf.fit(tr.X, tr.Y)
            # Compute predictions on a test set
            y_pred, scores = clf.predict(ts.X, return_decision_function=True)
            # Evaluate the accuracy of the classifier
            metric = CMetricAccuracy()
            acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred))
            grads.append(clf.grad_f_x(ts.X[1, :], 1))

        self.assertAlmostEqual(acc[0], acc[1])
        self.assert_array_almost_equal(grads[0], grads[1])

예제 #5

파일 보기

파일: test_c_classifier_multi_ovo.py 프로젝트: pralab/secml

class TestCClassifierMultiOVO(CClassifierTestCases):
    """Unittests for CClassifierMultiOVO."""
    def setUp(self):
        # generate synthetic data
        self.dataset = CDLRandom(n_classes=4, n_clusters_per_class=1).load()

    def test_predict_withsvm(self):

        svc = SVC(kernel='linear', class_weight='balanced')
        multiclass_sklearn = OneVsOneClassifier(svc)
        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced',
                                              n_jobs=2)
        multiclass.verbose = 2

        multiclass.fit(self.dataset.X, self.dataset.Y)
        class_pred, score_pred = multiclass.predict(
            self.dataset.X, return_decision_function=True)

        self.logger.info("Predicted: \n{:}".format(class_pred))
        self.logger.info("Real: \n{:}".format(self.dataset.Y))

        acc = CMetric.create('accuracy').performance_score(
            self.dataset.Y, class_pred)
        self.logger.info("Accuracy: {:}".format(acc))

        multiclass_sklearn.fit(self.dataset.X.get_data(),
                               self.dataset.Y.tondarray())
        y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data())

        acc_sklearn = CMetric.create('accuracy').performance_score(
            self.dataset.Y, CArray(y_sklearn))
        self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn))

        self.assertLess(abs(acc - acc_sklearn), 0.21)

    def test_set(self):

        from secml.ml.kernels import CKernelRBF
        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              C=1,
                                              kernel=CKernelRBF())
        # Test set before training
        multiclass.set_params({'C': 100, 'kernel.gamma': 20})
        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 100.0)
            self.assertEqual(clf.kernel.gamma, 20.0)

        # Restoring kernel
        multiclass.set('kernel.gamma', 50)

        # Setting different parameter in single trained_classifiers
        multiclass.prepare(num_classes=6)
        different_c = (10, 20, 30, 40, 50, 60)
        multiclass.set('C', different_c)
        different_gamma = (70, 80, 90, 100, 110, 120)
        multiclass.set('kernel.gamma', different_gamma)

        # Fit multiclass classifier than test set after training
        multiclass.fit(self.dataset.X, self.dataset.Y)

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, different_c[clf_idx])
            self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx])

        # Test set after training
        multiclass.set_params({'C': 30, 'kernel.gamma': 200})
        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 30.0)
            self.assertEqual(clf.kernel.gamma, 200.0)

        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 30.0)
            self.assertEqual(clf.kernel.gamma, 200.0)

        # Setting parameter in single trained_classifiers
        multiclass._binary_classifiers[0].kernel.gamma = 300
        for i in range(1, multiclass.num_classifiers):
            self.assertNotEqual(multiclass._binary_classifiers[i].kernel.gamma,
                                300.0)

        # Setting different parameter in single trained_classifiers
        different_c = (100, 200, 300)

        # ValueError is raised as not enough binary classifiers are available
        with self.assertRaises(ValueError):
            multiclass.set('C', different_c)

        multiclass.prepare(num_classes=3)
        multiclass.set('C', different_c)
        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, different_c[clf_idx])

    def test_apply_method(self):

        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced')
        multiclass.fit(self.dataset.X, self.dataset.Y)
        multiclass.apply_method(CClassifierSVM.set,
                                param_name='C',
                                param_value=150)

        for i in range(multiclass.num_classifiers):
            self.assertEqual(multiclass._binary_classifiers[i].C, 150)

    def test_normalization(self):
        """Test data normalization inside CClassifierMulticlassOVO."""
        from secml.ml.features.normalization import CNormalizerMinMax

        ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X)

        multi_nonorm = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                                class_weight='balanced')
        multi_nonorm.fit(ds_norm_x, self.dataset.Y)
        pred_y_nonorm = multi_nonorm.predict(ds_norm_x)

        multi = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                         class_weight='balanced',
                                         preprocess='min-max')
        multi.fit(self.dataset.X, self.dataset.Y)
        pred_y = multi.predict(self.dataset.X)

        self.logger.info("Predictions with internal norm:\n{:}".format(pred_y))
        self.logger.info(
            "Predictions with external norm:\n{:}".format(pred_y_nonorm))

        self.assertFalse((pred_y_nonorm != pred_y).any())

    def test_plot_decision_function(self):
        """Test plot of multiclass classifier decision function."""
        # generate synthetic data
        ds = CDLRandom(n_classes=3,
                       n_features=2,
                       n_redundant=0,
                       n_clusters_per_class=1,
                       class_sep=1,
                       random_state=0).load()

        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced',
                                              preprocess='min-max')

        # Training and classification
        multiclass.fit(ds.X, ds.Y)
        y_pred, score_pred = multiclass.predict(ds.X,
                                                return_decision_function=True)

        def plot_hyperplane(img, clf, min_v, max_v, linestyle, label):
            """Plot the hyperplane associated to the OVO clf."""
            xx = CArray.linspace(min_v - 5, max_v +
                                 5)  # make sure the line is long enough
            # get the separating hyperplane
            yy = -(clf.w[0] * xx + clf.b) / clf.w[1]
            img.sp.plot(xx, yy.ravel(), linestyle, label=label)

        fig = CFigure(height=7, width=8)
        fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__,
                                        multiclass.classifier.__name__))

        x_bounds, y_bounds = ds.get_bounds()

        styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.']

        for c_idx, c in enumerate(ds.classes):
            # Plot boundary and predicted label for each OVO classifier

            plot_hyperplane(fig, multiclass._binary_classifiers[c_idx],
                            x_bounds[0], x_bounds[1], styles[c_idx],
                            'Boundary\nfor class {:}'.format(c))

            fig.sp.scatter(ds.X[ds.Y == c, 0],
                           ds.X[ds.Y == c, 1],
                           s=40,
                           c=styles[c_idx][0])
            fig.sp.scatter(ds.X[y_pred == c, 0],
                           ds.X[y_pred == c, 1],
                           s=160,
                           edgecolors=styles[c_idx][0],
                           facecolors='none',
                           linewidths=2)

        # Plotting multiclass decision function
        fig.sp.plot_decision_regions(multiclass,
                                     n_grid_points=100,
                                     grid_limits=ds.get_bounds(offset=5))

        fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1],
                    x_bounds[1] + .5 * x_bounds[1])
        fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1],
                    y_bounds[1] + .5 * y_bounds[1])

        fig.sp.legend(loc=4)  # lower, right

        fig.show()

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        self.logger.info("Test for decision_function() and predict() methods.")

        mc = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                      class_weight='balanced')

        scores_d = self._test_fun(mc, self.dataset.todense())
        scores_s = self._test_fun(mc, self.dataset.tosparse())

        self.assert_array_almost_equal(scores_d, scores_s)

    def test_gradient(self):
        """Unittests for gradient() function."""
        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced')

        i = 5  # Sample to test

        self.logger.info("Testing with dense data...")
        ds = self.dataset.todense()
        multiclass.fit(ds.X, ds.Y)

        pattern = ds.X[i, :]

        # Check if we can return the i_th classifier
        for i in range(multiclass.n_classes):

            # Compute the gradient for class i
            ovo_grad_pos = CArray.zeros(shape=pattern.shape,
                                        dtype=pattern.dtype,
                                        sparse=pattern.issparse)
            ovo_grad_neg = CArray.zeros(shape=pattern.shape,
                                        dtype=pattern.dtype,
                                        sparse=pattern.issparse)
            for j in range(multiclass.num_classifiers):
                idx_pos = multiclass._clf_pair_idx[j][0]
                idx_neg = multiclass._clf_pair_idx[j][1]

                if idx_pos == i:
                    w_bin = CArray([1, 0])
                    grad_pos = \
                        multiclass._binary_classifiers[j].gradient(pattern, w_bin)
                    ovo_grad_pos += grad_pos
                if idx_neg == i:
                    w_bin = CArray([0, 1])
                    grad_neg = \
                        multiclass._binary_classifiers[j].gradient(pattern, w_bin)
                    ovo_grad_neg += grad_neg

            ovo_grad = (ovo_grad_pos + ovo_grad_neg) / 3

            w = CArray.zeros(shape=multiclass.n_classes)
            w[i] = 1  # one-hot encoding of y
            gradient = multiclass.gradient(pattern, w)
            self.logger.info("Gradient of {:}^th sub-clf is:\n{:}".format(
                i, gradient))

            self.assert_array_almost_equal(gradient.atleast_2d(), -ovo_grad)

        self.logger.info("Testing with sparse data...")
        ds = self.dataset.tosparse()
        multiclass.fit(ds.X, ds.Y)

        pattern = ds.X[i, :]

        # Compare dense gradients with sparse gradients
        grads_d = self._test_gradient_numerical(multiclass, pattern)
        grads_s = self._test_gradient_numerical(multiclass, pattern)

        for grad_i, grad in enumerate(grads_d):
            self.assert_array_almost_equal(grad.atleast_2d(), grads_s[grad_i])

        # Test error raise
        # TODO: Change grad_f_x with gradient after checking clf_idx in gradient(x,w)
        with self.assertRaises(ValueError):
            multiclass.grad_f_x(pattern, y=-1)
        with self.assertRaises(ValueError):
            multiclass.grad_f_x(pattern, y=100)

    def test_multiclass_gradient(self):
        """Test if gradient is correct when requesting for all classes with w"""

        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced')
        multiclass.fit(self.dataset.X, self.dataset.Y)
        div = CArray.rand(shape=multiclass.n_classes, random_state=0)

        def f_x(z):
            z = multiclass.predict(z, return_decision_function=True)[1]
            return CArray((z / div).mean())

        def grad_f_x(p):
            w = CArray.ones(shape=multiclass.n_classes) / \
                (div * multiclass.n_classes)
            return multiclass.gradient(p, w=w)

        i = 5  # Sample to test
        x = self.dataset.X[i, :]

        from secml.optim.function import CFunction
        check_grad_val = CFunction(f_x, grad_f_x).check_grad(x, epsilon=1e-1)
        self.logger.info("norm(grad - num_grad): %s", str(check_grad_val))
        self.assertLess(check_grad_val, 1e-3)

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                              class_weight='balanced')

        # All linear transformations with gradient implemented
        self._test_preprocess(self.dataset, multiclass,
                              ['min-max', 'mean-std'], [{
                                  'feature_range': (-1, 1)
                              }, {}])
        self._test_preprocess_grad(self.dataset, multiclass,
                                   ['min-max', 'mean-std'], [{
                                       'feature_range': (-1, 1)
                                   }, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(self.dataset, multiclass, ['pca', 'unit-norm'],
                              [{}, {}])

예제 #6

파일 보기

파일: test_c_classifier_logistic.py 프로젝트: eweroliveira/Poisoning-Attacks-on-Algorithmic-Fairness

class TestCClassifierLogistic(CClassifierTestCases):
    """Unit test for CClassifierLogistic."""

    def setUp(self):
        """Test for init and fit methods."""
        # generate synthetic data
        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1, random_state=99).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        
        self.log = CClassifierLogistic(random_state=99)

    def test_plot(self):
        """ Compare the classifiers graphically"""
        fig = self._test_plot(self.log, self.dataset)
        fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                            'test_c_classifier_logistic.pdf'))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        scores_d = self._test_fun(self.log, self.dataset.todense())
        scores_s = self._test_fun(self.log, self.dataset.tosparse())

        self.assert_array_almost_equal(scores_d, scores_s)

    def test_gradient(self):
        """Unittests for gradient_f_x."""
        self.logger.info("Testing log.gradient_f_x() method")

        i = 5  # IDX of the point to test
        pattern = self.dataset.X[i, :]
        self.logger.info("P {:}: {:}".format(i, pattern))

        self.logger.info("Testing dense data...")
        ds = self.dataset.todense()
        self.log.fit(ds)

        # Run the comparison with numerical gradient
        # (all classes will be tested)
        grads_d = self._test_gradient_numerical(self.log, pattern.todense())

        self.logger.info("Testing sparse data...")
        ds = self.dataset.tosparse()
        self.log.fit(ds)

        # Run the comparison with numerical gradient
        # (all classes will be tested)
        grads_s = self._test_gradient_numerical(self.log, pattern.tosparse())

        # Compare dense gradients with sparse gradients
        for grad_i, grad in enumerate(grads_d):
            self.assert_array_almost_equal(
                grad.atleast_2d(), grads_s[grad_i])

    def test_sparse(self):
        """Test classifier operations on sparse data."""

        self._test_sparse_linear(self.dataset.tosparse(), self.log)

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()

        # All linear transformations with gradient implemented
        self._test_preprocess(ds, self.log,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])
        self._test_preprocess_grad(ds, self.log,
                                   ['min-max', 'mean-std'],
                                   [{'feature_range': (-1, 1)}, {}])

        self.logger.info("The following case will skip the gradient test")
        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(ds, self.log, ['pca', 'unit-norm'], [{}, {}])

예제 #7

파일 보기

파일: test_c_classifier_sklearn.py 프로젝트: eweroliveira/Poisoning-Attacks-on-Algorithmic-Fairness

class TestCClassifierSkLearn(CClassifierTestCases):
    """Unit test for SkLearn classifiers."""
    def setUp(self):

        # QuadraticDiscriminantAnalysis will raise a warning
        self.logger.filterwarnings("ignore",
                                   message="Variables are collinear",
                                   category=UserWarning)

        multiclass = True

        n_classes = 3 if multiclass is True else 2
        self.dataset = CDLRandom(n_features=25,
                                 n_redundant=10,
                                 n_informative=5,
                                 n_classes=n_classes,
                                 n_samples=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.skclfs = [
            KNeighborsClassifier(3),
            SVC(kernel="linear",
                C=0.025,
                random_state=0,
                decision_function_shape='ovr'),
            SVC(kernel="rbf", gamma=2, C=1, random_state=0),
            DecisionTreeClassifier(max_depth=5, random_state=0),
            RandomForestClassifier(max_depth=5, n_estimators=5,
                                   random_state=0),
            MLPClassifier(alpha=1, max_iter=1000, random_state=0),
            AdaBoostClassifier(random_state=0),
            OneVsRestClassifier(SVC(kernel='linear')),
            # These clf below only work on dense data!
            GaussianProcessClassifier(1.0 * RBF(1.0)),
            GaussianNB(),
            QuadraticDiscriminantAnalysis()
        ]

        self.classifiers = []
        for model in self.skclfs:
            self.classifiers.append(CClassifierSkLearn(sklearn_model=model))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""

        for i, clf in enumerate(self.classifiers):

            self.logger.info("Classifier:\n - " + str(clf._sklearn_model))

            # create a fake private _decision_function to run tests
            # but this is basically the same in CClassifierSkLearn
            # - we need to think better tests!
            def _decision_function(x, y=None):
                x = x.atleast_2d()
                try:
                    scores = CArray(self.skclfs[i].decision_function(
                        x.get_data()))
                    probs = False
                except AttributeError:
                    scores = CArray(self.skclfs[i].predict_proba(x.get_data()))
                    probs = True

                # two-class classifiers outputting only scores for class 1
                if len(scores.shape) == 1:  # duplicate column for class 0
                    outputs = CArray.ones(shape=(x.shape[0], clf.n_classes))
                    scores = scores.T
                    outputs[:, 1] = scores
                    outputs[:, 0] = -scores if probs is False else 1 - scores
                    scores = outputs
                scores.atleast_2d()
                if y is not None:
                    return scores[:, y].ravel()
                else:
                    return scores

            clf._decision_function = _decision_function

            # execute tests
            self._test_fun(clf, self.dataset.todense())
            try:
                self._test_fun(clf, self.dataset.tosparse())
            except TypeError:
                self.logger.info(
                    "This sklearn model does not support sparse data!")

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        # All linear transformations
        for clf in self.classifiers:
            self._test_preprocess(self.dataset, clf, ['min-max', 'mean-std'],
                                  [{
                                      'feature_range': (-1, 1)
                                  }, {}])

            # Mixed linear/nonlinear transformations
            self._test_preprocess(self.dataset, clf, ['pca', 'unit-norm'],
                                  [{}, {}])

    def test_pretrained(self):
        """Test wrapping of pretrained models."""
        from sklearn import datasets, svm

        iris = datasets.load_iris()
        X = iris.data
        y = iris.target

        clf = svm.SVC(kernel='linear')

        from secml.core.exceptions import NotFittedError
        with self.assertRaises(NotFittedError):
            secmlclf = CClassifierSkLearn(clf)
            secmlclf.predict(CArray(X))

        clf.fit(X, y)

        y_pred = clf.predict(X)

        clf = svm.SVC(kernel='linear')
        secmlclf = CClassifierSkLearn(clf)
        secmlclf.fit(CDataset(X, y))

        y_pred_secml = secmlclf.predict(CArray(X))

        self.logger.info(
            "Predicted labels by pretrained model:\n{:}".format(y_pred))
        self.logger.info(
            "Predicted labels by our fit:\n{:}".format(y_pred_secml))

        self.assert_array_equal(y_pred, y_pred_secml)

    def test_set_get_state(self):
        """Test for set_state and get_state."""

        pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        clf = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf",
                                                   gamma=2,
                                                   C=1,
                                                   random_state=0),
                                 preprocess=pre)

        clf.fit(self.dataset)
        pred_y = clf.predict(self.dataset.X)
        self.logger.info(
            "Predictions before restoring state:\n{:}".format(pred_y))

        state = clf.get_state()
        self.logger.info("State of multiclass:\n{:}".format(state))

        # Generate a temp file to test
        import tempfile
        from secml.utils import fm
        tempdir = tempfile.gettempdir()
        tempfile = fm.join(tempdir, 'secml_testgetsetstate')

        # Test save state to disk
        tempfile = clf.save_state(tempfile)

        # Create an entirely new clf
        pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        clf_post = CClassifierSkLearn(sklearn_model=SVC(kernel="rbf",
                                                        gamma=2,
                                                        C=1,
                                                        random_state=0),
                                      preprocess=pre_post)

        # Restore state from disk
        clf_post.load_state(tempfile)

        pred_y_post = clf_post.predict(self.dataset.X)
        self.logger.info(
            "Predictions after restoring state:\n{:}".format(pred_y_post))

        self.assert_array_equal(pred_y, pred_y_post)

예제 #8

파일 보기

파일: test_c_classifier_sgd.py 프로젝트: pralab/secml

class TestCClassifierSGD(CClassifierTestCases):
    """Unit test for SGD Classifier."""
    def setUp(self):
        """Test for init and fit methods."""

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100,
                                 n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        self.sgd = CClassifierSGD(regularizer=CRegularizerL2(),
                                  loss=CLossHinge(),
                                  random_state=0)

        # this is equivalent to C=1 for SGD
        alpha = 1 / self.dataset.num_samples

        kernel_types = \
            (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3))
        self.sgds = [
            CClassifierSGD(regularizer=CRegularizerL2(),
                           loss=CLossHinge(),
                           max_iter=1000,
                           random_state=0,
                           alpha=alpha,
                           preprocess=kernel if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SGD with kernel functions: %s",
                         str(kernel_types))

        for sgd in self.sgds:
            sgd.verbose = 0  # Enabling debug output for each classifier
            sgd.fit(self.dataset.X, self.dataset.Y)

    def test_draw(self):
        """ Compare the classifiers graphically"""
        self.logger.info("Testing classifiers graphically")

        # generate 2D synthetic data
        dataset = CDLRandom(n_features=2,
                            n_redundant=1,
                            n_informative=1,
                            n_clusters_per_class=1).load()
        dataset.X = CNormalizerMinMax().fit_transform(dataset.X)

        self.sgds[0].fit(dataset.X, dataset.Y)

        svm = CClassifierSVM()
        svm.fit(dataset.X, dataset.Y)

        fig = CFigure(width=10, markersize=8)
        fig.subplot(2, 1, 1)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(svm.decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SVM')

        fig.subplot(2, 1, 2)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(self.sgds[0].decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SGD Classifier')

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd1.pdf'))

    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for sgd in self.sgds:

            self.logger.info("SGD kernel: {:}".format(sgd.preprocess))

            if sgd.preprocess is not None:
                k = sgd.preprocess.deepcopy()
            else:
                k = None
            svm = CClassifierSVM(kernel=k)

            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(self.dataset.X,
                                           return_decision_function=True)
            label_sgd, y_sgd = sgd.predict(self.dataset.X,
                                           return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_sgd = CMetric.create('f1').performance_score(
                self.dataset.Y, label_sgd)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of SGD: {:}".format(acc_sgd))
            self.assertGreater(acc_sgd, 0.90,
                               "Accuracy of SGD: {:}".format(acc_sgd))

    def test_margin(self):

        self.logger.info("Testing margin separation of SGD...")

        # we create 50 separable points
        dataset = CDLRandomBlobs(n_samples=50,
                                 centers=2,
                                 random_state=0,
                                 cluster_std=0.60).load()

        # fit the model
        clf = CClassifierSGD(loss=CLossHinge(),
                             regularizer=CRegularizerL2(),
                             alpha=0.01,
                             max_iter=200,
                             random_state=0)
        clf.fit(dataset.X, dataset.Y)

        # plot the line, the points, and the nearest vectors to the plane
        xx = CArray.linspace(-1, 5, 10)
        yy = CArray.linspace(-1, 5, 10)

        X1, X2 = np.meshgrid(xx.tondarray(), yy.tondarray())
        Z = CArray.empty(X1.shape)
        for (i, j), val in np.ndenumerate(X1):
            x1 = val
            x2 = X2[i, j]
            Z[i, j] = clf.decision_function(CArray([x1, x2]), y=1)
        levels = [-1.0, 0.0, 1.0]
        linestyles = ['dashed', 'solid', 'dashed']
        colors = 'k'
        fig = CFigure(linewidth=1)
        fig.sp.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
        fig.sp.scatter(dataset.X[:, 0].ravel(),
                       dataset.X[:, 1].ravel(),
                       c=dataset.Y,
                       s=40)

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd2.pdf'))

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        for clf in self.sgds:
            self.logger.info("SGD kernel: {:}".format(clf.preprocess))

            scores_d = self._test_fun(clf, self.dataset.todense())
            scores_s = self._test_fun(clf, self.dataset.tosparse())

            # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
            # self.assert_array_almost_equal(scores_d, scores_s)

    def test_gradient(self):
        """Unittests for gradient_f_x."""
        self.logger.info("Testing SGD.gradient_f_x() method")

        i = 5  # IDX of the point to test
        pattern = self.dataset.X[i, :]
        self.logger.info("P {:}: {:}".format(i, pattern))

        for sgd in self.sgds:

            self.logger.info("Checking gradient for SGD with kernel: %s",
                             sgd.preprocess)

            if hasattr(sgd.preprocess, 'gamma'):  # set gamma for poly and rbf
                sgd.set('gamma', 1e-5)
            if hasattr(sgd.preprocess, 'degree'):  # set degree for poly
                sgd.set('degree', 3)

            self.logger.info("Testing dense data...")
            ds = self.dataset.todense()
            sgd.fit(ds.X, ds.Y)

            # Run the comparison with numerical gradient
            # (all classes will be tested)
            grads_d = self._test_gradient_numerical(sgd, pattern.todense())

            self.logger.info("Testing sparse data...")
            ds = self.dataset.tosparse()
            sgd.fit(ds.X, ds.Y)

            # Run the comparison with numerical gradient
            # (all classes will be tested)
            grads_s = self._test_gradient_numerical(sgd, pattern.tosparse())

            # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
            # Compare dense gradients with sparse gradients
            # for grad_i, grad in enumerate(grads_d):
            #     self.assert_array_almost_equal(
            #         grad.atleast_2d(), grads_s[grad_i])

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        ds = CDLRandom().load()
        clf = CClassifierSGD(regularizer=CRegularizerL2(),
                             loss=CLossHinge(),
                             random_state=0)

        # All linear transformations with gradient implemented
        self._test_preprocess(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])
        self._test_preprocess_grad(ds, clf, ['min-max', 'mean-std'], [{
            'feature_range': (-1, 1)
        }, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}])

예제 #9

파일 보기

파일: c_kernel_testcases.py 프로젝트: eweroliveira/Poisoning-Attacks-on-Algorithmic-Fairness

class CCKernelTestCases(CUnitTest):
    def _set_up(self, kernel_name):

        self.d_dense = CDLRandom(n_samples=10,
                                 n_features=5,
                                 n_redundant=0,
                                 n_informative=3,
                                 n_clusters_per_class=1,
                                 random_state=100).load()

        self.p1_dense = self.d_dense.X[0, :]
        self.p2_dense = self.d_dense.X[1, :]

        self.d_sparse = self.d_dense.tosparse()
        self.p1_sparse = self.d_sparse.X[0, :]
        self.p2_sparse = self.d_sparse.X[1, :]

        self.kernel = CKernel.create(kernel_name)

    def _has_gradient(self):
        try:
            self.kernel.rv = self.p1_dense
            self.kernel.gradient(self.p2_dense)
            return True
        except NotImplementedError:
            return False

    def _cmp_kernel(self, k_fun, a1, a2):
        k = k_fun(a1, a2)
        if isinstance(k, CArray):
            self.logger.info("k shape with inputs {:} {:} is: {:}"
                             "".format(a1.shape, a2.shape, k.shape))
            self.assertEqual(k.shape, (CArray(a1).atleast_2d().shape[0],
                                       CArray(a2).atleast_2d().shape[0]))
        else:
            self.assertTrue(is_scalar(k))

    def _test_similarity_shape(self):
        """Test shape of kernel."""
        self.logger.info("Testing shape of " + self.kernel.class_type +
                         " kernel output.")

        x_vect = CArray.rand(shape=(1, 10)).ravel()
        x_mat = CArray.rand(shape=(10, 10))
        x_col = CArray.rand(shape=(10, 1))
        x_single = CArray.rand(shape=(1, 1))

        self._cmp_kernel(self.kernel.k, x_vect, x_vect)
        self._cmp_kernel(self.kernel.k, x_mat, x_vect)
        self._cmp_kernel(self.kernel.k, x_vect, x_mat)
        self._cmp_kernel(self.kernel.k, x_mat, x_mat)
        self._cmp_kernel(self.kernel.k, x_col, x_col)
        self._cmp_kernel(self.kernel.k, x_col, x_single)
        self._cmp_kernel(self.kernel.k, x_single, x_col)
        self._cmp_kernel(self.kernel.k, x_single, x_single)

    def _test_similarity_shape_sparse(self):
        """Test shape of kernel."""
        self.logger.info("Testing shape of " + self.kernel.class_type +
                         " kernel output.")

        x_vect = CArray.rand(shape=(1, 10)).ravel().tosparse()
        x_mat = CArray.rand(shape=(10, 10)).tosparse()
        x_col = CArray.rand(shape=(10, 1)).tosparse()
        x_single = CArray.rand(shape=(1, 1)).tosparse()

        self._cmp_kernel(self.kernel.k, x_vect, x_vect)
        self._cmp_kernel(self.kernel.k, x_mat, x_vect)
        self._cmp_kernel(self.kernel.k, x_vect, x_mat)
        self._cmp_kernel(self.kernel.k, x_mat, x_mat)
        self._cmp_kernel(self.kernel.k, x_col, x_col)
        self._cmp_kernel(self.kernel.k, x_col, x_single)
        self._cmp_kernel(self.kernel.k, x_single, x_col)
        self._cmp_kernel(self.kernel.k, x_single, x_single)

    def _test_gradient(self):
        """Test for kernel gradients with dense points."""

        if not self._has_gradient():
            self.logger.info(
                "Gradient is not implemented for %s. "
                "Skipping gradient dense tests.", self.kernel.class_type)
            return

        # we invert the order of input patterns as we compute the kernel
        # gradient wrt the second point but check_grad needs it as first input
        def kern_f_for_test(p2, p1, kernel_func):
            return kernel_func.k(p2, p1)

        def kern_grad_for_test(p2, p1, kernel_func):
            kernel_func.rv = p1
            return kernel_func.gradient(p2)

        self.logger.info("Testing gradient with dense data.")
        self.logger.info("Kernel type: %s", self.kernel.class_type)

        for i in range(self.d_dense.num_samples):
            self.logger.info("x point: " + str(self.p2_dense))
            self.logger.info("y point: " + str(self.d_dense.X[i, :]))

            # TODO: implement centered numerical differences.
            # if analytical gradient is zero, numerical estimation does not
            # work, as it is using one-side estimation. We should use centered
            # numerical differences to gain precision.
            self.kernel.rv = self.d_dense.X[i, :]
            grad = self.kernel.gradient(self.p2_dense)
            if grad.norm() >= 1e-10:
                grad_error = CFunction(kern_f_for_test,
                                       kern_grad_for_test).check_grad(
                                           self.p2_dense, 1e-8,
                                           self.d_dense.X[i, :], self.kernel)
                self.logger.info("Gradient approx. error: {:}"
                                 "".format(grad_error))
                self.assertTrue(grad_error < 1e-4)

    def _test_gradient_sparse(self):
        """Test for kernel gradients with sparse points."""

        if not self._has_gradient():
            self.logger.info(
                "Gradient is not implemented for %s. "
                "Skipping gradient sparse tests.", self.kernel.class_type)
            return

        self.logger.info("Testing gradient with sparse data.")
        self.logger.info("Kernel type: %s", self.kernel.class_type)

        self.kernel.rv = self.d_sparse.X
        k_grad = self.kernel.gradient(self.p2_dense)
        self.logger.info("sparse/dense ->.isdense: {:}".format(k_grad.isdense))
        self.assertTrue(k_grad.isdense)

        self.kernel.rv = self.d_dense.X
        k_grad = self.kernel.gradient(self.p2_sparse)
        self.logger.info("dense/sparse ->.issparse: {:}".format(
            k_grad.issparse))
        self.assertTrue(k_grad.issparse)

        self.kernel.rv = self.d_sparse.X
        k_grad = self.kernel.gradient(self.p2_sparse)
        self.logger.info("sparse/sparse ->.issparse: {:}".format(
            k_grad.issparse))
        self.assertTrue(k_grad.issparse)

    def _test_gradient_multiple_points(self):
        """Test for kernel gradients with multiple points vs single point."""

        if not self._has_gradient():
            self.logger.info(
                "Gradient is not implemented for %s. "
                "Skipping multiple-point tests.", self.kernel.class_type)
            return

        # check if gradient computed on multiple points is the same as
        # the gradients computed on one point at a time.
        data = self.d_dense.X[0:5, :]  # using same no. of points and features
        self.kernel.rv = data
        k1 = self.kernel.gradient(self.p2_dense)
        k2 = CArray.zeros(shape=k1.shape)
        for i in range(k2.shape[0]):
            self.kernel.rv = data[i, :]
            k2[i, :] = self.kernel.gradient(self.p2_dense)
        self.assertTrue((k1 - k2).ravel().norm() < 1e-4)

        data = self.d_dense.X  # using different no. of points/features
        self.kernel.rv = data
        k1 = self.kernel.gradient(self.p2_dense)
        k2 = CArray.zeros(shape=k1.shape)
        for i in range(k2.shape[0]):
            self.kernel.rv = data[i, :]
            k2[i, :] = self.kernel.gradient(self.p2_dense)
        self.assertTrue((k1 - k2).ravel().norm() < 1e-4)

    def _test_gradient_multiple_points_sparse(self):
        """Test for kernel gradients with multiple points vs single point."""

        if not self._has_gradient():
            self.logger.info(
                "Gradient is not implemented for %s. "
                "Skipping multiple-point tests.", self.kernel.class_type)
            return

        # check if gradient computed on multiple points is the same as
        # the gradients computed on one point at a time.
        data = self.d_sparse.X[0:5, :]  # using same no. of points and features
        self.kernel.rv = data
        k1 = self.kernel.gradient(self.p2_dense)
        k2 = CArray.zeros(shape=k1.shape)
        for i in range(k2.shape[0]):
            self.kernel.rv = data[i, :]
            k2[i, :] = self.kernel.gradient(self.p2_dense)
        self.assertTrue((k1 - k2).ravel().norm() < 1e-4)

        data = self.d_sparse.X  # using different no. of points/features
        self.kernel.rv = data
        k1 = self.kernel.gradient(self.p2_dense)
        k2 = CArray.zeros(shape=k1.shape)
        for i in range(k2.shape[0]):
            self.kernel.rv = data[i, :]
            k2[i, :] = self.kernel.gradient(self.p2_dense)
        self.assertTrue((k1 - k2).ravel().norm() < 1e-4)

    def _test_gradient_w(self):
        """Test for backard passing of w in kernel gradients"""

        if not self._has_gradient():
            self.logger.info(
                "Gradient is not implemented for %s. "
                "Skipping multiple-point tests.", self.kernel.class_type)
            return

        # check if the gradient computed when passing w is the same as the
        # gradient computed with w=None and pre-multiplied with w

        # test on single point
        w = CArray.rand(shape=(1, ), random_state=0)
        self.kernel.rv = self.p2_dense
        grad_1 = self.kernel.gradient(self.p1_dense, w=w)
        grad_2 = w * (self.kernel.gradient(self.p1_dense))
        grad_2 = grad_2.ravel()
        self.assertTrue(grad_1.is_vector_like)
        self.assertTrue(grad_2.is_vector_like)
        self.assert_array_almost_equal(grad_1, grad_2, decimal=10)

        # test on multiple points
        w = CArray.rand(shape=(5, ), random_state=0)
        self.kernel.rv = self.d_dense[:5, :].X
        grad_1 = self.kernel.gradient(self.p1_dense, w=w)
        grad_2 = w.dot(self.kernel.gradient(self.p1_dense)).ravel()
        self.assertTrue(grad_1.is_vector_like)
        self.assertTrue(grad_2.is_vector_like)
        self.assert_array_almost_equal(grad_1, grad_2, decimal=10)