Beispiel #1
0
    def setUp(self):

        import numpy as np
        np.random.seed(12345678)

        # generate synthetic data
        self.ds = CDLRandom(n_classes=3,
                            n_features=2,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=1,
                            random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        # self.kernel = None
        self.kernel = CKernelRBF(gamma=10)

        # Data normalization
        self.normalizer = CNormalizerMinMax()
        self.ds.X = self.normalizer.fit_transform(self.ds.X)

        self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                                   class_weight='balanced',
                                                   preprocess=None,
                                                   kernel=self.kernel)
        self.multiclass.verbose = 0

        # Training and classification
        self.multiclass.fit(self.ds.X, self.ds.Y)

        self.y_pred, self.score_pred = self.multiclass.predict(
            self.ds.X, return_decision_function=True)
def plot_loss_after_attack(evasAttack):
    """
	This function plots the evolution of the loss function of the surrogate classifier
	after an attack is performed.
	The loss function is normalized between 0 and 1.
	It helps to know whether parameters given to the attack algorithm are well tuned are not;
	the loss should be as minimal as possible.
	The script is inspired from https://secml.gitlab.io/tutorials/11-ImageNet_advanced.html#Visualize-and-check-the-attack-optimization
	"""
    n_iter = evasAttack.x_seq.shape[0]
    itrs = CArray.arange(n_iter)

    # create a plot that shows the loss during the attack iterations
    # note that the loss is not available for all attacks
    fig = CFigure(width=10, height=4, fontsize=14)

    # apply a linear scaling to have the loss in [0,1]
    loss = evasAttack.f_seq
    if loss is not None:
        loss = CNormalizerMinMax().fit_transform(CArray(loss).T).ravel()
        fig.subplot(1, 2, 1)
        fig.sp.xlabel('iteration')
        fig.sp.ylabel('loss')
        fig.sp.plot(itrs, loss, c='black')

    fig.tight_layout()
    fig.show()
Beispiel #3
0
 def setUp(self):
     self.clf = CClassifierMulticlassOVA(
         classifier=CClassifierSVM, kernel='rbf')
     self.dataset = CDLRandomBlobs(
         random_state=3, n_features=2, centers=4).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
Beispiel #4
0
    def test_grad_tr_params_linear(self):
        """Test `grad_tr_params` on a linear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(store_dual_vars=True, preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Beispiel #5
0
    def test_grad_tr_params_nonlinear(self):
        """Test `grad_tr_params` on a nonlinear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(kernel='rbf', preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Beispiel #6
0
    def setUp(self):
        """Test for init and fit methods."""

        # TODO: remove this filter when `kernel` parameter is removed from Ridge Classifier
        self.logger.filterwarnings("ignore",
                                   message="`kernel` parameter.*",
                                   category=DeprecationWarning)
        # generate synthetic data
        self.dataset = CDLRandom(n_features=100,
                                 n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.ridges = [
            CClassifierRidge(kernel=kernel() if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing RIDGE with kernel unctions: %s",
                         str(kernel_types))

        for ridge in self.ridges:
            ridge.verbose = 2  # Enabling debug output for each classifier
            ridge.fit(self.dataset)
        def sklearn_comp(array):

            self.logger.info("Original array is:\n{:}".format(array))

            # Sklearn normalizer (requires float dtype input)
            array_sk = array.astype(float).tondarray()
            sk_norm = MinMaxScaler().fit(array_sk)

            target = CArray(sk_norm.transform(array_sk))

            # Our normalizer
            our_norm = CNormalizerMinMax().fit(array)
            result = our_norm.transform(array)

            self.logger.info("Correct result is:\n{:}".format(target))
            self.logger.info("Our result is:\n{:}".format(result))

            self.assert_array_almost_equal(target, result)

            # Testing out of range normalization

            self.logger.info("Testing out of range normalization")

            # Sklearn normalizer (requires float dtype input)
            target = CArray(sk_norm.transform(array_sk * 2))

            # Our normalizer
            result = our_norm.transform(array * 2)

            self.logger.info("Correct result is:\n{:}".format(target))
            self.logger.info("Our result is:\n{:}".format(result))

            self.assert_array_almost_equal(target, result)
    def test_grad_tr_params_linear(self):
        """Test `grad_tr_params` on a linear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierRidge(preprocess=n)
            clf.fit(self.ds)
            self._test_grad_tr_params(clf)
Beispiel #9
0
    def _prepare_linear_svm(self, sparse, seed):
        """Preparare the data required for attacking a LINEAR SVM.

        - load a blob 2D dataset
        - create a SVM (C=1) and a minmax preprocessor

        Parameters
        ----------
        sparse : bool
        seed : int or None

        Returns
        -------
        ds : CDataset
        clf : CClassifierSVM

        """
        ds = self._load_blobs(
            n_feats=2,  # Number of dataset features
            n_clusters=2,  # Number of dataset clusters
            sparse=sparse,
            seed=seed)

        normalizer = CNormalizerMinMax(feature_range=(-1, 1))
        clf = CClassifierSVM(C=1.0, preprocess=normalizer)

        return ds, clf
Beispiel #10
0
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2,
                    n_clusters_per_class=1, random_state=0).load()
     ds.X = CNormalizerMinMax().fit_transform(ds.X)
     fig = self._test_plot(self.ridges[0], ds)
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_ridge.pdf'))
Beispiel #11
0
    def test_grad_tr_params_linear(self):
        """Test `grad_tr_params` on a linear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierLogistic(preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self.logger.info('w: ' + str(clf.w) + ', b: ' + str(clf.b))
            self._test_grad_tr_params(clf)
Beispiel #12
0
 def setUp(self):
     self.clf = CClassifierSVM()
     self.dataset = CDLRandom(n_features=2,
                              n_redundant=0,
                              n_informative=1,
                              n_clusters_per_class=1).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
    def setUp(self):
        """Test for init and fit methods."""

        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.nc = CClassifierNearestCentroid()
Beispiel #14
0
    def test_poisoning_with_normalization_inside(self):
        """Test the CAttackPoisoning object when the classifier contains a
        normalizer.
        """
        normalizer = CNormalizerMinMax(feature_range=(-10, 10))

        self._test_clf_accuracy(normalizer)

        # test if the attack is effective and eventually show 2D plots
        self._test_attack_effectiveness(normalizer)
    def setUp(self):
        """Test for init and fit methods."""
        # generate synthetic data
        self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1,
                                 n_clusters_per_class=1, random_state=99).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        
        self.log = CClassifierLogistic(random_state=99)
Beispiel #16
0
    def _dataset_creation(self):
        # generate synthetic data
        self.ds = CDLRandom(n_samples=100, n_classes=3, n_features=2,
                            n_redundant=0, n_clusters_per_class=1,
                            class_sep=1, random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        self.lb = 0
        self.ub = 1

        # Data normalization
        self.normalizer = CNormalizerMinMax(
            feature_range=(self.lb, self.ub))
        self.normalizer = None
        if self.normalizer is not None:
            self.ds.X = self.normalizer.fit_transform(self.ds.X)
 def test_transform(self):
     """Test for `.transform()` method."""
     self._sklearn_comp(
         self.array_dense, MinMaxScaler(), CNormalizerMinMax())
     self._sklearn_comp(
         self.array_sparse, MinMaxScaler(), CNormalizerMinMax())
     self._sklearn_comp(
         self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax())
     self._sklearn_comp(
         self.row_sparse, MinMaxScaler(), CNormalizerMinMax())
     self._sklearn_comp(
         self.column_dense, MinMaxScaler(), CNormalizerMinMax())
     self._sklearn_comp(
         self.column_sparse, MinMaxScaler(), CNormalizerMinMax())
Beispiel #18
0
    def setUp(self):
        """Test for init and fit methods."""

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100, n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly)
        self.ridges = [CClassifierRidge(
            preprocess=kernel() if kernel is not None else None)
            for kernel in kernel_types]
        self.logger.info(
            "Testing RIDGE with kernel functions: %s", str(kernel_types))

        for ridge in self.ridges:
            ridge.verbose = 2  # Enabling debug output for each classifier
            ridge.fit(self.dataset.X, self.dataset.Y)
    def test_normalization(self):
        """Test data normalization inside CClassifierMulticlassOVO."""
        from secml.ml.features.normalization import CNormalizerMinMax

        ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X)

        multi_nonorm = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                                class_weight='balanced')
        multi_nonorm.fit(ds_norm_x, self.dataset.Y)
        pred_y_nonorm = multi_nonorm.predict(ds_norm_x)

        multi = CClassifierMulticlassOVO(classifier=CClassifierSVM,
                                         class_weight='balanced',
                                         preprocess='min-max')
        multi.fit(self.dataset.X, self.dataset.Y)
        pred_y = multi.predict(self.dataset.X)

        self.logger.info("Predictions with internal norm:\n{:}".format(pred_y))
        self.logger.info(
            "Predictions with external norm:\n{:}".format(pred_y_nonorm))

        self.assertFalse((pred_y_nonorm != pred_y).any())
Beispiel #20
0
    def setUp(self):
        """Test for init and fit methods."""

        # TODO: remove this filter when `kernel` parameter is removed from SGD Classifier
        self.logger.filterwarnings("ignore",
                                   message="`kernel` parameter.*",
                                   category=DeprecationWarning)

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100,
                                 n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        self.sgd = CClassifierSGD(regularizer=CRegularizerL2(),
                                  loss=CLossHinge(),
                                  random_state=0)

        kernel_types = \
            (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3))
        self.sgds = [
            CClassifierSGD(regularizer=CRegularizerL2(),
                           loss=CLossHinge(),
                           max_iter=500,
                           random_state=0,
                           kernel=kernel if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SGD with kernel functions: %s",
                         str(kernel_types))

        for sgd in self.sgds:
            sgd.verbose = 2  # Enabling debug output for each classifier
            sgd.fit(self.dataset)
Beispiel #21
0
    def test_draw(self):
        """ Compare the classifiers graphically"""
        self.logger.info("Testing classifiers graphically")

        # generate 2D synthetic data
        dataset = CDLRandom(n_features=2,
                            n_redundant=1,
                            n_informative=1,
                            n_clusters_per_class=1).load()
        dataset.X = CNormalizerMinMax().fit_transform(dataset.X)

        self.sgds[0].fit(dataset.X, dataset.Y)

        svm = CClassifierSVM()
        svm.fit(dataset.X, dataset.Y)

        fig = CFigure(width=10, markersize=8)
        fig.subplot(2, 1, 1)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(svm.decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SVM')

        fig.subplot(2, 1, 2)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(self.sgds[0].decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SGD Classifier')

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd1.pdf'))
Beispiel #22
0
    def setUp(self):
        """Test for init and fit methods."""

        # generate synthetic data
        self.dataset = CDLRandom(n_features=100,
                                 n_redundant=20,
                                 n_informative=25,
                                 n_clusters_per_class=2,
                                 random_state=0).load()

        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)

        self.logger.info("Testing classifier creation ")
        self.sgd = CClassifierSGD(regularizer=CRegularizerL2(),
                                  loss=CLossHinge(),
                                  random_state=0)

        # this is equivalent to C=1 for SGD
        alpha = 1 / self.dataset.num_samples

        kernel_types = \
            (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3))
        self.sgds = [
            CClassifierSGD(regularizer=CRegularizerL2(),
                           loss=CLossHinge(),
                           max_iter=1000,
                           random_state=0,
                           alpha=alpha,
                           preprocess=kernel if kernel is not None else None)
            for kernel in kernel_types
        ]
        self.logger.info("Testing SGD with kernel functions: %s",
                         str(kernel_types))

        for sgd in self.sgds:
            sgd.verbose = 0  # Enabling debug output for each classifier
            sgd.fit(self.dataset.X, self.dataset.Y)
    def _dataset_creation(self):
        """Creates a blob dataset. """
        self.n_features = 2  # Number of dataset features

        self.seed = 42

        self.n_tr = 50
        self.n_ts = 100
        self.n_classes = 2

        loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts,
                                n_features=self.n_features,
                                centers=[(-1, -1), (+1, +1)],
                                center_box=(-2, 2),
                                cluster_std=0.8,
                                random_state=self.seed)

        self.logger.info("Loading `random_blobs` with seed: {:}".format(
            self.seed))

        dataset = loader.load()
        splitter = CDataSplitterShuffle(num_folds=1,
                                        train_size=self.n_tr,
                                        random_state=3)
        splitter.compute_indices(dataset)
        self.tr = dataset[splitter.tr_idx[0], :]
        self.ts = dataset[splitter.ts_idx[0], :]

        normalizer = CNormalizerMinMax(feature_range=(-1, 1))
        self.tr.X = normalizer.fit_transform(self.tr.X)
        self.ts.X = normalizer.transform(self.ts.X)

        self.lb = -1
        self.ub = 1

        self.grid_limits = [(self.lb - 0.1, self.ub + 0.1),
                            (self.lb - 0.1, self.ub + 0.1)]