def test_alignment(self):

        ds = CDLRandom(n_samples=100,
                       n_features=500,
                       n_redundant=0,
                       n_informative=10,
                       n_clusters_per_class=1,
                       random_state=0).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        svm_pred = sec_svm.predict(ds.X)
        secsvm_pred = sec_svm.predict(ds.X)

        self.logger.info("SVM pred:\n{:}".format(svm_pred))
        self.logger.info("Sec-SVM pred:\n{:}".format(secsvm_pred))

        self.assert_array_almost_equal(secsvm_pred, svm_pred)
Example #2
0
    def test_grad_tr_params_linear(self):
        """Test `grad_tr_params` on a linear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(store_dual_vars=True, preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Example #3
0
class TestCPlot(CUnitTest):
    """Unit test for TestCPlot."""
    def setUp(self):
        self.clf = CClassifierSVM()
        self.dataset = CDLRandom(n_features=2,
                                 n_redundant=0,
                                 n_informative=1,
                                 n_clusters_per_class=1).load()
        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
        self.clf.fit(self.dataset.X, self.dataset.Y)

    def test_fun(self):
        """Test for CPlotFunction.plot_fun method."""
        fig = CFigure()
        fig.sp.plot_ds(self.dataset)

        fig.sp.plot_fun(self.clf.decision_function, y=1)
        fig.show()

    def test_fgrads(self):
        """Test for CPlotFunction.plot_fgrads method."""
        fig = CFigure()
        fig.sp.plot_ds(self.dataset)

        fig.sp.plot_fun(self.clf.decision_function, y=1)
        fig.sp.plot_fgrads(lambda x: self.clf.grad_f_x(x, y=1))
        fig.show()
Example #4
0
    def test_grad_tr_params_nonlinear(self):
        """Test `grad_tr_params` on a nonlinear classifier."""

        for n in (None, CNormalizerMinMax((-10, 10))):
            clf = CClassifierSVM(kernel='rbf', preprocess=n)
            clf.fit(self.ds.X, self.ds.Y)
            self._test_grad_tr_params(clf)
Example #5
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for sgd in self.sgds:

            self.logger.info("SGD kernel: {:}".format(sgd.preprocess))

            if sgd.preprocess is not None:
                k = sgd.preprocess.deepcopy()
            else:
                k = None
            svm = CClassifierSVM(kernel=k)

            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(self.dataset.X,
                                           return_decision_function=True)
            label_sgd, y_sgd = sgd.predict(self.dataset.X,
                                           return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_sgd = CMetric.create('f1').performance_score(
                self.dataset.Y, label_sgd)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of SGD: {:}".format(acc_sgd))
            self.assertGreater(acc_sgd, 0.90,
                               "Accuracy of SGD: {:}".format(acc_sgd))
Example #6
0
    def test_performance(self):
        """ Compare the classifiers performance"""
        self.logger.info("Testing error performance of the "
                         "classifiers on the training set")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            if ridge.preprocess is not None:
                svm_kernel = ridge.preprocess.deepcopy()
            else:
                svm_kernel = None

            svm = CClassifierSVM(kernel=svm_kernel)
            svm.fit(self.dataset.X, self.dataset.Y)

            label_svm, y_svm = svm.predict(
                self.dataset.X, return_decision_function=True)
            label_ridge, y_ridge = ridge.predict(
                self.dataset.X, return_decision_function=True)

            acc_svm = CMetric.create('f1').performance_score(
                self.dataset.Y, label_svm)
            acc_ridge = CMetric.create('f1').performance_score(
                self.dataset.Y, label_ridge)

            self.logger.info("Accuracy of SVM: {:}".format(acc_svm))
            self.assertGreater(acc_svm, 0.90,
                               "Accuracy of SVM: {:}".format(acc_svm))
            self.logger.info("Accuracy of ridge: {:}".format(acc_ridge))
            self.assertGreater(acc_ridge, 0.90,
                               "Accuracy of ridge: {:}".format(acc_ridge))
    def test_store_dual_vars(self):
        """Test of parameters that control storing of dual space variables."""
        self.logger.info("Checking CClassifierSVM.store_dual_vars...")

        self.logger.info("Instancing a linear SVM")
        svm = CClassifierSVM(kernel=None)

        self.assertIsNone(svm.store_dual_vars)
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNone(svm.sv)

        self.logger.info("Changing store_dual_vars to True")
        svm.store_dual_vars = True

        self.assertTrue(svm.store_dual_vars)
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.sv)

        self.logger.info("Changing store_dual_vars to False")
        svm.store_dual_vars = False

        self.assertFalse(svm.store_dual_vars)
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNone(svm.sv)

        self.logger.info("Changing kernel to nonlinear when "
                         "store_dual_vars is False should raise ValueError")
        with self.assertRaises(ValueError):
            svm.kernel = CKernelRBF()

        self.logger.info("Instancing a nonlinear SVM")
        svm = CClassifierSVM(kernel='rbf')

        self.assertIsNone(svm.store_dual_vars)
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.sv)

        self.logger.info("Changing store_dual_vars to True")
        svm.store_dual_vars = True

        self.assertTrue(svm.store_dual_vars)
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.sv)

        self.logger.info(
            "Changing store_dual_vars to False should raise ValueError")
        with self.assertRaises(ValueError):
            svm.store_dual_vars = False
Example #8
0
    def test_time(self):
        """ Compare execution time of ridge and SVM"""
        self.logger.info("Testing training speed of ridge compared to SVM ")

        for ridge in self.ridges:
            self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess))

            svm = CClassifierSVM(ridge.preprocess)

            with self.timer() as t_svm:
                svm.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of SVM: {:}".format(t_svm.interval))
            with self.timer() as t_ridge:
                ridge.fit(self.dataset.X, self.dataset.Y)
            self.logger.info(
                "Execution time of ridge: {:}".format(t_ridge.interval))
    def test_plot(self):

        ds = CDLRandom(n_samples=100,
                       n_features=2,
                       n_redundant=0,
                       random_state=100).load()

        self.logger.info("Train Sec SVM")
        sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5)
        sec_svm.verbose = 2
        sec_svm.fit(ds.X, ds.Y)

        self.logger.info("Train SVM")
        svm = CClassifierSVM(C=1)
        svm.fit(ds.X, ds.Y)

        self._compute_alignment(ds, sec_svm, svm)

        fig = CFigure(height=5, width=8)
        fig.subplot(1, 2, 1)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("SVM")

        fig.subplot(1, 2, 2)
        # Plot dataset points
        fig.sp.plot_ds(ds)
        # Plot objective function
        fig.sp.plot_fun(sec_svm.predict,
                        multipoint=True,
                        plot_background=True,
                        plot_levels=False,
                        n_grid_points=100,
                        grid_limits=ds.get_bounds())
        fig.sp.title("Sec-SVM")

        fig.show()
class TestCFigure(CUnitTest):
    """Unittest for CFigure."""
    def test_svm(self):

        self.X = CArray([[1, 2], [3, 4], [5, 6], [7, 8]])
        self.Y = CArray([[0], [1], [1], [0]]).ravel()
        self.dataset = CDataset(self.X, self.Y)

        self.classifier = CClassifierSVM(kernel=CKernelRBF())
        self.classifier.fit(self.dataset)

        self.x_min, self.x_max = (self.X[:, [0]].min() - 1,
                                  self.X[:, [0]].max() + 1)
        self.y_min, self.y_max = (self.X[:, [1]].min() - 1,
                                  self.X[:, [1]].max() + 1)

        self.fig = CFigure(height=7,
                           width=10,
                           linewidth=5,
                           fontsize=24,
                           markersize=20)
        self.fig.sp.title("Svm Test")

        self.logger.info("Test plot dataset method...")

        self.fig.sp.plot_ds(self.dataset)

        self.logger.info("Test plot path method...")
        path = CArray([[1, 2], [1, 3], [1.5, 5]])
        self.fig.sp.plot_path(path)

        self.logger.info("Test plot function method...")
        bounds = [(self.x_min, self.x_max), (self.y_min, self.y_max)]
        self.fig.sp.plot_fun(self.classifier.decision_function,
                             plot_levels=False,
                             grid_limits=bounds,
                             y=1)

        self.fig.sp.xlim(self.x_min, self.x_max)
        self.fig.sp.ylim(self.y_min, self.y_max)

        self.fig.show()
class TestCExplainerGradient(CUnitTest):
    """Unittests for CExplainerGradient"""
    def setUp(self):

        self.clf = CClassifierSVM()
        # 100 samples, 2 classes, 20 features
        self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load()

        # Training classifier
        self.clf.fit(self.ds)

        self.explainer = CExplainerGradient(self.clf)

    def test_explain(self):
        """Unittest for explain method."""
        i = 67
        x = self.ds.X[i, :]

        attr = self.explainer.explain(x, y=1)

        self.logger.info("Attributions:\n{:}".format(attr.tolist()))

        self.assertIsInstance(attr, CArray)
        self.assertEqual(attr.shape, attr.shape)

        fig = CFigure(height=3, width=6)

        # Plotting original image
        fig.subplot(1, 2, 1)
        fig.sp.imshow(attr.reshape((8, 8)), cmap='gray')

        th = max(abs(attr.min()), abs(attr.max()))

        # Plotting attributions
        fig.subplot(1, 2, 2)
        fig.sp.imshow(attr.reshape((8, 8)),
                      cmap='seismic',
                      vmin=-1 * th,
                      vmax=th)

        fig.show()
Example #12
0
    def test_margin(self):
        self.logger.info("Testing margin separation of SVM...")

        import numpy as np

        # we create 40 separable points
        rng = np.random.RandomState(0)
        n_samples_1 = 1000
        n_samples_2 = 100
        X = np.r_[1.5 * rng.randn(n_samples_1, 2),
                  0.5 * rng.randn(n_samples_2, 2) + [2, 2]]
        y = [0] * (n_samples_1) + [1] * (n_samples_2)

        dataset = CDataset(X, y)

        # fit the model
        clf = CClassifierSVM()
        clf.fit(dataset.X, dataset.Y)

        w = clf.w
        a = -w[0] / w[1]

        xx = CArray.linspace(-5, 5)
        yy = a * xx - clf.b / w[1]

        wclf = CClassifierSVM(class_weight={0: 1, 1: 10})
        wclf.fit(dataset.X, dataset.Y)

        ww = wclf.w
        wa = -ww[0] / ww[1]
        wyy = wa * xx - wclf.b / ww[1]

        fig = CFigure(linewidth=1)
        fig.sp.plot(xx, yy.ravel(), 'k-', label='no weights')
        fig.sp.plot(xx, wyy.ravel(), 'k--', label='with weights')
        fig.sp.scatter(X[:, 0].ravel(), X[:, 1].ravel(), c=y)
        fig.sp.legend()

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_svm.pdf'))
Example #13
0
    def test_linear_svm(self):
        """Performs tests on linear SVM."""
        self.logger.info("Testing SVM linear variants (kernel and not)")

        # Instancing a linear SVM and an SVM with linear kernel
        linear_svm = CClassifierSVM(kernel=None)
        kernel_linear_svm = self.svms[0]

        self.logger.info("SVM w/ linear kernel in the primal")
        self.assertIsNone(linear_svm.kernel)

        self.logger.info("Training both classifiers on dense data")
        linear_svm.fit(self.dataset.X, self.dataset.Y)
        kernel_linear_svm.fit(self.dataset.X, self.dataset.Y)

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)

        self.logger.info("Training both classifiers on sparse data")
        linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)
        kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y)

        self.assertTrue(
            linear_svm.w.issparse, "Weights vector is not sparse even "
            "if training data is sparse")

        linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)
        kernel_linear_svm_pred_y, \
        kernel_linear_svm_pred_score = kernel_linear_svm.predict(
            self.dataset_sparse.X, return_decision_function=True)

        # check prediction
        self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)
Example #14
0
    def test_draw(self):
        """ Compare the classifiers graphically"""
        self.logger.info("Testing classifiers graphically")

        # generate 2D synthetic data
        dataset = CDLRandom(n_features=2,
                            n_redundant=1,
                            n_informative=1,
                            n_clusters_per_class=1).load()
        dataset.X = CNormalizerMinMax().fit_transform(dataset.X)

        self.sgds[0].fit(dataset.X, dataset.Y)

        svm = CClassifierSVM()
        svm.fit(dataset.X, dataset.Y)

        fig = CFigure(width=10, markersize=8)
        fig.subplot(2, 1, 1)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(svm.decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SVM')

        fig.subplot(2, 1, 2)
        # Plot dataset points
        fig.sp.plot_ds(dataset)
        # Plot objective function
        fig.sp.plot_fun(self.sgds[0].decision_function,
                        grid_limits=dataset.get_bounds(),
                        y=1)
        fig.sp.title('SGD Classifier')

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd1.pdf'))
Example #15
0
    def test_store_dual_vars(self):
        """Test of parameters that control storing of dual space variables."""
        self.logger.info("Checking CClassifierSVM.store_dual_vars...")

        self.logger.info("Linear SVM in primal space")
        svm = CClassifierSVM()
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNone(svm.alpha)

        self.logger.info("Linear SVM in dual space")
        svm = CClassifierSVM(kernel='linear')
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.alpha)

        self.logger.info("Nonlinear SVM in dual space")
        svm = CClassifierSVM(kernel='rbf')
        svm.fit(self.dataset.X, self.dataset.Y)
        self.assertIsNotNone(svm.alpha)
Example #16
0
)

# Select and set the best training parameters for the RBF classifier
#print("Estimating the best training parameters for RBF kernel...")
#best_rbf_params = clf_rbf.estimate_parameters(
#    dataset=tr,
#    parameters=xval_rbf_params,
#    splitter=xval_splitter,
#    metric='accuracy',
#    perf_evaluator='xval'
#)
print(best_lin_params)

#train classifier
print("start training")
clf_lin.fit(data_smp_encoded_secML)
#print("linear training ended, begining rbf")
#clf_rbf.fit(tr)
#print("start linear classif")
#clf_l.fit(data_smp_encoded_secML)

print("Classifiers trained")


# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()


# Compute predictions on a test set
y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
Example #17
0
class TestCSecEval(CUnitTest):
    """Unittests for CSecEval (evasion attack)."""
    def setUp(self):

        self.classifier = CClassifierSVM(kernel='linear', C=1.0)

        self.lb = -2
        self.ub = +2

        n_tr = 20
        n_ts = 10
        n_features = 2

        n_reps = 1

        self.sec_eval = []
        self.attack_ds = []
        for rep_i in range(n_reps):
            self.logger.info(
                "Loading `random_blobs` with seed: {:}".format(rep_i))
            loader = CDLRandomBlobs(n_samples=n_tr + n_ts,
                                    n_features=n_features,
                                    centers=[(-0.5, -0.5), (+0.5, +0.5)],
                                    center_box=(-0.5, 0.5),
                                    cluster_std=0.5,
                                    random_state=rep_i * 100 + 10)
            ds = loader.load()

            self.tr = ds[:n_tr, :]
            self.ts = ds[n_tr:, :]

            self.classifier.fit(self.tr.X, self.tr.Y)

            # only manipulate positive samples, targeting negative ones
            self.y_target = None
            self.attack_classes = CArray([1])

            for create_fn in (self._attack_pgd_ls, self._attack_cleverhans):
                self.attack_ds.append(self.ts)
                attack, param_name, param_values = create_fn()
                # set sec eval object
                self.sec_eval.append(
                    CSecEval(
                        attack=attack,
                        param_name=param_name,
                        param_values=param_values,
                    ))

    def _attack_pgd_ls(self):
        params = {
            "classifier": self.classifier,
            "double_init_ds": self.tr,
            "distance": 'l1',
            "lb": self.lb,
            "ub": self.ub,
            "y_target": self.y_target,
            "attack_classes": self.attack_classes,
            "solver_params": {
                'eta': 0.5,
                'eps': 1e-2
            }
        }
        attack = CAttackEvasionPGDLS(**params)
        attack.verbose = 1

        # sec eval params
        param_name = 'dmax'
        dmax = 2
        dmax_step = 0.5
        param_values = CArray.arange(start=0,
                                     step=dmax_step,
                                     stop=dmax + dmax_step)

        return attack, param_name, param_values

    def _attack_cleverhans(self):
        attack_params = {
            'eps': 0.1,
            'clip_max': self.ub,
            'clip_min': self.lb,
            'ord': 1
        }

        attack = CAttackEvasionCleverhans(classifier=self.classifier,
                                          surrogate_data=self.tr,
                                          y_target=self.y_target,
                                          clvh_attack_class=FastGradientMethod,
                                          **attack_params)

        param_name = 'attack_params.eps'
        dmax = 2
        dmax_step = 0.5
        param_values = CArray.arange(start=0,
                                     step=dmax_step,
                                     stop=dmax + dmax_step)

        return attack, param_name, param_values

    def _plot_sec_eval(self):
        # figure creation
        figure = CFigure(height=5, width=5)

        for sec_eval in self.sec_eval:
            sec_eval_data = sec_eval.sec_eval_data
            # plot security evaluation
            figure.sp.plot_sec_eval(sec_eval_data,
                                    label='SVM',
                                    marker='o',
                                    show_average=True,
                                    mean=True)

        figure.subplots_adjust()
        figure.show()

    def test_sec_eval(self):
        # evaluate classifier security
        for sec_eval_i, sec_eval in enumerate(self.sec_eval):
            sec_eval.run_sec_eval(self.attack_ds[sec_eval_i])

        self._plot_sec_eval()

    if __name__ == '__main__':
        CUnitTest.main()
Example #18
0
class TestCLossClassification(CUnitTest):
    """Unittests for CLossClassification and subclasses."""
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)

    def test_one_at_zero(self):
        """Testing that classification loss return 1 for input 0."""

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            self.assertEqual(CArray([1.0]),
                             loss_class.loss(CArray([1]), CArray([0])))

    def test_in_out(self):
        """Unittest for input and output to loss classes"""
        def _check_loss(l, n_samples):

            self.assertIsInstance(l, CArray)
            self.assertTrue(l.isdense)
            self.assertEqual(1, l.ndim)
            self.assertEqual(n_samples, l.size)
            self.assertEqual(l.dtype, float)

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            loss = loss_class.loss(self.ds.Y, self.scores)
            loss_mean = loss.mean()
            self.logger.info("{:}.loss(y_true, scores).mean():\n{:}".format(
                loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, self.ds.Y.size)

            loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel())
            loss_mean_pos = loss_pos.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_pos))
            _check_loss(loss_pos, self.ds.Y.size)

            self.assertEqual(loss_mean, loss_mean_pos)

            loss = loss_class.loss(self.ds.Y, self.scores, pos_label=0)
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true, scores, pos_label=0).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, self.ds.Y.size)

            loss_neg = loss_class.loss(self.ds.Y, self.scores[:, 0].ravel())
            loss_mean_neg = loss_neg.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:,0].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_neg))
            _check_loss(loss_neg, self.ds.Y.size)

            self.assertEqual(loss_mean, loss_mean_neg)

            loss = loss_class.loss(self.ds.Y[0], self.scores[0, :])
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, 1)

    def test_draw(self):
        """Drawing the loss functions.

        Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification

        """
        fig = CFigure()
        x = CArray.arange(-1, 3.01, 0.01)

        fig.sp.plot(x,
                    CArray([1 if i <= 0 else 0 for i in x]),
                    label='0-1 indicator')

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)
            fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id)

        fig.sp.grid()
        fig.sp.legend()

        fig.show()

    def test_grad(self):
        """Compare analytical gradients with its numerical approximation."""
        def _loss_wrapper(scores, loss, true_labels):
            return loss.loss(true_labels, scores)

        def _dloss_wrapper(scores, loss, true_labels):
            return loss.dloss(true_labels, scores)

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):
            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            n_elemes = 1
            y_true = CArray.randint(0, 2, n_elemes).todense()
            score = CArray.randn((n_elemes, ))

            check_grad_val = CFunction(
                _loss_wrapper, _dloss_wrapper).check_grad(score,
                                                          1e-8,
                                                          loss=loss_class,
                                                          true_labels=y_true)
            self.logger.info(
                "Gradient difference between analytical svm "
                "gradient and numerical gradient: %s", str(check_grad_val))
            self.assertLess(
                check_grad_val, 1e-4,
                "the gradient is wrong {:} for {:} loss".format(
                    check_grad_val, loss_id))
Example #19
0
                                            train_size=n_tr,
                                            random_state=random_state)

training_data = CDataset(x_train, y)
validation_data = CDataset(x_val, y_val)
test_data = CDataset(xtt, ytt)

del xtr
del ytr

metric = CMetricAccuracy()

clf = CClassifierSVM(kernel=CKernelRBF(gamma=GAMMA), C=C)

# We can now fit the classifier
clf.fit(training_data.X, training_data.Y)
print("Training of classifier complete!")
# Compute predictions on a test set
y_pred = clf.predict(test_data.X)

lb, ub = validation_data.X.min(), validation_data.X.max(
)  # Bounds of the attack space. Can be set to `None` for unbounded
n_poisoning_points = int(
    n_tr * poison_percentage)  # Number of poisoning points to generate

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
Example #20
0
class TestCLossRegression(CUnitTest):
    """Unittests for CLossRegression and subclasses."""
    def setUp(self):

        self.ds = CDLRandom(n_samples=50, random_state=0).load()

        self.logger.info("Train an SVM and classify dataset...")
        self.svm = CClassifierSVM()
        self.svm.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.svm.predict(
            self.ds.X, return_decision_function=True)

    def test_in_out(self):
        """Unittest for input and output to loss classes"""
        def _check_loss(l, n_samples):

            self.assertIsInstance(l, CArray)
            self.assertTrue(l.isdense)
            self.assertEqual(1, l.ndim)
            self.assertEqual(n_samples, l.size)
            self.assertIsSubDtype(l.dtype, float)

        for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel())
            loss_mean_pos = loss_pos.mean()
            self.logger.info(
                "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format(
                    loss_class.__class__.__name__, loss_mean_pos))
            _check_loss(loss_pos, self.ds.Y.size)

            loss = loss_class.loss(self.ds.Y[0], self.scores[0, 1].ravel())
            loss_mean = loss.mean()
            self.logger.info(
                "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format(
                    loss_class.__class__.__name__, loss_mean))
            _check_loss(loss, 1)

            with self.assertRaises(ValueError):
                loss_class.loss(self.ds.Y, self.scores[:, 1])

    def test_draw(self):
        """Drawing the loss functions.

        Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification

        """
        fig = CFigure()
        x = CArray.arange(-1, 3.01, 0.01)

        for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'):

            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)
            fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id)

        fig.sp.grid()
        fig.sp.legend()

        fig.show()
Example #21
0
    def setUp(self):
        
        classifier = CClassifierSVM(
            kernel='linear', C=1.0, grad_sampling=1.0)

        # data parameters
        discrete = False

        lb = -2
        ub = +2

        n_tr = 20
        n_ts = 10
        n_features = 2
        
        n_reps = 1

        self.sec_eval = []
        self.attack_ds = []
        for rep_i in range(n_reps):

            self.logger.info(
                "Loading `random_blobs` with seed: {:}".format(rep_i))
            loader = CDLRandomBlobs(
                n_samples=n_tr + n_ts,
                n_features=n_features,
                centers=[(-0.5, -0.5), (+0.5, +0.5)],
                center_box=(-0.5, 0.5),
                cluster_std=0.5,
                random_state=rep_i * 100 + 10)
            ds = loader.load()

            tr = ds[:n_tr, :]
            ts = ds[n_tr:, :]
            
            classifier.fit(tr)
            
            self.attack_ds.append(ts)

            # only manipulate positive samples, targeting negative ones
            self.y_target = None
            attack_classes = CArray([1])
        
            params = {
                "classifier": classifier,
                "surrogate_classifier": classifier,
                "surrogate_data": tr,
                "distance": 'l1',
                "lb": lb,
                "ub": ub,
                "discrete": discrete,
                "y_target": self.y_target,
                "attack_classes": attack_classes,
                "solver_params": {'eta': 0.5, 'eps': 1e-2}
            }
            attack = CAttackEvasionPGDLS(**params)
            attack.verbose = 1
        
            # sec eval params
            param_name = 'dmax'
            dmax = 2
            dmax_step = 0.5
            param_values = CArray.arange(
                start=0, step=dmax_step,
                stop=dmax + dmax_step)

            # set sec eval object
            self.sec_eval.append(
                CSecEval(
                    attack=attack,
                    param_name=param_name,
                    param_values=param_values,
                    )
            )
# Let's create a 3-Fold data splitter
from secml.data.splitter import CDataSplitterKFold
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

# Select and set the best training parameters for the classifier
print("Estimating the best training parameters...")
best_params = clf.estimate_parameters(dataset=tr,
                                      parameters=xval_params,
                                      splitter=xval_splitter,
                                      metric='accuracy',
                                      perf_evaluator='xval')
print("The best training parameters are: ", best_params)

# We can now fit the classifier
clf.fit(tr)

# Compute predictions on a test set
y_pred = clf.predict(ts.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

x0, y0 = ts[5, :].X, ts[5, :].Y  # Initial sample; add randomness?
print(x0.dtype)
print(y0.dtype)

noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
dmax = 0.4  # Maximum perturbation
Example #23
0
# preds = secml_sklearn_clf.predict(ds_te_secml.X)
# metric = CMetricAccuracy()
# acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds)
# print("Accuracy on test set: {:.2%}".format(acc))
# probs = secml_sklearn_clf.predict_proba(ds_te_secml.X)       #Doesn't work
#
# #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks
# ###############################################################
#
# =============================================================================
x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y  # This won't work if we want to specify the target
#class for each example

#secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1)
secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
secml_clf.fit(ds_tr_secml)
preds = secml_clf.predict(ds_te_secml.X)
metric = CMetricAccuracy()
acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds)
print("Accuracy on test set: {:.2%}".format(acc))

#Performing the attack
noise_type = 'l2'
dmax = 0.4
lb, ub = None, None  # with 0, 1 it goes out of bounds
y_target = None  #### Here y_target can be some class, indicating which class is expected for the adversarial example

#solver_params = {
#    'eta': 0.3,
#    'max_iter': 100,
#    'eps': 1e-4
#problem seems linearly decidable -> try a logistic regression classifier without any parameter estimations
from secml.ml.classifiers import CClassifierLogistic
#clf_l= CClassifierLogistic()

xval_lin_params = {'C': [0.01, 0.1, 1, 10, 100]}

# Select and set the best training parameters for the linear classifier
print("Estimating the best training parameters for linear kernel...")
best_lin_params = clf_lin.estimate_parameters(dataset=tr_set,
                                              parameters=xval_lin_params,
                                              splitter=xval_splitter,
                                              metric='accuracy',
                                              perf_evaluator='xval')

clf_lin.fit(tr_set)

## Select and set the best training parameters for the linear classifier
#print("Estimating the best training parameters for linear kernel...")
#best_lin_params = clf_l.estimate_parameters(
#    dataset=tr_set,
#    parameters=xval_lin_params,
#    splitter=xval_splitter,
#    metric='accuracy',
#    perf_evaluator='xval'
#)

#clf_l.fit(tr_set)

# Compute predictions on a test set
y_pred = clf_lin.predict(ts_set.X)
Example #25
0
from secml.ml.classifiers import CClassifierLogistic
clf_l= CClassifierLogistic()

xval_lin_params = {'C': [0.01, 0.1, 1, 10, 100]}

# Select and set the best training parameters for the linear classifier
print("Estimating the best training parameters for linear kernel...")
best_lin_params = clf_lin.estimate_parameters(
    dataset=tr_set,
    parameters=xval_lin_params,
    splitter=xval_splitter,
    metric='accuracy',
    perf_evaluator='xval'
)

clf_lin.fit(tr_set)

# Select and set the best training parameters for the linear classifier
#print("Estimating the best training parameters for linear kernel...")
#best_lin_params = clf_l.estimate_parameters(
#    dataset=tr_set,
#    parameters=xval_lin_params,
#    splitter=xval_splitter,
#    metric='accuracy',
#    perf_evaluator='xval'
#)

#clf_l.fit(tr_set)

import random
from secml.adv.attacks.evasion import CAttackEvasionPGDLS
Example #26
0
class TestCPerfEvaluator(CUnitTest):
    """Unit test for CKernel."""
    def setUp(self):

        # Create dummy dataset (we want a test different from train)
        loader = CDLRandom(random_state=50000)
        self.training_dataset = loader.load()
        self.test_dataset = loader.load()

        # CREATE CLASSIFIERS
        kernel = CKernel.create('rbf')
        self.svm = CClassifierSVM(kernel=kernel)
        self.svm.verbose = 1

        self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type))

    def test_parameters_setting(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2})

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        # Set the best parameters inside the classifier
        self.svm.estimate_parameters(self.training_dataset, xval_parameters,
                                     xval_splitter, 'accuracy')

        self.logger.info("SVM has now the following parameters: {:}".format(
            self.svm.get_params()))

        self.assertEqual(self.svm.get_params()['C'], 1)
        self.assertEqual(self.svm.get_params()['kernel.gamma'], 50)

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter,
                                       CMetric.create('accuracy'))
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters)

        for param in xval_parameters:
            self.logger.info("Best '{:}' is: {:}".format(
                param, best_params[param]))
            self.assertEqual(best_params[param], self.svm.get_params()[param])

        self.svm.verbose = 0

        parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1],
                                  [100, 50]]
        par_comb_score = CArray.zeros(len(parameters_combination))
        for comb in range(len(parameters_combination)):

            this_fold_score = []
            num_xval_fold = len(xval_splitter.tr_idx)

            for f in range(num_xval_fold):
                self.svm.set("C", parameters_combination[comb][0])
                self.svm.kernel.gamma = parameters_combination[comb][1]

                self.svm.fit(
                    self.training_dataset[xval_splitter.tr_idx[f], :].X,
                    self.training_dataset[xval_splitter.tr_idx[f], :].Y)

                this_fold_predicted = self.svm.predict(
                    self.training_dataset[xval_splitter.ts_idx[f], :].X)

                this_fold_accuracy = skm.accuracy_score(
                    self.training_dataset[
                        xval_splitter.ts_idx[f], :].Y.get_data(),
                    this_fold_predicted.get_data())
                this_fold_score.append(this_fold_accuracy)

            par_comb_score[comb] = (np.mean(this_fold_score))
            self.logger.info("this fold mean: {:}".format(
                par_comb_score[comb]))

        max_combination_score = par_comb_score.max()
        better_param_comb = parameters_combination[par_comb_score.argmax()]
        self.logger.info("max combination score founded here: {:}".format(
            max_combination_score))
        self.logger.info(
            "max comb score founded during xval {:}".format(best_score))

        self.assertEqual(max_combination_score, best_score)

        # set parameters found by xval and check if are the same chosen here
        self.logger.info("the parameters selected by own xval are:")
        self.svm.set_params(best_params)
        self.logger.info("C: {:}".format(self.svm.C))
        self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma))
        # check c
        self.assertEqual(better_param_comb[0], self.svm.C)
        # check gamma
        self.assertEqual(better_param_comb[1], self.svm.kernel.gamma)

    def test_nan_metric_value(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1})
        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        self.logger.info("Testing metric with some nan")

        some_nan_metric = CMetricFirstNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric)
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters, pick='last')

        self.logger.info("best score : {:}".format(best_score))

        # The xval should select the only one actual value (others are nan)
        self.assertEqual(best_score, 1.)

        self.logger.info("Testing metric with all nan")

        # This test case involves an all-nan slice
        self.logger.filterwarnings(action="ignore",
                                   message="All-NaN slice encountered",
                                   category=RuntimeWarning)

        all_nan_metric = CMetricAllNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric)
        perf_eval.verbose = 1

        with self.assertRaises(ValueError):
            perf_eval.evaluate_params(self.svm,
                                      self.training_dataset,
                                      xval_parameters,
                                      pick='last')

    def _run_multiclass(self, tr, multiclass, xval_params, expected_best):

        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=3,
                                             random_state=50000)

        # Set the best parameters inside the classifier
        best_params = multiclass.estimate_parameters(tr, xval_params,
                                                     xval_splitter, 'accuracy')

        self.logger.info(
            "Multiclass SVM has now the following parameters: {:}".format(
                multiclass.get_params()))

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, expected_best['C'])
            self.assertEqual(clf.kernel.gamma, expected_best['kernel.gamma'])

        # Final test: fit using best parameters
        multiclass.fit(tr.X, tr.Y)

        for clf in multiclass._binary_classifiers:
            for param in best_params:
                self.assertEqual(clf.get_params()[param], best_params[param])

    def test_params_multiclass(self):
        """Parameter estimation for multiclass classifiers."""
        # Create dummy dataset (we want a test different from train)
        tr = CDLRandom(n_classes=4, n_clusters_per_class=1,
                       random_state=50000).load()

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel)
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)

        self.logger.info("Testing with preprocessor")

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel,
                                              preprocess='min-max')
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)
                                              perf_evaluator='xval')

# Select and set the best training parameters for the RBF classifier
#print("Estimating the best training parameters for RBF kernel...")
#best_rbf_params = clf_rbf.estimate_parameters(
#    dataset=tr,
#    parameters=xval_rbf_params,
#    splitter=xval_splitter,
#    metric='accuracy',
#    perf_evaluator='xval'
#)
print(best_lin_params)

#train classifier
print("start training")
clf_lin.fit(data_smp_encoded_secML)
#print("linear training ended, begining rbf")
#clf_rbf.fit(tr)
#print("start linear classif")
#clf_l.fit(data_smp_encoded_secML)

print("Classifiers trained")

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Compute predictions on a test set
y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
#y_rbf_pred = clf_rbf.predict(ts.X)
#y_l_pred = clf_l.predict(raw_data_encoded_secML.X)
Example #28
0
class TestCSecEval(CAttackEvasionTestCases):
    """Unittests for CSecEval (evasion attack)."""

    def setUp(self):

        self.clf = CClassifierSVM(C=1.0)

        self.n_tr = 40
        self.n_features = 10
        self.seed = 0

        self.logger.info(
            "Loading `random_blobs` with seed: {:}".format(self.seed))
        self.ds = self._load_blobs(
            self.n_features, 2, sparse=False, seed=self.seed)

        self.tr = self.ds[:self.n_tr, :]
        self.ts = self.ds[self.n_tr:, :]

        self.clf.fit(self.tr.X, self.tr.Y)

    def test_attack_pgd_ls(self):
        """Test SecEval using CAttackEvasionPGDLS."""
        params = {
            "classifier": self.clf,
            "double_init_ds": self.tr,
            "distance": 'l2',
            "lb": -2,
            "ub": 2,
            "y_target": None,
            "solver_params": {'eta': 0.1, 'eps': 1e-2}
        }
        attack = CAttackEvasionPGDLS(**params)
        attack.verbose = 1

        param_name = 'dmax'

        self._set_and_run(attack, param_name)

    def test_attack_pgd_ls_discrete(self):
        """Test SecEval using CAttackEvasionPGDLS on a problematic
        discrete case with L1 constraint.
        We alter the classifier so that many weights have the same value.
        The optimizer should be able to evade the classifier anyway,
        by changing one feature each iteration. Otherwise, by changing
        all the feature with the same value at once, the evasion will always
        fail because the L1 constraint will be violated.
        """
        self.ds = self._discretize_data(self.ds, eta=1)
        self.ds.X[self.ds.X > 1] = 1
        self.ds.X[self.ds.X < -1] = -1

        self.tr = self.ds[:self.n_tr, :]
        self.ts = self.ds[self.n_tr:, :]

        self.clf.fit(self.tr.X, self.tr.Y)

        # Set few features to the same max value
        w_new = self.clf.w.deepcopy()
        w_new[CArray.randint(
            self.clf.w.size, shape=5, random_state=0)] = self.clf.w.max()
        self.clf._w = w_new

        params = {
            "classifier": self.clf,
            "double_init": False,
            "distance": 'l1',
            "lb": -1,
            "ub": 1,
            "y_target": None,
            "solver_params": {'eta': 1, 'eps': 1e-2}
        }
        attack = CAttackEvasionPGDLS(**params)
        attack.verbose = 1

        param_name = 'dmax'

        self._set_and_run(attack, param_name, dmax_step=1)

    def test_attack_cleverhans(self):
        """Test SecEval using CAttackEvasionCleverhans+FastGradientMethod."""
        try:
            import cleverhans
        except ImportError as e:
            import unittest
            raise unittest.SkipTest(e)

        from cleverhans.attacks import FastGradientMethod
        from secml.adv.attacks import CAttackEvasionCleverhans
        params = {
            "classifier": self.clf,
            "surrogate_data": self.tr,
            "y_target": None,
            "clvh_attack_class": FastGradientMethod,
            'eps': 0.1,
            'clip_max': 2,
            'clip_min': -2,
            'ord': 2
        }
        attack = CAttackEvasionCleverhans(**params)

        param_name = 'attack_params.eps'

        self._set_and_run(attack, param_name)

    def _set_and_run(self, attack, param_name, dmax=2, dmax_step=0.5):
        """Create the SecEval and run it on test set."""
        param_values = CArray.arange(
            start=0, step=dmax_step,
            stop=dmax + dmax_step)

        sec_eval = CSecEval(
            attack=attack,
            param_name=param_name,
            param_values=param_values,
        )

        sec_eval.run_sec_eval(self.ts)

        self._plot_sec_eval(sec_eval)

        # At the end of the seceval we expect 0% accuracy
        self.assertFalse(
            CArray(sec_eval.sec_eval_data.Y_pred[-1] == self.ts.Y).any())

    @staticmethod
    def _plot_sec_eval(sec_eval):

        figure = CFigure(height=5, width=5)

        figure.sp.plot_sec_eval(sec_eval.sec_eval_data,
                                label='SVM', marker='o',
                                show_average=True, mean=True)

        figure.sp.title(sec_eval.attack.__class__.__name__)
        figure.subplots_adjust()
        figure.show()

    if __name__ == '__main__':
        CAttackEvasionTestCases.main()
training, validation = splitter.split(dataset)

# Normalize the data
normalizer = CNormalizerMinMax()
training.X = normalizer.fit_transform(training.X)
validation.X = normalizer.transform(validation.X)
test.X = normalizer.transform(test.X)

# Metric to use for training and performance evaluation
metric = CMetricAccuracy()

# Creation of the multiclass classifier
classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)

# We can now fit the classifier
classifier.fit(training.X, training.Y)
print("Training of classifier complete!")

# Compute predictions on a test set
predictionY = classifier.predict(test.X)

# Bounds of the attack space. Can be set to `None` for unbounded
lowerBound, upperBound = validation.X.min(), validation.X.max()

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6