Esempio n. 1
0
    def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None):
        """Load Random Blobs dataset.

        - n_samples = 50
        - center_box = (-0.5, 0.5)
        - cluster_std = 0.5

        Parameters
        ----------
        n_feats : int
        n_clusters : int
        sparse : bool, optional (default False)
        seed : int or None, optional (default None)

        """
        loader = CDLRandomBlobs(n_samples=50,
                                n_features=n_feats,
                                centers=n_clusters,
                                center_box=(-0.5, 0.5),
                                cluster_std=0.5,
                                random_state=seed)

        self.logger.info("Loading `random_blobs` with seed: {:}".format(seed))
        ds = loader.load()

        if sparse is True:
            ds = ds.tosparse()

        return ds
    def setUp(self):

        self.classifier = CClassifierSVM(kernel='linear', C=1.0)

        self.lb = -2
        self.ub = +2

        n_tr = 20
        n_ts = 10
        n_features = 2

        n_reps = 1

        self.sec_eval = []
        self.attack_ds = []
        for rep_i in range(n_reps):
            self.logger.info(
                "Loading `random_blobs` with seed: {:}".format(rep_i))
            loader = CDLRandomBlobs(n_samples=n_tr + n_ts,
                                    n_features=n_features,
                                    centers=[(-0.5, -0.5), (+0.5, +0.5)],
                                    center_box=(-0.5, 0.5),
                                    cluster_std=0.5,
                                    random_state=rep_i * 100 + 10)
            ds = loader.load()

            self.tr = ds[:n_tr, :]
            self.ts = ds[n_tr:, :]

            self.classifier.fit(self.tr.X, self.tr.Y)

            # only manipulate positive samples, targeting negative ones
            self.y_target = None
            self.attack_classes = CArray([1])

            for create_fn in (self._attack_pgd_ls, self._attack_cleverhans):
                # TODO: REFACTOR THESE UNITTESTS REMOVING THE FOR LOOP

                try:
                    import cleverhans
                except ImportError:
                    continue

                self.attack_ds.append(self.ts)
                attack, param_name, param_values = create_fn()
                # set sec eval object
                self.sec_eval.append(
                    CSecEval(
                        attack=attack,
                        param_name=param_name,
                        param_values=param_values,
                    ))
    def setUp(self):
        """Test for init and fit methods."""
        # generate synthetic data
        self.dataset = CDLRandomBlobs(n_features=2, n_samples=100, centers=2,
                                      cluster_std=2.0, random_state=0).load()

        self.logger.info("Testing classifier creation ")
        self.clf_norej = CClassifierSGD(regularizer=CRegularizerL2(),
                                        loss=CLossHinge(), random_state=0)

        self.clf = CClassifierRejectThreshold(self.clf_norej, threshold=0.6)
        self.clf.verbose = 2  # Enabling debug output for each classifier
        self.clf.fit(self.dataset.X, self.dataset.Y)
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.nc, ds, [-10])
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_nearest_centroid.pdf'))
Esempio n. 5
0
 def setUp(self):
     self.clf = CClassifierMulticlassOVA(
         classifier=CClassifierSVM, kernel='rbf')
     self.dataset = CDLRandomBlobs(
         random_state=3, n_features=2, centers=4).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
Esempio n. 6
0
 def setUpClass(cls):
     cls.plots = False
     cls.dataset = CDLRandomBlobs(n_features=2,
                                  centers=[[-1, 1], [1, 1]],
                                  cluster_std=(0.4, 0.4),
                                  random_state=0).load()
     CUnitTest.setUpClass()  # call superclass constructor
 def test_plot(self):
     """ Compare the classifiers graphically"""
     ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.rnd_forest, ds, levels=[0.5])
     fig.savefig(fm.join(fm.abspath(__file__), 'figs',
                         'test_c_classifier_random_forest.pdf'))
Esempio n. 8
0
 def test_plot(self):
     ds = CDLRandomBlobs(n_samples=100,
                         centers=3,
                         n_features=2,
                         random_state=1).load()
     fig = self._test_plot(self.knn, ds, levels=[0.5])
     fig.savefig(
         fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_knn.pdf'))
    def _dataset_creation(self):
        """Creates a blob dataset. """
        self.n_features = 2  # Number of dataset features

        self.seed = 42

        self.n_tr = 50
        self.n_ts = 100
        self.n_classes = 2

        loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts,
                                n_features=self.n_features,
                                centers=[(-1, -1), (+1, +1)],
                                center_box=(-2, 2),
                                cluster_std=0.8,
                                random_state=self.seed)

        self.logger.info("Loading `random_blobs` with seed: {:}".format(
            self.seed))

        dataset = loader.load()
        splitter = CDataSplitterShuffle(num_folds=1,
                                        train_size=self.n_tr,
                                        random_state=3)
        splitter.compute_indices(dataset)
        self.tr = dataset[splitter.tr_idx[0], :]
        self.ts = dataset[splitter.ts_idx[0], :]

        normalizer = CNormalizerMinMax(feature_range=(-1, 1))
        self.tr.X = normalizer.fit_transform(self.tr.X)
        self.ts.X = normalizer.transform(self.ts.X)

        self.lb = -1
        self.ub = 1

        self.grid_limits = [(self.lb - 0.1, self.ub + 0.1),
                            (self.lb - 0.1, self.ub + 0.1)]
Esempio n. 10
0
    def test_margin(self):

        self.logger.info("Testing margin separation of SGD...")

        # we create 50 separable points
        dataset = CDLRandomBlobs(n_samples=50,
                                 centers=2,
                                 random_state=0,
                                 cluster_std=0.60).load()

        # fit the model
        clf = CClassifierSGD(loss=CLossHinge(),
                             regularizer=CRegularizerL2(),
                             alpha=0.01,
                             max_iter=200,
                             random_state=0)
        clf.fit(dataset.X, dataset.Y)

        # plot the line, the points, and the nearest vectors to the plane
        xx = CArray.linspace(-1, 5, 10)
        yy = CArray.linspace(-1, 5, 10)

        X1, X2 = np.meshgrid(xx.tondarray(), yy.tondarray())
        Z = CArray.empty(X1.shape)
        for (i, j), val in np.ndenumerate(X1):
            x1 = val
            x2 = X2[i, j]
            Z[i, j] = clf.decision_function(CArray([x1, x2]), y=1)
        levels = [-1.0, 0.0, 1.0]
        linestyles = ['dashed', 'solid', 'dashed']
        colors = 'k'
        fig = CFigure(linewidth=1)
        fig.sp.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
        fig.sp.scatter(dataset.X[:, 0].ravel(),
                       dataset.X[:, 1].ravel(),
                       c=dataset.Y,
                       s=40)

        fig.savefig(
            fm.join(fm.abspath(__file__), 'figs',
                    'test_c_classifier_sgd2.pdf'))
    def setUpClass(cls):

        CAttackEvasionCleverhansTestCases.setUpClass()

        cls.seed = 0

        cls.y_target = None

        cls.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                           kernel=CKernelRBF(gamma=10),
                                           C=0.1,
                                           preprocess=CNormalizerMinMax())

        cls.ds = CDLRandomBlobs(n_features=0,
                                centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]],
                                cluster_std=0.01,
                                n_samples=100,
                                random_state=cls.seed).load()

        cls.clf.fit(cls.ds.X, cls.ds.Y)

        cls.x0 = CArray([0.6, 0.2])
        cls.y0 = CArray(cls.clf.predict(cls.x0))
random_state = 999

n_features = 2  # Number of features
n_samples = 1100  # Number of samples
centers = [[-2, 0], [2, -2], [2, 2]]  # Centers of the clusters
cluster_std = 0.8  # Standard deviation of the clusters

from secml.data.loader import CDLRandomBlobs
dataset = CDLRandomBlobs(n_features=n_features,
                         centers=centers,
                         cluster_std=cluster_std,
                         n_samples=n_samples,
                         random_state=random_state).load()

n_tr = 1000  # Number of training set samples
n_ts = 100  # Number of test set samples

# Split in training and test
from secml.data.splitter import CTrainTestSplit
splitter = CTrainTestSplit(
    train_size=n_tr, test_size=n_ts, random_state=random_state)
tr, ts = splitter.split(dataset)

# Normalize the data
from secml.ml.features import CNormalizerMinMax
nmz = CNormalizerMinMax()
tr.X = nmz.fit_transform(tr.X)
ts.X = nmz.transform(ts.X)

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
Esempio n. 13
0
from secml.figure.c_figure import CFigure
from secml.data.selection import CPrototypesSelector
from secml.data.loader import CDLRandomBlobs

dataset = CDLRandomBlobs(n_features=2,
                         n_samples=30,
                         centers=[[-0.5, 0], [0.5, 1]],
                         cluster_std=(0.8, 0.8),
                         random_state=7545).load()

fig = CFigure(width=6, height=2, markersize=8, fontsize=11)

rules = ['center', 'border', 'spanning', 'k-medians']
for rule_id, rule in enumerate(rules):

    ps = CPrototypesSelector.create(rule)
    ps.verbose = 2
    ds_reduced = ps.select(dataset, n_prototypes=5)

    fig.subplot(1, len(rules), rule_id + 1)

    # Plot dataset points
    fig.sp.scatter(dataset.X[:, 0], dataset.X[:, 1], linewidths=0, s=30)
    fig.sp.plot(ds_reduced.X[:, 0],
                ds_reduced.X[:, 1],
                linestyle='None',
                markeredgewidth=2,
                marker='o',
                mfc='red')
    fig.sp.title('{:}'.format(rule))
class TestCClassifierRejectThreshold(CClassifierRejectTestCases):
    """Unit test for CClassifierRejectThreshold."""

    def setUp(self):
        """Test for init and fit methods."""
        # generate synthetic data
        self.dataset = CDLRandomBlobs(n_features=2, n_samples=100, centers=2,
                                      cluster_std=2.0, random_state=0).load()

        self.logger.info("Testing classifier creation ")
        self.clf_norej = CClassifierSGD(regularizer=CRegularizerL2(),
                                        loss=CLossHinge(), random_state=0)

        self.clf = CClassifierRejectThreshold(self.clf_norej, threshold=0.6)
        self.clf.verbose = 2  # Enabling debug output for each classifier
        self.clf.fit(self.dataset.X, self.dataset.Y)

    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        self.logger.info(
            "Test for decision_function() and predict() methods.")

        scores_d = self._test_fun(self.clf, self.dataset.todense())
        scores_s = self._test_fun(self.clf, self.dataset.tosparse())

        # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
        # self.assert_array_almost_equal(scores_d, scores_s)

    def test_reject(self):
        clf = self.clf_norej.deepcopy()
        clf_reject = self.clf.deepcopy()

        # Training the classifiers
        clf_reject.fit(self.dataset.X, self.dataset.Y)
        clf.fit(self.dataset.X, self.dataset.Y)

        # Classification of another dataset
        y_pred_reject, score_pred_reject = clf_reject.predict(
            self.dataset.X, n_jobs=_NoValue, return_decision_function=True)
        y_pred, score_pred = clf.predict(self.dataset.X,
                                         return_decision_function=True)

        # Compute the number of rejected samples
        n_rej = (y_pred_reject == -1).sum()
        self.logger.info("Rejected samples: {:}".format(n_rej))

        self.logger.info("Real: \n{:}".format(self.dataset.Y))
        self.logger.info("Predicted: \n{:}".format(y_pred))
        self.logger.info(
            "Predicted with reject: \n{:}".format(y_pred_reject))

        acc = CMetric.create('accuracy').performance_score(
            y_pred, self.dataset.Y)
        self.logger.info("Accuracy no rejection: {:}".format(acc))

        rej_acc = CMetric.create('accuracy').performance_score(
            y_pred_reject[y_pred_reject != -1],
            self.dataset.Y[y_pred_reject != -1])
        self.logger.info("Accuracy with rejection: {:}".format(rej_acc))

        # check that the accuracy using reject is higher that the one
        # without rejects
        self.assertGreaterEqual(
            rej_acc, acc, "The accuracy of the classifier that is allowed "
                          "to reject is lower than the one of the "
                          "classifier that is not allowed to reject")

    def test_gradient(self):
        """Unittest for gradient_f_x method."""

        i = 5  # Sample to test

        self.logger.info("Testing with dense data...")
        ds = self.dataset.todense()
        clf = self.clf.fit(ds.X, ds.Y)

        grads_d = self._test_gradient_numerical(
            clf, ds.X[i, :], extra_classes=[-1])

        self.logger.info("Testing with sparse data...")
        ds = self.dataset.tosparse()
        clf = self.clf.fit(ds.X, ds.Y)

        grads_s = self._test_gradient_numerical(
            clf, ds.X[i, :], extra_classes=[-1])

        # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE?
        # Compare dense gradients with sparse gradients
        # for grad_i, grad in enumerate(grads_d):
        #     self.assert_array_almost_equal(
        #         grad.atleast_2d(), grads_s[grad_i])

    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        # All linear transformations with gradient implemented
        self._test_preprocess(self.dataset, self.clf,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])
        self._test_preprocess_grad(self.dataset, self.clf,
                                   ['min-max', 'mean-std'],
                                   [{'feature_range': (-1, 1)}, {}],
                                   extra_classes=[-1])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(
            self.dataset, self.clf, ['pca', 'unit-norm'], [{}, {}])

    def test_draw(self):
        """ Compare the classifiers graphically"""
        self.logger.info("Testing classifiers graphically")

        fig = CFigure(width=10, markersize=8)
        # Plot dataset points

        # mark the rejected samples
        y = self.clf.predict(self.dataset.X)
        fig.sp.plot_ds(
            self.dataset[y == -1, :], colors=['k', 'k'], markersize=12)

        # plot the dataset
        fig.sp.plot_ds(self.dataset)

        # Plot objective function
        fig.sp.plot_fun(self.clf.decision_function,
                        grid_limits=self.dataset.get_bounds(),
                        levels=[0], y=1)
        fig.sp.title('Classifier with reject threshold')

        fig.show()
Esempio n. 15
0
    def setUp(self):
        
        classifier = CClassifierSVM(
            kernel='linear', C=1.0, grad_sampling=1.0)

        # data parameters
        discrete = False

        lb = -2
        ub = +2

        n_tr = 20
        n_ts = 10
        n_features = 2
        
        n_reps = 1

        self.sec_eval = []
        self.attack_ds = []
        for rep_i in range(n_reps):

            self.logger.info(
                "Loading `random_blobs` with seed: {:}".format(rep_i))
            loader = CDLRandomBlobs(
                n_samples=n_tr + n_ts,
                n_features=n_features,
                centers=[(-0.5, -0.5), (+0.5, +0.5)],
                center_box=(-0.5, 0.5),
                cluster_std=0.5,
                random_state=rep_i * 100 + 10)
            ds = loader.load()

            tr = ds[:n_tr, :]
            ts = ds[n_tr:, :]
            
            classifier.fit(tr)
            
            self.attack_ds.append(ts)

            # only manipulate positive samples, targeting negative ones
            self.y_target = None
            attack_classes = CArray([1])
        
            params = {
                "classifier": classifier,
                "surrogate_classifier": classifier,
                "surrogate_data": tr,
                "distance": 'l1',
                "lb": lb,
                "ub": ub,
                "discrete": discrete,
                "y_target": self.y_target,
                "attack_classes": attack_classes,
                "solver_params": {'eta': 0.5, 'eps': 1e-2}
            }
            attack = CAttackEvasionPGDLS(**params)
            attack.verbose = 1
        
            # sec eval params
            param_name = 'dmax'
            dmax = 2
            dmax_step = 0.5
            param_values = CArray.arange(
                start=0, step=dmax_step,
                stop=dmax + dmax_step)

            # set sec eval object
            self.sec_eval.append(
                CSecEval(
                    attack=attack,
                    param_name=param_name,
                    param_values=param_values,
                    )
            )