def _load_blobs(self, n_feats, n_clusters, sparse=False, seed=None): """Load Random Blobs dataset. - n_samples = 50 - center_box = (-0.5, 0.5) - cluster_std = 0.5 Parameters ---------- n_feats : int n_clusters : int sparse : bool, optional (default False) seed : int or None, optional (default None) """ loader = CDLRandomBlobs(n_samples=50, n_features=n_feats, centers=n_clusters, center_box=(-0.5, 0.5), cluster_std=0.5, random_state=seed) self.logger.info("Loading `random_blobs` with seed: {:}".format(seed)) ds = loader.load() if sparse is True: ds = ds.tosparse() return ds
def setUp(self): self.classifier = CClassifierSVM(kernel='linear', C=1.0) self.lb = -2 self.ub = +2 n_tr = 20 n_ts = 10 n_features = 2 n_reps = 1 self.sec_eval = [] self.attack_ds = [] for rep_i in range(n_reps): self.logger.info( "Loading `random_blobs` with seed: {:}".format(rep_i)) loader = CDLRandomBlobs(n_samples=n_tr + n_ts, n_features=n_features, centers=[(-0.5, -0.5), (+0.5, +0.5)], center_box=(-0.5, 0.5), cluster_std=0.5, random_state=rep_i * 100 + 10) ds = loader.load() self.tr = ds[:n_tr, :] self.ts = ds[n_tr:, :] self.classifier.fit(self.tr.X, self.tr.Y) # only manipulate positive samples, targeting negative ones self.y_target = None self.attack_classes = CArray([1]) for create_fn in (self._attack_pgd_ls, self._attack_cleverhans): # TODO: REFACTOR THESE UNITTESTS REMOVING THE FOR LOOP try: import cleverhans except ImportError: continue self.attack_ds.append(self.ts) attack, param_name, param_values = create_fn() # set sec eval object self.sec_eval.append( CSecEval( attack=attack, param_name=param_name, param_values=param_values, ))
def setUp(self): """Test for init and fit methods.""" # generate synthetic data self.dataset = CDLRandomBlobs(n_features=2, n_samples=100, centers=2, cluster_std=2.0, random_state=0).load() self.logger.info("Testing classifier creation ") self.clf_norej = CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0) self.clf = CClassifierRejectThreshold(self.clf_norej, threshold=0.6) self.clf.verbose = 2 # Enabling debug output for each classifier self.clf.fit(self.dataset.X, self.dataset.Y)
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.nc, ds, [-10]) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_nearest_centroid.pdf'))
def setUp(self): self.clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, kernel='rbf') self.dataset = CDLRandomBlobs( random_state=3, n_features=2, centers=4).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y)
def setUpClass(cls): cls.plots = False cls.dataset = CDLRandomBlobs(n_features=2, centers=[[-1, 1], [1, 1]], cluster_std=(0.4, 0.4), random_state=0).load() CUnitTest.setUpClass() # call superclass constructor
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.rnd_forest, ds, levels=[0.5]) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_random_forest.pdf'))
def test_plot(self): ds = CDLRandomBlobs(n_samples=100, centers=3, n_features=2, random_state=1).load() fig = self._test_plot(self.knn, ds, levels=[0.5]) fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_knn.pdf'))
def _dataset_creation(self): """Creates a blob dataset. """ self.n_features = 2 # Number of dataset features self.seed = 42 self.n_tr = 50 self.n_ts = 100 self.n_classes = 2 loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts, n_features=self.n_features, centers=[(-1, -1), (+1, +1)], center_box=(-2, 2), cluster_std=0.8, random_state=self.seed) self.logger.info("Loading `random_blobs` with seed: {:}".format( self.seed)) dataset = loader.load() splitter = CDataSplitterShuffle(num_folds=1, train_size=self.n_tr, random_state=3) splitter.compute_indices(dataset) self.tr = dataset[splitter.tr_idx[0], :] self.ts = dataset[splitter.ts_idx[0], :] normalizer = CNormalizerMinMax(feature_range=(-1, 1)) self.tr.X = normalizer.fit_transform(self.tr.X) self.ts.X = normalizer.transform(self.ts.X) self.lb = -1 self.ub = 1 self.grid_limits = [(self.lb - 0.1, self.ub + 0.1), (self.lb - 0.1, self.ub + 0.1)]
def test_margin(self): self.logger.info("Testing margin separation of SGD...") # we create 50 separable points dataset = CDLRandomBlobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60).load() # fit the model clf = CClassifierSGD(loss=CLossHinge(), regularizer=CRegularizerL2(), alpha=0.01, max_iter=200, random_state=0) clf.fit(dataset.X, dataset.Y) # plot the line, the points, and the nearest vectors to the plane xx = CArray.linspace(-1, 5, 10) yy = CArray.linspace(-1, 5, 10) X1, X2 = np.meshgrid(xx.tondarray(), yy.tondarray()) Z = CArray.empty(X1.shape) for (i, j), val in np.ndenumerate(X1): x1 = val x2 = X2[i, j] Z[i, j] = clf.decision_function(CArray([x1, x2]), y=1) levels = [-1.0, 0.0, 1.0] linestyles = ['dashed', 'solid', 'dashed'] colors = 'k' fig = CFigure(linewidth=1) fig.sp.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) fig.sp.scatter(dataset.X[:, 0].ravel(), dataset.X[:, 1].ravel(), c=dataset.Y, s=40) fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd2.pdf'))
def setUpClass(cls): CAttackEvasionCleverhansTestCases.setUpClass() cls.seed = 0 cls.y_target = None cls.clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF(gamma=10), C=0.1, preprocess=CNormalizerMinMax()) cls.ds = CDLRandomBlobs(n_features=0, centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]], cluster_std=0.01, n_samples=100, random_state=cls.seed).load() cls.clf.fit(cls.ds.X, cls.ds.Y) cls.x0 = CArray([0.6, 0.2]) cls.y0 = CArray(cls.clf.predict(cls.x0))
random_state = 999 n_features = 2 # Number of features n_samples = 1100 # Number of samples centers = [[-2, 0], [2, -2], [2, 2]] # Centers of the clusters cluster_std = 0.8 # Standard deviation of the clusters from secml.data.loader import CDLRandomBlobs dataset = CDLRandomBlobs(n_features=n_features, centers=centers, cluster_std=cluster_std, n_samples=n_samples, random_state=random_state).load() n_tr = 1000 # Number of training set samples n_ts = 100 # Number of test set samples # Split in training and test from secml.data.splitter import CTrainTestSplit splitter = CTrainTestSplit( train_size=n_tr, test_size=n_ts, random_state=random_state) tr, ts = splitter.split(dataset) # Normalize the data from secml.ml.features import CNormalizerMinMax nmz = CNormalizerMinMax() tr.X = nmz.fit_transform(tr.X) ts.X = nmz.transform(ts.X) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy
from secml.figure.c_figure import CFigure from secml.data.selection import CPrototypesSelector from secml.data.loader import CDLRandomBlobs dataset = CDLRandomBlobs(n_features=2, n_samples=30, centers=[[-0.5, 0], [0.5, 1]], cluster_std=(0.8, 0.8), random_state=7545).load() fig = CFigure(width=6, height=2, markersize=8, fontsize=11) rules = ['center', 'border', 'spanning', 'k-medians'] for rule_id, rule in enumerate(rules): ps = CPrototypesSelector.create(rule) ps.verbose = 2 ds_reduced = ps.select(dataset, n_prototypes=5) fig.subplot(1, len(rules), rule_id + 1) # Plot dataset points fig.sp.scatter(dataset.X[:, 0], dataset.X[:, 1], linewidths=0, s=30) fig.sp.plot(ds_reduced.X[:, 0], ds_reduced.X[:, 1], linestyle='None', markeredgewidth=2, marker='o', mfc='red') fig.sp.title('{:}'.format(rule))
class TestCClassifierRejectThreshold(CClassifierRejectTestCases): """Unit test for CClassifierRejectThreshold.""" def setUp(self): """Test for init and fit methods.""" # generate synthetic data self.dataset = CDLRandomBlobs(n_features=2, n_samples=100, centers=2, cluster_std=2.0, random_state=0).load() self.logger.info("Testing classifier creation ") self.clf_norej = CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0) self.clf = CClassifierRejectThreshold(self.clf_norej, threshold=0.6) self.clf.verbose = 2 # Enabling debug output for each classifier self.clf.fit(self.dataset.X, self.dataset.Y) def test_fun(self): """Test for decision_function() and predict() methods.""" self.logger.info( "Test for decision_function() and predict() methods.") scores_d = self._test_fun(self.clf, self.dataset.todense()) scores_s = self._test_fun(self.clf, self.dataset.tosparse()) # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE? # self.assert_array_almost_equal(scores_d, scores_s) def test_reject(self): clf = self.clf_norej.deepcopy() clf_reject = self.clf.deepcopy() # Training the classifiers clf_reject.fit(self.dataset.X, self.dataset.Y) clf.fit(self.dataset.X, self.dataset.Y) # Classification of another dataset y_pred_reject, score_pred_reject = clf_reject.predict( self.dataset.X, n_jobs=_NoValue, return_decision_function=True) y_pred, score_pred = clf.predict(self.dataset.X, return_decision_function=True) # Compute the number of rejected samples n_rej = (y_pred_reject == -1).sum() self.logger.info("Rejected samples: {:}".format(n_rej)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) self.logger.info("Predicted: \n{:}".format(y_pred)) self.logger.info( "Predicted with reject: \n{:}".format(y_pred_reject)) acc = CMetric.create('accuracy').performance_score( y_pred, self.dataset.Y) self.logger.info("Accuracy no rejection: {:}".format(acc)) rej_acc = CMetric.create('accuracy').performance_score( y_pred_reject[y_pred_reject != -1], self.dataset.Y[y_pred_reject != -1]) self.logger.info("Accuracy with rejection: {:}".format(rej_acc)) # check that the accuracy using reject is higher that the one # without rejects self.assertGreaterEqual( rej_acc, acc, "The accuracy of the classifier that is allowed " "to reject is lower than the one of the " "classifier that is not allowed to reject") def test_gradient(self): """Unittest for gradient_f_x method.""" i = 5 # Sample to test self.logger.info("Testing with dense data...") ds = self.dataset.todense() clf = self.clf.fit(ds.X, ds.Y) grads_d = self._test_gradient_numerical( clf, ds.X[i, :], extra_classes=[-1]) self.logger.info("Testing with sparse data...") ds = self.dataset.tosparse() clf = self.clf.fit(ds.X, ds.Y) grads_s = self._test_gradient_numerical( clf, ds.X[i, :], extra_classes=[-1]) # FIXME: WHY THIS TEST IS CRASHING? RANDOM_STATE MAYBE? # Compare dense gradients with sparse gradients # for grad_i, grad in enumerate(grads_d): # self.assert_array_almost_equal( # grad.atleast_2d(), grads_s[grad_i]) def test_preprocess(self): """Test classifier with preprocessors inside.""" # All linear transformations with gradient implemented self._test_preprocess(self.dataset, self.clf, ['min-max', 'mean-std'], [{'feature_range': (-1, 1)}, {}]) self._test_preprocess_grad(self.dataset, self.clf, ['min-max', 'mean-std'], [{'feature_range': (-1, 1)}, {}], extra_classes=[-1]) # Mixed linear/nonlinear transformations without gradient self._test_preprocess( self.dataset, self.clf, ['pca', 'unit-norm'], [{}, {}]) def test_draw(self): """ Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") fig = CFigure(width=10, markersize=8) # Plot dataset points # mark the rejected samples y = self.clf.predict(self.dataset.X) fig.sp.plot_ds( self.dataset[y == -1, :], colors=['k', 'k'], markersize=12) # plot the dataset fig.sp.plot_ds(self.dataset) # Plot objective function fig.sp.plot_fun(self.clf.decision_function, grid_limits=self.dataset.get_bounds(), levels=[0], y=1) fig.sp.title('Classifier with reject threshold') fig.show()
def setUp(self): classifier = CClassifierSVM( kernel='linear', C=1.0, grad_sampling=1.0) # data parameters discrete = False lb = -2 ub = +2 n_tr = 20 n_ts = 10 n_features = 2 n_reps = 1 self.sec_eval = [] self.attack_ds = [] for rep_i in range(n_reps): self.logger.info( "Loading `random_blobs` with seed: {:}".format(rep_i)) loader = CDLRandomBlobs( n_samples=n_tr + n_ts, n_features=n_features, centers=[(-0.5, -0.5), (+0.5, +0.5)], center_box=(-0.5, 0.5), cluster_std=0.5, random_state=rep_i * 100 + 10) ds = loader.load() tr = ds[:n_tr, :] ts = ds[n_tr:, :] classifier.fit(tr) self.attack_ds.append(ts) # only manipulate positive samples, targeting negative ones self.y_target = None attack_classes = CArray([1]) params = { "classifier": classifier, "surrogate_classifier": classifier, "surrogate_data": tr, "distance": 'l1', "lb": lb, "ub": ub, "discrete": discrete, "y_target": self.y_target, "attack_classes": attack_classes, "solver_params": {'eta': 0.5, 'eps': 1e-2} } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 # sec eval params param_name = 'dmax' dmax = 2 dmax_step = 0.5 param_values = CArray.arange( start=0, step=dmax_step, stop=dmax + dmax_step) # set sec eval object self.sec_eval.append( CSecEval( attack=attack, param_name=param_name, param_values=param_values, ) )