def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for sgd in self.sgds: self.logger.info("SGD kernel: {:}".format(sgd.preprocess)) if sgd.preprocess is not None: k = sgd.preprocess.deepcopy() else: k = None svm = CClassifierSVM(kernel=k) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict(self.dataset.X, return_decision_function=True) label_sgd, y_sgd = sgd.predict(self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_sgd = CMetric.create('f1').performance_score( self.dataset.Y, label_sgd) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of SGD: {:}".format(acc_sgd)) self.assertGreater(acc_sgd, 0.90, "Accuracy of SGD: {:}".format(acc_sgd))
def test_auc(self): self.logger.info("Testing auc score...") peval = CMetric.create('auc') true = CArray([0, 0, 1, 1]) pred = CArray([0.1, 0.4, 0.35, 0.8]) res = peval.performance_score(y_true=true, score=pred) self.assertEqual(0.75, res) self.assertTrue(is_float(res)) self.logger.info("Testing auc_wmw score...") peval = CMetric.create('auc-wmw') true = CArray([0, 0, 1, 1]) pred = CArray([0.1, 0.4, 0.35, 0.8]) res = peval.performance_score(y_true=true, score=pred) self.assertEqual(0.75, res) self.assertTrue(is_float(res)) self.logger.info("Testing pauc score...") peval = CMetric.create('pauc', fpr=1.0, n_points=500) true = CArray([0, 0, 1, 1]) pred = CArray([0.1, 0.4, 0.35, 0.8]) res = peval.performance_score(y_true=true, score=pred) self.assertEqual(0.75, res) self.assertTrue(is_float(res))
def test_predict_withsvm(self): svc = SVC(kernel='linear', class_weight='balanced') multiclass_sklearn = OneVsOneClassifier(svc) multiclass = CClassifierMulticlassOVO(classifier=CClassifierSVM, class_weight='balanced', n_jobs=2) multiclass.verbose = 2 multiclass.fit(self.dataset.X, self.dataset.Y) class_pred, score_pred = multiclass.predict( self.dataset.X, return_decision_function=True) self.logger.info("Predicted: \n{:}".format(class_pred)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) acc = CMetric.create('accuracy').performance_score( self.dataset.Y, class_pred) self.logger.info("Accuracy: {:}".format(acc)) multiclass_sklearn.fit(self.dataset.X.get_data(), self.dataset.Y.tondarray()) y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data()) acc_sklearn = CMetric.create('accuracy').performance_score( self.dataset.Y, CArray(y_sklearn)) self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn)) self.assertLess(abs(acc - acc_sklearn), 0.21)
def test_reject(self): y_pred, score_pred = self.clf.predict( self.ts.X, return_decision_function=True) # set the threshold to have 10% of rejection rate threshold = self.clf.compute_threshold(0.1, self.ts) self.clf.threshold = threshold y_pred_reject, score_pred_reject = self.clf.predict( self.ts.X, return_decision_function=True) # Compute the number of rejected samples n_rej = (y_pred_reject == -1).sum() self.logger.info("Rejected samples: {:}".format(n_rej)) self.logger.info("Real: \n{:}".format(self.ts.Y)) self.logger.info("Predicted: \n{:}".format(y_pred)) self.logger.info( "Predicted with reject: \n{:}".format(y_pred_reject)) acc = CMetric.create('accuracy').performance_score( y_pred, self.ts.Y) self.logger.info("Accuracy no rejection: {:}".format(acc)) rej_acc = CMetric.create('accuracy').performance_score( y_pred_reject[y_pred_reject != -1], self.ts.Y[y_pred_reject != -1]) self.logger.info("Accuracy with rejection: {:}".format(rej_acc)) # check that the accuracy using reject is higher that the one # without rejects self.assertGreaterEqual( rej_acc, acc, "The accuracy of the classifier that is allowed " "to reject is lower than the one of the " "classifier that is not allowed to reject")
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) if ridge.preprocess is not None: svm_kernel = ridge.preprocess.deepcopy() else: svm_kernel = None svm = CClassifierSVM(kernel=svm_kernel) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( self.dataset.X, return_decision_function=True) label_ridge, y_ridge = ridge.predict( self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_ridge = CMetric.create('f1').performance_score( self.dataset.Y, label_ridge) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of ridge: {:}".format(acc_ridge)) self.assertGreater(acc_ridge, 0.90, "Accuracy of ridge: {:}".format(acc_ridge))
def _clf_poisoning(self): """ Computes a poisoning point considering as source the sample {xc, yc}. """ xc = self.poisoning._run(self.xc, self.yc) self.logger.info("Starting score: " + str(self.poisoning.f_seq[0])) self.logger.info("Final score: " + str(self.poisoning.f_seq[-1])) self.logger.info("x*: " + str(xc)) self.logger.info("Point sequence: " + str(self.poisoning.x_seq)) self.logger.info("Score sequence: : " + str(self.poisoning.f_seq)) self.logger.info("Fun Eval: " + str(self.poisoning.f_eval)) self.logger.info("Grad Eval: " + str(self.poisoning.grad_eval)) metric = CMetric.create('accuracy') y_pred, scores = self.classifier.predict(self.ts.X, return_decision_function=True) orig_acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) self.logger.info("Error on testing data: " + str(1 - orig_acc)) tr = self.tr.append(CDataset(xc, self.yc)) pois_clf = self.classifier.deepcopy() pois_clf.fit(tr.X, tr.Y) y_pred, scores = pois_clf.predict(self.ts.X, return_decision_function=True) pois_acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) self.logger.info("Error on testing data (poisoned): " + str(1 - pois_acc)) return pois_clf, xc
def test_fnratth(self): self.logger.info("Testing FNR @ TH metric...") metric = CMetric.create('fnr-at-th', th=0.76) res = self._test_roc_metric(metric) self.assertAlmostEqual(0.67, res, places=2)
def test_thatfpr(self): self.logger.info("Testing TH @ FPR metric...") metric = CMetric.create('th-at-fpr', fpr=0.1) res = self._test_roc_metric(metric) self.assertEqual(0.645, res)
def test_fnratfpr(self): self.logger.info("Testing FNR @ FPR metric...") metric = CMetric.create('fnr-at-fpr', fpr=0.1) res = self._test_roc_metric(metric) self.assertAlmostEqual(0.33, res, places=2)
def test_train_net(self): self._create_ds() self._create_net() self.clf.fit(self.tr) label_torch, y_torch = self.clf.predict(self.ts.X, return_decision_function=True) acc_torch = CMetric.create('accuracy').performance_score( self.ts.Y, label_torch) logging.info("Accuracy: {:.3f}".format(acc_torch)) self.assertGreater(acc_torch, 0)
def test_tpratfpr(self): self.logger.info("Testing tpr_at_fpr score...") peval = CMetric.create('tpr-at-fpr', fpr=0.1) true = CArray([0, 0, 1, 1]) pred = CArray([0.1, 0.4, 0.35, 0.8]) res = peval.performance_score(y_true=true, score=pred) self.assertEqual(0.5, res) self.assertTrue(is_float(res))
def test_mse(self): self.logger.info("Testing mse score...") peval = CMetric.create('mse') true = CArray([3, -0.5, 2, 7]) pred = CArray([2.5, 0.0, 2, 8]) res = peval.performance_score(y_true=true, score=pred) self.assertEqual(0.375, res) self.assertTrue(is_float(res))
def test_recall(self): self.logger.info("Testing recall score...") peval = CMetric.create('recall') true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) res = peval.performance_score(y_true=true, y_pred=pred) # tpr: 0.5, fnr: 0.5 -> 0.5 / (0.5 + 0.5) = 0.5 self.assertEqual(0.5, res) self.assertTrue(is_float(res))
def test_precision(self): self.logger.info("Testing precision score...") peval = CMetric.create('precision') true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) res = peval.performance_score(y_true=true, y_pred=pred) # tpr: 0.5, fpr: 0.25 -> 0.5 / (0.5 + 0.25) = 0.666... self.assertAlmostEqual(res, 0.67, 2) self.assertTrue(is_float(res))
def test_auc(self): self.logger.info("Testing AUC metric...") metric = CMetric.create('auc') res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2) self.logger.info("Testing AUC-WMW metric...") metric = CMetric.create('auc-wmw') res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2) self.logger.info("Testing pAUC metric...") metric = CMetric.create('pauc', fpr=1.0, n_points=500) res = self._test_roc_metric(metric) self.assertAlmostEqual(0.89, res, places=2)
def test_f1(self): self.logger.info("Testing f1 score...") peval = CMetric.create('f1') true = CArray([0, 0, 0, 0, 1, 1, 1, 1]) pred = CArray([1, 0, 0, 0, 1, 1, 0, 0]) res = peval.performance_score(y_true=true, y_pred=pred) # precision: 0.67, recall: 0.5 # 2 * (prec * rec) / (prec + rec) -> 2 * 0.335 / 1.17 = 0.57 self.assertAlmostEqual(res, 0.57, 2) self.assertTrue(is_float(res))
def test_reject(self): clf = self.clf_norej.deepcopy() clf_reject = self.clf.deepcopy() # Training the classifiers clf_reject.fit(self.dataset.X, self.dataset.Y) clf.fit(self.dataset.X, self.dataset.Y) # Classification of another dataset y_pred_reject, score_pred_reject = clf_reject.predict( self.dataset.X, n_jobs=_NoValue, return_decision_function=True) y_pred, score_pred = clf.predict(self.dataset.X, return_decision_function=True) # Compute the number of rejected samples n_rej = (y_pred_reject == -1).sum() self.logger.info("Rejected samples: {:}".format(n_rej)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) self.logger.info("Predicted: \n{:}".format(y_pred)) self.logger.info( "Predicted with reject: \n{:}".format(y_pred_reject)) acc = CMetric.create('accuracy').performance_score( y_pred, self.dataset.Y) self.logger.info("Accuracy no rejection: {:}".format(acc)) rej_acc = CMetric.create('accuracy').performance_score( y_pred_reject[y_pred_reject != -1], self.dataset.Y[y_pred_reject != -1]) self.logger.info("Accuracy with rejection: {:}".format(rej_acc)) # check that the accuracy using reject is higher that the one # without rejects self.assertGreaterEqual( rej_acc, acc, "The accuracy of the classifier that is allowed " "to reject is lower than the one of the " "classifier that is not allowed to reject")
def _test_clf_accuracy(self, normalizer): """Checks the accuracy of the classifier considered into the test. """ self._test_init(normalizer) metric = CMetric.create('accuracy') y_pred, scores = self.classifier.predict(self.ts.X, return_decision_function=True) acc = metric.performance_score(y_true=self.ts.Y, y_pred=y_pred) self.logger.info("Error on testing data: " + str(1 - acc)) self.assertGreater( acc, 0.70, "The trained classifier have an accuracy that " "is too low to evaluate if the poisoning against " "this classifier works")
def test_accuracy(self): self.logger.info("Testing accuracy score...") peval = CMetric.create('accuracy') y_true = CArray([0, 1, 2, 3]) y_pred = CArray([0, 2, 1, 3]) res = peval.performance_score(y_true=y_true, y_pred=y_pred) self.assertEqual(0.5, res) y_true = CArray([0, 1, 0, 0]) y_pred = CArray([0, 0, 0, 0]) res = peval.performance_score(y_true=y_true, y_pred=y_pred) self.assertEqual(0.75, res) self.assertTrue(is_float(res))
def __init__(self, splitter, metric): self.splitter = CDataSplitter.create(splitter) self.metric = CMetric.create(metric)
def test_parameters_setting(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) # Set the best parameters inside the classifier self.svm.estimate_parameters(self.training_dataset, xval_parameters, xval_splitter, 'accuracy') self.logger.info("SVM has now the following parameters: {:}".format( self.svm.get_params())) self.assertEqual(self.svm.get_params()['C'], 1) self.assertEqual(self.svm.get_params()['kernel.gamma'], 50) # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, CMetric.create('accuracy')) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters) for param in xval_parameters: self.logger.info("Best '{:}' is: {:}".format( param, best_params[param])) self.assertEqual(best_params[param], self.svm.get_params()[param]) self.svm.verbose = 0 parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1], [100, 50]] par_comb_score = CArray.zeros(len(parameters_combination)) for comb in range(len(parameters_combination)): this_fold_score = [] num_xval_fold = len(xval_splitter.tr_idx) for f in range(num_xval_fold): self.svm.set("C", parameters_combination[comb][0]) self.svm.kernel.gamma = parameters_combination[comb][1] self.svm.fit( self.training_dataset[xval_splitter.tr_idx[f], :].X, self.training_dataset[xval_splitter.tr_idx[f], :].Y) this_fold_predicted = self.svm.predict( self.training_dataset[xval_splitter.ts_idx[f], :].X) this_fold_accuracy = skm.accuracy_score( self.training_dataset[ xval_splitter.ts_idx[f], :].Y.get_data(), this_fold_predicted.get_data()) this_fold_score.append(this_fold_accuracy) par_comb_score[comb] = (np.mean(this_fold_score)) self.logger.info("this fold mean: {:}".format( par_comb_score[comb])) max_combination_score = par_comb_score.max() better_param_comb = parameters_combination[par_comb_score.argmax()] self.logger.info("max combination score founded here: {:}".format( max_combination_score)) self.logger.info( "max comb score founded during xval {:}".format(best_score)) self.assertEqual(max_combination_score, best_score) # set parameters found by xval and check if are the same chosen here self.logger.info("the parameters selected by own xval are:") self.svm.set_params(best_params) self.logger.info("C: {:}".format(self.svm.C)) self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma)) # check c self.assertEqual(better_param_comb[0], self.svm.C) # check gamma self.assertEqual(better_param_comb[1], self.svm.kernel.gamma)
def plot_sec_eval(self, sec_eval_data, metric='accuracy', mean=False, percentage=False, show_average=False, label=None, linestyle='-', color=None, marker=None, metric_args=()): """Plot the Security Evaluation Curve using desired metric. Parameters ---------- sec_eval_data : CSecEvalData or list A single CSecEvalData object or a list with multiple repetitions. metric : str or CMetric, optional Metric to be evaluated. Default 'accuracy'. mean : bool, optional If True, the mean of all sec eval repetitions will be computed. Default False.. percentage : bool, optional If True, values will be displayed in percentage. Default False. show_average : bool, optional If True, the average along the sec eval parameters will be shown in legend. Default False. label : str, optional Label of the sec eval curve. Default None. linestyle : str, optional Style of the curve. Default '-'. color : str or None, optional Color of the curve. If None (default) the plot engine will chose. marker : str or None, optional Style of the markers. Default None. metric_args Any other argument for the metric. """ metric = CMetric.create(metric, *metric_args) if not isinstance(sec_eval_data, list): sec_eval_data = [sec_eval_data] n_sec_eval = len(sec_eval_data) n_param_val = sec_eval_data[0].param_values.size perf = CArray.zeros((n_sec_eval, n_param_val)) for i in range(n_sec_eval): if sec_eval_data[i].param_values.size != n_param_val: raise ValueError("the number of sec eval parameters changed!") perf[i, :] = _cmpt_sec_eval_curve(sec_eval_data[i], metric) if mean is True: perf_std = perf.std(axis=0, keepdims=False) perf = perf.mean(axis=0, keepdims=False) else: if len(sec_eval_data) > 1: raise ValueError("if `mean` is False, " "only one sec eval data should be passed") perf = perf.ravel() if percentage is True: perf *= 100 if mean is True: perf_std *= 100 if show_average is True: auc_val = perf.mean() if label is None: label = "err: {:.2f}".format(auc_val) else: label += ", err: {:.2f}".format(auc_val) # This is done here to make 'markevery' work correctly self.xticks(sec_eval_data[0].param_values) self.plot(sec_eval_data[0].param_values, perf, label=label, linestyle=linestyle, color=color, marker=marker, markevery=self.get_xticks_idx(sec_eval_data[0].param_values)) if mean is True: std_up = perf + perf_std std_down = perf - perf_std std_down[std_down < 0.0] = 0.0 if percentage is True: std_up[std_up > 100] = 100 else: std_up[std_up > 1.0] = 1.0 self.fill_between(sec_eval_data[0].param_values, std_up, std_down, interpolate=False, alpha=0.2, facecolor=color, linestyle='None') if self._xlabel is None: self.xlabel(sec_eval_data[0].param_name) if self._ylabel is None: self.ylabel(metric.class_type.capitalize()) self.legend(loc='best', labelspacing=0.4, handletextpad=0.3, edgecolor='k') self.title("Security Evaluation Curve") self.apply_params_sec_eval()
def run(self, x, y, ds_init=None, max_iter=1): """Runs poisoning on multiple points. It reads n_points (previously set), initializes xc, yc at random, and then optimizes the poisoning points xc. Parameters ---------- x : CArray Validation set for evaluating classifier performance. Note that this is not the validation data used by the attacker, which should be passed instead to `CAttackPoisoning` init. y : CArray Corresponding true labels for samples in `x`. ds_init : CDataset or None, optional. Dataset for warm start. max_iter : int, optional Number of iterations to re-optimize poisoning data. Default 1. Returns ------- y_pred : predicted labels for all val samples by targeted classifier scores : scores for all val samples by targeted classifier adv_xc : manipulated poisoning points xc (for subsequents warm starts) f_opt : final value of the objective function """ if self._n_points is None or self._n_points == 0: # evaluate performance on x,y y_pred, scores = self._classifier.predict( x, return_decision_function=True) return y_pred, scores, ds_init, 0 # n_points > 0 if self.init_type == 'random': # randomly sample xc and yc xc, yc = self._rnd_init_poisoning_points() elif self.init_type == 'loss_based': xc, yc = self._loss_based_init_poisoning_points() else: raise NotImplementedError( "Unknown poisoning point initialization strategy.") # re-set previously-optimized points if passed as input if ds_init is not None: xc[0:ds_init.num_samples, :] = ds_init.X yc[0:ds_init.num_samples] = ds_init.Y delta = 1.0 k = 0 # max_iter ignored for single-point attacks if self.n_points == 1: max_iter = 1 metric = CMetric.create('accuracy') while delta > 0 and k < max_iter: self.logger.info( "Iter on all the poisoning samples: {:}".format(k)) xc_prv = xc.deepcopy() for i in range(self._n_points): # this is to optimize the last points first # (and then re-optimize the first ones) idx = self.n_points - i - 1 xc[idx, :] = self._run(xc, yc, idx=idx) # optimizing poisoning point 0 self.logger.info( "poisoning point {:} optim fopt: {:}".format( i, self._f_opt)) y_pred, scores = self._poisoned_clf.predict( x, return_decision_function=True) acc = metric.performance_score(y_true=y, y_pred=y_pred) self.logger.info("Poisoned classifier accuracy " "on test data {:}".format(acc)) delta = (xc_prv - xc).norm_2d() self.logger.info( "Optimization with n points: " + str(self._n_points) + " iter: " + str(k) + ", delta: " + str(delta) + ", fopt: " + str(self._f_opt)) k += 1 # re-train the targeted classifier (copied) on poisoned data # to evaluate attack effectiveness on targeted classifier clf, tr = self._update_poisoned_clf(clf=self._classifier, tr=self._training_data, train_normalizer=False) # fixme: rechange train_normalizer=True y_pred, scores = clf.predict(x, return_decision_function=True) acc = metric.performance_score(y_true=y, y_pred=y_pred) self.logger.info( "Original classifier accuracy on test data {:}".format(acc)) return y_pred, scores, CDataset(xc, yc), self._f_opt