def test_performance_diff(self): from art.estimators.classification.scikitlearn import SklearnClassifier from sklearn.svm import SVC (x_train, y_train), (x_test, y_test), min_, max_ = load_iris() full_model = SklearnClassifier(model=SVC(kernel="linear", gamma="auto"), clip_values=(min_, max_)) full_model.fit(x_train, y_train) limited_model = SklearnClassifier(model=SVC(kernel="linear", gamma="auto"), clip_values=(min_, max_)) limited_model.fit(x_train[:10], y_train[:10]) self.assertEqual( performance_diff(full_model, limited_model, x_test[:20], y_test[:20], perf_function="accuracy"), 0.35 ) self.assertEqual(performance_diff(full_model, limited_model, x_test[:20], y_test[:20]), 0.35) diff = performance_diff( full_model, limited_model, x_test[:20], y_test[:20], perf_function="f1", average="weighted" ) self.assertGreater(diff, 0.43) self.assertLess(diff, 0.44) def first_class(true_labels, model_labels, idx=0): return np.average(np.argmax(model_labels, axis=1) == idx) self.assertEqual( performance_diff(full_model, limited_model, x_test, y_test, perf_function=first_class), 1.0 / 3 ) self.assertEqual( performance_diff(full_model, limited_model, x_test, y_test, perf_function=first_class, idx=1), -1.0 / 3 )
def get_calibration_info( self, before_classifier: "CLASSIFIER_TYPE" ) -> Tuple[np.ndarray, np.ndarray]: """ Calculate the median and standard deviation of the accuracy shifts caused by the calibration set. :param before_classifier: The classifier trained without suspicious point. :return: A tuple consisting of `(median, std_dev)`. """ accs = [] for x_c, y_c in zip(self.x_cal, self.y_cal): after_classifier = deepcopy(before_classifier) after_classifier.fit(x=np.vstack([self.x_val, x_c]), y=np.vstack([self.y_val, y_c])) accs.append( performance_diff( before_classifier, after_classifier, self.x_quiz, self.y_quiz, perf_function=self.perf_func, )) return np.median(accs), np.std(accs)
def detect_poison_partially_trusted(self, **kwargs) -> Dict[int, float]: """ Detect poison given trusted validation data :return: dictionary where keys are suspected poisonous device indices and values are performance differences """ self.set_params(**kwargs) if self.x_val is None or self.y_val is None: raise ValueError("Trusted data unavailable.") suspected = {} unfiltered_data = np.copy(self.x_train) unfiltered_labels = np.copy(self.y_train) segments = segment_by_class(self.x_train, self.p_train, self.num_devices) for device_idx, segment in enumerate(segments): filtered_data, filtered_labels = self.filter_input(unfiltered_data, unfiltered_labels, segment) unfiltered_model = deepcopy(self.classifier) filtered_model = deepcopy(self.classifier) unfiltered_model.fit(unfiltered_data, unfiltered_labels) filtered_model.fit(filtered_data, filtered_labels) var_w = performance_diff( filtered_model, unfiltered_model, self.x_val, self.y_val, perf_function=self.perf_func, ) if self.eps < var_w: suspected[device_idx] = var_w unfiltered_data = filtered_data unfiltered_labels = filtered_labels return suspected
def detect_poison_untrusted(self, **kwargs) -> Dict[int, float]: """ Detect poison given no trusted validation data :return: dictionary where keys are suspected poisonous device indices and values are performance differences """ self.set_params(**kwargs) suspected = {} ( train_data, valid_data, train_labels, valid_labels, train_prov, valid_prov, ) = train_test_split(self.x_train, self.y_train, self.p_train, test_size=self.pp_valid) train_segments = segment_by_class(train_data, train_prov, self.num_devices) valid_segments = segment_by_class(valid_data, valid_prov, self.num_devices) for device_idx, (train_segment, valid_segment) in enumerate( zip(train_segments, valid_segments)): filtered_data, filtered_labels = self.filter_input( train_data, train_labels, train_segment) unfiltered_model = deepcopy(self.classifier) filtered_model = deepcopy(self.classifier) unfiltered_model.fit(train_data, train_labels) filtered_model.fit(filtered_data, filtered_labels) valid_non_device_data, valid_non_device_labels = self.filter_input( valid_data, valid_labels, valid_segment) var_w = performance_diff( filtered_model, unfiltered_model, valid_non_device_data, valid_non_device_labels, perf_function=self.perf_func, ) if self.eps < var_w: suspected[device_idx] = var_w train_data = filtered_data train_labels = filtered_labels valid_data = valid_non_device_data valid_labels = valid_non_device_labels return suspected
def detect_poison(self, **kwargs) -> Tuple[dict, List[int]]: """ Returns poison detected and a report. :param kwargs: A dictionary of detection-specific parameters. :return: (report, is_clean_lst): where a report is a dict object that contains information specified by the provenance detection method where is_clean is a list, where is_clean_lst[i]=1 means that x_train[i] there is clean and is_clean_lst[i]=0, means that x_train[i] was classified as poison. """ self.set_params(**kwargs) x_suspect = self.x_train y_suspect = self.y_train x_trusted = self.x_val y_trusted = self.y_val self.is_clean_lst = [1 for _ in range(len(x_suspect))] report = {} before_classifier = deepcopy(self.classifier) before_classifier.fit(x_suspect, y_suspect) for idx in np.random.permutation(len(x_suspect)): x_i = x_suspect[idx] y_i = y_suspect[idx] after_classifier = deepcopy(before_classifier) after_classifier.fit(x=np.vstack([x_trusted, x_i]), y=np.vstack([y_trusted, y_i])) acc_shift = performance_diff( before_classifier, after_classifier, self.x_quiz, self.y_quiz, perf_function=self.perf_func, ) # print(acc_shift, median, std_dev) if self.is_suspicious(before_classifier, acc_shift): self.is_clean_lst[idx] = 0 report[idx] = acc_shift else: before_classifier = after_classifier x_trusted = np.vstack([x_trusted, x_i]) y_trusted = np.vstack([y_trusted, y_i]) return report, self.is_clean_lst
def get_calibration_info(self, before_classifier): """ Calculate the median and standard deviation of the accuracy shifts caused by the calibration set. :param before_classifier: The classifier trained without suspicious point :type before_classifier: `art.classifiers.classifier.Classifier` :return: a tuple consisting of (`median`, `std_dev`) :rtype: (`float`, `float`) """ accs = [] for x_c, y_c in zip(self.x_cal, self.y_cal): after_classifier = deepcopy(before_classifier) after_classifier.fit(x=np.vstack([self.x_val, x_c]), y=np.vstack([self.y_val, y_c])) accs.append( performance_diff( before_classifier, after_classifier, self.x_quiz, self.y_quiz, perf_function=self.perf_func ) ) return np.median(accs), np.std(accs)