Beispiel #1
0
    def test_performance_diff(self):
        from art.estimators.classification.scikitlearn import SklearnClassifier
        from sklearn.svm import SVC

        (x_train, y_train), (x_test, y_test), min_, max_ = load_iris()

        full_model = SklearnClassifier(model=SVC(kernel="linear", gamma="auto"), clip_values=(min_, max_))
        full_model.fit(x_train, y_train)

        limited_model = SklearnClassifier(model=SVC(kernel="linear", gamma="auto"), clip_values=(min_, max_))
        limited_model.fit(x_train[:10], y_train[:10])

        self.assertEqual(
            performance_diff(full_model, limited_model, x_test[:20], y_test[:20], perf_function="accuracy"), 0.35
        )
        self.assertEqual(performance_diff(full_model, limited_model, x_test[:20], y_test[:20]), 0.35)
        diff = performance_diff(
            full_model, limited_model, x_test[:20], y_test[:20], perf_function="f1", average="weighted"
        )
        self.assertGreater(diff, 0.43)
        self.assertLess(diff, 0.44)

        def first_class(true_labels, model_labels, idx=0):
            return np.average(np.argmax(model_labels, axis=1) == idx)

        self.assertEqual(
            performance_diff(full_model, limited_model, x_test, y_test, perf_function=first_class), 1.0 / 3
        )
        self.assertEqual(
            performance_diff(full_model, limited_model, x_test, y_test, perf_function=first_class, idx=1), -1.0 / 3
        )
Beispiel #2
0
    def get_calibration_info(
            self, before_classifier: "CLASSIFIER_TYPE"
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Calculate the median and standard deviation of the accuracy shifts caused
        by the calibration set.

        :param before_classifier: The classifier trained without suspicious point.
        :return: A tuple consisting of `(median, std_dev)`.
        """
        accs = []

        for x_c, y_c in zip(self.x_cal, self.y_cal):
            after_classifier = deepcopy(before_classifier)
            after_classifier.fit(x=np.vstack([self.x_val, x_c]),
                                 y=np.vstack([self.y_val, y_c]))
            accs.append(
                performance_diff(
                    before_classifier,
                    after_classifier,
                    self.x_quiz,
                    self.y_quiz,
                    perf_function=self.perf_func,
                ))

        return np.median(accs), np.std(accs)
    def detect_poison_partially_trusted(self, **kwargs) -> Dict[int, float]:
        """
        Detect poison given trusted validation data

        :return: dictionary where keys are suspected poisonous device indices and values are performance differences
        """
        self.set_params(**kwargs)

        if self.x_val is None or self.y_val is None:
            raise ValueError("Trusted data unavailable.")

        suspected = {}
        unfiltered_data = np.copy(self.x_train)
        unfiltered_labels = np.copy(self.y_train)

        segments = segment_by_class(self.x_train, self.p_train, self.num_devices)
        for device_idx, segment in enumerate(segments):
            filtered_data, filtered_labels = self.filter_input(unfiltered_data, unfiltered_labels, segment)

            unfiltered_model = deepcopy(self.classifier)
            filtered_model = deepcopy(self.classifier)

            unfiltered_model.fit(unfiltered_data, unfiltered_labels)
            filtered_model.fit(filtered_data, filtered_labels)

            var_w = performance_diff(
                filtered_model, unfiltered_model, self.x_val, self.y_val, perf_function=self.perf_func,
            )
            if self.eps < var_w:
                suspected[device_idx] = var_w
                unfiltered_data = filtered_data
                unfiltered_labels = filtered_labels

        return suspected
    def detect_poison_untrusted(self, **kwargs) -> Dict[int, float]:
        """
        Detect poison given no trusted validation data

        :return: dictionary where keys are suspected poisonous device indices and values are performance differences
        """
        self.set_params(**kwargs)

        suspected = {}
        (
            train_data,
            valid_data,
            train_labels,
            valid_labels,
            train_prov,
            valid_prov,
        ) = train_test_split(self.x_train,
                             self.y_train,
                             self.p_train,
                             test_size=self.pp_valid)

        train_segments = segment_by_class(train_data, train_prov,
                                          self.num_devices)
        valid_segments = segment_by_class(valid_data, valid_prov,
                                          self.num_devices)

        for device_idx, (train_segment, valid_segment) in enumerate(
                zip(train_segments, valid_segments)):
            filtered_data, filtered_labels = self.filter_input(
                train_data, train_labels, train_segment)

            unfiltered_model = deepcopy(self.classifier)
            filtered_model = deepcopy(self.classifier)

            unfiltered_model.fit(train_data, train_labels)
            filtered_model.fit(filtered_data, filtered_labels)

            valid_non_device_data, valid_non_device_labels = self.filter_input(
                valid_data, valid_labels, valid_segment)
            var_w = performance_diff(
                filtered_model,
                unfiltered_model,
                valid_non_device_data,
                valid_non_device_labels,
                perf_function=self.perf_func,
            )

            if self.eps < var_w:
                suspected[device_idx] = var_w
                train_data = filtered_data
                train_labels = filtered_labels
                valid_data = valid_non_device_data
                valid_labels = valid_non_device_labels

        return suspected
Beispiel #5
0
    def detect_poison(self, **kwargs) -> Tuple[dict, List[int]]:
        """
        Returns poison detected and a report.

        :param kwargs: A dictionary of detection-specific parameters.
        :return: (report, is_clean_lst):
                where a report is a dict object that contains information specified by the provenance detection method
                where is_clean is a list, where is_clean_lst[i]=1 means that x_train[i]
                there is clean and is_clean_lst[i]=0, means that x_train[i] was classified as poison.
        """
        self.set_params(**kwargs)

        x_suspect = self.x_train
        y_suspect = self.y_train
        x_trusted = self.x_val
        y_trusted = self.y_val

        self.is_clean_lst = [1 for _ in range(len(x_suspect))]
        report = {}

        before_classifier = deepcopy(self.classifier)
        before_classifier.fit(x_suspect, y_suspect)

        for idx in np.random.permutation(len(x_suspect)):
            x_i = x_suspect[idx]
            y_i = y_suspect[idx]

            after_classifier = deepcopy(before_classifier)
            after_classifier.fit(x=np.vstack([x_trusted, x_i]),
                                 y=np.vstack([y_trusted, y_i]))
            acc_shift = performance_diff(
                before_classifier,
                after_classifier,
                self.x_quiz,
                self.y_quiz,
                perf_function=self.perf_func,
            )
            # print(acc_shift, median, std_dev)
            if self.is_suspicious(before_classifier, acc_shift):
                self.is_clean_lst[idx] = 0
                report[idx] = acc_shift
            else:
                before_classifier = after_classifier
                x_trusted = np.vstack([x_trusted, x_i])
                y_trusted = np.vstack([y_trusted, y_i])

        return report, self.is_clean_lst
    def get_calibration_info(self, before_classifier):
        """
        Calculate the median and standard deviation of the accuracy shifts caused
        by the calibration set.

        :param before_classifier: The classifier trained without suspicious point
        :type before_classifier: `art.classifiers.classifier.Classifier`
        :return: a tuple consisting of (`median`, `std_dev`)
        :rtype: (`float`, `float`)
        """
        accs = []

        for x_c, y_c in zip(self.x_cal, self.y_cal):
            after_classifier = deepcopy(before_classifier)
            after_classifier.fit(x=np.vstack([self.x_val, x_c]), y=np.vstack([self.y_val, y_c]))
            accs.append(
                performance_diff(
                    before_classifier, after_classifier, self.x_quiz, self.y_quiz, perf_function=self.perf_func
                )
            )

        return np.median(accs), np.std(accs)