def __init__(self, classifier, step, eps, x_train, y_train, x_val, y_val, max_iter=100, **kwargs): """ Initialize an SVM poisoning attack :param classifier: A trained ScikitlearnSVC classifier :type classifier: `art.classifiers.scikitlearn.ScikitlearnSVC` :param step: The step size of the classifier :type step: `float` :param eps: The minimum difference in loss before convergence of the classifier :type eps: `float` :param x_train: The training data used for classification :type x_train: `np.ndarray` :param y_train: The training labels used for classification :type y_train: `np.ndarray` :param x_val: The validation data used to test the attack :type x_val: `np.ndarray` :param y_val: The validation labels used to test the attack :type y_val: `np.ndarray` :param max_iter: The maximum number of iterations for the attack :type max_iter: `int` :param kwargs: Extra optional keyword arguments """ # pylint: disable=W0212 from sklearn.svm import LinearSVC, SVC super(PoisoningAttackSVM, self).__init__(classifier) if not isinstance(classifier, ScikitlearnSVC): raise TypeError("Classifier must be a SVC") if isinstance(self.classifier._model, LinearSVC): self.classifier = ScikitlearnSVC( model=SVC(C=self.classifier._model.C, kernel="linear"), clip_values=self.classifier.clip_values) self.classifier.fit(x_train, y_train) elif not isinstance(self.classifier._model, SVC): raise NotImplementedError( "Model type '{}' not yet supported".format( type(self.classifier._model))) self.step = step self.eps = eps self.x_train = x_train self.y_train = y_train self.x_val = x_val self.y_val = y_val self.max_iter = max_iter self.set_params(**kwargs)
def setUpClass(cls): master_seed(seed=1234) super().setUpClass() cls.sklearn_model = LinearSVC() cls.classifier = ScikitlearnSVC(model=cls.sklearn_model) cls.classifier.fit(x=cls.x_train_iris, y=cls.y_train_iris)
def setUpClass(cls): np.random.seed(seed=1234) sklearn_model = LinearSVC() cls.classifier = ScikitlearnSVC(model=sklearn_model) assert (type(cls.classifier) == type( SklearnClassifier(model=sklearn_model))) cls.classifier.fit(x=x_train, y=y_train)
class PoisoningAttackSVM(Attack): """ Close implementation of poisoning attack on Support Vector Machines (SVM) by Biggio et al. | Paper link: https://arxiv.org/pdf/1206.6389.pdf """ attack_params = ['classifier', 'step', 'eps', 'x_train', 'y_train', 'x_val', 'y_val'] def __init__(self, classifier, step, eps, x_train, y_train, x_val, y_val, max_iter=10, **kwargs): """ Initialize an SVM poisoning attack :param classifier: A trained ScikitlearnSVC classifier :type classifier: `art.classifiers.scikitlearn.ScikitlearnSVC` :param step: The step size of the classifier :type step: `float` :param eps: The minimum difference in loss before convergence of the classifier :type eps: `float` :param x_train: The training data used for classification :type x_train: `np.ndarray` :param y_train: The training labels used for classification :type y_train: `np.ndarray` :param x_val: The validation data used to test the attack :type x_val: `np.ndarray` :param y_val: The validation labels used to test the attack :type y_val: `np.ndarray` :param max_iter: The maximum number of iterations for the attack :type max_iter: `int` :param kwargs: Extra optional keyword arguments """ # pylint: disable=W0212 from sklearn.svm import LinearSVC, SVC super(PoisoningAttackSVM, self).__init__(classifier) if not isinstance(classifier, ScikitlearnSVC): raise TypeError('Classifier must be a SVC') if isinstance(self.classifier._model, LinearSVC): self.classifier = ScikitlearnSVC(model=SVC(C=self.classifier._model.C, kernel='linear'), clip_values=self.classifier.clip_values) self.classifier.fit(x_train, y_train) elif not isinstance(self.classifier._model, SVC): raise NotImplementedError("Model type '{}' not yet supported".format(type(self.classifier._model))) self.step = step self.eps = eps self.x_train = x_train self.y_train = y_train self.x_val = x_val self.y_val = y_val self.max_iter = max_iter self.set_params(**kwargs) def generate(self, x, y=None, **kwargs): """ Iteratively finds optimal attack points starting at values at x :param x: An array with the points that initialize attack points. :type x: `np.ndarray` :param y: The target labels for :return: An array holding the adversarial examples. :rtype: `np.ndarray` """ if y is None: y_attack = self.classifier.predict(x) else: y_attack = np.copy(y) num_poison = len(x) if num_poison == 0: raise ValueError("Must input at least one poison point") num_features = len(x[0]) train_data = np.copy(self.x_train) train_labels = np.copy(self.y_train) all_poison = [] for attack_point, attack_label in zip(x, y_attack): poison = self.generate_attack_point(attack_point, attack_label) all_poison.append(poison) train_data = np.vstack([train_data, poison]) train_labels = np.vstack([train_labels, attack_label]) return np.array(all_poison).reshape((num_poison, num_features)) def set_params(self, **kwargs): """ Take in a dictionary of parameters and apply attack-specific checks before saving them as attributes. :param kwargs: a dictionary of attack-specific parameters :type kwargs: `dict` :return: `True` when parsing was successful """ super(PoisoningAttackSVM, self).set_params(**kwargs) if self.step <= 0: raise ValueError("Step size must be strictly positive") if self.eps <= 0: raise ValueError("Value of eps must be strictly positive") if self.max_iter <= 1: raise ValueError("Value of max_iter must be strictly positive") def generate_attack_point(self, x_attack, y_attack): """ Generate a single poison attack the model, using `x_val` and `y_val` as validation points. The attack begins at the point init_attack. The attack class will be the opposite of the model's classification for `init_attack`. :param x_attack: the initial attack point :type x_attack: `np.ndarray` :param y_attack: the initial attack label :type y_attack: `np.ndarray` :return: a tuple containing the final attack point and the poisoned model :rtype: (`np.ndarray`, `art.classifiers.ScikitlearnSVC`) """ # pylint: disable=W0212 from sklearn.preprocessing import normalize poisoned_model = self.classifier._model y_t = np.argmax(self.y_train, axis=1) poisoned_model.fit(self.x_train, y_t) y_a = np.argmax(y_attack) attack_point = np.expand_dims(x_attack, axis=0) var_g = poisoned_model.decision_function(self.x_val) k_values = np.where(-var_g > 0) new_p = np.sum(var_g[k_values]) old_p = np.copy(new_p) i = 0 while new_p - old_p < self.eps and i < self.max_iter: old_p = new_p poisoned_input = np.vstack([self.x_train, attack_point]) poisoned_labels = np.append(y_t, y_a) poisoned_model.fit(poisoned_input, poisoned_labels) unit_grad = normalize(self.attack_gradient(attack_point)) attack_point += self.step * unit_grad lower, upper = self.classifier.clip_values new_attack = np.clip(attack_point, lower, upper) new_g = poisoned_model.decision_function(self.x_val) k_values = np.where(-new_g > 0) new_p = np.sum(new_g[k_values]) i += 1 attack_point = new_attack poisoned_input = np.vstack([self.x_train, attack_point]) poisoned_labels = np.append(y_t, y_a) poisoned_model.fit(poisoned_input, poisoned_labels) return attack_point def predict_sign(self, vec): """ Predicts the inputs by binary classifier and outputs -1 and 1 instead of 0 and 1 :param vec: an input array :type vec: `np.ndarray` :return: an array of -1/1 predictions :rtype: `np.ndarray` """ # pylint: disable=W0212 preds = self.classifier._model.predict(vec) one = 1 zero = 0 signs = np.zeros(preds.shape[0], ) signs[preds == one] = 1 signs[preds == zero] = -1 return signs def attack_gradient(self, attack_point): """ Calculates the attack gradient, or ∂P for this attack. See equation 8 in Biggio et al. Ch. 14 :param attack_point: the current attack point :type attack_point: `np.ndarray` :return: The attack gradient :rtype: `np.ndarray` """ # pylint: disable=W0212 art_model = self.classifier model = self.classifier._model grad = np.zeros((1, self.x_val.shape[1])) support_vectors = model.support_vectors_ num_support = len(support_vectors) support_labels = np.expand_dims(self.predict_sign(support_vectors), axis=1) c_idx = np.isin(support_vectors, attack_point).all(axis=1) if not c_idx.any(): return grad c_idx = np.where(c_idx == True)[0][0] alpha_c = model.dual_coef_[0, c_idx] assert support_labels.shape == (num_support, 1) qss = art_model.q_submatrix(support_vectors, support_vectors) qss_inv = np.linalg.inv(qss + np.random.uniform(0, 0.01 * np.min(qss), (num_support, num_support))) zeta = np.matmul(qss_inv, support_labels) zeta = np.matmul(support_labels.T, zeta) nu_k = np.matmul(qss_inv, support_labels) for x_k, y_k in zip(self.x_val, self.y_val): y_k = np.expand_dims(np.argmax(y_k), axis=0) q_ks = art_model.q_submatrix(np.array([x_k]), support_vectors) m_k = (1.0 / zeta) * np.matmul(q_ks, zeta * qss_inv - np.matmul(nu_k, nu_k.T)) + np.matmul(y_k, nu_k.T) d_q_sc = np.fromfunction(lambda i: art_model._get_kernel_gradient_sv(i, attack_point), (len(support_vectors),), dtype=int) d_q_kc = art_model._kernel_grad(x_k, attack_point) grad += (np.matmul(m_k, d_q_sc) + d_q_kc) * alpha_c return grad
def setUpClass(cls): master_seed(301) (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() y_train = np.argmax(y_train, axis=1) y_test = np.argmax(y_test, axis=1) zero_or_four = np.logical_or(y_train == 4, y_train == 0) x_train = x_train[zero_or_four] y_train = y_train[zero_or_four] tr_labels = np.zeros((y_train.shape[0], 2)) tr_labels[y_train == 0] = np.array([1, 0]) tr_labels[y_train == 4] = np.array([0, 1]) y_train = tr_labels zero_or_four = np.logical_or(y_test == 4, y_test == 0) x_test = x_test[zero_or_four] y_test = y_test[zero_or_four] te_labels = np.zeros((y_test.shape[0], 2)) te_labels[y_test == 0] = np.array([1, 0]) te_labels[y_test == 4] = np.array([0, 1]) y_test = te_labels n_samples_train = x_train.shape[0] n_features_train = x_train.shape[1] * x_train.shape[2] * x_train.shape[3] n_samples_test = x_test.shape[0] n_features_test = x_test.shape[1] * x_test.shape[2] * x_test.shape[3] x_train = x_train.reshape(n_samples_train, n_features_train) x_test = x_test.reshape(n_samples_test, n_features_test) x_train = x_train[:NB_TRAIN] y_train = y_train[:NB_TRAIN] trusted_data = x_test[:NB_TRUSTED] trusted_labels = y_test[:NB_TRUSTED] x_test = x_test[NB_TRUSTED:] y_test = y_test[NB_TRUSTED:] valid_data = x_test[:NB_VALID] valid_labels = y_test[:NB_VALID] x_test = x_test[NB_VALID:] y_test = y_test[NB_VALID:] clean_prov = np.random.randint(NB_DEVICES - 1, size=x_train.shape[0]) p_train = np.eye(NB_DEVICES)[clean_prov] no_defense = ScikitlearnSVC(model=SVC(kernel=kernel), clip_values=(min_, max_)) no_defense.fit(x=x_train, y=y_train) poison_points = np.random.randint(no_defense._model.support_vectors_.shape[0], size=NB_POISON) all_poison_init = np.copy(no_defense._model.support_vectors_[poison_points]) poison_labels = np.array([1, 1]) - no_defense.predict(all_poison_init) svm_attack = PoisoningAttackSVM(classifier=no_defense, x_train=x_train, y_train=y_train, step=0.1, eps=1.0, x_val=valid_data, y_val=valid_labels, max_iters=200) poisoned_data = svm_attack.generate(all_poison_init, y=poison_labels) # Stack on poison to data and add provenance of bad actor all_data = np.vstack([x_train, poisoned_data]) all_labels = np.vstack([y_train, poison_labels]) poison_prov = np.zeros((NB_POISON, NB_DEVICES)) poison_prov[:, NB_DEVICES - 1] = 1 all_p = np.vstack([p_train, poison_prov]) model = SVC(kernel=kernel) cls.mnist = (all_data, all_labels, all_p), (x_test, y_test), (trusted_data, trusted_labels), \ (valid_data, valid_labels), (min_, max_) cls.classifier = SklearnClassifier(model=model, clip_values=(min_, max_)) cls.classifier.fit(all_data, all_labels) cls.defence_trust = ProvenanceDefense(cls.classifier, all_data, all_labels, all_p, x_val=trusted_data, y_val=trusted_labels, eps=0.1) cls.defence_no_trust = ProvenanceDefense(cls.classifier, all_data, all_labels, all_p, eps=0.1)
def setUpClass(cls): np.random.seed(seed=1234) cls.sklearn_model = LinearSVC() cls.classifier = ScikitlearnSVC(model=cls.sklearn_model) cls.classifier.fit(x=x_train, y=y_train)
tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax') ]) # model.compile(optimizer='adam', # loss='sparse_categorical_crossentropy', # metrics=['accuracy']) # # model.fit(x_train, y_train, epochs=3) # # loss_test, accuracy_test = model.evaluate(x_test, y_test) # print('Accuracy on test data: {:4.2f}%'.format(accuracy_test * 100)) # # classifier = KerasClassifier(model=model, clip_values=(0, 1)) classsifierSVC = ScikitlearnSVC(model=model_svc, clip_values=(0, 1)) # attack_PGD = ProjectedGradientDescent(classifier=classifier) # attack_PSVM = PoisoningAttackSVM(classifier=classsifierSVC, eps=.3, step=.1, x_train=x_train, y_train=y_train, x_val=x_test[101:200], y_val=None) attack_PSVM = PoisoningAttackSVM(classifier=classsifierSVC, eps=.3, step=.1, x_train=x_train_svc, y_train=y_train, x_val=x_test[101:200], y_val=None) # # attack_SMM = SaliencyMapMethod(classifier=classifier) # attack_STran = SpatialTransformation(classifier=classifier) # test_PGD = attack_PGD.generate(x_test) # test_PSVM = attack_PSVM.generate(x_test) test_PSVM = attack_PSVM.generate(x_test_svc) # test_SMM = attack_SMM.generate(x_test) # test_STran = attack_STran.generate(x_test)