def explain(self, request: Dict) -> Dict: image = request["instances"][0] label = request["instances"][1] try: inputs = np.array(image) logging.info("Calling explain on image of shape %s", (inputs.shape,)) except Exception as e: raise Exception( "Failed to initialize NumPy array from inputs: %s, %s" % (e, request["instances"])) try: if str.lower(self.adversary_type) == "squareattack": classifier = BlackBoxClassifier(self._predict, inputs.shape, self.nb_classes, clip_values=(-np.inf, np.inf)) preds = np.argmax(classifier.predict(inputs, batch_size=1)) classifier.channels_first = False attack = SquareAttack(estimator=classifier, max_iter=self.max_iter) x_adv = attack.generate(x=inputs, y=label) adv_preds = np.argmax(classifier.predict(x_adv)) l2_error = np.linalg.norm(np.reshape(x_adv[0] - inputs, [-1])) return {"explanations": {"adversarial_example": x_adv.tolist(), "L2 error": l2_error.tolist(), "adversarial_prediction": adv_preds.tolist(), "prediction": preds.tolist()}} except Exception as e: raise Exception("Failed to explain %s" % e)
def attack(predictWrapper, x_train, x_test, y_train, y_test, input_shape, datapoint): min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) print('xtrain shape: ', x_train.shape) print('xtest shape: ', x_test.shape) print('y_train shape: ', y_train.shape) print('ytest shape: ', y_test.shape) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(input_shape, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples s = time.time() attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=100, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100) # Input data shape should be 2D datapoint = datapoint.reshape((-1, input_shape)) adv_data = attacker.generate(x=datapoint) distortion(datapoint, adv_data) print('Generate test adv cost time: ', time.time() - s) return adv_data
def attack(): x_train, x_test, y_train, y_test = load_data('cifar10', 2) min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) s = time.time() # model = BNN(['../binary/checkpoints/cifar10_mlpbnn_approx_%d.h5' % (i) for i in range(100)]) model = BNN([ '../binary/checkpoints/cifar10_mlpbnn_approx_ep004_%d.h5' % (i) for i in range(100) ]) pred_y = model.predict(x_test) print('pred_y: ', pred_y) np.savetxt('pred_y', pred_y) np.savetxt('y_test', y_test) print('pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]', pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]) print('Accuracy: ', accuracy_score(y_true=y_test, y_pred=pred_y)) # Create a model wrapper predictWrapper = modelWrapper(model) classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(32 * 32 * 3, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data -----') attack = BoundaryAttack(estimator=classifier, targeted=False, delta=0.01, epsilon=0.01, max_iter=100, num_trial=100, sample_size=100, init_size=100) print('----- generate adv test data -----') x_test = x_test[288] # Input data shape should be 2D x_test = x_test.reshape((-1, 32 * 32 * 3)) x_test_adv = attack.generate(x=x_test) print('x_test ', x_test) print('x_test_adv ', x_test_adv) dist2 = utils.computeDist2(x_test, x_test_adv) print('test data dist2: ', dist2) distInf = utils.computeDistInf(x_test, x_test_adv) print('test data distInf: ', distInf) print('Cost time: ', time.time() - s)
def get_classifier_bb(defences=None): """ Standard BlackBox classifier for unit testing :return: BlackBoxClassifier """ from art.classifiers import BlackBoxClassifier from art.utils import to_categorical # define blackbox classifier def predict(x): with open( os.path.join(os.path.dirname(os.path.dirname(__file__)), "data/mnist", "api_output.txt") ) as json_file: predictions = json.load(json_file) return to_categorical(predictions["values"][: len(x)], nb_classes=10) bbc = BlackBoxClassifier(predict, (28, 28, 1), 10, clip_values=(0, 255), preprocessing_defences=defences) return bbc
input_shape = train.shape[1:] yp = scd.predict(test) correct_index = np.nonzero((yp == test_label).astype(np.int8))[0] predictWrapper = modelWrapper(scd) min_pixel_value = train.min() max_pixel_value = train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=input_shape, nb_classes=args.n_classes, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=40, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100)
def attack(): x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2) x_train = x_train.reshape((-1, 32, 32, 3)).transpose( (0, 3, 1, 2)).astype(np.float32) x_test = x_test.reshape((-1, 32, 32, 3)).transpose( (0, 3, 1, 2)).astype(np.float32) min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) s = time.time() path = '../binary/checkpoints/cifar10_binary_lenet_100.pkl' with open(path, 'rb') as f: model = pickle.load(f) # Predict # Lent and simpleNet input data shape is (-1,3, 32, 32) # The other net input data shape is vector # Mlp01 need to add: cuda=False print('xtest shape1', x_test.shape) pred_y = model.predict(x_test) print('xtest shape1', x_test.shape) print('pred_y: ', pred_y) # Create a model wrapper predictWrapper = modelWrapper(model) classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(3 * 32 * 32, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data -----') attack = BoundaryAttack(estimator=classifier, targeted=False, delta=0.01, epsilon=0.01, max_iter=100, num_trial=100, sample_size=100, init_size=100) print('----- generate adv test data -----') x_test = x_test[288] # Input data shape should be 2D x_test = x_test.reshape((-1, 3 * 32 * 32)) x_test_adv = attack.generate(x=x_test) np.save('x', x_test) np.save('adv_x', x_test_adv) print('x_test ', x_test) print('x_test_adv ', x_test_adv) # dist1 = utils.computeDist1(x_test, x_test_adv) # print('test data dist1: ', dist1) dist2 = utils.computeDist2(x_test, x_test_adv) print('test data dist2: ', dist2) distInf = utils.computeDistInf(x_test, x_test_adv) print('test data distInf: ', distInf) # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv) # print('test avg_dist2: ', avg_dist2) # # print('test med_dist2: ', med_dist2) # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv) # print('test avg_distInf: ', avg_distInf) # # print('test med_distInf: ', med_distInf) print('Cost time: ', time.time() - s)
def attack(): x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2) min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) s = time.time() path = '../binary/checkpoints/cifar10_binary_scd01mlp_100_br02_h500_nr075_ni25000_i1.pkl' with open(path, 'rb') as f: model = pickle.load(f) pred_y = model.predict(x_test, cuda=False) # np.savetxt('pred_y_mpl2', pred_y) print('pred_y: ', pred_y) # Create a model wrapper predictWrapper = modelWrapper(model) classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(3 * 32 * 32, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data -----') attack = BoundaryAttack(estimator=classifier, targeted=False, delta=0.01, epsilon=0.01, max_iter=100, num_trial=100, sample_size=100, init_size=100) print('----- generate adv test data -----') x_test = x_test[-1] # Input data shape should be 2D x_test = x_test.reshape((-1, 3 * 32 * 32)) x_test_adv = attack.generate(x=x_test) np.save('x', x_test) np.save('adv_x', x_test_adv) print('x_test ', x_test) print('x_test_adv ', x_test_adv) # dist1 = utils.computeDist1(x_test, x_test_adv) # print('test data dist1: ', dist1) dist2 = utils.computeDist2(x_test, x_test_adv) print('test data dist2: ', dist2) distInf = utils.computeDistInf(x_test, x_test_adv) print('test data distInf: ', distInf) # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv) # print('test avg_dist2: ', avg_dist2) # # print('test med_dist2: ', med_dist2) # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv) # print('test avg_distInf: ', avg_distInf) # # print('test med_distInf: ', med_distInf) print('Cost time: ', time.time() - s)
def extract(self, x, y=None, delta_0=0.05, fraction_true=0.3, rel_diff_slope=0.00001, rel_diff_value=0.000001, delta_init_value=0.1, delta_value_max=50, d2_min=0.0004, d_step=0.01, delta_sign=0.02, unit_vector_scale=10000, **kwargs): """ Extract the targeted model. :param x: Samples of input data of shape (num_samples, num_features). :type x: `np.ndarray` :param y: Correct labels or target labels for `x`, depending if the attack is targeted or not. This parameter is only used by some of the attacks. :type y: `np.ndarray` :param delta_0: Initial step size of binary search :type delta_0: `float` :param fraction_true: Fraction of output predictions that have to fulfill criteria for critical point :type fraction_true: `float` :param rel_diff_slope: Relative slope difference at critical points :type rel_diff_slope: `float` :param rel_diff_value: Relative value difference at critical points :type rel_diff_value: `float` :param delta_init_value: Initial delta of weight value search :type delta_init_value: `float` :param delta_value_max: Maximum delta of weight value search :type delta_value_max: `float` :param d2_min: Minimum acceptable value of sum of absolute second derivatives :type d2_min: `float` :param d_step: Step size of delta increase :type d_step: `float` :param delta_sign: Delta of weight sign search :type delta_sign: `float` :param unit_vector_scale: Multiplicative scale of the unit vector e_j. :type unit_vector_scale: `int` :return: ART BlackBoxClassifier of the extracted model. :rtype: :class:`.BlackBoxClassifier` """ self._critical_point_search(delta_0=delta_0, fraction_true=fraction_true, rel_diff_slope=rel_diff_slope, rel_diff_value=rel_diff_value) self._weight_recovery(delta_init_value=delta_init_value, delta_value_max=delta_value_max, d2_min=d2_min, d_step=d_step, delta_sign=delta_sign) self._sign_recovery(unit_vector_scale=unit_vector_scale) self._last_layer_extraction(x) def predict(x): """ Predict extracted model. :param x: Samples of input data of shape (num_samples, num_features) :type x: `np.ndarray` :return: Predictions with the extracted model of shape (num_samples, num_classes) :rtype: `np.ndarray` """ layer_0 = np.maximum(np.matmul(self.w_0.T, x.T) + self.b_0, 0.0) layer_1 = np.matmul(self.w_1.T, layer_0) + self.b_1 return layer_1.T extracted_classifier = BlackBoxClassifier(predict, input_shape=self.classifier.input_shape, nb_classes=self.classifier.nb_classes(), clip_values=self.classifier.clip_values, defences=self.classifier.defences, preprocessing=self.classifier.preprocessing) return extracted_classifier
def attack(): x_train, x_test, y_train, y_test = load_data('cifar10', 2) # x_train, y_train, x_test, y_test = utils.loadData() min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) s = time.time() path = '../binary/checkpoints/cifar10_scd01mlp_100_br02_nr075_ni1000_i1_ep2.pkl' # path = '/home/y/yx277/research/scd01mc/binary/checkpoints/cifar10_mlp.pkl' # path = '/research/datasci/mx42/adversarial_machine_learning/IBM_ART/checkpoints_scd_01/scd_stl10_01_v7.pkl' with open(path, 'rb') as f: model = pickle.load(f) pred_y = model.predict(x_test) # np.savetxt('pred_y_mpl2', pred_y) print('pred_y: ', pred_y) # Create a model wrapper predictWrapper = modelWrapper(model) classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(32 * 32 * 3, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data -----') attack = BoundaryAttack(estimator=classifier, targeted=False, delta=0.01, epsilon=0.01, max_iter=500, num_trial=100, sample_size=100, init_size=100) print('----- generate adv test data -----') x_test = x_test[0] # Input data shape should be 2D x_test = x_test.reshape((-1, 32 * 32 * 3)) x_test_adv = attack.generate(x=x_test) np.save('x', x_test) np.save('adv_x', x_test_adv) print('x_test ', x_test) print('x_test_adv ', x_test_adv) # dist1 = utils.computeDist1(x_test, x_test_adv) # print('test data dist1: ', dist1) dist2 = utils.computeDist2(x_test, x_test_adv) print('test data dist2: ', dist2) distInf = utils.computeDistInf(x_test, x_test_adv) print('test data distInf: ', distInf) # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv) # print('test avg_dist2: ', avg_dist2) # # print('test med_dist2: ', med_dist2) # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv) # print('test avg_distInf: ', avg_distInf) # # print('test med_distInf: ', med_distInf) print('Cost time: ', time.time() - s)
file = open("out.txt","r+") test = file.read() out_string = test.strip() # convert to categorical if out_string == 'dissent': out_label.append(0) elif out_string == 'assent': out_label.append(1) else: out_label.append(2) return to_categorical(out_label, 3) # 1.3 init黑盒对象 classifier = BlackBoxClassifier(predict, image_target.shape, 3, clip_values=(0, 255)) label_dict = {0: 'dissent', 1: 'assent', 2: 'other'} # 1.4 this is the image we want to target plt.imshow(image_target) plt.show() print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_target])))]) """ Tesseract output is: dissent """ # 1.5 this is the label we want to perturb to plt.imshow(image_init) plt.show() print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_init])))])