Ejemplo n.º 1
0
    def explain(self, request: Dict) -> Dict:
        image = request["instances"][0]
        label = request["instances"][1]
        try:
            inputs = np.array(image)
            logging.info("Calling explain on image of shape %s", (inputs.shape,))
        except Exception as e:
            raise Exception(
                "Failed to initialize NumPy array from inputs: %s, %s" % (e, request["instances"]))
        try:
            if str.lower(self.adversary_type) == "squareattack":

                classifier = BlackBoxClassifier(self._predict, inputs.shape, self.nb_classes,
                                                clip_values=(-np.inf, np.inf))
                preds = np.argmax(classifier.predict(inputs, batch_size=1))
                classifier.channels_first = False
                attack = SquareAttack(estimator=classifier, max_iter=self.max_iter)

                x_adv = attack.generate(x=inputs, y=label)

                adv_preds = np.argmax(classifier.predict(x_adv))
                l2_error = np.linalg.norm(np.reshape(x_adv[0] - inputs, [-1]))

                return {"explanations": {"adversarial_example": x_adv.tolist(), "L2 error": l2_error.tolist(),
                                         "adversarial_prediction": adv_preds.tolist(), "prediction": preds.tolist()}}
        except Exception as e:
            raise Exception("Failed to explain %s" % e)
Ejemplo n.º 2
0
def attack(predictWrapper, x_train, x_test, y_train, y_test, input_shape, datapoint):

    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    print('xtrain shape: ', x_train.shape)
    print('xtest shape: ', x_test.shape)
    print('y_train shape: ', y_train.shape)
    print('ytest shape: ', y_test.shape)

    # Create classifier
    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(input_shape, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value, max_pixel_value))

    print('----- generate adv data by HopSkipJump attack -----')
    # Generate adversarial test examples
    s = time.time()

    attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=100, max_eval=10000, init_eval=100, init_size=100)
    # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100)


    # Input data shape should be 2D
    datapoint = datapoint.reshape((-1, input_shape))
    adv_data = attacker.generate(x=datapoint)

    distortion(datapoint, adv_data)
    print('Generate test adv cost time: ', time.time() - s)

    return adv_data
Ejemplo n.º 3
0
def attack():
    x_train, x_test, y_train, y_test = load_data('cifar10', 2)
    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    s = time.time()

    # model = BNN(['../binary/checkpoints/cifar10_mlpbnn_approx_%d.h5' % (i) for i in range(100)])
    model = BNN([
        '../binary/checkpoints/cifar10_mlpbnn_approx_ep004_%d.h5' % (i)
        for i in range(100)
    ])

    pred_y = model.predict(x_test)
    print('pred_y: ', pred_y)
    np.savetxt('pred_y', pred_y)
    np.savetxt('y_test', y_test)
    print('pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]',
          pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1])
    print('Accuracy: ', accuracy_score(y_true=y_test, y_pred=pred_y))

    # Create a model wrapper
    predictWrapper = modelWrapper(model)

    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(32 * 32 * 3, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data -----')
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            delta=0.01,
                            epsilon=0.01,
                            max_iter=100,
                            num_trial=100,
                            sample_size=100,
                            init_size=100)

    print('----- generate adv test data -----')
    x_test = x_test[288]
    # Input data shape should be 2D
    x_test = x_test.reshape((-1, 32 * 32 * 3))
    x_test_adv = attack.generate(x=x_test)

    print('x_test ', x_test)
    print('x_test_adv ', x_test_adv)

    dist2 = utils.computeDist2(x_test, x_test_adv)
    print('test data dist2: ', dist2)

    distInf = utils.computeDistInf(x_test, x_test_adv)
    print('test data distInf: ', distInf)

    print('Cost time: ', time.time() - s)
Ejemplo n.º 4
0
def get_classifier_bb(defences=None):
    """
    Standard BlackBox classifier for unit testing

    :return: BlackBoxClassifier
    """
    from art.classifiers import BlackBoxClassifier
    from art.utils import to_categorical

    # define blackbox classifier
    def predict(x):
        with open(
            os.path.join(os.path.dirname(os.path.dirname(__file__)), "data/mnist", "api_output.txt")
        ) as json_file:
            predictions = json.load(json_file)
        return to_categorical(predictions["values"][: len(x)], nb_classes=10)

    bbc = BlackBoxClassifier(predict, (28, 28, 1), 10, clip_values=(0, 255), preprocessing_defences=defences)
    return bbc
Ejemplo n.º 5
0
    input_shape = train.shape[1:]
    yp = scd.predict(test)

    correct_index = np.nonzero((yp == test_label).astype(np.int8))[0]

    predictWrapper = modelWrapper(scd)

    min_pixel_value = train.min()
    max_pixel_value = train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    # Create classifier
    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=input_shape,
                                    nb_classes=args.n_classes,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data by HopSkipJump attack -----')
    # Generate adversarial test examples

    attacker = HopSkipJump(classifier=classifier,
                           targeted=False,
                           norm=2,
                           max_iter=40,
                           max_eval=10000,
                           init_eval=100,
                           init_size=100)
    # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100)
Ejemplo n.º 6
0
def attack():
    x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2)
    x_train = x_train.reshape((-1, 32, 32, 3)).transpose(
        (0, 3, 1, 2)).astype(np.float32)
    x_test = x_test.reshape((-1, 32, 32, 3)).transpose(
        (0, 3, 1, 2)).astype(np.float32)

    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    s = time.time()

    path = '../binary/checkpoints/cifar10_binary_lenet_100.pkl'
    with open(path, 'rb') as f:
        model = pickle.load(f)

    # Predict
    # Lent and simpleNet input data shape is (-1,3, 32, 32)
    # The other net input data shape is vector
    # Mlp01 need to add: cuda=False
    print('xtest shape1', x_test.shape)
    pred_y = model.predict(x_test)
    print('xtest shape1', x_test.shape)
    print('pred_y: ', pred_y)

    # Create a model wrapper
    predictWrapper = modelWrapper(model)

    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(3 * 32 * 32, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data -----')
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            delta=0.01,
                            epsilon=0.01,
                            max_iter=100,
                            num_trial=100,
                            sample_size=100,
                            init_size=100)

    print('----- generate adv test data -----')
    x_test = x_test[288]
    # Input data shape should be 2D
    x_test = x_test.reshape((-1, 3 * 32 * 32))
    x_test_adv = attack.generate(x=x_test)

    np.save('x', x_test)
    np.save('adv_x', x_test_adv)

    print('x_test ', x_test)
    print('x_test_adv ', x_test_adv)

    # dist1 = utils.computeDist1(x_test, x_test_adv)
    # print('test data dist1: ', dist1)

    dist2 = utils.computeDist2(x_test, x_test_adv)
    print('test data dist2: ', dist2)

    distInf = utils.computeDistInf(x_test, x_test_adv)
    print('test data distInf: ', distInf)

    # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv)
    # print('test avg_dist2: ', avg_dist2)
    # # print('test med_dist2: ', med_dist2)

    # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv)
    # print('test avg_distInf: ', avg_distInf)
    # # print('test med_distInf: ', med_distInf)

    print('Cost time: ', time.time() - s)
Ejemplo n.º 7
0
def attack():
    x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2)
    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    s = time.time()

    path = '../binary/checkpoints/cifar10_binary_scd01mlp_100_br02_h500_nr075_ni25000_i1.pkl'

    with open(path, 'rb') as f:
        model = pickle.load(f)

    pred_y = model.predict(x_test, cuda=False)
    # np.savetxt('pred_y_mpl2', pred_y)
    print('pred_y: ', pred_y)

    # Create a model wrapper
    predictWrapper = modelWrapper(model)

    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(3 * 32 * 32, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data -----')
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            delta=0.01,
                            epsilon=0.01,
                            max_iter=100,
                            num_trial=100,
                            sample_size=100,
                            init_size=100)

    print('----- generate adv test data -----')
    x_test = x_test[-1]
    # Input data shape should be 2D
    x_test = x_test.reshape((-1, 3 * 32 * 32))
    x_test_adv = attack.generate(x=x_test)

    np.save('x', x_test)
    np.save('adv_x', x_test_adv)

    print('x_test ', x_test)
    print('x_test_adv ', x_test_adv)

    # dist1 = utils.computeDist1(x_test, x_test_adv)
    # print('test data dist1: ', dist1)

    dist2 = utils.computeDist2(x_test, x_test_adv)
    print('test data dist2: ', dist2)

    distInf = utils.computeDistInf(x_test, x_test_adv)
    print('test data distInf: ', distInf)

    # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv)
    # print('test avg_dist2: ', avg_dist2)
    # # print('test med_dist2: ', med_dist2)

    # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv)
    # print('test avg_distInf: ', avg_distInf)
    # # print('test med_distInf: ', med_distInf)

    print('Cost time: ', time.time() - s)
    def extract(self, x, y=None, delta_0=0.05, fraction_true=0.3, rel_diff_slope=0.00001, rel_diff_value=0.000001,
                delta_init_value=0.1, delta_value_max=50, d2_min=0.0004, d_step=0.01, delta_sign=0.02,
                unit_vector_scale=10000, **kwargs):
        """
        Extract the targeted model.

        :param x: Samples of input data of shape (num_samples, num_features).
        :type x: `np.ndarray`
        :param y: Correct labels or target labels for `x`, depending if the attack is targeted
               or not. This parameter is only used by some of the attacks.
        :type y: `np.ndarray`
        :param delta_0: Initial step size of binary search
        :type delta_0: `float`
        :param fraction_true: Fraction of output predictions that have to fulfill criteria for critical point
        :type fraction_true: `float`
        :param rel_diff_slope: Relative slope difference at critical points
        :type rel_diff_slope: `float`
        :param rel_diff_value: Relative value difference at critical points
        :type rel_diff_value: `float`
        :param delta_init_value: Initial delta of weight value search
        :type delta_init_value: `float`
        :param delta_value_max: Maximum delta  of weight value search
        :type delta_value_max: `float`
        :param d2_min: Minimum acceptable value of sum of absolute second derivatives
        :type d2_min: `float`
        :param d_step:  Step size of delta increase
        :type d_step: `float`
        :param delta_sign: Delta of weight sign search
        :type delta_sign: `float`
        :param unit_vector_scale: Multiplicative scale of the unit vector e_j.
        :type unit_vector_scale: `int`

        :return: ART BlackBoxClassifier of the extracted model.
        :rtype: :class:`.BlackBoxClassifier`
        """
        self._critical_point_search(delta_0=delta_0, fraction_true=fraction_true, rel_diff_slope=rel_diff_slope,
                                    rel_diff_value=rel_diff_value)
        self._weight_recovery(delta_init_value=delta_init_value, delta_value_max=delta_value_max, d2_min=d2_min,
                              d_step=d_step, delta_sign=delta_sign)
        self._sign_recovery(unit_vector_scale=unit_vector_scale)
        self._last_layer_extraction(x)

        def predict(x):
            """
            Predict extracted model.

            :param x: Samples of input data of shape (num_samples, num_features)
            :type x: `np.ndarray`

            :return: Predictions with the extracted model of shape (num_samples, num_classes)
            :rtype: `np.ndarray`
            """
            layer_0 = np.maximum(np.matmul(self.w_0.T, x.T) + self.b_0, 0.0)
            layer_1 = np.matmul(self.w_1.T, layer_0) + self.b_1
            return layer_1.T

        extracted_classifier = BlackBoxClassifier(predict, input_shape=self.classifier.input_shape,
                                                  nb_classes=self.classifier.nb_classes(),
                                                  clip_values=self.classifier.clip_values,
                                                  defences=self.classifier.defences,
                                                  preprocessing=self.classifier.preprocessing)

        return extracted_classifier
Ejemplo n.º 9
0
def attack():
    x_train, x_test, y_train, y_test = load_data('cifar10', 2)
    # x_train, y_train, x_test, y_test = utils.loadData()
    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    s = time.time()

    path = '../binary/checkpoints/cifar10_scd01mlp_100_br02_nr075_ni1000_i1_ep2.pkl'
    # path = '/home/y/yx277/research/scd01mc/binary/checkpoints/cifar10_mlp.pkl'
    # path = '/research/datasci/mx42/adversarial_machine_learning/IBM_ART/checkpoints_scd_01/scd_stl10_01_v7.pkl'
    with open(path, 'rb') as f:
        model = pickle.load(f)

    pred_y = model.predict(x_test)
    # np.savetxt('pred_y_mpl2', pred_y)
    print('pred_y: ', pred_y)

    # Create a model wrapper
    predictWrapper = modelWrapper(model)

    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(32 * 32 * 3, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data -----')
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            delta=0.01,
                            epsilon=0.01,
                            max_iter=500,
                            num_trial=100,
                            sample_size=100,
                            init_size=100)

    print('----- generate adv test data -----')
    x_test = x_test[0]
    # Input data shape should be 2D
    x_test = x_test.reshape((-1, 32 * 32 * 3))
    x_test_adv = attack.generate(x=x_test)

    np.save('x', x_test)
    np.save('adv_x', x_test_adv)

    print('x_test ', x_test)
    print('x_test_adv ', x_test_adv)

    # dist1 = utils.computeDist1(x_test, x_test_adv)
    # print('test data dist1: ', dist1)

    dist2 = utils.computeDist2(x_test, x_test_adv)
    print('test data dist2: ', dist2)

    distInf = utils.computeDistInf(x_test, x_test_adv)
    print('test data distInf: ', distInf)

    # avg_dist2, med_dist2 = utils.computeDist2(x_test, x_test_adv)
    # print('test avg_dist2: ', avg_dist2)
    # # print('test med_dist2: ', med_dist2)

    # avg_distInf, med_distInf = utils.computeDistInf(x_test, x_test_adv)
    # print('test avg_distInf: ', avg_distInf)
    # # print('test med_distInf: ', med_distInf)

    print('Cost time: ', time.time() - s)
Ejemplo n.º 10
0
        file = open("out.txt","r+") 
        test = file.read()
        out_string = test.strip()

        # convert to categorical
        if out_string == 'dissent':
            out_label.append(0)
        elif out_string == 'assent':
            out_label.append(1)
        else: 
            out_label.append(2)
    
    return to_categorical(out_label, 3)

# 1.3 init黑盒对象
classifier = BlackBoxClassifier(predict, image_target.shape, 3, clip_values=(0, 255))

label_dict = {0: 'dissent', 1: 'assent', 2: 'other'}

# 1.4 this is the image we want to target
plt.imshow(image_target)
plt.show()
print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_target])))])
"""
Tesseract output is: dissent
"""

# 1.5 this is the label we want to perturb to
plt.imshow(image_init)
plt.show()
print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_init])))])