def predict(x):
    global num_queries
    num_queries += 1
    print(f"classifier query {num_queries}")
    # Translate array to image
    x = np.array(x)
    y = np.reshape(x, (400, 400, 3)).astype(np.uint8)
    im = Image.fromarray(y)
    # Get the image in bytes
    img_byte_arr = io.BytesIO()
    im.save(img_byte_arr, format='PNG')
    bs = img_byte_arr.getvalue()
    # Querying google cloud service
    image = vision.Image(content=bs)
    resp = client.label_detection(image=image)
    labels = resp.label_annotations
    # Give label predictions
    prob1 = prob2 = prob3 = 0
    for l in labels:
        if l.description == "Cat":
            prob1 = l.score
        elif l.description == "Wolf":
            prob2 = l.score
    if prob1 == 0 and prob2 == 0:
        return to_categorical([2], nb_classes=3)
    elif prob1 > prob2:
        return to_categorical([0], nb_classes=3)
    else:
        return to_categorical([1], nb_classes=3)
Beispiel #2
0
def predict(x):
    global num_queries
    num_queries += 1
    print(f"classifier query {num_queries}")
    # Translate array to image
    x = np.array(x)
    y = np.reshape(x, SHAPE).astype(np.uint8)
    im = Image.fromarray(y)
    # Get the image in bytes
    img_byte_arr = io.BytesIO()
    im.save(img_byte_arr, format='PNG')
    bs = img_byte_arr.getvalue()
    # Querying google cloud service
    image = vision.Image(content=bs)
    import inspect
    print(inspect.getmembers(client))
    print(client.__dict__)
    resp = client.label_detection(image=image, max_results=20)
    print(client.object_localization(image=image))
    labels = resp.label_annotations
    # Give label predictions
    prob1 = prob2 = prob3 = 0
    for l in labels:
        if l.description == "Human":
            prob1 = l.score
        elif l.description == "Machine":
            prob2 = l.score
    if prob1 == 0 and prob2 == 0:
        return to_categorical([2], nb_classes=3)
    elif prob1 > prob2:
        return to_categorical([0], nb_classes=3)
    else:
        return to_categorical([1], nb_classes=3)
    def test_to_categorical(self):
        y = np.array([3, 1, 4, 1, 5, 9])
        y_ = to_categorical(y)
        self.assertEqual(y_.shape, (6, 10))
        self.assertTrue(np.all(y_.argmax(axis=1) == y))
        self.assertTrue(np.all(np.logical_or(y_ == 0.0, y_ == 1.0)))

        y_ = to_categorical(y, 20)
        self.assertEqual(y_.shape, (6, 20))
    def outlier_detection(self, x_val: np.ndarray, y_val: np.ndarray) -> List[Tuple[int, np.ndarray, np.ndarray]]:
        """
        Returns a tuple of suspected of suspected poison labels and their mask and pattern
        :return: A list of tuples containing the the class index, mask, and pattern for suspected labels
        """
        l1_norms = []
        masks = []
        patterns = []
        num_classes = self.nb_classes
        for class_idx in range(num_classes):
            # Assuming classes are indexed
            target_label = to_categorical([class_idx], num_classes).flatten()
            mask, pattern = self.generate_backdoor(x_val, y_val, target_label)
            norm = np.sum(np.abs(mask))
            l1_norms.append(norm)
            masks.append(mask)
            patterns.append(pattern)

        # assuming l1 norms would naturally create a normal distribution
        consistency_constant = 1.4826

        median = np.median(l1_norms)
        mad = consistency_constant * np.median(np.abs(l1_norms - median))
        # min_mad = np.abs(np.min(l1_norms) - median) / mad
        flagged_labels = []

        for class_idx in range(num_classes):
            anomaly_index = np.abs(l1_norms[class_idx] - median) / mad
            # Points with anomaly_index > 2 have 95% probability of being an outlier
            # Backdoor outliers show up as masks with small l1 norms
            if l1_norms[class_idx] <= median and anomaly_index > 2:
                logger.warning("Detected potential backdoor in class: %s", str(class_idx))
                flagged_labels.append(class_idx)

        return [(label, masks[label], patterns[label]) for label in flagged_labels]
    def predict(self, x, **kwargs):
        """
        Perform prediction for a batch of inputs.

        :param x: Test set.
        :type x: `np.ndarray`
        :return: Array of predictions of shape `(nb_inputs, nb_classes)`.
        :rtype: `np.ndarray`
        """
        from xgboost import Booster, XGBClassifier
        from art.utils import to_categorical

        # Apply preprocessing
        x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)

        if isinstance(self._model, Booster):
            from xgboost import DMatrix

            train_data = DMatrix(x_preprocessed, label=None)
            predictions = self._model.predict(train_data)
            y_prediction = np.asarray([line for line in predictions])
            if len(y_prediction.shape) == 1:
                y_prediction = to_categorical(labels=y_prediction,
                                              nb_classes=self.nb_classes())
        elif isinstance(self._model, XGBClassifier):
            y_prediction = self._model.predict_proba(x_preprocessed)

        # Apply postprocessing
        y_prediction = self._apply_postprocessing(preds=y_prediction,
                                                  fit=False)

        return y_prediction
Beispiel #6
0
 def _predict_rf(x):
     """Wrapper to query blackbox for rf"""
     proba_rf = model_kdf.rf_model.predict_proba(x)
     predicted_label_rf = np.argmax(proba_rf, axis=1)
     return to_categorical(predicted_label_rf,
                           nb_classes=len(
                               np.unique(y[indx_to_take_train])))
    def poison_dataset(x_clean, y_clean, poison_func):
        x_poison = np.copy(x_clean)
        y_poison = np.copy(y_clean)
        is_poison = np.zeros(np.shape(y_poison)[0])

        for i in range(10):
            src = i
            tgt = (i + 1) % 10
            n_points_in_tgt = np.round(
                np.sum(np.argmax(y_clean, axis=1) == tgt))
            num_poison = int((PP_POISON * n_points_in_tgt) / (1 - PP_POISON))
            src_imgs = np.copy(x_clean[np.argmax(y_clean, axis=1) == src])

            n_points_in_src = np.shape(src_imgs)[0]
            if num_poison:
                indices_to_be_poisoned = np.random.choice(
                    n_points_in_src, num_poison)

                imgs_to_be_poisoned = src_imgs[indices_to_be_poisoned]
                backdoor_attack = PoisoningAttackBackdoor(poison_func)
                poison_images, poison_labels = backdoor_attack.poison(
                    imgs_to_be_poisoned,
                    y=to_categorical(np.ones(num_poison) * tgt, 10))
                x_poison = np.append(x_poison, poison_images, axis=0)
                y_poison = np.append(y_poison, poison_labels, axis=0)
                is_poison = np.append(is_poison, np.ones(num_poison))

        is_poison = is_poison != 0

        return is_poison, x_poison, y_poison
Beispiel #8
0
    def test_keras_mnist(self):

        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Keras classifier
        classifier = get_classifier_kr()

        scores = classifier._model.evaluate(x_train, y_train)
        logger.info('[Keras, MNIST] Accuracy on training set: %.2f%%',
                    (scores[1] * 100))

        scores = classifier._model.evaluate(x_test, y_test)
        logger.info('[Keras, MNIST] Accuracy on test set: %.2f%%',
                    (scores[1] * 100))

        # targeted

        # Generate random target classes
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Beispiel #9
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Generate random target classes
        import numpy as np
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1)
        x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy on adversarial examples: %.2f%%' % (acc * 100))
Beispiel #10
0
    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
        """
        Perform prediction for a batch of inputs.

        :param x: Input samples.
        :return: Array of predictions of shape `(nb_inputs, nb_classes)`.
        """
        import xgboost  # lgtm [py/repeated-import] lgtm [py/import-and-import-from]

        # Apply preprocessing
        x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)

        if isinstance(self._model, xgboost.Booster):
            train_data = xgboost.DMatrix(x_preprocessed, label=None)
            y_prediction = self._model.predict(train_data)
            if len(y_prediction.shape) == 1:
                y_prediction = to_categorical(labels=y_prediction,
                                              nb_classes=self.nb_classes)
        elif isinstance(self._model, xgboost.XGBClassifier):
            y_prediction = self._model.predict_proba(x_preprocessed)

        # Apply postprocessing
        y_prediction = self._apply_postprocessing(preds=y_prediction,
                                                  fit=False)

        return y_prediction
Beispiel #11
0
def predict(x):
    
    out_label = []
    
    for x_i in x:
        # save image as intermediate png
        imageio.imsave('tmp.png', x_i)

        # run tesseract
        status = os.system("tesseract tmp.png out")
        if status != 0:
            raise Exception('Tesseract failed to run.')

        # read text
        file = open("out.txt","r+") 
        test = file.read()
        out_string = test.strip()

        # convert to categorical
        if out_string == 'dissent':
            out_label.append(0)
        elif out_string == 'assent':
            out_label.append(1)
        else: 
            out_label.append(2)
    
    return to_categorical(out_label, 3)
    def test_keras_mnist(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr()

        # First attack
        ead = ElasticNet(classifier=krc, targeted=True, max_iter=2)
        y_target = to_categorical(np.asarray([6, 6, 7, 4, 9, 7, 9, 0, 1, 0]), nb_classes=10)
        x_test_adv = ead.generate(x_test, y=y_target)
        expected_x_test_adv = np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                          0.0, 0.0, 0.0, 0.0, 0.00183569, 0.0,
                                          0.0, 0.49765405, 1., 0.6467149, 0.0033755, 0.0052456,
                                          0.0, 0.01104407, 0.00495547, 0.02747423, 0.0, 0.0,
                                          0.0, 0.0, 0.0, 0.0])
        np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv, decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(y_target, axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug('EAD target: %s', target)
        logger.debug('EAD actual: %s', y_pred_adv)
        logger.info('EAD success rate: %.2f%%', (100 * sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        ead = ElasticNet(classifier=krc, targeted=False, max_iter=2)
        y_target = to_categorical(np.asarray([9, 5, 6, 7, 1, 6, 1, 5, 8, 5]), nb_classes=10)
        x_test_adv = ead.generate(x_test, y=y_target)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug('EAD target: %s', y_target)
        logger.debug('EAD actual: %s', y_pred_adv)
        logger.info('EAD success rate: %.2f', (100 * sum(target != y_pred_adv) / float(len(target))))
        self.assertTrue((target != y_pred_adv).any())
        np.testing.assert_array_equal(y_pred_adv, np.asarray([7, 1, 1, 4, 4, 1, 4, 4, 4, 4]))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)

        k.clear_session()
Beispiel #13
0
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,).
        :type y: `np.ndarray`
        :param x_adv_init: Initial array to act as initial adversarial examples. Same shape as `x`.
        :type x_adv_init: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Get clip_min and clip_max from the classifier or infer them from data
        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
        else:
            clip_min, clip_max = np.min(x), np.max(x)

        # Prediction from the original images
        preds = np.argmax(self.classifier.predict(x, batch_size=self.batch_size), axis=1)

        # Prediction from the initial adversarial examples if not None
        x_adv_init = kwargs.get('x_adv_init')

        if x_adv_init is not None:
            init_preds = np.argmax(self.classifier.predict(x_adv_init, batch_size=self.batch_size), axis=1)
        else:
            init_preds = [None] * len(x)
            x_adv_init = [None] * len(x)

        # Assert that, if attack is targeted, y is provided
        if self.targeted and y is None:
            raise ValueError('Target labels `y` need to be provided for a targeted attack.')

        # Some initial setups
        x_adv = x.astype(NUMPY_DTYPE)
        if y is not None:
            y = np.argmax(y, axis=1)

        # Generate the adversarial samples
        for ind, val in enumerate(x_adv):
            if self.targeted:
                x_adv[ind] = self._perturb(x=val, y=y[ind], y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)
            else:
                x_adv[ind] = self._perturb(x=val, y=-1, y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)

        if y is not None:
            y = to_categorical(y, self.classifier.nb_classes())

        logger.info('Success rate of HopSkipJump attack: %.2f%%',
                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted, batch_size=self.batch_size))

        return x_adv
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in a Numpy array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: An array with the original labels to be predicted.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        x_adv = x.astype(NUMPY_DTYPE)

        # Initialize variables
        y_pred = self.classifier.predict(x, logits=False, batch_size=self.batch_size)
        pred_class = np.argmax(y_pred, axis=1)

        # Compute perturbation with implicit batching
        for batch_id in range(int(np.ceil(x_adv.shape[0] / float(self.batch_size)))):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]

            # Main algorithm for each batch
            norm_batch = np.linalg.norm(np.reshape(batch, (batch.shape[0], -1)), axis=1)
            l_batch = pred_class[batch_index_1:batch_index_2]
            l_b = to_categorical(l_batch, self.classifier.nb_classes).astype(bool)

            # Main loop of the algorithm
            for _ in range(self.max_iter):
                # Compute score
                score = self.classifier.predict(batch, logits=False)[l_b]

                # Compute the gradients and norm
                grads = self.classifier.class_gradient(batch, label=l_batch, logits=False)
                grads = np.squeeze(grads, axis=1)
                norm_grad = np.linalg.norm(np.reshape(grads, (batch.shape[0], -1)), axis=1)

                # Theta
                theta = self._compute_theta(norm_batch, score, norm_grad)

                # Pertubation
                di_batch = self._compute_pert(theta, grads, norm_grad)

                # Update xi and pertubation
                batch += di_batch

            # Apply clip
            if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
                clip_min, clip_max = self.classifier.clip_values
                x_adv[batch_index_1:batch_index_2] = np.clip(batch, clip_min, clip_max)
            else:
                x_adv[batch_index_1:batch_index_2] = batch

        logger.info('Success rate of NewtonFool attack: %.2f%%',
                    (np.sum(np.argmax(self.classifier.predict(x, batch_size=self.batch_size), axis=1) != np.argmax(
                        self.classifier.predict(x_adv, batch_size=self.batch_size), axis=1)) / x.shape[0]))

        return x_adv
Beispiel #15
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in a Numpy array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Initialize variables
        y_pred = self.estimator.predict(x, batch_size=self.batch_size)
        pred_class = np.argmax(y_pred, axis=1)

        # Compute perturbation with implicit batching
        for batch_id in trange(
            int(np.ceil(x_adv.shape[0] / float(self.batch_size))), desc="NewtonFool", disable=not self.verbose
        ):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]

            # Main algorithm for each batch
            norm_batch = np.linalg.norm(np.reshape(batch, (batch.shape[0], -1)), axis=1)
            l_batch = pred_class[batch_index_1:batch_index_2]
            l_b = to_categorical(l_batch, self.estimator.nb_classes).astype(bool)

            # Main loop of the algorithm
            for _ in range(self.max_iter):
                # Compute score
                score = self.estimator.predict(batch)[l_b]

                # Compute the gradients and norm
                grads = self.estimator.class_gradient(batch, label=l_batch)
                if grads.shape[1] == 1:
                    grads = np.squeeze(grads, axis=1)
                norm_grad = np.linalg.norm(np.reshape(grads, (batch.shape[0], -1)), axis=1)

                # Theta
                theta = self._compute_theta(norm_batch, score, norm_grad)

                # Perturbation
                di_batch = self._compute_pert(theta, grads, norm_grad)

                # Update xi and perturbation
                batch += di_batch

            # Apply clip
            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv[batch_index_1:batch_index_2] = np.clip(batch, clip_min, clip_max)
            else:
                x_adv[batch_index_1:batch_index_2] = batch

        logger.info(
            "Success rate of NewtonFool attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, batch_size=self.batch_size),
        )
        return x_adv
    def test_random_targets(self):
        y = np.array([3, 1, 4, 1, 5, 9])
        y_ = to_categorical(y)

        random_y = random_targets(y, 10)
        self.assertTrue(np.all(y != random_y.argmax(axis=1)))

        random_y = random_targets(y_, 10)
        self.assertTrue(np.all(y != random_y.argmax(axis=1)))
def test_failure_modes(art_warning, get_default_mnist_subset, image_dl_estimator, params):
    try:
        (x_train, y_train), (_, _) = get_default_mnist_subset
        classifier, _ = image_dl_estimator()
        target = to_categorical([9], 10)[0]
        backdoor = PoisoningAttackBackdoor(add_pattern_bd)
        with pytest.raises(ValueError):
            attack = PoisoningAttackCleanLabelBackdoor(backdoor, classifier, target, **params)
    except ARTTestException as e:
        art_warning(e)
    def test_get_labels_np_array(self):
        y = np.array([3, 1, 4, 1, 5, 9])
        y_ = to_categorical(y)

        logits = np.random.normal(1 * y_, scale=0.1)
        ps = (np.exp(logits).T / np.sum(np.exp(logits), axis=1)).T

        labels = get_labels_np_array(ps)
        self.assertEqual(labels.shape, y_.shape)
        self.assertTrue(np.all(labels == y_))
    def generate(self, x, y=None):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs. `x` is expected to have spatial dimensions.
        :type x: `np.ndarray`
        :param y: An array with the original labels to be predicted.
        :type y: `np.ndarray`
        :return: An array holding the adversarial patch.
        :rtype: `np.ndarray`
        """
        logger.info('Creating adversarial patch.')

        if len(x.shape) == 2:
            raise ValueError(
                'Feature vectors detected. The adversarial patch can only be applied to data with spatial '
                'dimensions.')

        self.patch = (np.random.standard_normal(size=self.patch_shape)) * 20.0

        for i_step in range(self.max_iter):
            if i_step == 0 or (i_step + 1) % 100 == 0:
                logger.info('Training Step: %i', i_step + 1)

            if self.clip_patch is not None:
                for i_channel, (a_min, a_max) in enumerate(self.clip_patch):
                    self.patch[:, :,
                               i_channel] = np.clip(self.patch[:, :,
                                                               i_channel],
                                                    a_min=a_min,
                                                    a_max=a_max)

            patched_images, patch_mask_transformed, transforms = self._augment_images_with_random_patch(
                x, self.patch)
            gradients = self.classifier.loss_gradient(
                patched_images,
                to_categorical(
                    np.broadcast_to(np.array(self.target), x.shape[0]),
                    self.classifier.nb_classes))
            patch_gradients = np.zeros_like(self.patch)

            for i_batch in range(self.batch_size):
                patch_gradients_i = self._reverse_transformation(
                    gradients[i_batch, :, :, :],
                    patch_mask_transformed[i_batch, :, :, :],
                    transforms[i_batch])
                patch_gradients += patch_gradients_i

            patch_gradients = patch_gradients / self.batch_size
            self.patch -= patch_gradients * self.learning_rate

        return self.patch, self._get_circular_patch_mask()
Beispiel #20
0
    def _query_label(self, x: np.ndarray) -> np.ndarray:
        """
        Query the victim classifier.

        :param x: An array with the source input to the victim classifier.
        :return: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)`.
        """
        labels = self.estimator.predict(x=x, batch_size=self.batch_size_query)
        labels = np.argmax(labels, axis=1)
        labels = to_categorical(labels=labels,
                                nb_classes=self.estimator.nb_classes)

        return labels
def test_poison(art_warning, get_default_mnist_subset, image_dl_estimator):
    try:
        (x_train, y_train), (_, _) = get_default_mnist_subset
        classifier, _ = image_dl_estimator()
        target = to_categorical([9], 10)[0]
        backdoor = PoisoningAttackBackdoor(add_pattern_bd)
        attack = PoisoningAttackCleanLabelBackdoor(backdoor, classifier, target)
        poison_data, poison_labels = attack.poison(x_train, y_train)

        np.testing.assert_equal(poison_data.shape, x_train.shape)
        np.testing.assert_equal(poison_labels.shape, y_train.shape)
    except ARTTestException as e:
        art_warning(e)
    def test_get_label_conf(self):
        y = np.array([3, 1, 4, 1, 5, 9])
        y_ = to_categorical(y)

        logits = np.random.normal(10 * y_, scale=0.1)
        ps = (np.exp(logits).T / np.sum(np.exp(logits), axis=1)).T
        c, l = get_label_conf(ps)

        self.assertEqual(c.shape, y.shape)
        self.assertEqual(l.shape, y.shape)

        self.assertTrue(np.all(l == y))
        self.assertTrue(np.allclose(c, 0.99, atol=1e-2))
Beispiel #23
0
    def test_segment_by_class(self):
        data = np.array([[3, 2], [9, 2], [4, 0], [9, 0]])
        classes = to_categorical(np.array([2, 1, 0, 1]))
        num_classes = 3
        segments = segment_by_class(data, classes, num_classes)
        self.assertEqual(len(segments), num_classes)
        self.assertEqual(len(segments[1]), 2)
        self.assertTrue(np.all(np.equal(segments[0], np.array([data[2]]))))
        self.assertTrue(np.all(np.equal(segments[1], np.array([data[1], data[3]]))))
        self.assertTrue(np.all(np.equal(segments[2], np.array([data[0]]))))

        num_classes = 4
        segments = segment_by_class(data, classes, num_classes)
        self.assertEqual(len(segments), num_classes)
Beispiel #24
0
def poison_loader_clbd(**kwargs):
    backdoor_kwargs = kwargs.pop("backdoor_kwargs")
    backdoor = poison_loader_GTSRB(**backdoor_kwargs)

    # Targets is a one-hot numpy array -- need to map from sparse representation
    target = kwargs.pop("target")
    n_classes = kwargs.pop("n_classes")
    targets = to_categorical([target], n_classes)[0]

    return (
        PoisoningAttackCleanLabelBackdoor(backdoor=backdoor,
                                          target=targets,
                                          **kwargs),
        backdoor,
    )
Beispiel #25
0
    def _perchannel(self, x: np.ndarray) -> np.ndarray:
        """
        Apply thermometer encoding to one channel.

        :param x: Sample to encode with shape `(batch_size, width, height)`.
        :return: Encoded sample with shape `(batch_size, width, height, num_space)`.
        """
        pos = np.zeros(shape=x.shape)
        for i in range(1, self.num_space):
            pos[x > float(i) / self.num_space] += 1

        onehot_rep = to_categorical(pos.reshape(-1), self.num_space)

        for i in range(self.num_space - 1):
            onehot_rep[:, i] += np.sum(onehot_rep[:, i + 1:], axis=1)

        return onehot_rep.flatten()
Beispiel #26
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Generate random target classes
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        # import time
        df = SaliencyMapMethod(classifier, theta=1)

        # starttime = time.clock()
        # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=1)
        # endtime = time.clock()
        # print(1, endtime - starttime)
        #
        # starttime = time.clock()
        # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=10)
        # endtime = time.clock()
        # print(10, endtime - starttime)
        #
        # starttime = time.clock()
        x_test_adv = df.generate(x_test,
                                 y=to_categorical(targets, nb_classes),
                                 batch_size=100)
        # endtime = time.clock()
        # print(100, endtime - starttime)

        # starttime = time.clock()
        # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=1000)
        # endtime = time.clock()
        # print(1000, endtime - starttime)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
    def test_keras_mnist_L2(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr(from_logits=True)

        # First attack
        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=10)
        y_target = [6, 6, 7, 4, 9, 7, 9, 0, 1, 0]
        x_test_adv = cl2m.generate(x_test,
                                   y=to_categorical(y_target, nb_classes=10))
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug('CW2 Target: %s', y_target)
        logger.debug('CW2 Actual: %s', y_pred_adv)
        logger.info('CW2 Success Rate: %.2f',
                    (np.sum(y_target == y_pred_adv) / float(len(y_target))))
        self.assertTrue((y_target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=10)
        x_test_adv = cl2m.generate(x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug('CW2 Target: %s', y_target)
        logger.debug('CW2 Actual: %s', y_pred_adv)
        logger.info('CW2 Success Rate: %.2f',
                    (np.sum(y_target != y_pred_adv) / float(len(y_target))))
        self.assertTrue((y_target != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # Clean-up
        k.clear_session()
Beispiel #28
0
    def generate(self, x, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :return: An array holding the adversarial patch.
        :rtype: `np.ndarray`
        """
        logger.info('Creating adversarial patch.')
        self.set_params(**kwargs)
        self.patch = (np.random.standard_normal(size=self.patch_shape)) * 20.0

        for i_step in range(self.max_iter):
            if i_step == 0 or (i_step + 1) % 100 == 0:
                logger.info('Training Step: %i', i_step + 1)

            if self.clip_patch is not None:
                for i_channel, (a_min, a_max) in enumerate(self.clip_patch):
                    self.patch[:, :,
                               i_channel] = np.clip(self.patch[:, :,
                                                               i_channel],
                                                    a_min=a_min,
                                                    a_max=a_max)

            patched_images, patch_mask_transformed, transforms = self._augment_images_with_random_patch(
                x, self.patch)
            gradients = self.classifier.loss_gradient(
                patched_images,
                to_categorical(
                    np.broadcast_to(np.array(self.target), x.shape[0]),
                    self.classifier.nb_classes))
            patch_gradients = np.zeros_like(self.patch)

            for i_batch in range(self.batch_size):
                patch_gradients_i = self._reverse_transformation(
                    gradients[i_batch, :, :, :],
                    patch_mask_transformed[i_batch, :, :, :],
                    transforms[i_batch])
                patch_gradients += patch_gradients_i

            patch_gradients = patch_gradients / self.batch_size
            self.patch -= patch_gradients * self.learning_rate

        return self.patch, self._get_circular_patch_mask()
Beispiel #29
0
    def predict(self, x, **kwargs):
        """
        Perform prediction for a batch of inputs.

        :param x: Test set.
        :type x: `np.ndarray`
        :return: Array of predictions of shape `(nb_inputs, nb_classes)`.
        :rtype: `np.ndarray`
        """
        # Apply defences
        x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)

        if hasattr(self._model, 'predict_proba') and callable(getattr(self._model, 'predict_proba')):
            y_pred = self._model.predict_proba(x_preprocessed)
        elif hasattr(self._model, 'predict') and callable(getattr(self._model, 'predict')):
            y_pred = to_categorical(self._model.predict(x_preprocessed), nb_classes=self._model.classes_.shape[0])
        else:
            raise ValueError('The provided model does not have methods `predict_proba` or `predict`.')

        return y_pred
Beispiel #30
0
    def _perchannel(self, x):
        """
        Apply thermometer encoding to one channel.

        :param x: Sample to encode with shape `(batch_size, width, height)`.
        :type x: `np.ndarray`
        :return: Encoded sample with shape `(batch_size, width, height, num_space)`.
        :rtype: `np.ndarray`
        """
        pos = np.zeros(shape=x.shape)
        for i in range(1, self.num_space):
            pos[x > float(i) / self.num_space] += 1

        onehot_rep = to_categorical(pos.reshape(-1), self.num_space)

        for i in reversed(range(1, self.num_space)):
            onehot_rep[:, i] += np.sum(onehot_rep[:, :i], axis=1)

        result = onehot_rep.reshape(list(x.shape) + [self.num_space])

        return result