Exemple #1
0
    def explain(self, request: Dict) -> Dict:
        image = request["instances"][0]
        label = request["instances"][1]
        try:
            inputs = np.array(image)
            logging.info("Calling explain on image of shape %s", (inputs.shape,))
        except Exception as e:
            raise Exception(
                "Failed to initialize NumPy array from inputs: %s, %s" % (e, request["instances"]))
        try:
            if str.lower(self.adversary_type) == "squareattack":

                classifier = BlackBoxClassifier(self._predict, inputs.shape, self.nb_classes,
                                                clip_values=(-np.inf, np.inf))
                preds = np.argmax(classifier.predict(inputs, batch_size=1))
                classifier.channels_first = False
                attack = SquareAttack(estimator=classifier, max_iter=self.max_iter)

                x_adv = attack.generate(x=inputs, y=label)

                adv_preds = np.argmax(classifier.predict(x_adv))
                l2_error = np.linalg.norm(np.reshape(x_adv[0] - inputs, [-1]))

                return {"explanations": {"adversarial_example": x_adv.tolist(), "L2 error": l2_error.tolist(),
                                         "adversarial_prediction": adv_preds.tolist(), "prediction": preds.tolist()}}
        except Exception as e:
            raise Exception("Failed to explain %s" % e)
Exemple #2
0
        elif out_string == 'assent':
            out_label.append(1)
        else: 
            out_label.append(2)
    
    return to_categorical(out_label, 3)

# 1.3 init黑盒对象
classifier = BlackBoxClassifier(predict, image_target.shape, 3, clip_values=(0, 255))

label_dict = {0: 'dissent', 1: 'assent', 2: 'other'}

# 1.4 this is the image we want to target
plt.imshow(image_target)
plt.show()
print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_target])))])
"""
Tesseract output is: dissent
"""

# 1.5 this is the label we want to perturb to
plt.imshow(image_init)
plt.show()
print('Tesseract output is: ' + label_dict[np.argmax(classifier.predict(np.array([image_init])))])
"""
Tesseract output is: assent
"""

# 2. 使用HopSkipJump进行攻击

attack = HopSkipJump(classifier=classifier, targeted=True, norm=2, max_iter=0, max_eval=1000, init_eval=10)