def test_attack_eval():
    # prepare test data
    np.random.seed(1024)
    inputs = np.random.normal(size=(3, 512, 512, 3))
    labels = np.array([[0.1, 0.1, 0.2, 0.6], [0.1, 0.7, 0.0, 0.2],
                       [0.8, 0.1, 0.0, 0.1]])
    adv_x = inputs + np.ones((3, 512, 512, 3)) * 0.001
    adv_y = np.array([[0.1, 0.1, 0.2, 0.6], [0.1, 0.0, 0.8, 0.1],
                      [0.0, 0.9, 0.1, 0.0]])

    # create obj
    attack_eval = AttackEvaluate(inputs, labels, adv_x, adv_y)

    # run eval
    mr = attack_eval.mis_classification_rate()
    acac = attack_eval.avg_conf_adv_class()
    l_0, l_2, l_inf = attack_eval.avg_lp_distance()
    ass = attack_eval.avg_ssim()
    nte = attack_eval.nte()
    res = [mr, acac, l_0, l_2, l_inf, ass, nte]

    # compare
    expected_value = [0.6666, 0.8500, 1.0, 0.0009, 0.0001, 0.9999, 0.75]
    assert np.allclose(res, expected_value, 0.0001, 0.0001)
Example #2
0
def test_lbfgs_attack():
    """
    LBFGS-Attack test for CPU device.
    """
    # upload trained network
    ckpt_path = '../../../common/networks/lenet5/trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, load_dict)

    # get test data
    data_list = "../../../common/dataset/MNIST/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size=batch_size)

    # prediction accuracy before attack
    model = Model(net)
    batch_num = 3  # the number of batches of attacking samples
    test_images = []
    test_labels = []
    predict_labels = []
    i = 0
    for data in ds.create_tuple_iterator(output_numpy=True):
        i += 1
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
        pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(),
                                axis=1)
        predict_labels.append(pred_labels)
        if i >= batch_num:
            break
    predict_labels = np.concatenate(predict_labels)
    true_labels = np.concatenate(test_labels)
    accuracy = np.mean(np.equal(predict_labels, true_labels))
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
    is_targeted = True
    if is_targeted:
        targeted_labels = np.random.randint(0, 10, size=len(true_labels)).astype(np.int32)
        for i, true_l in enumerate(true_labels):
            if targeted_labels[i] == true_l:
                targeted_labels[i] = (targeted_labels[i] + 1) % 10
    else:
        targeted_labels = true_labels.astype(np.int32)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = LBFGS(net, is_targeted=is_targeted, loss_fn=loss)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),
                                     targeted_labels,
                                     batch_size=batch_size)
    stop_time = time.clock()
    pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy()
    # rescale predict confidences into (0, 1).
    pred_logits_adv = softmax(pred_logits_adv, axis=1)
    pred_labels_adv = np.argmax(pred_logits_adv, axis=1)

    accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
    LOGGER.info(TAG, "prediction accuracy after attacking is : %s",
                accuracy_adv)
    attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose(0, 2, 3, 1),
                                     np.eye(10)[true_labels],
                                     adv_data.transpose(0, 2, 3, 1),
                                     pred_logits_adv,
                                     targeted=is_targeted,
                                     target_label=targeted_labels)
    LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
                attack_evaluate.mis_classification_rate())
    LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
                attack_evaluate.avg_conf_adv_class())
    LOGGER.info(TAG, 'The average confidence of true class is : %s',
                attack_evaluate.avg_conf_true_class())
    LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
                     'samples and adversarial samples are: %s',
                attack_evaluate.avg_lp_distance())
    LOGGER.info(TAG, 'The average structural similarity between original '
                     'samples and adversarial samples are: %s',
                attack_evaluate.avg_ssim())
    LOGGER.info(TAG, 'The average costing time is %s',
                (stop_time - start_time)/(batch_num*batch_size))
Example #3
0
def test_pso_attack_on_mnist():
    """
    PSO-Attack test
    """
    # upload trained network
    ckpt_path = '../../../common/networks/lenet5/trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, load_dict)

    # get test data
    data_list = "../../../common/dataset/MNIST/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size=batch_size)

    # prediction accuracy before attack
    model = ModelToBeAttacked(net)
    batch_num = 3  # the number of batches of attacking samples
    test_images = []
    test_labels = []
    predict_labels = []
    i = 0
    for data in ds.create_tuple_iterator(output_numpy=True):
        i += 1
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
        pred_labels = np.argmax(model.predict(images), axis=1)
        predict_labels.append(pred_labels)
        if i >= batch_num:
            break
    predict_labels = np.concatenate(predict_labels)
    true_labels = np.concatenate(test_labels)
    accuracy = np.mean(np.equal(predict_labels, true_labels))
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
    attack = PSOAttack(model, bounds=(0.0, 1.0), pm=0.5, sparse=True)
    start_time = time.clock()
    success_list, adv_data, query_list = attack.generate(
        np.concatenate(test_images), np.concatenate(test_labels))
    stop_time = time.clock()
    LOGGER.info(TAG, 'success_list: %s', success_list)
    LOGGER.info(TAG, 'average of query times is : %s', np.mean(query_list))
    pred_logits_adv = model.predict(adv_data)
    # rescale predict confidences into (0, 1).
    pred_logits_adv = softmax(pred_logits_adv, axis=1)
    pred_labels_adv = np.argmax(pred_logits_adv, axis=1)
    accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
    LOGGER.info(TAG, "prediction accuracy after attacking is : %s",
                accuracy_adv)
    test_labels_onehot = np.eye(10)[np.concatenate(test_labels)]
    attack_evaluate = AttackEvaluate(np.concatenate(test_images),
                                     test_labels_onehot, adv_data,
                                     pred_logits_adv)
    LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
                attack_evaluate.mis_classification_rate())
    LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
                attack_evaluate.avg_conf_adv_class())
    LOGGER.info(TAG, 'The average confidence of true class is : %s',
                attack_evaluate.avg_conf_true_class())
    LOGGER.info(
        TAG, 'The average distance (l0, l2, linf) between original '
        'samples and adversarial samples are: %s',
        attack_evaluate.avg_lp_distance())
    LOGGER.info(
        TAG, 'The average structural similarity between original '
        'samples and adversarial samples are: %s', attack_evaluate.avg_ssim())
    LOGGER.info(TAG, 'The average costing time is %s',
                (stop_time - start_time) / (batch_num * batch_size))
Example #4
0
def test_salt_and_pepper_attack_on_mnist():
    """
    Salt-and-Pepper-Attack test
    """
    # upload trained network
    ckpt_path = '../../../common/networks/lenet5/trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, load_dict)

    # get test data
    data_list = "../../../common/dataset/MNIST/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size=batch_size)

    # prediction accuracy before attack
    model = ModelToBeAttacked(net)
    batch_num = 3  # the number of batches of attacking samples
    test_images = []
    test_labels = []
    predict_labels = []
    i = 0
    for data in ds.create_tuple_iterator(output_numpy=True):
        i += 1
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
        pred_labels = np.argmax(model.predict(images), axis=1)
        predict_labels.append(pred_labels)
        if i >= batch_num:
            break
    LOGGER.debug(
        TAG,
        'model input image shape is: {}'.format(np.array(test_images).shape))
    predict_labels = np.concatenate(predict_labels)
    true_labels = np.concatenate(test_labels)
    accuracy = np.mean(np.equal(predict_labels, true_labels))
    LOGGER.info(TAG, "prediction accuracy before attacking is : %g", accuracy)

    # attacking
    is_target = False
    attack = SaltAndPepperNoiseAttack(model=model,
                                      is_targeted=is_target,
                                      sparse=True)
    if is_target:
        targeted_labels = np.random.randint(0, 10, size=len(true_labels))
        for i, true_l in enumerate(true_labels):
            if targeted_labels[i] == true_l:
                targeted_labels[i] = (targeted_labels[i] + 1) % 10
    else:
        targeted_labels = true_labels
    LOGGER.debug(
        TAG, 'input shape is: {}'.format(np.concatenate(test_images).shape))
    success_list, adv_data, query_list = attack.generate(
        np.concatenate(test_images), targeted_labels)
    success_list = np.arange(success_list.shape[0])[success_list]
    LOGGER.info(TAG, 'success_list: %s', success_list)
    LOGGER.info(TAG, 'average of query times is : %s', np.mean(query_list))
    adv_preds = []
    for ite_data in adv_data:
        pred_logits_adv = model.predict(ite_data)
        # rescale predict confidences into (0, 1).
        pred_logits_adv = softmax(pred_logits_adv, axis=1)
        adv_preds.extend(pred_logits_adv)
    accuracy_adv = np.mean(np.equal(np.max(adv_preds, axis=1), true_labels))
    LOGGER.info(TAG, "prediction accuracy after attacking is : %g",
                accuracy_adv)
    test_labels_onehot = np.eye(10)[true_labels]
    attack_evaluate = AttackEvaluate(np.concatenate(test_images),
                                     test_labels_onehot,
                                     adv_data,
                                     adv_preds,
                                     targeted=is_target,
                                     target_label=targeted_labels)
    LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
                attack_evaluate.mis_classification_rate())
    LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
                attack_evaluate.avg_conf_adv_class())
    LOGGER.info(TAG, 'The average confidence of true class is : %s',
                attack_evaluate.avg_conf_true_class())
    LOGGER.info(
        TAG, 'The average distance (l0, l2, linf) between original '
        'samples and adversarial samples are: %s',
        attack_evaluate.avg_lp_distance())