Esempio n. 1
0
def main(src_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)
    prediction = src_model(x)
    eval_params = {'batch_size': FLAGS.BATCH_SIZE}

    # error = tf_test_error_rate(src_model, x, X_test, Y_test)
    acc = model_eval(K.get_session(),
                     x,
                     y,
                     prediction,
                     X_test,
                     Y_test,
                     args=eval_params)
    print '{}: {:.3f}'.format(basename(src_model_name), acc)
Esempio n. 2
0
def _predict_double_digits(tens, ones):
    flatted_tens = tens.reshape(-1, 28*28) / 255.0
    flatted_ones = ones.reshape(-1, 28*28) / 255.0

    # 모델 로드 후 예측
    model = load_model()
    tens_prediction, ones_prediction = np.argmax(model.predict(flatted_tens)), np.argmax(model.predict(flatted_ones))

    prediction = str(tens_prediction) + str(ones_prediction)
    return prediction
def main(model_name, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_mnist_flags()
    
    flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs')

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:300]
    Y_sub = np.argmax(Y_test[:300], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[300:]
    Y_test = Y_test[300:]

    x = K.placeholder((None,
                       FLAGS.IMAGE_ROWS,
                       FLAGS.IMAGE_COLS,
                       FLAGS.NUM_CHANNELS
                       ))

    y = K.placeholder(shape=(None, FLAGS.NUM_CLASSES))

    # Load Black-Box model
    model = load_model(blackbox_name)
    prediction = model(x)

    train_sub_out = train_sub(K.get_session(), x, y, prediction, X_sub, Y_sub, nb_classes=FLAGS.NUM_CLASSES,
                                     nb_epochs_s=args.epochs, batch_size=FLAGS.BATCH_SIZE, learning_rate=0.001, data_aug=6, lmbda=0.1, model_type=model_type)
    model_sub, preds_sub = train_sub_out
    eval_params = {
        'batch_size': FLAGS.BATCH_SIZE
    }

    # Finally print the result!
    # test_error = tf_test_error_rate(model_sub, x, X_test, Y_test)
    accuracy = model_eval(K.get_session(), x, y, preds_sub, X_test, Y_test, args=eval_params)
    print('Test accuracy of substitute on legitimate samples: %.3f%%' % accuracy)

    save_model(model_sub, model_name)
    json_string = model_sub.to_json()
    with open(model_name+'.json', 'wr') as f:
        f.write(json_string)
Esempio n. 4
0
def main():
    x = K.placeholder((None, 28, 28, 1))

    model = load_model(args.model)
    logits = model(x)
    prediction = K.softmax(logits)

    if args.mnist:
        _, _, X, Y = data_mnist(one_hot=False)
        accuracy = get_accuracy(X, Y, prediction, x)
        output_results(accuracy)

    elif args.n is None:
        with np.load(args.dataset) as data:
            X = data['drawings'] / 255
            Y = data['Y'].reshape(-1, )

        accuracy = get_accuracy(X, Y, prediction, x)
        output_results(accuracy)

    else:
        result = []

        for i in tqdm(range(1, args.n + 1)):
            with np.load(args.dataset % i) as data:
                X = data['drawings'] / 255
                Y = data['Y'].reshape(-1, )

            predictions = \
                K.get_session().run([prediction], feed_dict={x: X, K.learning_phase(): 0})[0]
            argmax = np.argmax(predictions, axis=1)

            if args.attack and not args.targeted:
                equal = argmax != Y
            else:
                equal = argmax == Y

            accuracy = np.mean(equal)
            result.append(accuracy)

        print(result)
def main(attack, src_model_name, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    dim = 28 * 28 * 1

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), err))
        _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(target_model_name), err))

        return

    if args.targeted_flag == 1:
        targets = []
        allowed_targets = list(range(10))
        for i in range(len(Y_test)):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(10))
        targets = np.array(targets)
        print(targets)
        targets_cat = np_utils.to_categorical(targets, 10).astype(np.float32)
        Y_test = targets_cat

    logits = src_model(x)
    print('logits', logits)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
        assert grad is not None
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            assert grad is not None
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = src_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        print('Generating adversarial samples')
        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

        avg_l2_perturb = np.mean(np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l])
        if args.targeted_flag == 1:
            err = 100.0 - err
        print('{}->{}: {:.1f}'.format(src_model_name, src_model_name, err))

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print('{}->{}: {:.1f}, {}, {} {}'.format(src_model_name,
                                                     basename(target_model_name), err,
                                                     avg_l2_perturb, eps, attack))
Esempio n. 6
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      err)
Esempio n. 7
0
 def select_model(self,event):
     global model
     filename = tkinter.filedialog.askopenfilename()
     model = mnist.load_model(filename)
Esempio n. 8
0
def init_axes(ax, title):
    ax.cla()
    ax.set_xlim(0, 28)
    ax.set_ylim(28, 0)
    ax.set_title(title)
    return ax

fig = plt.figure()
ax = plt.subplot(221)
init_axes(ax, "write a digit")

ax2 = plt.subplot(222)
init_axes(ax2, "preprocessed image")

model = mnist.load_model()
tkroot = tkinter.Tk()
tkroot.withdraw()

x,y = [], []
a = np.zeros((28,28))
prepared = []
# create empty plot
points, = ax.plot([], [], 'o')
# ax.grid(True)

# cache the background
background = fig.canvas.copy_from_bbox(ax.bbox)

is_released = True
def main(target_model_name, target=None):
    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder((None, IMAGE_ROWS, IMAGE_COLS, NUM_CHANNELS))

    y = K.placeholder((None, NUM_CLASSES))

    dim = int(IMAGE_ROWS * IMAGE_COLS)

    _, _, X_test_ini, Y_test = data_mnist()
    print('Loaded data')

    Y_test_uncat = np.argmax(Y_test, axis=1)

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    if '_un' in args.method:
        targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1)
    elif RANDOM is False:
        targets = np.array([target] * (BATCH_SIZE * BATCH_EVAL_NUM))
    elif RANDOM is True:
        targets = []
        allowed_targets = list(range(NUM_CLASSES))
        for i in range(BATCH_SIZE * BATCH_EVAL_NUM):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(NUM_CLASSES))
        # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
        targets = np.array(targets)
        print targets
    targets_cat = np_utils.to_categorical(targets,
                                          NUM_CLASSES).astype(np.float32)

    if args.norm == 'linf':
        # eps_list = list(np.linspace(0.025, 0.1, 4))
        # eps_list.extend(np.linspace(0.15, 0.5, 8))
        eps_list = [0.3]
        if "_iter" in args.method:
            eps_list = [0.3]
    elif args.norm == 'l2':
        eps_list = list(np.linspace(0.0, 2.0, 5))
        eps_list.extend(np.linspace(2.5, 9.0, 14))
        # eps_list = [5.0]
    print(eps_list)

    random_perturb = np.random.randn(*X_test_ini.shape)

    if args.norm == 'linf':
        random_perturb_signed = np.sign(random_perturb)
        X_test = np.clip(X_test_ini + args.alpha * random_perturb_signed,
                         CLIP_MIN, CLIP_MAX)
    elif args.norm == 'l2':
        random_perturb_unit = random_perturb / np.linalg.norm(
            random_perturb.reshape(curr_len, dim), axis=1)[:, None, None, None]
        X_test = np.clip(X_test_ini + args.alpha * random_perturb_unit,
                         CLIP_MIN, CLIP_MAX)

    for eps in eps_list:
        if '_iter' in args.method:
            white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test,
                                X_test_ini, Y_test_uncat, targets, targets_cat,
                                eps, dim, args.beta)
            estimated_grad_attack_iter(X_test, X_test_ini, x, targets,
                                       prediction, logits, eps, dim, args.beta)
        else:
            white_box_fgsm(prediction, target_model, x, logits, y, X_test,
                           X_test_ini, Y_test_uncat, targets, targets_cat, eps,
                           dim)
Esempio n. 10
0
def main(target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS)

    _, _, X_test, Y_test = data_mnist()
    print('Loaded data')

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    Y_test_uncat = np.argmax(Y_test, 1)

    means, class_frac = class_means(X_test, Y_test_uncat)

    scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat)

    if args.norm == 'linf':
        eps_list = list(np.linspace(0.0, 0.1, 5))
        eps_list.extend(np.linspace(0.2, 0.5, 7))
    elif args.norm == 'l2':
        eps_list = list(np.linspace(0.0, 9.0, 28))

    for eps in eps_list:
        eps_orig = eps
        if args.alpha > eps:
            alpha = eps
            eps = 0
        elif eps >= args.alpha:
            alpha = args.alpha
            eps -= args.alpha

        adv_success = 0.0
        avg_l2_perturb = 0.0
        for i in range(FLAGS.NUM_CLASSES):
            curr_indices = np.where(Y_test_uncat == i)
            X_test_ini = X_test[curr_indices]
            Y_test_curr = Y_test_uncat[curr_indices]
            curr_len = len(X_test_ini)
            if args.targeted_flag == 1:
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
                allowed_targets.remove(i)

            random_perturb = np.random.randn(*X_test_ini.shape)

            if args.norm == 'linf':
                random_perturb_signed = np.sign(random_perturb)
                X_test_curr = np.clip(
                    X_test_ini + alpha * random_perturb_signed, CLIP_MIN,
                    CLIP_MAX)
            elif args.norm == 'l2':
                random_perturb_unit = random_perturb / np.linalg.norm(
                    random_perturb.reshape(curr_len, dim), axis=1)[:, None,
                                                                   None, None]
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit,
                                      CLIP_MIN, CLIP_MAX)

            if args.targeted_flag == 0:
                closest_class = int(closest_means[i])
                mean_diff_vec = means[closest_class] - means[i]
            elif args.targeted_flag == 1:
                targets = []
                mean_diff_array = np.zeros(
                    (curr_len, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS,
                     FLAGS.NUM_CHANNELS))
                for j in range(curr_len):
                    target = np.random.choice(allowed_targets)
                    targets.append(target)
                    mean_diff_array[j] = means[target] - means[i]

            if args.norm == 'linf':
                if args.targeted_flag == 0:
                    mean_diff_vec_signed = np.sign(mean_diff_vec)
                    perturb = eps * mean_diff_vec_signed
                elif args.targeted_flag == 1:
                    mean_diff_array_signed = np.sign(mean_diff_array)
                    perturb = eps * mean_diff_array_signed
            elif args.norm == 'l2':
                mean_diff_vec_unit = mean_diff_vec / np.linalg.norm(
                    mean_diff_vec.reshape(dim))
                perturb = eps * mean_diff_vec_unit

            X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX)

            # Getting the norm of the perturbation
            perturb_norm = np.linalg.norm(
                (X_adv - X_test_ini).reshape(curr_len, dim), axis=1)
            perturb_norm_batch = np.mean(perturb_norm)
            avg_l2_perturb += perturb_norm_batch

            predictions_adv = K.get_session().run([prediction],
                                                  feed_dict={
                                                      x: X_adv,
                                                      K.learning_phase(): 0
                                                  })[0]

            if args.targeted_flag == 0:
                adv_success += np.sum(
                    np.argmax(predictions_adv, 1) != Y_test_curr)
            elif args.targeted_flag == 1:
                print(targets)
                adv_success += np.sum(
                    np.argmax(predictions_adv, 1) == np.array(targets))

        err = 100.0 * adv_success / len(X_test)
        avg_l2_perturb = avg_l2_perturb / FLAGS.NUM_CLASSES

        print('{}, {}, {}'.format(eps, alpha, err))
        print('{}'.format(avg_l2_perturb))
np.random.seed(0)
tf.set_random_seed(0)

x = K.placeholder(
    (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

y = K.placeholder((None, FLAGS.NUM_CLASSES))

dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS)

_, _, X_test, Y_test = data_mnist()
print('Loaded data')

# target model for crafting adversarial examples
target_model = load_model(args.target_model)
target_model_name = basename(target_model_name)

logits = target_model(x)
prediction = K.softmax(logits)

sess = tf.Session()
print('Creating session')

targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1)
# elif RANDOM is False:
#     targets = np.array([target]*(BATCH_SIZE*BATCH_EVAL_NUM))
# elif RANDOM is True:
#     targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
targets_cat = np_utils.to_categorical(targets,
                                      FLAGS.NUM_CLASSES).astype(np.float32)
def main(measures, src_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    X_train, Y_train, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_models = [None] * len(src_model_names)
    accuracy = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        src_models[i] = load_model(src_model_names[i])

    if measures == "Q":
        X_test = X_test[0:100]
        Y_test = Y_test[0:100]
        N = len(X_test)
        k = len(src_model_names)
        Qij = [([None] * k) for p in range(k)]
        for i in range(k - 1):
            for j in range(i + 1, k):
                a = b = c = d = 0.0
                for n in range(N):
                    src_model_i = src_models[i]
                    src_model_j = src_models[j]
                    Ci = tf_compute_C(src_model_i, x, y, X_test[n:n + 1],
                                      Y_test[n:n + 1])
                    Cj = tf_compute_C(src_model_j, x, y, X_test[n:n + 1],
                                      Y_test[n:n + 1])
                    if (Ci[0] == 1 & Cj[0] == 1):
                        a += 1
                    elif (Ci[0] == 0 & Cj[0] == 0):
                        d += 1
                    elif (Ci[0] == 0 & Cj[0] == 1):
                        c += 1
                    elif (Ci[0] == 1 & Cj[0] == 0):
                        b += 1
                print a, b, c, d
                Qij[i][j] = (a * d - b * c) / (a * d + b * c)

        Qij_SUM = 0.0
        for i in range(k - 1):
            for j in range(i + 1, k):
                Qij_SUM += Qij[i][j]
        QAV = (2.0 / (k * (k - 1))) * Qij_SUM
        print('The value of the Q statistic: %.4f' % (QAV))
        return

    if measures == "p":
        X_test = X_test[0:100]
        Y_test = Y_test[0:100]
        N = len(X_test)
        k = len(src_model_names)
        Pij = [([None] * k) for p in range(k)]
        for i in range(k - 1):
            for j in range(i + 1, k):
                a = b = c = d = 0.0
                for n in range(N):
                    src_model_i = src_models[i]
                    src_model_j = src_models[j]
                    Ci = tf_compute_C(src_model_i, x, y, X_test[n:n + 1],
                                      Y_test[n:n + 1])
                    Cj = tf_compute_C(src_model_j, x, y, X_test[n:n + 1],
                                      Y_test[n:n + 1])
                    if (Ci[0] == 1 & Cj[0] == 1):
                        a += 1
                    elif (Ci[0] == 0 & Cj[0] == 0):
                        d += 1
                    elif (Ci[0] == 0 & Cj[0] == 1):
                        c += 1
                    elif (Ci[0] == 1 & Cj[0] == 0):
                        b += 1
                print a, b, c, d
                Pij[i][j] = (a * d - b * c) / math.sqrt(
                    (a + b) * (a + c) * (b + d) * (d + c))

        Pij_SUM = 0.0
        for i in range(k - 1):
            for j in range(i + 1, k):
                Pij_SUM += Pij[i][j]
        PAV = (2.0 / (k * (k - 1))) * Pij_SUM
        print('The value of the correlation coefficient: %.4f' % (PAV))
        return
    if measures == "Ent":
        X_test = X_test[0:100]
        Y_test = Y_test[0:100]
        k = len(src_model_names)
        N = len(X_test)
        num = 0
        for i in range(N):
            lxt = 0
            print i
            for (name, src_model) in zip(src_model_names, src_models):
                C = tf_compute_C(src_model, x, y, X_test[i:i + 1],
                                 Y_test[i:i + 1])
                # lxt denote the number of substitutes that accurately recognize sample x.
                lxt += C[0]  # lxt= 0,1,2,3
            m = min(lxt, k - lxt)
            num += ((1.0 / (k - math.ceil(k / 2.0))) * m)
        Ent = (1.0 / N) * num
        print('The value of the entropy measure: %.4f' % (Ent))
        return

    if measures == "KW":
        X_test = X_test[0:100]
        Y_test = Y_test[0:100]
        k = len(src_model_names)
        N = len(X_test)
        num = 0
        for i in range(N):
            lxt = 0
            print i
            for (name, src_model) in zip(src_model_names, src_models):
                C = tf_compute_C(src_model, x, y, X_test[i:i + 1],
                                 Y_test[i:i + 1])
                # lxt denote the number of substitutes that accurately recognize sample x.
                lxt += C[0]  # lxt= 0,1,2,3
            num += (lxt * (k - lxt))
        KW = (1.0 / (N * math.pow(k, 2))) * num
        print('The value of the Kohavi-Wolpert variance: %.4f' % (KW))
        return

    if measures == "test":
        X_test = X_test[0:5]
        Y_test = Y_test[0:5]
        # display_leg_sample(X_test)
        for j in range(1):

            for (name, src_model) in zip(src_model_names, src_models):
                # the number of substitutes from D that correctly recognize X_test[j]
                num = tf_test_acc_num(src_model, x, y, X_test, Y_test)
                # output 1, 1, 1, 1, 1, 1
                print num

        return
Esempio n. 13
0
def main(target_model_name):
    redraw = False
    save_adv_samples_only = True

    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    dim = int(28 * 28 * 1)

    _, _, X_test, Y_test = data_mnist()
    print('Loaded data')

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    Y_test_uncat = np.argmax(Y_test, 1)

    means, class_frac = class_means(X_test, Y_test_uncat)

    scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat)

    eps_list = [args.eps]

    adv_images = np.empty((10 * 28, len(eps_list) * 28))

    total_X_adv = np.empty(X_test.shape)
    total_Y = np.empty(Y_test_uncat.shape)
    total_adv_pred = np.empty(Y_test_uncat.shape)
    total_adv_prob = np.empty(Y_test_uncat.shape)

    for eps_idx, eps in tqdm(enumerate(eps_list)):
        eps_orig = eps
        if args.alpha > eps:
            alpha = eps
            eps = 0
        elif eps >= args.alpha:
            alpha = args.alpha
            eps -= args.alpha

        adv_success = 0.0
        avg_l2_perturb = 0.0
        NUM_SAVED = 0
        for i in tqdm(range(10)):
            curr_indices = np.where(Y_test_uncat == i)
            NUM_SAMPLES = len(curr_indices[0])
            X_test_ini = X_test[curr_indices]
            Y_test_curr = Y_test_uncat[curr_indices]
            curr_len = len(X_test_ini)
            if args.targeted_flag == 1:
                allowed_targets = list(range(10))
                allowed_targets.remove(i)

            random_perturb = np.random.randn(*X_test_ini.shape)

            if args.norm == 'linf':
                random_perturb_signed = np.sign(random_perturb)
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_signed, CLIP_MIN,
                                      CLIP_MAX)
            elif args.norm == 'l2':
                random_perturb_unit = random_perturb / np.linalg.norm(
                    random_perturb.reshape(curr_len, dim), axis=1)[:,
                                                       None, None, None]
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit, CLIP_MIN, CLIP_MAX)

            if args.targeted_flag == 0:
                closest_class = int(closest_means[i])
                mean_diff_vec = means[closest_class] - means[i]
            elif args.targeted_flag == 1:
                targets = []
                mean_diff_array = np.zeros(
                    (curr_len, 28, 28, 1))
                for j in range(curr_len):
                    target = np.random.choice(allowed_targets)
                    targets.append(target)
                    mean_diff_array[j] = means[target] - means[i]

            if args.norm == 'linf':
                if args.targeted_flag == 0:
                    mean_diff_vec_signed = np.sign(mean_diff_vec)
                    perturb = eps * mean_diff_vec_signed
                elif args.targeted_flag == 1:
                    mean_diff_array_signed = np.sign(mean_diff_array)
                    perturb = eps * mean_diff_array_signed
            elif args.norm == 'l2':
                mean_diff_vec_unit = mean_diff_vec / np.linalg.norm(mean_diff_vec.reshape(dim))
                perturb = eps * mean_diff_vec_unit

            X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX)

            assert X_adv.shape[1:] == total_X_adv.shape[1:], f'X_adv.shape[1:] = {X_adv.shape[
                                                                                  1:]}, total_X_adv.shape[1:] = {total_X_adv.shape[
                                                                                                                 1:]}'
            assert X_adv.shape[0] == NUM_SAMPLES, f'X_adv.shape[0] = {X_adv.shape[0]}'
            total_X_adv[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = X_adv

            # sample_x_adv = save_sample(X_adv, eps, i)

            if redraw:
                sample_x_adv = draw(
                    images=sample_x_adv.reshape(
                        (1, sample_x_adv.shape[0], sample_x_adv.shape[1], sample_x_adv.shape[2])),
                    n=10,
                    alpha=0.8,
                    background='000000'
                )[0]

            # row_start = i * 28
            # col_start = eps_idx * 28
            # no_channels_x = sample_x_adv.reshape(28, 28)
            # adv_images[row_start:row_start + 28,
            # col_start: col_start + 28] = no_channels_x

            # Getting the norm of the perturbation
            perturb_norm = np.linalg.norm((X_adv - X_test_ini).reshape(curr_len, dim), axis=1)
            perturb_norm_batch = np.mean(perturb_norm)
            avg_l2_perturb += perturb_norm_batch

            predictions_adv = \
                K.get_session().run([prediction], feed_dict={x: X_adv, K.learning_phase(): 0})[0]

            total_adv_pred[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.argmax(predictions_adv, axis=1)
            total_adv_prob[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.max(predictions_adv, axis=1)

            if args.targeted_flag == 0:
                adv_success += np.sum(np.argmax(predictions_adv, 1) != Y_test_curr)
                assert Y_test_curr.shape[1:] == total_Y.shape[
                                                1:], f'Y_test_curr.shape[1:] = {Y_test_curr.shape[
                                                                                1:]}, total_Y.shape[1:] = {total_Y.shape[
                                                                                                           1:]}'
                assert Y_test_curr.shape[0] == NUM_SAMPLES, f'Y_test_curr.shape[0] = {
                Y_test_curr.shape[0]}'
                total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = Y_test_curr
            elif args.targeted_flag == 1:
                targets_arr = np.array(targets)
                adv_success += np.sum(np.argmax(predictions_adv, 1) == targets_arr)
                assert targets_arr.shape[1:] == total_Y.shape[
                                                1:], f'targets_arr.shape[1:] = {targets_arr.shape[
                                                                                1:]}, total_Y.shape[1:] = {total_Y.shape[
                                                                                                           1:]}'
                assert targets_arr.shape[0] == NUM_SAMPLES, f'targets_arr.shape[0] = {
                targets_arr.shape[0]}'
                total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = targets_arr

            NUM_SAVED += NUM_SAMPLES

        err = 100.0 * adv_success / len(X_test)
        avg_l2_perturb = avg_l2_perturb / 10

        print(f'eps = {eps}, alpha = {alpha}, adv success = {err}')
        print(f'avg l2 pertub = {avg_l2_perturb}')

    if redraw:
        scipy.misc.imsave('baseline_attacks_redrawned.png', adv_images)
    else:
        # save_total(adv_images, avg_l2_perturb, err)
        file = os.path.join(ADVERSARIAL_DATA_PATH,
                            f'baseline-norm-{args.norm}-alpha-{args.alpha}-targeted-{args.targeted_flag}-adv-samples')
        np.savez(file=file, X=total_X_adv, Y=total_Y, pred=total_adv_pred, prob=total_adv_prob)
def main(attack, src_model_names, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches')
    set_mnist_flags()

    dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_models = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        src_models[i] = load_model(src_model_names[i])

    src_model_name_joint = ''
    for i in range(len(src_models)):
        src_model_name_joint += basename(src_model_names[i])

    # model(s) to target
    if target_model_name is not None:
        target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        for (name, src_model) in zip(src_model_names, src_models):
            _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        if target_model_name is not None:
            _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(target_model_name), err)

        return

    if args.targeted_flag == 1:
        pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p'
        if os.path.exists(pickle_name):
            targets = pickle.load(open(pickle_name, 'rb'))
        else:
            targets = []
            allowed_targets = list(range(FLAGS.NUM_CLASSES))
            for i in range(len(Y_test)):
                allowed_targets.remove(Y_test_uncat[i])
                targets.append(np.random.choice(allowed_targets))
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
            # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
            targets = np.array(targets)
            print targets
            targets_cat = np_utils.to_categorical(
                targets, FLAGS.NUM_CLASSES).astype(np.float32)
            Y_test = targets_cat
            if SAVE_FLAG == True:
                pickle.dump(Y_test, open(pickle_name, 'wb'))

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        curr_model = src_models[i]
        logits[i] = curr_model(x)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = [None] * len(src_model_names)
                for i in range(len(src_model_names)):
                    curr_model = src_models[i]
                    adv_logits[i] = curr_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        if attack == "CW_ens":
            l = 1000
            pickle_name = attack + '_' + src_model_name_joint + '_' + str(
                args.eps) + '_adv.p'
            print(pickle_name)
            Y_test = Y_test[0:l]
            if os.path.exists(pickle_name) and attack == "CW_ens":
                print 'Loading adversarial samples'
                X_adv = pickle.load(open(pickle_name, 'rb'))

                for (name, src_model) in zip(src_model_names, src_models):
                    preds_adv, _, err = tf_test_error_rate(
                        src_model, x, X_adv, Y_test)
                    print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                                  basename(name), err)

                preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv,
                                                       Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

                return

            X_test = X_test[0:l]
            time1 = time()
            cli = CarliniLiEns(K.get_session(),
                               src_models,
                               targeted=False,
                               confidence=args.kappa,
                               eps=eps)

            X_adv = cli.attack(X_test, Y_test)

            r = np.clip(X_adv - X_test, -eps, eps)
            X_adv = X_test + r
            time2 = time()
            print("Run with Adam took {}s".format(time2 - time1))

            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

            for (name, src_model) in zip(src_model_names, src_models):
                print('Carrying out white-box attack')
                pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(name), err)
            if target_model_name is not None:
                print('Carrying out black-box attack')
                preds, orig, err = tf_test_error_rate(target_model, x, X_adv,
                                                      Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

            return

        pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
            eps) + '_adv.p'
        if args.targeted_flag == 1:
            pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
                eps) + '_adv_t.p'

        if os.path.exists(pickle_name):
            print 'Loading adversarial samples'
            X_adv = pickle.load(open(pickle_name, 'rb'))
        else:
            print 'Generating adversarial samples'
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

        avg_l2_perturb = np.mean(
            np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        for (name, src_model) in zip(src_model_names, src_models):
            preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv,
                                                      Y_test[0:l])
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}'.format(basename(name), basename(name), err)

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}, {}, {} {}'.format(
                src_model_name_joint, basename(target_model_name), err,
                avg_l2_perturb, eps, attack)
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "cascade_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4,
                         sub_model_5, sub_model_6, sub_model_7)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "parallel_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        print x_advs
        adv_x_mean = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_mean = adv_x_mean + x_advs[j][i]
        xadv = adv_x_mean / (args.steps * len(sub_models))
        preds = src_model(xadv)
        grads = gen_grad(xadv, preds, y)
        adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)