コード例 #1
0
def generate_carlini_l2_examples(sess, model, x, y, X, Y, attack_params,
                                 verbose, attack_log_fpath):
    model_wrapper = wrap_to_tohinz_model(model, X, Y)

    accepted_params = [
        'batch_size', 'confidence', 'targeted', 'learning_rate',
        'binary_search_steps', 'max_iterations', 'abort_early', 'initial_const'
    ]
    for k in attack_params:
        if k not in accepted_params:
            raise NotImplementedError("Unsuporrted params in Carlini L2: %s" %
                                      k)

    # assert batch_size <= len(X)
    if 'batch_size' in attack_params and attack_params['batch_size'] > len(X):
        attack_params['batch_size'] = len(X)

    if 'binary_search_steps' in attack_params:
        attack_params['binary_search_steps'] = int(
            attack_params['binary_search_steps'])

    attack = CarliniL2(sess, model_wrapper, **attack_params)

    if not verbose:
        disablePrint(attack_log_fpath)
    # The input range is [0, 1], convert to [-0.5, 0.5] by subtracting 0.5.
    # The return range is [-0.5, 0.5]. Convert back to [0,1] by adding 0.5.
    X_adv = attack.attack(X - 0.5, Y) + 0.5
    if not verbose:
        enablePrint()

    return X_adv
コード例 #2
0
def run_pca(Data, num_components=10, invert=False):
    data = Data()

    sess = K.get_session()

    K.set_learning_phase(False)

    shape = (-1, 784)
    
    pca = sklearn.decomposition.PCA(n_components=num_components)

    pca.fit(data.train_data.reshape(shape)) # [:10000]

    if invert:
        model = MNISTModel("models/mnist-pca-cnn-top-"+str(num_components))
    else:
        model = make_model(num_components)
        model.load_weights("models/mnist-pca-top-"+str(num_components))
        model = Wrap(model,pca)

    tf_mean = tf.constant(pca.mean_,dtype=tf.float32)
    tf_components = tf.constant(pca.components_.T,dtype=tf.float32)

    def new_predict(xs):
        # map to PCA space
        xs = tf.reshape(xs,(-1,784))
        xs -= tf_mean
        xs = tf.matmul(xs, tf_components)
    
        # map back
        xs = tf.matmul(xs, tf.transpose(tf_components))
        xs += tf_mean
        xs = tf.reshape(xs, (-1, 28, 28, 1))
        return model.model(xs)

    if invert:
        model.predict = new_predict

    attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, 
                       binary_search_steps=6, targeted=False,
                       initial_const=1)

    N = 100

    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])

    print('accuracy',np.mean(np.argmax(sess.run(model.predict(tf.constant(data.test_data,dtype=np.float32))),axis=1)==np.argmax(data.test_labels,axis=1)))

    print(list(test_adv[0].flatten()))

    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))

    it = np.argmax(sess.run(model.predict(tf.constant(test_adv))),axis=1)
    print('success',np.mean(it==np.argmax(data.test_labels,axis=1)[:N]))
コード例 #3
0
def run(Data, Model, path):
    sess = K.get_session()
    K.set_learning_phase(False)
    data, model = Data(), Model(path)

    if Data == MNIST:
        attack = CarliniL2(sess,
                           model,
                           batch_size=100,
                           max_iterations=2000,
                           binary_search_steps=5,
                           initial_const=1.,
                           learning_rate=1e-1,
                           targeted=False)
    else:
        attack = CarliniL2(sess,
                           model,
                           batch_size=100,
                           max_iterations=200,
                           binary_search_steps=3,
                           initial_const=.01,
                           learning_rate=1e-2,
                           targeted=True,
                           confidence=2)

    now = time.time()

    for name, X, y in [["test", data.test_data, data.test_labels]]:
        print("OKAY", name)
        for k in range(0, len(y), 5000):
            #if os.path.exists("tmp/"+path.split("/")[1]+"."+name+".adv.X."+str(k)+".npy"):
            #    print('skip',k)
            #    continue
            now = time.time()
            adv = attack.attack(X[k:k + 100], y[k:k + 100])
            #print('time',time.time()-now)
            #print('accuracy',np.mean(np.argmax(model.model.predict(adv),axis=1)==np.argmax(y[k:k+5000],axis=1)))
            #print('mean distortion',np.mean(np.sum((adv-X[k:k+5000])**2,axis=(1,2,3))**.5))
            np.save(
                "/tmp/" + path.split("/")[1] + "." + name + ".adv.X." + str(k),
                adv)
コード例 #4
0
def compare_baseline():
    data = MNIST()
    model = MNISTModel("models/mnist")
    sess = K.get_session()

    attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, 
                       binary_search_steps=4, targeted=False,
                       initial_const=10)

    N = 100
    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])
    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))
コード例 #5
0
def run_pca(Data, Model, path=None):
    sess = K.get_session()
    K.set_learning_phase(False)

    data = Data()
    model = Model(path)

    shape = (-1, model.num_channels * model.image_size**2)

    pca = sklearn.decomposition.PCA(n_components=shape[1])

    pca.fit(data.train_data.reshape(shape))

    print(pca.explained_variance_ratio_)

    r_test = pca.transform(data.test_data.reshape(shape))

    #attack = FGS(sess, model, eps=.3)
    attack = CarliniL2(sess,
                       model,
                       batch_size=100,
                       max_iterations=1000,
                       binary_search_steps=2,
                       targeted=False,
                       initial_const=10)

    N = 10000

    #test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])
    test_adv = np.load("tmp/outlieradvtest.npy")

    r_test_adv = pca.transform(test_adv[:N].reshape(shape))

    fig = plt.figure(figsize=(4, 3))
    fig.subplots_adjust(bottom=0.17, left=.19)

    plt.xlabel('Component Number')
    plt.ylabel('Mean Absolute Value (log scale)')

    plt.semilogy(range(r_test.shape[1]),
                 np.mean(np.abs(r_test), axis=0),
                 label='Valid')
    plt.semilogy(range(r_test_adv.shape[1]),
                 np.mean(np.abs(r_test_adv), axis=0),
                 label='Adversarial')

    plt.legend()

    pp = PdfPages('/tmp/a.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
    plt.show()
コード例 #6
0
def run_filter(Data, Model, path):
    K.set_learning_phase(False)
    data = Data()
    model = Model(path)
    model2 = Model(path)

    def new_predict(xs):
        print(xs.get_shape())
        if 'mnist' in path:
            xs = tf.nn.conv2d(xs, tf.constant(np.ones((3,3,1,1))/9,dtype=tf.float32),
                              [1,1,1,1], "SAME")
        else:
            xs = tf.nn.conv2d(xs, tf.constant(np.ones((3,3,3,3))/9,dtype=tf.float32),
                              [1,1,1,1], "SAME")
        return model2.model(xs)
    model2.predict = new_predict

    sess = K.get_session()
    #dist 1.45976

    attack = CarliniL2(sess, model2, batch_size=100, max_iterations=3000,
                       binary_search_steps=4, targeted=False, confidence=0,
                       initial_const=10)

    N = 100

    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])

    print('accuracy of original model',np.mean(np.argmax(sess.run(model.predict(tf.constant(data.test_data,dtype=np.float32))),axis=1)==np.argmax(data.test_labels,axis=1)))
    print('accuracy of blurred model',np.mean(np.argmax(sess.run(model.predict(tf.constant(data.test_data,dtype=np.float32))),axis=1)==np.argmax(data.test_labels,axis=1)))

    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))

    #it = np.argmax(sess.run(model.predict(tf.constant(test_adv))),axis=1)
    #print('success of unblured',np.mean(it==np.argmax(data.test_labels,axis=1)[:N]))
    it = np.argmax(sess.run(model2.predict(tf.constant(test_adv))),axis=1)
    print('success of blured',np.mean(it==np.argmax(data.test_labels,axis=1)[:N]))
コード例 #7
0
def run_test(Data, Model, path):
    sess = K.get_session()
    K.set_learning_phase(False)
    data = Data()
    model = Model(path)

    N = 1000
    X = data.train_data[np.random.choice(np.arange(len(data.train_data)),
                                         N,
                                         replace=False)].reshape((N, -1))
    #Y = data.train_data[np.random.choice(np.arange(len(data.train_data)), N, replace=False)].reshape((N,-1))
    Y = data.test_data[np.random.choice(np.arange(len(data.test_data)),
                                        N,
                                        replace=False)].reshape((N, -1))

    #attack = FGS(sess, model, N, .275)
    attack = CarliniL2(sess,
                       model,
                       batch_size=100,
                       binary_search_steps=2,
                       initial_const=1,
                       targeted=False,
                       max_iterations=500)

    idx = np.random.choice(np.arange(len(data.test_data)), N, replace=False)
    Y = attack.attack(data.test_data[idx], data.test_labels[idx]).reshape(
        (N, -1))

    iterations = 1000

    sigma2 = 100
    mmd2u, mmd2u_null, p_value = kernel_two_sample_test(X,
                                                        Y,
                                                        iterations=iterations,
                                                        kernel_function='rbf',
                                                        gamma=1.0 / sigma2,
                                                        verbose=True)
コード例 #8
0
def run_evaluation(Data, Model, path, num_epochs, name):
    data = Data()

    #train(Model, data, 10, path, num_epochs=num_epochs)

    sess = K.get_session()
    K.set_learning_phase(False)

    model = Model(path)
    #attack = FGS(sess, model)
    attack = CarliniL2(sess,
                       model,
                       batch_size=100,
                       max_iterations=3000,
                       binary_search_steps=3,
                       targeted=True,
                       initial_const=10,
                       learning_rate=1e-2)
    """ # uncomment to run the training phase
    
    train_adv = attack.attack(data.train_data, data.train_labels)
    np.save("tmp/"+name+"outlieradvtrain",train_adv)
    train_adv = np.load("tmp/"+name+"outlieradvtrain.npy")
    data.train_data = np.concatenate((data.train_data, train_adv))
    data.train_labels = np.concatenate((data.train_labels, np.zeros(data.train_labels.shape, dtype=np.float32)))
    data.train_labels = np.pad(data.train_labels, [[0, 0], [0, 1]], mode='constant')
    data.train_labels[data.train_labels.shape[0]//2:,10] = 1

    validation_adv = attack.attack(data.validation_data, data.validation_labels)
    np.save("tmp/"+name+"outlieradvvalidation",validation_adv)
    validation_adv = np.load("tmp/"+name+"outlieradvvalidation.npy")
    data.validation_data = np.concatenate((data.validation_data, validation_adv))
    data.validation_labels = np.concatenate((data.validation_labels, np.zeros(data.validation_labels.shape, dtype=np.float32)))
    data.validation_labels = np.pad(data.validation_labels, [[0, 0], [0, 1]], mode='constant')
    data.validation_labels[data.validation_labels.shape[0]//2:,10] = 1

    test_adv = attack.attack(data.test_data, data.test_labels)
    np.save("tmp/"+name+"outlieradvtest",test_adv)
    test_adv = np.load("tmp/"+name+"outlieradvtest.npy")
    data.test_data = np.concatenate((data.test_data, test_adv))
    data.test_labels = np.concatenate((data.test_labels, np.zeros(data.test_labels.shape, dtype=np.float32)))
    data.test_labels = np.pad(data.test_labels, [[0, 0], [0, 1]], mode='constant')
    data.test_labels[data.test_labels.shape[0]//2:,10] = 1

    train(Model, data, 11, path+"_advtraining", num_epochs=num_epochs)

    data1 = Data() # just need a reference, this is a bit ugly to do
    data2 = Data() # just need a reference, this is a bit ugly to do

    idxs = list(range(len(data.train_data)))
    random.shuffle(idxs)

    data1.train_data = data.train_data[idxs[:len(idxs)//2]]
    data2.train_data = data.train_data[idxs[len(idxs)//2:]]
    data1.train_labels = data.train_labels[idxs[:len(idxs)//2],:]
    data2.train_labels = data.train_labels[idxs[len(idxs)//2:],:]

    idxs = list(range(len(data.validation_data)))
    random.shuffle(idxs)
    data1.validation_data = data.validation_data[idxs[:len(idxs)//2]]
    data2.validation_data = data.validation_data[idxs[len(idxs)//2:]]
    data1.validation_labels = data.validation_labels[idxs[:len(idxs)//2]]
    data2.validation_labels = data.validation_labels[idxs[len(idxs)//2:]]

    idxs = list(range(len(data.test_data)))
    random.shuffle(idxs)
    data1.test_data = data.test_data[idxs[:len(idxs)//2]]
    data2.test_data = data.test_data[idxs[len(idxs)//2:]]
    data1.test_labels = data.test_labels[idxs[:len(idxs)//2]]
    data2.test_labels = data.test_labels[idxs[len(idxs)//2:]]

    train(Model, data1, 11, path+"_advtraining-left", num_epochs=num_epochs)
    train(Model, data2, 11, path+"_advtraining-right", num_epochs=num_epochs)
    #"""

    K.set_learning_phase(False)

    rmodel = Model(num_labels=11).model
    rmodel.load_weights(path + "_advtraining")
    if name == "cifar":
        rmodel = Wrap(rmodel, 32, 3, 11)
    else:
        rmodel = Wrap(rmodel, 28, 1, 11)

    rmodel1 = Model(num_labels=11).model
    rmodel1.load_weights(path + "_advtraining-left")
    if name == "cifar":
        rmodel1 = Wrap(rmodel1, 32, 3, 11)
    else:
        rmodel1 = Wrap(rmodel1, 28, 1, 11)

    rmodel2 = Model(num_labels=11).model
    rmodel2.load_weights(path + "_advtraining-right")
    if name == "cifar":
        rmodel2 = Wrap(rmodel2, 32, 3, 11)
    else:
        rmodel2 = Wrap(rmodel2, 28, 1, 11)

    rmodel2.model.summary()

    attack2 = CarliniL2(sess,
                        rmodel,
                        batch_size=100,
                        max_iterations=2000,
                        confidence=.1,
                        binary_search_steps=3,
                        targeted=True,
                        initial_const=10,
                        learning_rate=1e-2)

    #test_adv = np.load("tmp/outlieradvtest.npy")
    #print('qq',np.mean(rmodel.model.predict_classes(test_adv)==10))

    N = 100
    targets = utils.get_labs(data.test_labels[:100])
    #"""
    test_adv = attack.attack(data.test_data[:N], targets)
    print(
        'mean distortion',
        np.mean(
            np.sum((test_adv - data.test_data[:N])**2, axis=(1, 2, 3))**.5))
    print('model predict', np.argmax(model.model.predict(test_adv), axis=1))
    print('rmodel predict', np.argmax(rmodel.model.predict(test_adv), axis=1))
    #"""

    targets2 = np.zeros((N, 11))
    targets2[:, :10] = targets
    test_adv = attack2.attack(data.test_data[:N], targets2)
    print(list(test_adv[0].flatten()))
    print(
        'mean distortion',
        np.mean(
            np.sum((test_adv - data.test_data[:N])**2, axis=(1, 2, 3))**.5))

    a = (np.argmax(model.model.predict(test_adv), axis=1))
    #print(a)
    print('summary', np.mean(a == np.argmax(targets, axis=1)),
          np.mean(a == 10))

    a = (np.argmax(rmodel.model.predict(test_adv), axis=1))
    #print(a)
    print('summary', np.mean(a == np.argmax(targets, axis=1)),
          np.mean(a == 10))

    a = (np.argmax(rmodel1.model.predict(test_adv), axis=1))
    #print(a)
    print('summary', np.mean(a == np.argmax(targets, axis=1)),
          np.mean(a == 10))

    a = (np.argmax(rmodel2.model.predict(test_adv), axis=1))
    #print(a)
    print('summary', np.mean(a == np.argmax(targets, axis=1)),
          np.mean(a == 10))
コード例 #9
0
model.add(Conv2D(32, (2, 2), activation="relu", padding="same")) 
model.add(Conv2D(128, (2, 2), activation="relu", padding="same"))
model.add(Conv2D(128, (1, 1), activation="relu", padding="same"))
model.add(Flatten())
model.add(Dense(10, activation="softmax"))
model.summary()

#model = model_mnist(input_image=Input(shape=(28, 28, 1)))
model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["acc"])
model.load_weights('./cifar100/mnist_cnn_G08.hdf5')

with tf.Session() as sess:
    data, model1 = MNIST_data(), MNISTModel()
    attack = CarliniL2(sess, model1, batch_size=100, max_iterations=1000,
                       confidence=0, boxmin=0, boxmax=1)

    inputs, targets = generate_data(data, samples=N, targeted=True,
                                    start=0, inception=False)
    print(targets)
    adv = attack.attack(inputs, targets)

# MNISTデータの表示
W = 10  # 横に並べる個数
H = 10   # 縦に並べる個数
fig = plt.figure(figsize=(H, W))
fig.subplots_adjust(left=0, right=1, bottom=0, top=1.0, hspace=0.05, wspace=0.05)
for i in range(W*H):
    ax1 = fig.add_subplot(H, W, i + 1, xticks=[], yticks=[])
    ax1.imshow(x_test[i].reshape((28, 28)), cmap='gray')
plt.savefig('./cifar100/mnist_x_test100_G08.jpg')
コード例 #10
0
def run_kde(Data, Model, path):
    global DECONST
    sess = K.get_session()
    K.set_learning_phase(False)
    data, model = Data(), Model(path)

    model2 = Model(path)

    # TODO: hidden_layer -> selected layer
    layer_name = "activation_7"
    hidden_layer = pop_layer(model2.model, layer_name)
    #hidden_layer = pop(model2.model) # once to remove dense(10)
    #hidden_layer = pop(hidden_layer) # once to remove ReLU

    #compute_optimal_sigma(sess, model, hidden_layer, data)
    #MNIST SIGMA: 20

    removed_cols = []
    for i in range(10):
        removed_cols.extend(
            get_removed_cols(
                hidden_layer,
                data.train_data[np.argmax(data.train_labels, axis=1) == i]))
    removed_cols = list(set(removed_cols))

    de = [
        DensityEstimate(
            sess,
            hidden_layer,
            data.train_data[np.argmax(data.train_labels, axis=1) == i],
            model.image_size,
            model.num_channels,
            removed_cols,
            sigma=0.864) for i in range(10)
    ]
    #de2 = [DensityEstimate(sess, hidden_layer, data.train_data[np.argmax(data.train_labels,axis=1)==i], model.image_size, model.num_channels, sigma=0.864) for i in range(10)]
    de2 = de

    p = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))

    #print(np.log(de[0].predict(data.test_data[:10])))
    #print(sess.run(rmodel.predict(p)[1], {p: data.test_data[:10]}))
    #exit(0)

    N = 9
    #print(model.model.predict(data.train_data[:N]))
    #print(hidden_layer.predict(data.train_data[:N]))

    adv_candid = []
    jumped = False
    adv_labels = np.zeros((9, 10))
    for i in range(0, 10):
        if i == TARGET_CLASS:
            jumped = True
            continue
        adv_candid.extend(
            data.test_data[np.argmax(data.test_labels, axis=1) == i][:1])
        if jumped:
            adv_labels[i - 1][TARGET_CLASS] = 1
        else:
            adv_labels[i][TARGET_CLASS] = 1

    adv_candid = np.array(adv_candid)

    #for i in range(10):
    #    for j in range(N):
    #        print(de[i].predict(data.train_data[j:j+1])) # N

    #start_density = estimate_density_full(model, de, data.test_data[M:M+N])+1e-30
    start_density = estimate_density_full(model, de, adv_candid) + 1e-30
    print("starting density", -np.log(start_density))
    #print("starting density", -start_density)

    DECONST = -np.log(start_density)
    #DECONST = -start_density
    DECONST = np.median(DECONST)
    #DECONST = 0

    print("DECONST", DECONST)
    #DECONST = -1

    l = np.zeros((N, 10))
    #l[np.arange(N),np.random.random_integers(0,9,N)] = 1
    for i in range(N):
        r = np.random.random_integers(0, 9)
        while r == np.argmax(data.test_labels[i]):
            r = np.random.random_integers(0, 9)
        l[i, r] = 1

    l = adv_labels
    print(l)
    attack1 = CarliniL2(sess,
                        model,
                        batch_size=1,
                        max_iterations=3000,
                        binary_search_steps=3,
                        initial_const=1.0,
                        learning_rate=1e-1,
                        targeted=True)
    attack2 = CarliniL2New(sess,
                           model,
                           batch_size=1,
                           max_iterations=60000,
                           binary_search_steps=5,
                           initial_const=1.0,
                           learning_rate=1e-2,
                           targeted=True,
                           extra_loss=extra_loss(de2, TARGET_CLASS),
                           debug_extra_loss=debug_extra_loss(
                               de2, TARGET_CLASS),
                           de=de2)
    #l = data.test_labels[:N]
    #l = np.zeros((N,10))
    #l[np.arange(N),1] = 1
    print("RUN PHASE 1")
    #adv = attack1.attack(data.test_data[M:M+N], l)
    adv = attack1.attack(adv_candid, l)
    #print('mean distortion',np.mean(np.sum((adv-data.test_data[M:M+N])**2,axis=(1,2,3))**.5))
    print('mean distortion',
          np.mean(np.sum((adv - adv_candid)**2, axis=(1, 2, 3))**.5))

    print("RUN PHASE 2")
    #adv = attack2.attack(data.test_data[M:M+N], adv, l)
    adv = attack2.attack(adv_candid, adv, l)

    #np.save("/tmp/q"+str(M),adv)
    np.save("./adv/adv_mnist_cnw_target_{}".format(TARGET_CLASS), adv)
    #adv = np.load("/tmp/qq.npy")

    #print('labels',np.mean(np.argmax(sess.run(model.predict(p), {p: adv}),axis=1)==l))
    print('labels')
    print(np.argmax(l, axis=1))
    print(np.argmax(sess.run(model.predict(p), {p: adv}), axis=1))
    print(np.argmax(model.model.predict(adv), axis=1))

    #print('mean distortion',np.mean(np.sum((adv-data.test_data[M:M+N])**2,axis=(1,2,3))**.5))
    print('mean distortion',
          np.mean(np.sum((adv - adv_candid)**2, axis=(1, 2, 3))**.5))

    #a = estimate_density_full(model, de, data.test_data[M:M+N])+1e-30
    a = estimate_density_full(model, de, adv_candid) + 1e-30
    b = estimate_density_full(model, de, adv) + 1e-30

    #print(data.test_data.shape)
    #print(adv.shape)

    show(adv)

    print('de of test', np.mean(-np.log(a)))
    print('de of adv', np.mean(-np.log(b)))

    print('better ratio', np.mean(np.array(a) > np.array(b)))
    exit(0)

    #density = gaussian_kde(np.array(np.log(a))-np.array(np.log(b)))
    #density_a = gaussian_kde(np.log(a))
    #density_b = gaussian_kde(np.log(b))

    xs = np.linspace(-25, 25, 200)

    fig = plt.figure(figsize=(4, 3))
    fig.subplots_adjust(bottom=0.17, left=.15, right=.85)

    plt.xlabel('log(KDE(valid))-log(KDE(adversarial))')
    plt.ylabel('Occurrances')

    #plt.hist(np.log(a),100)
    #plt.hist(np.log(b),100)
    plt.hist(np.log(a) - np.log(b), 100)
    #plt.hist(np.array(np.log(a))-np.array(np.log(b)),100)
    #a = plt.plot(xs,density_a(xs), 'r--',color='blue', label='Valid')
    #b = plt.plot(xs,density_b(xs), color='red', label='Adversarial')
    #plt.plot(xs,density(xs))

    #plt.legend(handles=[a[0], b[0]])

    pp = PdfPages('/tmp/a.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
コード例 #11
0
def run_nn_detection(Data, path):
    
    data = Data()
    sess = K.get_session()
    K.set_learning_phase(False)
    
    model_with_detector = ResnetBuilder.build_resnet_32((3, 32, 32), 10, 
                                                        with_detector=2, activation=False)
    model_with_detector.save_weights("/tmp/q")
    

    model_with_detector.load_weights("models/cifar-layerdetect-37-0")

    N = 10#len(data.test_data)//100
    """ # uncomment to generate adversarial testing data
    model = ResnetBuilder.build_resnet_32((3, 32, 32), 10, activation=False)
    model.load_weights("models/cifar-resnet")
    model = Wrap(model)

    #attack = FGS(sess, model)
    attack = CarliniL2(sess, model, batch_size=100, binary_search_steps=3,
                       initial_const=0.1, max_iterations=3000, learning_rate=0.005,
                       confidence=0, targeted=False)

    for i in range(0,N,1000):
        test_adv = attack.attack(data.test_data[i:i+100], data.test_labels[i:i+100])
        np.save("tmp/testadv"+path.split("/")[1]+str(i), test_adv)
    #"""

    test_adv = []
    for i in range(0,N,1000):
        test_adv.extend(np.load("tmp/testadv"+path.split("/")[1]+str(i)+".npy"))
    test_adv = np.array(test_adv)

    print('Accuracy of model on test set',np.mean(np.argmax(model_with_detector.predict(data.test_data)[0],axis=1)==np.argmax(data.test_labels,axis=1)))
    print('Accuracy of model on adversarial data',np.mean(np.argmax(model_with_detector.predict(test_adv)[0],axis=1)==np.argmax(data.test_labels,axis=1)))

    print('Probaility detects valid data as valid',np.mean(model_with_detector.predict(data.test_data)[1]<=0))
    print('Probability detects adversarail data as adversarial',np.mean(model_with_detector.predict(test_adv)[1]>0))

    xs = tf.placeholder(tf.float32, [None, 32, 32, 3])
    rmodel = RobustModel(model_with_detector)
    preds = rmodel.predict(xs)

    y1 = np.argmax(sess.run(preds, {xs: data.test_data[:N]}),axis=1)
    print('Robust model accuracy on test dat',np.mean(y1==np.argmax(data.test_labels[:N],axis=1)))
    print('Probability robust model detects valid data as adversarial', np.mean(y1==10))

    y2 = np.argmax(sess.run(preds, {xs: test_adv}),axis=1)
    print('Probability robust model detects adversarial data as adversarial', np.mean(y2==10))

    attack = CarliniL2(sess, rmodel, batch_size=10, binary_search_steps=3,
                       initial_const=0.1, max_iterations=300, learning_rate=0.01,
                       confidence=0, targeted=True)

    targets = np.argmax(model_with_detector.predict(test_adv[:N])[0],axis=1)
    realtargets = np.zeros((N, 11))
    realtargets[np.arange(N),targets] = 1

    np.save("tmp/adaptiveattack",attack.attack(data.test_data[:N], realtargets))
    adv = np.load("tmp/adaptiveattack.npy")

    print('Accuracy on adversarial data',np.mean(np.argmax(model_with_detector.predict(adv)[0],axis=1)==np.argmax(data.test_labels,axis=1)))

    print('Probability detector detects adversarial data as adversarial',np.mean(model_with_detector.predict(adv)[1]>0))

    d=np.sum((adv-data.test_data[:N])**2,axis=(1,2,3))**.5
    print("mean distortion attacking robust model", np.mean(d))

    d=np.sum((test_adv[:N]-data.test_data[:N])**2,axis=(1,2,3))**.5
    print("mean distortion attacking unsecurred model", np.mean(d))
    

    model_with_detector_2 = ResnetBuilder.build_resnet_32((3, 32, 32), 10, 
                                                        with_detector=2, activation=False)
    model_with_detector_2.load_weights("models/cifar-layerdetect-42-0")


    print('Accuracy on adversarial data',np.mean(np.argmax(model_with_detector_2.predict(adv)[0],axis=1)==np.argmax(data.test_labels,axis=1)))

    print('Probability detector detects adversarial data as adversarial',np.mean(model_with_detector_2.predict(adv)[1]>0))
コード例 #12
0
        generated_img)  # preprocess image
    enc_gen, enc_gen_layers = stn.encoder.encode(generated_img)

    if data_set == "cifar10":
        classifier = Model("eval", raw_cifar.train_images)
        classifier._build_model(adv_img, label, reuse=False, conf=0.1)
        adv_loss = - classifier.target_loss
        adv_acc = classifier.accuracy
        adv_acc_y = tf.cast(classifier.correct_prediction, tf.float32)
        classifier._build_model(content, label, reuse=True)
        normal_loss = - classifier.target_loss
        norm_acc = classifier.accuracy
        logits = classifier.pre_softmax
        pgd_attack = LinfPGDAttack(classifier.xent, content, label, epsilon=0.25 *
                                   255, num_steps=200, step_size=0.05*255, random_start=True)
        CarliniL2.pgd_attack()
    elif data_set == "imagenet":
 
        classifier = build_imagenet_model(
            adv_img_bgr, label, conf=0.1, shrink_class=shrink_class)
        adv_loss = - classifier.target_loss
        adv_acc = classifier.accuracy
        adv_acc_y = classifier.acc_y
        adv_acc_y_5 = classifier.acc_y_5
        #logits = classifier.logits

        
        content_bgr = tf.reverse(
            content, axis=[-1])  # switch RGB to BGR
        classifier = build_imagenet_model(
            content_bgr, label, reuse=True, shrink_class=shrink_class)
コード例 #13
0
def test(Model, data, path):
    keras.backend.set_learning_phase(False)
    model = make_model(Model, dropout=False)
    model.load_weights(path)

    modeld = make_model(Model, dropout=True)
    modeld.load_weights(path)

    guess = model.predict(data.test_data)
    print(guess[:10])
    print(
        'Accuracy wihtout dropout',
        np.mean(
            np.argmax(guess, axis=1) == np.argmax(data.test_labels, axis=1)))

    guess = modeld.predict(data.test_data)
    print(
        'Accuracy with dropout',
        np.mean(
            np.argmax(guess, axis=1) == np.argmax(data.test_labels, axis=1)))

    sess = keras.backend.get_session()

    N = 10
    labs = get_labs(data.test_data[:N])
    print(labs)
    print('good?', np.sum(labs * data.test_labels[:N]))

    attack = CarliniL2(sess,
                       Wrap(model),
                       batch_size=N,
                       max_iterations=1000,
                       binary_search_steps=3,
                       learning_rate=1e-1,
                       initial_const=1,
                       targeted=True,
                       confidence=0)
    adv = attack.attack(data.test_data[:N], labs)
    guess = model.predict(adv)
    print('average distortion',
          np.mean(np.sum((data.test_data[:N] - adv)**2, axis=(1, 2, 3))**.5))
    print(guess[:10])

    print("Test data")
    valid_u = compute_u(sess, modeld, data.test_data[:N])
    print("Adversarial examples")
    valid_u = compute_u(sess, modeld, adv)

    # The below attack may not even be necessary for CIFAR
    # the adversarial examples generated with (3,1000,1e-1) have a lower mean
    # uncertenty than the test images, but again with a 3x increase in distortion.

    if ISMNIST:
        p = tf.placeholder(tf.float32, (None, 28, 28, 1))
    else:
        p = tf.placeholder(tf.float32, (None, 32, 32, 3))
    r = differentable_u(modeld, p, 100)

    models = []
    for _ in range(20):
        m = make_model(Model, dropout=True, fixed=True)
        m.load_weights(path)
        models.append(m)
    #r2 = differentable_u_multiple(models, p)

    #print('uncertenty on test data', np.mean((sess.run(r, {p: data.test_data[:N]}))))
    #print('uncertenty on test data (multiple models)', np.mean((sess.run(r2, {p: data.test_data[:N]}))))
    #print('labels on robust model', np.argmax(sess.run(robustmodel.predict(p), {p: data.test_data[:100]}),axis=1))

    attack = CarliniL2Multiple(sess, [Wrap(m) for m in models],
                               batch_size=10,
                               binary_search_steps=4,
                               initial_const=1,
                               max_iterations=1000,
                               confidence=1,
                               targeted=True,
                               abort_early=False,
                               learning_rate=1e-1)

    #z = np.zeros((N, 10))
    #z[np.arange(N),np.random.random_integers(0,9,N)] = 1
    #z[np.arange(N),(9, 3, 0, 8, 7, 3, 4, 1, 6, 4)] = 1
    print(z)

    #qq = (3, 2, 1, 18, 4, 8, 11, 0, 61, 7)
    #np.save("images/mnist_dropout", attack.attack(data.test_data[qq,:,:,:],
    #                                               np.pad(np.roll(data.test_labels[qq,:],1,axis=1), [(0, 0), (0, 0)], 'constant')))
    #exit(0)

    adv = attack.attack(data.test_data[:N], labs)
    #adv = attack.attack(data.test_data[:N], data.test_labels[:N])

    np.save("/tmp/dropout_adv_" + str(ISMNIST), adv)
    #adv = np.load("/tmp/qq.npy")

    guess = model.predict(adv)

    print('normal predictions', guess)

    print('average distortion',
          np.mean(np.sum((data.test_data[:N] - adv)**2, axis=(1, 2, 3))**.5))

    print('normal label predictions', np.argmax(guess, axis=1))

    for m in models:
        print('model preds', np.argmax(m.predict(adv), axis=1))

    print(
        'Model accuracy on adversarial examples',
        np.mean(
            np.argmax(guess, axis=1) == np.argmax(data.test_labels[:N],
                                                  axis=1)))

    adv_u = compute_u(sess, modeld, adv)
    #print('differentable uncertienty',np.mean((sess.run(r, {p: adv}))))

    print('Targetted adversarial examples success rate',
          np.mean(np.argmax(guess, axis=1) == np.argmax(z, axis=1)))

    import matplotlib
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages
    """
    fig = plt.figure(figsize=(4,3))
    fig.subplots_adjust(bottom=0.15,left=.15)
    a=plt.hist(adv_u, 100, log=True, label="Adversarial (FGS)")
    b=plt.hist(valid_u, 100, log=True, label="Valid")
    plt.xlabel('Uncertainty')
    plt.ylabel('Occurrances (log scaled)')
    plt.legend()
    """
    fig = plt.figure(figsize=(4, 3))
    fig.subplots_adjust(bottom=0.15, left=.15)
    b = plt.hist(valid_u - adv_u, 100, label="Valid")
    plt.xlabel('U(valid)-U(adversarial)')
    plt.ylabel('Occurrances')

    pp = PdfPages('/tmp/a.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
    plt.show()
コード例 #14
0
def run_kde(Data, Model, path):
    global DECONST
    sess = K.get_session()
    K.set_learning_phase(False)
    data, model = Data(), Model(path)

    model2 = Model(path)

    hidden_layer = pop(model2.model)  # once to remove dense(10)
    hidden_layer = pop(hidden_layer)  # once to remove ReLU

    #compute_optimal_sigma(sess, model, hidden_layer, data)
    #MNIST SIGMA: 20

    de = [
        DensityEstimate(
            sess,
            hidden_layer,
            data.train_data[np.argmax(data.train_labels, axis=1) == i],
            model.image_size,
            model.num_channels,
            sigma=20) for i in range(10)
    ]
    de2 = [
        DensityEstimate(
            sess,
            hidden_layer,
            data.train_data[np.argmax(data.train_labels, axis=1) == i][:100],
            model.image_size,
            model.num_channels,
            sigma=20) for i in range(10)
    ]

    p = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))

    #print(np.log(de[0].predict(data.test_data[:10])))
    #print(sess.run(rmodel.predict(p)[1], {p: data.test_data[:10]}))
    #exit(0)

    N = 1
    print(model.model.predict(data.train_data[:N]))
    print(hidden_layer.predict(data.train_data[:N]))

    for i in range(10):
        print(de[i].predict(data.train_data[:N]))

    start_density = estimate_density_full(model, de,
                                          data.test_data[M:M + N]) + 1e-30
    print("starting density", np.log(start_density))

    DECONST = -np.log(start_density)

    l = np.zeros((N, 10))
    #l[np.arange(N),np.random.random_integers(0,9,N)] = 1
    for i in range(N):
        r = np.random.random_integers(0, 9)
        while r == np.argmax(data.test_labels[i]):
            r = np.random.random_integers(0, 9)
        l[i, r] = 1

    attack1 = CarliniL2(sess,
                        model,
                        batch_size=1,
                        max_iterations=3000,
                        binary_search_steps=3,
                        initial_const=1.0,
                        learning_rate=1e-1,
                        targeted=True)
    attack2 = CarliniL2New(sess,
                           model,
                           batch_size=1,
                           max_iterations=10000,
                           binary_search_steps=5,
                           initial_const=1.0,
                           learning_rate=1e-2,
                           targeted=True,
                           extra_loss=extra_loss(de2, np.argmax(l)))
    #l = data.test_labels[:N]
    #l = np.zeros((N,10))
    #l[np.arange(N),1] = 1
    print("RUN PHASE 1")
    adv = attack1.attack(data.test_data[M:M + N], l)
    print(
        'mean distortion',
        np.mean(
            np.sum((adv - data.test_data[M:M + N])**2, axis=(1, 2, 3))**.5))

    print("RUN PHASE 2")
    adv = attack2.attack(data.test_data[M:M + N], adv, l)

    np.save("/tmp/q" + str(M), adv)
    #adv = np.load("/tmp/qq.npy")

    print(
        'labels',
        np.mean(np.argmax(sess.run(model.predict(p), {p: adv}), axis=1) == l))

    print(
        'mean distortion',
        np.mean(
            np.sum((adv - data.test_data[M:M + N])**2, axis=(1, 2, 3))**.5))

    a = estimate_density_full(model, de, data.test_data[M:M + N]) + 1e-30
    b = estimate_density_full(model, de, adv) + 1e-30

    show(adv)

    print('de of test', np.mean(np.log(a)))
    print('de of adv', np.mean(np.log(b)))

    print('better ratio', np.mean(np.array(a) > np.array(b)))
    exit(0)

    #density = gaussian_kde(np.array(np.log(a))-np.array(np.log(b)))
    #density_a = gaussian_kde(np.log(a))
    #density_b = gaussian_kde(np.log(b))

    xs = np.linspace(-25, 25, 200)

    fig = plt.figure(figsize=(4, 3))
    fig.subplots_adjust(bottom=0.17, left=.15, right=.85)

    plt.xlabel('log(KDE(valid))-log(KDE(adversarial))')
    plt.ylabel('Occurrances')

    #plt.hist(np.log(a),100)
    #plt.hist(np.log(b),100)
    plt.hist(np.log(a) - np.log(b), 100)
    #plt.hist(np.array(np.log(a))-np.array(np.log(b)),100)
    #a = plt.plot(xs,density_a(xs), 'r--',color='blue', label='Valid')
    #b = plt.plot(xs,density_b(xs), color='red', label='Adversarial')
    #plt.plot(xs,density(xs))

    #plt.legend(handles=[a[0], b[0]])

    pp = PdfPages('/tmp/a.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
    plt.show()