Ejemplo n.º 1
0
def test_acc(model_indicator, split, dataset):
    # get block definitions
    blocks_definition = get_split(split, dataset)

    # construct model
    keras.backend.set_learning_phase(0)
    model = construct_hrs_model(dataset=dataset,
                                model_indicator=model_indicator,
                                blocks_definition=blocks_definition)

    # get data
    [X_train, X_test, Y_train, Y_test] = get_data(dataset=dataset,
                                                  scale1=True,
                                                  one_hot=False,
                                                  percentage=0.01)

    # note: it is more accurate to feed data points one by one, because of the randomness of the model
    # PS: you don't want to get the acc just for a single model realization
    score = []
    for i in range(X_test.shape[0]):
        x = X_test[i:i + 1]
        y = Y_test[i]
        # pred = keras_model.predict(x)
        pred = np.argmax(model.predict(x)[0])
        if np.array_equal(y, pred):
            score.append(1)
        else:
            score.append(0)

    acc = np.mean(np.array(score))

    print('Test Acc. of Model: %s is %.2f' % (model_indicator, acc))
    return acc
Ejemplo n.º 2
0
def defend_adversarial_attack(dataset, split, model_indicator, attack, epsilon, test_samples, num_steps, step_size,
                              attack_setting, gradient_samples):
    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # get block definitions
    blocks_definition = get_split(split, dataset)

    # construct model
    keras.backend.set_learning_phase(0)
    model = construct_hrs_model(dataset=dataset, model_indicator=model_indicator, blocks_definition=blocks_definition)

    # get data
    [X_train, X_test, Y_train, Y_test] = get_data(dataset=dataset, scale1=True, one_hot=False, percentage=0.01)

    # make attack object
    if attack == 'FGSM':
        from attack_utils import FGSM
        attack = FGSM(model=model, epsilon=epsilon, dataset=dataset)
    elif attack == 'PGD':
        from attack_utils import PGD
        attack = PGD(model=model, num_steps=num_steps, step_size=step_size, epsilon=epsilon, dataset=dataset)
    elif attack == 'CWPGD':
        from attack_utils import CW_PGD
        attack = CW_PGD(model=model, num_steps=num_steps, step_size=step_size, epsilon=epsilon, dataset=dataset)
    else:
        raise ValueError('%s is not a valid attack name!' % attack)

    # perform attack
    result = []
    distortion = []

    for test_sample_idx in range(test_samples):
        print('generating adv sample for test sample ' + str(test_sample_idx))
        image = X_test[test_sample_idx:test_sample_idx + 1]
        label = Y_test[test_sample_idx:test_sample_idx + 1]

        for target in range(10):
            if target == label:
                continue

            target_input = np.array([target])
            if attack_setting == 'normal':
                adversarial = attack.perturb(image, target_input, sess)
            elif attack_setting == 'EOT':
                adversarial = attack.perturb_gm(image, target_input, sess, gradient_samples=gradient_samples)
            else:
                raise ValueError('%s is not a valid attack setting!' % attack_setting)

            output = model.predict(adversarial)
            adv_pred = np.argmax(list(output)[0])
            result.append((adv_pred == target).astype(int))

            l_inf = np.amax(adversarial - image)
            distortion.append(l_inf)

    # compute attack success rate (ASR) and average distortion(L_inf)
    succ_rate = np.array(result).mean()
    mean_distortion = np.array(distortion).mean()

    print('Perform %s attack to model %s' % (attack, model_indicator))
    print('Attack succ rate (ASR) = %.4f' % succ_rate)
    print('Average distortion = %.2f' % mean_distortion)
def defend_adversarial_reprogramming(model_indicator, split, epochs):
    save_dir = './Adversarial_Reprogramming/' + args.model_indicator + '/'
    try:
        os.makedirs(save_dir)
    except:
        pass

    # get MNIST data
    [X_train, X_test, Y_train, Y_test] = get_data(dataset='MNIST',
                                                  scale1=True,
                                                  one_hot=False,
                                                  percentage=0.01)

    # input transfer model
    input_transfer = Sequential()
    input_transfer.add(ZeroPadding2D(padding=3, input_shape=(28, 28, 1)))
    input_transfer.add(LocallyConnected2D(3, (3, 3), activation='relu'))
    input_transfer.add(Activation('tanh'))

    # get block definitions
    blocks_definition = get_split(split, 'CIFAR')

    # target model to reprogram
    keras.backend.set_learning_phase(0)
    model = construct_hrs_model(dataset='CIFAR',
                                model_indicator=model_indicator,
                                blocks_definition=blocks_definition)
    # set layer untrainable
    for layer in model.layers:
        layer.trainable = False

    # overall model
    output = model(input_transfer.output)
    adv_model = Model(input_transfer.input, output)

    # optimizer and loss
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted)

    # compile
    adv_model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    # training the input transfer
    hist = adv_model.fit(X_train,
                         Y_train,
                         batch_size=128,
                         validation_data=(X_test, Y_test),
                         nb_epoch=epochs,
                         shuffle=True)

    # save training history
    train_acc = hist.history['acc']
    test_acc = hist.history['val_acc']

    np.save(save_dir + 'hist.npy', np.array(hist.history))
    print('Perform adversarial reprogramming to model %s' % model_indicator)
    print('Reprogramming Train Acc. after %d epochs of training is %.4f' %
          (epochs, train_acc[-1]))
    print('Reprogramming Test Acc. after %d epochs of training is %.4f' %
          (epochs, test_acc[-1]))