Exemple #1
0
def calculate_lid(datasets, model_path, sample_path, attack, k_nearest,
                  batch_size):
    """
    Load multiple characteristics for one dataset and one attack.
    :param dataset: 
    :param attack: 
    :param characteristics: 
    :return: 
    """
    # Load the model
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)

    [X_test_adv_train, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack + "/train")
    [X_test_adv_test, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack + "/test")
    train_num = len(X_test_adv_train)
    test_num = len(X_test_adv_test)
    X_test_adv = preprocess_image_1(
        np.concatenate((np.asarray(X_test_adv_train),
                        np.asarray(X_test_adv_test))).astype('float32'))
    if len(X_test_adv.shape) < 4:
        X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_train, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/train")
    [X_test_test, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/test")
    X_test_train = np.asarray(X_test_train)[np.random.choice(len(X_test_train),
                                                             train_num,
                                                             replace=False)]
    X_test_test = np.asarray(X_test_test)[np.random.choice(len(X_test_test),
                                                           test_num,
                                                           replace=False)]
    X_test = preprocess_image_1(
        np.concatenate((np.asarray(X_test_train),
                        np.asarray(X_test_test))).astype('float32'))
    if len(X_test.shape) < 4:
        X_test = np.expand_dims(X_test, axis=3)

    file_name = os.path.join('../detection/lid/',
                             "%s_%s.npy" % (datasets, attack))
    if not os.path.exists(file_name):
        # extract local intrinsic dimensionality
        characteristics, labels = get_lid(sess, x, model, feed_dict, X_test,
                                          X_test_adv, k_nearest, batch_size,
                                          datasets)
        data = np.concatenate((characteristics, labels), axis=1)
        np.save(file_name, data)
    return train_num
Exemple #2
0
def mutation_tutorial(datasets,
                      attack,
                      sample_path,
                      store_path,
                      model_path,
                      level=1,
                      test_num=100,
                      mutation_number=1000,
                      mutated=False):
    sess, preds, x, y, model, feed_dict = model_load(datasets,
                                                     model_path + datasets)

    sample_path = sample_path + attack + '/' + datasets
    # sample_path = '../mt_result/mnist_jsma/adv_jsma'
    [image_list, image_files, real_labels,
     predicted_labels] = utils.get_data_mutation_test(sample_path)
    count = 0
    for i in range(len(image_list)):
        ori_img = preprocess_image_1(image_list[i].astype('float64'))
        ori_img = np.expand_dims(ori_img.copy(), 0)
        p = model_argmax(sess, x, preds, ori_img, feed=feed_dict)
        if p != predicted_labels[i]:
            count = count + 1
            image_file = image_files[i]
            os.remove("../datasets/adversary/" + attack + '/' + datasets +
                      '/' + image_file)
            # os.remove(sample_path + '/' + image_file)

    # Close TF session
    print(count)
    sess.close()
    print('Finish.')
Exemple #3
0
def nte(datasets, model, samples_path, epoch=49):
    """
    :param datasets
    :param model
    :param samples_path
    :return:
    """
    tf.reset_default_graph()
    # Object used to keep track of (and return) key accuracies
    sess, preds, x, y, model, feed_dict = model_load(datasets, model, epoch=epoch)

    [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path)

    samples = np.asarray([preprocess_image_1(image.astype('float64')) for image in image_list])
    samples = np.asarray(image_list)
    pbs = []
    n_batches = int(np.ceil(samples.shape[0] / 256))
    for i in range(n_batches):
        start = i * 256
        end = np.minimum(len(samples), (i + 1) * 256)
        feed = {x: samples[start:end]}
        if feed_dict is not None:
            feed.update(feed_dict)
        probabilities = sess.run(preds, feed)
        #print(probabilities[1])
        for j in range(len(probabilities)):
            pro_adv_max=probabilities[j][predicted_labels[start+j]]
            temp=np.delete(probabilities[j], predicted_labels[start+j], axis=0)
            pro_adv_top2=np.max(temp)
            pbs.append(pro_adv_max-pro_adv_top2)
    result = sum(pbs) / len(pbs)
    print('Noise Tolerance Estimation  %.4f' %(result))

    # Close TF session
    sess.close()

    return result
Exemple #4
0
def detect_adv_samples(datasets, model_path, sample_path, store_path,
                       attack_type):
    print('Loading the data and model...')
    # Load the model
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)

    # # Load the dataset
    if 'mnist' == datasets:
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000

        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
    elif 'cifar10' == datasets:
        preprocess_image = preprocess_image_1
        train_start = 0
        train_end = 50000
        test_start = 0
        test_end = 10000

        # Get CIFAR10 test data
        X_train, Y_train, fn_train, X_test, Y_test, fn_test = data_cifar10(
            train_start=train_start,
            train_end=train_end,
            test_start=test_start,
            test_end=test_end,
            preprocess=preprocess_image)

    # # Refine the normal, noisy and adversarial sets to only include samples for
    # # which the original version was correctly classified by the model
    # preds_test = model_argmax(sess, x, preds, X_test, feed=feed_dict)
    # inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0]
    # X_test = X_test[inds_correct]
    # X_test = X_test[np.random.choice(len(X_test), 500)]#500
    #
    # # Check attack type, select adversarial and noisy samples accordingly
    # print('Loading adversarial samples...')
    # # Load adversarial samplesx
    # [X_test_adv, adv_image_files, real_labels, predicted_labels] = utils.get_data_mutation_test(sample_path)
    # X_test_adv = preprocess_image_1(np.asarray(X_test_adv).astype('float32'))
    # if len(X_test_adv.shape) < 4:
    #     X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_adv_train, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack_type + "/train")
    [X_test_adv_test, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack_type + "/test")
    train_num = len(X_test_adv_train)
    test_num = len(X_test_adv_test)
    X_test_adv = preprocess_image_1(
        np.concatenate((np.asarray(X_test_adv_train),
                        np.asarray(X_test_adv_test))).astype('float32'))
    if len(X_test_adv.shape) < 4:
        X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_train, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/train")
    [X_test_test, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/test")
    X_test_train = np.asarray(X_test_train)[np.random.choice(len(X_test_train),
                                                             train_num,
                                                             replace=False)]
    X_test_test = np.asarray(X_test_test)[np.random.choice(len(X_test_test),
                                                           test_num,
                                                           replace=False)]
    X_test = preprocess_image_1(
        np.concatenate((np.asarray(X_test_train),
                        np.asarray(X_test_test))).astype('float32'))
    if len(X_test.shape) < 4:
        X_test = np.expand_dims(X_test, axis=3)

    ## Get Bayesian uncertainty scores
    print('Getting Monte Carlo dropout variance predictions...')
    uncerts_normal = get_mc_predictions(sess, x, preds,
                                        X_test).var(axis=0).mean(axis=1)
    uncerts_adv = get_mc_predictions(sess, x, preds,
                                     X_test_adv).var(axis=0).mean(axis=1)

    ## Get KDE scores
    # Get deep feature representations
    print('Getting deep feature representations...')
    X_train_features = get_deep_representations(sess, x, X_train, model,
                                                feed_dict)
    X_test_normal_features = get_deep_representations(sess, x, X_test, model,
                                                      feed_dict)
    X_test_adv_features = get_deep_representations(sess, x, X_test_adv, model,
                                                   feed_dict)

    # Train one KDE per class
    print('Training KDEs...')
    class_inds = {}
    for i in range(Y_train.shape[1]):
        class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0]
    kdes = {}
    warnings.warn(
        "Using pre-set kernel bandwidths that were determined "
        "optimal for the specific CNN models of the paper. If you've "
        "changed your model, you'll need to re-optimize the "
        "bandwidth.")
    for i in range(Y_train.shape[1]):
        kdes[i] = KernelDensity(kernel='gaussian',
                                bandwidth=BANDWIDTHS[datasets]) \
            .fit(X_train_features[class_inds[i]])

    # Get model predictions
    print('Computing model predictions...')
    preds_test_normal = model_argmax(sess, x, preds, X_test, feed=feed_dict)
    preds_test_adv = model_argmax(sess, x, preds, X_test_adv, feed=feed_dict)

    # Get density estimates
    print('computing densities...')
    densities_normal = score_samples(kdes, X_test_normal_features,
                                     preds_test_normal)
    densities_adv = score_samples(kdes, X_test_adv_features, preds_test_adv)

    uncerts_pos = uncerts_adv[:]
    uncerts_neg = uncerts_normal[:]
    characteristics, labels = merge_and_generate_labels(
        uncerts_pos, uncerts_neg)
    file_name = os.path.join('../detection/bu/',
                             "%s_%s.npy" % (datasets, attack_type))
    data = np.concatenate((characteristics, labels), axis=1)
    np.save(file_name, data)

    densities_pos = densities_adv[:]
    densities_neg = densities_normal[:]
    characteristics, labels = merge_and_generate_labels(
        densities_pos, densities_neg)
    file_name = os.path.join(
        '../detection/de/',
        "%s_%s_%.4f.npy" % (datasets, attack_type, BANDWIDTHS[datasets]))
    data = np.concatenate((characteristics, labels), axis=1)
    np.save(file_name, data)

    ## Z-score the uncertainty and density values
    uncerts_normal_z, uncerts_adv_z = normalize(uncerts_normal, uncerts_adv)
    densities_normal_z, densities_adv_z = normalize(densities_normal,
                                                    densities_adv)

    ## Build detector
    values, labels = features(densities_pos=densities_adv_z,
                              densities_neg=densities_normal_z,
                              uncerts_pos=uncerts_adv_z,
                              uncerts_neg=uncerts_normal_z)
    X_tr, Y_tr, X_te, Y_te = block_split(values, labels, train_num)

    lr = train_lr(X_tr, Y_tr)

    ## Evaluate detector
    # Compute logistic regression model predictions
    probs = lr.predict_proba(X_te)[:, 1]
    preds = lr.predict(X_te)
    # Compute AUC
    n_samples = int(len(X_te) / 2)
    # The first 2/3 of 'probs' is the negative class (normal and noisy samples),
    # and the last 1/3 is the positive class (adversarial samples).
    _, _, auc_score = compute_roc(probs_neg=probs[:n_samples],
                                  probs_pos=probs[n_samples:])

    precision = precision_score(Y_te, preds)
    recall = recall_score(Y_te, preds)

    y_label_pred = lr.predict(X_te)
    acc = accuracy_score(Y_te, y_label_pred)

    print(
        'Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall: %.4f'
        % (auc_score, acc, precision, recall))
Exemple #5
0
def batch_attack(datasets, attack, model_path, store_path, nb_classes):
    if 'mnist' == datasets:
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000

        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
    elif 'cifar10' == datasets:
        preprocess_image = preprocess_image_1
        train_start = 0
        train_end = 50000
        test_start = 0
        test_end = 10000

        # Get CIFAR10 test data
        X_train, Y_train, fn_train, X_test, Y_test, fn_test = data_cifar10(
            train_start=train_start,
            train_end=train_end,
            test_start=test_start,
            test_end=test_end,
            preprocess=preprocess_image)
    elif 'svhn' == datasets:
        # choose the method of preprocess image
        preprocess_image = preprocess_image_1

        train_start = 0
        train_end = 73257
        test_start = 0
        test_end = 26032

        # Get SVHN test data
        X_train, Y_train, X_test, Y_test = data_svhn(
            train_start=train_start,
            train_end=train_end,
            test_start=test_start,
            test_end=test_end,
            preprocess=preprocess_image)

    store_path = store_path + attack + '/' + datasets
    sample_path = '../datasets/integration/batch_attack/' + datasets + '/'
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)
    if os.listdir(sample_path) == []:
        for i in range(len(X_test)):
            sample = X_test[i:i + 1]
            path = sample_path + str(i) + '.png'
            imsave(path, deprocess_image_1(sample))
            current_img = ndimage.imread(path)
            img = np.expand_dims(
                preprocess_image_1(current_img.astype('float64')), 0)
            p = model_argmax(sess, x, preds, img, feed=feed_dict)
            if p != Y_test[i].argmax(axis=0):
                os.remove(path)
        # for i in range(len(X_test)):
        #     sample = X_test[i:i+1]
        #     if model_argmax(sess, x, preds, sample, feed=feed_dict) == Y_test[i].argmax(axis=0):
        #         path = sample_path + str(i) + '.png'
        #         imsave(path, deprocess_image_1(sample))

    sess.close()
    samples = os.listdir(sample_path)
    for sample in samples:
        tf.reset_default_graph()
        if 'blackbox' == attack:
            blackbox(datasets=datasets,
                     sample_path=sample_path + sample,
                     model_path=model_path,
                     store_path=store_path,
                     nb_classes=nb_classes)
        elif 'fgsm' == attack:
            fgsm(datasets=datasets,
                 sample_path=sample_path + sample,
                 model_path=model_path,
                 store_path=store_path,
                 nb_classes=nb_classes)
        else:
            i = int(sample.split('.')[-2])
            for j in range(nb_classes):
                tf.reset_default_graph()
                if Y_test[i][j] == 0:
                    if 'jsma' == attack:
                        jsma(datasets=datasets,
                             sample_path=sample_path + sample,
                             target=j,
                             model_path=model_path,
                             store_path=store_path,
                             nb_classes=nb_classes)
                    if 'cw' == attack:
                        cw(datasets=datasets,
                           sample_path=sample_path + sample,
                           target=j,
                           model_path=model_path,
                           store_path=store_path,
                           nb_classes=nb_classes)
Exemple #6
0
def directory_detect(datasets, dir_path, normal, store_path, ad, sess, preds, x, feed_dict):

    print('--- Extracting images from: ', dir_path)
    if normal:
        [adv_image_list, adv_image_files, real_labels, predicted_labels] = utils.get_normal_data_mutation_test(dir_path)
    else:
        [adv_image_list, adv_image_files, real_labels, predicted_labels] = utils.get_data_mutation_test(dir_path)
    adv_count = 0
    not_decided_images = 0
    total_mutation_counts = []
    label_change_mutation_counts = []
    suc_total_mutation_counts = []
    suc_label_change_mutation_counts = []

    print('--- Evaluating inputs ---')

    if not os.path.exists(store_path):
        os.makedirs(store_path)
    detector_results = []
    summary_results = []
    for i in range(len(adv_image_list)):
        # # print('- Running image ', i)
        ori_img = preprocess_image_1(adv_image_list[i].astype('float32'))

        orig_label = predicted_labels[i]
        [result, decided, total_mutation_count, label_change_mutation_count] = ad.detect(ori_img, orig_label, sess, x,
                                                                                         preds, feed_dict)

        detector_results.append(adv_image_files[i] + ',' + str(result) + ',' + str(decided) + ',' + str(total_mutation_count) + ',' + str(label_change_mutation_count))

        if result:
            adv_count += 1
            if not normal: # Record the counts for adversaries
                suc_total_mutation_counts.append(total_mutation_count)
                suc_label_change_mutation_counts.append(label_change_mutation_count)

        if normal and not result: # Record the counts for normals
            suc_total_mutation_counts.append(total_mutation_count)
            suc_label_change_mutation_counts.append(label_change_mutation_count)

        if not decided:
            not_decided_images += 1

        total_mutation_counts.append(total_mutation_count)
        label_change_mutation_counts.append(label_change_mutation_count)

    with open(store_path + "/detection_result.csv", "w") as f:
        for item in detector_results:
            f.write("%s\n" % item)

    summary_results.append('adv_num,' + str(len(adv_image_list)))
    summary_results.append('identified_num,' + str(adv_count))
    summary_results.append('undecided_num,' + str(not_decided_images))

    if normal:
        summary_results.append('accuracy,' + str(1 - float(adv_count)/len(total_mutation_counts)))
    else:
        summary_results.append('accuracy,' + str(float(adv_count)/len(total_mutation_counts)))

    if len(suc_label_change_mutation_counts) > 0 and not normal:
        summary_results.append(
            'avg_mutation_num,' + str(sum(suc_total_mutation_counts) / len(suc_total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts)))

    if len(suc_label_change_mutation_counts) > 0 and normal:
        summary_results.append(
            'avg_mutation_num,' + str(sum(suc_total_mutation_counts) / len(suc_total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts)))

    summary_results.append(total_mutation_counts)
    summary_results.append(label_change_mutation_counts)

    with open(store_path + "/detection_summary_result.csv", "w") as f:
        for item in summary_results:
            f.write("%s\n" % item)

    print('- Total adversary images evaluated: ', len(adv_image_list))
    print('- Identified adversaries: ', adv_count)
    print('- Not decided images: ', not_decided_images)
    if len(suc_label_change_mutation_counts) > 0:
        print('- Average mutation needed: ', sum(suc_total_mutation_counts) / len(suc_total_mutation_counts))
        print('- Average label change mutations: ',
              float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts))
    else:
        summary_results.append(
            'avg_mutation_num,' + str(sum(total_mutation_counts) / len(total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(label_change_mutation_counts)) / len(label_change_mutation_counts)))
Exemple #7
0
def jsma(datasets,
         sample_path,
         model_name,
         target,
         store_path='../mt_result/integration/jsma/mnist'):
    """
    the Jacobian-based saliency map approach (JSMA)
    :param datasets
    :param sample: inputs to attack
    :param target: the class want to generate
    :param nb_classes: number of output classes
    :return:
    """
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_name)

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    if 'mnist' == datasets:
        sample = np.asarray(
            [np.asarray(imread(sample_path)).reshape(28, 28,
                                                     1)]).astype('float32')
        sample = preprocess_image_1(sample)
    elif 'cifar10' == datasets:
        sample = np.asarray(
            [np.asarray(imread(sample_path)).reshape(32, 32,
                                                     3)]).astype('float32')
        sample = preprocess_image_1(sample)
    elif 'svhn' == datasets:
        sample = np.asarray(
            [np.asarray(imread(sample_path)).reshape(32, 32,
                                                     3)]).astype('float32')
        sample = preprocess_image_1(sample)

    input_shape, nb_classes = get_shape(datasets)

    current_class = model_argmax(sess, x, preds, sample, feed=feed_dict)

    if not os.path.exists(store_path):
        os.makedirs(store_path)

    if target == current_class:
        return 'The target is equal to its original class'
    elif target >= nb_classes or target < 0:
        return 'The target is out of range'

    print('Start generating adv. example for target class %i' % target)
    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    # This call runs the Jacobian-based saliency map approach
    one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
    one_hot_target[0, target] = 1
    jsma_params['y_target'] = one_hot_target
    adv_x = jsma.generate_np(sample, **jsma_params)

    # Check if success was achieved
    new_class_label = model_argmax(
        sess, x, preds, adv_x,
        feed=feed_dict)  # Predicted class of the generated adversary
    res = int(new_class_label == target)

    # Close TF session
    sess.close()
    if res == 1:
        adv_img_deprocessed = deprocess_image_1(adv_x)
        i = sample_path.split('/')[-1].split('.')[-2]
        path = store_path + '/adv_' + str(
            time.time() * 1000) + '_' + i + '_' + str(
                current_class) + '_' + str(new_class_label) + '_.png'
        imsave(path, adv_img_deprocessed)
        print('$$$adv_img{' + path + '}')

    print('$$$ori_img{' + sample_path + '}')