Example #1
0
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files):
    """
    Tests the vaccination defense against the Benign Random Noise (BRN) 
    attack seeded by results of our mimicry attack against itself and 
    original, unmodified data. Performs 5 trials. 
    """
    mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1]
    ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0]
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5

    print "\n{:>6}{:>15}{:>15}".format("%", "ORIGINAL", "OUR MIMICRY")
    pool = multiprocessing.Pool(processes=None)
    scores = []
    for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1):
        acc = [0.0, 0.0]
        sys.stdout.write("{:>6.2f}".format(subset * 100))
        for _ in range(TRIALS):
            tr_mod = tr_data.copy()
            # Subsample malicious training files for attack
            wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind))))

            # Mimic random benign files using the sampled files
            pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind]
            for mimic, w_id in pool.imap(mimicry_wrap, pargs):
                tr_mod[w_id] = mimic

            # Evaluate the classifier on both clean test data and mimicry data
            res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels])
            acc = [old + new for old, new in zip(acc, res)]
        acc = [acc[0] / TRIALS, acc[1] / TRIALS]
        print "{:>15.3f}{:>15.3f}".format(acc[0], acc[1])
        scores.append(tuple(acc))
    return scores
Example #2
0
def perturbate_CV(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV):
    """
    Runs TRIALS trials of nCV-fold cross-validation, training 
    RandomForest     on a perturbated subset of data and testing on (1) 
    original, clean data, (2) 100% perturbated data, and (3) mimicry 
    attack samples. Returns a list of classification accuracy values, 
    one per test set, summed across all trials. 
    """
    accs = [0.0, 0.0, 0.0]
    for _ in range(TRIALS):
        # Shuffle input data
        shuf_indices = numpy.arange(len(data))
        numpy.random.shuffle(shuf_indices)
        trial_data = data[shuf_indices,]
        trial_labels = labels[shuf_indices]

        # Run nCV-fold cross-validation
        kf = KFold(len(trial_data), n_folds=nCV, indices=True)
        for tr, te in kf:
            test_data = [
                trial_data[te],
                perturbate(trial_data[te], trial_labels[te], 1.0, ben_means, ben_devs),
                mim_data,
            ]
            test_labels = [trial_labels[te], trial_labels[te], mim_labels]
            acc = common.evaluate_classifier(
                perturbate(trial_data[tr], trial_labels[tr], subset, ben_means, ben_devs),
                trial_labels[tr],
                test_data,
                test_labels,
            )
            accs = [old + new for old, new in zip(accs, acc)]
    return accs, subset
Example #3
0
def perturbate_CV(data, labels, mim_data, mim_labels, ben_means, ben_devs,
                  subset, TRIALS, nCV):
    '''
    Runs TRIALS trials of nCV-fold cross-validation, training 
    RandomForest     on a perturbated subset of data and testing on (1) 
    original, clean data, (2) 100% perturbated data, and (3) mimicry 
    attack samples. Returns a list of classification accuracy values, 
    one per test set, summed across all trials. 
    '''
    accs = [0., 0., 0.]
    for _ in range(TRIALS):
        # Shuffle input data
        shuf_indices = numpy.arange(len(data))
        numpy.random.shuffle(shuf_indices)
        trial_data = data[shuf_indices, ]
        trial_labels = labels[shuf_indices]

        # Run nCV-fold cross-validation
        kf = KFold(len(trial_data), n_folds=nCV, indices=True)
        for tr, te in kf:
            test_data = [
                trial_data[te],
                perturbate(trial_data[te], trial_labels[te], 1.0, ben_means,
                           ben_devs), mim_data
            ]
            test_labels = [trial_labels[te], trial_labels[te], mim_labels]
            acc = common.evaluate_classifier(
                perturbate(trial_data[tr], trial_labels[tr], subset, ben_means,
                           ben_devs), trial_labels[tr], test_data, test_labels)
            accs = [old + new for old, new in zip(accs, acc)]
    return accs, subset
Example #4
0
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files):
    '''
    Tests the vaccination defense against the Benign Random Noise (BRN) 
    attack seeded by results of our mimicry attack against itself and 
    original, unmodified data. Performs 5 trials. 
    '''
    mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1]
    ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0]
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5

    print '\n{:>6}{:>15}{:>15}'.format('%', 'ORIGINAL', 'OUR MIMICRY')
    pool = multiprocessing.Pool(processes=None)
    scores = []
    for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1):
        acc = [0.0, 0.0]
        sys.stdout.write('{:>6.2f}'.format(subset * 100))
        for _ in range(TRIALS):
            tr_mod = tr_data.copy()
            # Subsample malicious training files for attack
            wolf_ind = random.sample(mal_tr_ind,
                                     int(round(subset * len(mal_tr_ind))))

            # Mimic random benign files using the sampled files
            pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id)
                     for w_id in wolf_ind]
            for mimic, w_id in pool.imap(mimicry_wrap, pargs):
                tr_mod[w_id] = mimic

            # Evaluate the classifier on both clean test data and mimicry data
            res = common.evaluate_classifier(tr_mod, tr_labels,
                                             [te_data, mim_data],
                                             [te_labels, mim_labels])
            acc = [old + new for old, new in zip(acc, res)]
        acc = [acc[0] / TRIALS, acc[1] / TRIALS]
        print '{:>15.3f}{:>15.3f}'.format(acc[0], acc[1])
        scores.append(tuple(acc))
    return scores