def fig11(tr_data, tr_labels, te_data, te_labels, tr_files): """ Tests the vaccination defense against the Benign Random Noise (BRN) attack seeded by results of our mimicry attack against itself and original, unmodified data. Performs 5 trials. """ mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1] ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0] mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 print "\n{:>6}{:>15}{:>15}".format("%", "ORIGINAL", "OUR MIMICRY") pool = multiprocessing.Pool(processes=None) scores = [] for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1): acc = [0.0, 0.0] sys.stdout.write("{:>6.2f}".format(subset * 100)) for _ in range(TRIALS): tr_mod = tr_data.copy() # Subsample malicious training files for attack wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind)))) # Mimic random benign files using the sampled files pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind] for mimic, w_id in pool.imap(mimicry_wrap, pargs): tr_mod[w_id] = mimic # Evaluate the classifier on both clean test data and mimicry data res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels]) acc = [old + new for old, new in zip(acc, res)] acc = [acc[0] / TRIALS, acc[1] / TRIALS] print "{:>15.3f}{:>15.3f}".format(acc[0], acc[1]) scores.append(tuple(acc)) return scores
def perturbate_CV(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV): """ Runs TRIALS trials of nCV-fold cross-validation, training RandomForest on a perturbated subset of data and testing on (1) original, clean data, (2) 100% perturbated data, and (3) mimicry attack samples. Returns a list of classification accuracy values, one per test set, summed across all trials. """ accs = [0.0, 0.0, 0.0] for _ in range(TRIALS): # Shuffle input data shuf_indices = numpy.arange(len(data)) numpy.random.shuffle(shuf_indices) trial_data = data[shuf_indices,] trial_labels = labels[shuf_indices] # Run nCV-fold cross-validation kf = KFold(len(trial_data), n_folds=nCV, indices=True) for tr, te in kf: test_data = [ trial_data[te], perturbate(trial_data[te], trial_labels[te], 1.0, ben_means, ben_devs), mim_data, ] test_labels = [trial_labels[te], trial_labels[te], mim_labels] acc = common.evaluate_classifier( perturbate(trial_data[tr], trial_labels[tr], subset, ben_means, ben_devs), trial_labels[tr], test_data, test_labels, ) accs = [old + new for old, new in zip(accs, acc)] return accs, subset
def perturbate_CV(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV): ''' Runs TRIALS trials of nCV-fold cross-validation, training RandomForest on a perturbated subset of data and testing on (1) original, clean data, (2) 100% perturbated data, and (3) mimicry attack samples. Returns a list of classification accuracy values, one per test set, summed across all trials. ''' accs = [0., 0., 0.] for _ in range(TRIALS): # Shuffle input data shuf_indices = numpy.arange(len(data)) numpy.random.shuffle(shuf_indices) trial_data = data[shuf_indices, ] trial_labels = labels[shuf_indices] # Run nCV-fold cross-validation kf = KFold(len(trial_data), n_folds=nCV, indices=True) for tr, te in kf: test_data = [ trial_data[te], perturbate(trial_data[te], trial_labels[te], 1.0, ben_means, ben_devs), mim_data ] test_labels = [trial_labels[te], trial_labels[te], mim_labels] acc = common.evaluate_classifier( perturbate(trial_data[tr], trial_labels[tr], subset, ben_means, ben_devs), trial_labels[tr], test_data, test_labels) accs = [old + new for old, new in zip(accs, acc)] return accs, subset
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files): ''' Tests the vaccination defense against the Benign Random Noise (BRN) attack seeded by results of our mimicry attack against itself and original, unmodified data. Performs 5 trials. ''' mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1] ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0] mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 print '\n{:>6}{:>15}{:>15}'.format('%', 'ORIGINAL', 'OUR MIMICRY') pool = multiprocessing.Pool(processes=None) scores = [] for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1): acc = [0.0, 0.0] sys.stdout.write('{:>6.2f}'.format(subset * 100)) for _ in range(TRIALS): tr_mod = tr_data.copy() # Subsample malicious training files for attack wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind)))) # Mimic random benign files using the sampled files pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind] for mimic, w_id in pool.imap(mimicry_wrap, pargs): tr_mod[w_id] = mimic # Evaluate the classifier on both clean test data and mimicry data res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels]) acc = [old + new for old, new in zip(acc, res)] acc = [acc[0] / TRIALS, acc[1] / TRIALS] print '{:>15.3f}{:>15.3f}'.format(acc[0], acc[1]) scores.append(tuple(acc)) return scores