def fig11(tr_data, tr_labels, te_data, te_labels, tr_files): """ Tests the vaccination defense against the Benign Random Noise (BRN) attack seeded by results of our mimicry attack against itself and original, unmodified data. Performs 5 trials. """ mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1] ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0] mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 print "\n{:>6}{:>15}{:>15}".format("%", "ORIGINAL", "OUR MIMICRY") pool = multiprocessing.Pool(processes=None) scores = [] for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1): acc = [0.0, 0.0] sys.stdout.write("{:>6.2f}".format(subset * 100)) for _ in range(TRIALS): tr_mod = tr_data.copy() # Subsample malicious training files for attack wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind)))) # Mimic random benign files using the sampled files pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind] for mimic, w_id in pool.imap(mimicry_wrap, pargs): tr_mod[w_id] = mimic # Evaluate the classifier on both clean test data and mimicry data res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels]) acc = [old + new for old, new in zip(acc, res)] acc = [acc[0] / TRIALS, acc[1] / TRIALS] print "{:>15.3f}{:>15.3f}".format(acc[0], acc[1]) scores.append(tuple(acc)) return scores
def fig10(data, labels): ''' Reproduction of results published in Table 12 of "Malicious PDF Detection Using Metadata and Structural Features" by Charles Smutz and Angelos Stavrou, ACSAC 2012. ''' ben_means, ben_devs = common.get_benign_mean_stddev(data, labels) mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 nCV = 10 subsets = [0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] pool = multiprocessing.Pool(processes=None) pool_args = [(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV) for subset in subsets] print '\n % {:>15}{:>15}{:>15}'.format('ORIGINAL', 'MIMICRY', 'OUR MIMICRY'), norm = TRIALS * nCV res = [] for accs, subset in pool.imap(perturbate_CV_parallel, pool_args): print '\n{:>6.2f}'.format(subset * 100), for acc in accs: sys.stdout.write('{:>15.3f}'.format(acc / norm)) res.append(tuple([acc / norm for acc in accs])) return res
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files): ''' Tests the vaccination defense against the Benign Random Noise (BRN) attack seeded by results of our mimicry attack against itself and original, unmodified data. Performs 5 trials. ''' mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1] ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0] mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 print '\n{:>6}{:>15}{:>15}'.format('%', 'ORIGINAL', 'OUR MIMICRY') pool = multiprocessing.Pool(processes=None) scores = [] for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1): acc = [0.0, 0.0] sys.stdout.write('{:>6.2f}'.format(subset * 100)) for _ in range(TRIALS): tr_mod = tr_data.copy() # Subsample malicious training files for attack wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind)))) # Mimic random benign files using the sampled files pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind] for mimic, w_id in pool.imap(mimicry_wrap, pargs): tr_mod[w_id] = mimic # Evaluate the classifier on both clean test data and mimicry data res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels]) acc = [old + new for old, new in zip(acc, res)] acc = [acc[0] / TRIALS, acc[1] / TRIALS] print '{:>15.3f}{:>15.3f}'.format(acc[0], acc[1]) scores.append(tuple(acc)) return scores
def fig10(data, labels): """ Reproduction of results published in Table 12 of "Malicious PDF Detection Using Metadata and Structural Features" by Charles Smutz and Angelos Stavrou, ACSAC 2012. """ ben_means, ben_devs = common.get_benign_mean_stddev(data, labels) mim_data, mim_labels = common.get_FTC_mimicry() TRIALS = 5 nCV = 10 subsets = [0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] pool = multiprocessing.Pool(processes=None) pool_args = [(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV) for subset in subsets] print "\n % {:>15}{:>15}{:>15}".format("ORIGINAL", "MIMICRY", "OUR MIMICRY"), norm = TRIALS * nCV res = [] for accs, subset in pool.imap(perturbate_CV_parallel, pool_args): print "\n{:>6.2f}".format(subset * 100), for acc in accs: sys.stdout.write("{:>15.3f}".format(acc / norm)) res.append(tuple([acc / norm for acc in accs])) return res