Example #1
0
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files):
    """
    Tests the vaccination defense against the Benign Random Noise (BRN) 
    attack seeded by results of our mimicry attack against itself and 
    original, unmodified data. Performs 5 trials. 
    """
    mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1]
    ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0]
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5

    print "\n{:>6}{:>15}{:>15}".format("%", "ORIGINAL", "OUR MIMICRY")
    pool = multiprocessing.Pool(processes=None)
    scores = []
    for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1):
        acc = [0.0, 0.0]
        sys.stdout.write("{:>6.2f}".format(subset * 100))
        for _ in range(TRIALS):
            tr_mod = tr_data.copy()
            # Subsample malicious training files for attack
            wolf_ind = random.sample(mal_tr_ind, int(round(subset * len(mal_tr_ind))))

            # Mimic random benign files using the sampled files
            pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id) for w_id in wolf_ind]
            for mimic, w_id in pool.imap(mimicry_wrap, pargs):
                tr_mod[w_id] = mimic

            # Evaluate the classifier on both clean test data and mimicry data
            res = common.evaluate_classifier(tr_mod, tr_labels, [te_data, mim_data], [te_labels, mim_labels])
            acc = [old + new for old, new in zip(acc, res)]
        acc = [acc[0] / TRIALS, acc[1] / TRIALS]
        print "{:>15.3f}{:>15.3f}".format(acc[0], acc[1])
        scores.append(tuple(acc))
    return scores
Example #2
0
def fig10(data, labels):
    '''
    Reproduction of results published in Table 12 of "Malicious PDF 
    Detection Using Metadata and Structural Features" by Charles Smutz 
    and Angelos Stavrou, ACSAC 2012.
    '''
    ben_means, ben_devs = common.get_benign_mean_stddev(data, labels)
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5
    nCV = 10
    subsets = [0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]

    pool = multiprocessing.Pool(processes=None)
    pool_args = [(data, labels, mim_data, mim_labels, ben_means, ben_devs,
                  subset, TRIALS, nCV) for subset in subsets]
    print '\n     % {:>15}{:>15}{:>15}'.format('ORIGINAL', 'MIMICRY',
                                               'OUR MIMICRY'),
    norm = TRIALS * nCV
    res = []
    for accs, subset in pool.imap(perturbate_CV_parallel, pool_args):
        print '\n{:>6.2f}'.format(subset * 100),
        for acc in accs:
            sys.stdout.write('{:>15.3f}'.format(acc / norm))
        res.append(tuple([acc / norm for acc in accs]))
    return res
Example #3
0
def fig11(tr_data, tr_labels, te_data, te_labels, tr_files):
    '''
    Tests the vaccination defense against the Benign Random Noise (BRN) 
    attack seeded by results of our mimicry attack against itself and 
    original, unmodified data. Performs 5 trials. 
    '''
    mal_tr_ind = [i for i, l in enumerate(tr_labels) if l == 1]
    ben_tr_ind = [i for i, l in enumerate(tr_labels) if l == 0]
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5

    print '\n{:>6}{:>15}{:>15}'.format('%', 'ORIGINAL', 'OUR MIMICRY')
    pool = multiprocessing.Pool(processes=None)
    scores = []
    for subset in (0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1):
        acc = [0.0, 0.0]
        sys.stdout.write('{:>6.2f}'.format(subset * 100))
        for _ in range(TRIALS):
            tr_mod = tr_data.copy()
            # Subsample malicious training files for attack
            wolf_ind = random.sample(mal_tr_ind,
                                     int(round(subset * len(mal_tr_ind))))

            # Mimic random benign files using the sampled files
            pargs = [(tr_data[random.choice(ben_tr_ind)], tr_files[w_id], w_id)
                     for w_id in wolf_ind]
            for mimic, w_id in pool.imap(mimicry_wrap, pargs):
                tr_mod[w_id] = mimic

            # Evaluate the classifier on both clean test data and mimicry data
            res = common.evaluate_classifier(tr_mod, tr_labels,
                                             [te_data, mim_data],
                                             [te_labels, mim_labels])
            acc = [old + new for old, new in zip(acc, res)]
        acc = [acc[0] / TRIALS, acc[1] / TRIALS]
        print '{:>15.3f}{:>15.3f}'.format(acc[0], acc[1])
        scores.append(tuple(acc))
    return scores
Example #4
0
def fig10(data, labels):
    """
    Reproduction of results published in Table 12 of "Malicious PDF 
    Detection Using Metadata and Structural Features" by Charles Smutz 
    and Angelos Stavrou, ACSAC 2012.
    """
    ben_means, ben_devs = common.get_benign_mean_stddev(data, labels)
    mim_data, mim_labels = common.get_FTC_mimicry()
    TRIALS = 5
    nCV = 10
    subsets = [0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]

    pool = multiprocessing.Pool(processes=None)
    pool_args = [(data, labels, mim_data, mim_labels, ben_means, ben_devs, subset, TRIALS, nCV) for subset in subsets]
    print "\n     % {:>15}{:>15}{:>15}".format("ORIGINAL", "MIMICRY", "OUR MIMICRY"),
    norm = TRIALS * nCV
    res = []
    for accs, subset in pool.imap(perturbate_CV_parallel, pool_args):
        print "\n{:>6.2f}".format(subset * 100),
        for acc in accs:
            sys.stdout.write("{:>15.3f}".format(acc / norm))
        res.append(tuple([acc / norm for acc in accs]))
    return res