Beispiel #1
0
def plot_apiImportNum():
    f = 'api_import_nb'
    cols = util.LABELS + [f]
    df = util.load_wildlab_df()
    cols = [c for c in cols if c in df.columns]
    df = df[cols]
    plot_feature_dist_packers(df, f, cdf=-1, x_max=80)
Beispiel #2
0
def adv_onlyOnePacker(modelpath, confspath, featurespath):
    global clf, benign_feature_names, features_df
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    confspath = os.path.abspath(os.path.join(cur_dir, confspath))
    res = util.read_json(confspath)
    featurespath = os.path.abspath(os.path.join(cur_dir, featurespath))
    features = util.read_json(featurespath)
    features = [[w, f]
                for w, f in zip(features['weights'], features['features'])]
    features_sorted = sorted(features, reverse=True)
    feature_names_sorted = [f for _, f in features_sorted]
    feature_weights_sorted = [w for w, _ in features_sorted]
    clf = joblib.load(modelpath)
    assert res and clf
    confs = json.loads(res['0.5']['1.0']['confidence'])
    # feature_names = json.loads(json.loads(res['1.0']['1.0']['features']))
    malconfs = {
        id: val
        for id, val in confs.items()
        if val['label'] == 1 and val['predict'] == 1
    }
    lowconfs = {id: v for id, v in malconfs.items() if v['conf'] <= 0.6}
    global df, dfb, dfm
    df = util.load_wildlab_df()
    features_df = [f for f in df.columns if f in feature_names_sorted]
    # df = df[df.packer_name == packer]
    dfb = df[df.benign]
    dfm = df[df.malicious]
    benign_features = get_benign_features(feature_names_sorted,
                                          feature_weights_sorted, dfb, dfm)
    benign_feature_weights = [w for w, _, _, _ in benign_features]
    benign_feature_names = [f for _, f, _, _ in benign_features]

    data = []
    for sample_id, value in malconfs.items():
        cur_conf = confs[sample_id]['conf']
        data.append([sample_id, cur_conf])
    print("generating adv. samples for {} samples".format(len(data)))
    with multiprocessing.Pool() as p:
        res = p.map(attack, data)
    res = {
        sample_id: {
            'log': r,
            'initConf': cur_conf,
            'finalConf': final_conf,
            'minChanges': min_changes,
            'maxChanges': max_changes
        }
        for r, cur_conf, final_conf, sample_id, min_changes, max_changes in res
    }

    resdir = '../../../results/paper/experiments/exp-adversarial'
    if not os.path.exists(resdir):
        os.makedirs(resdir)
    with open('{}/malconfs-adv.json'.format(resdir), 'w') as f:
        json.dump(res, f)
Beispiel #3
0
def load_data(respath):
    df = util.load_wildlab_df()
    df = balance_per_packer(df)

    global packer_codes
    packer_codes = {}
    i = 0
    for p in sorted(list(df.packer_name.unique())):
        i += 1
        packer_codes[p] = i
    df['packer_name'] = [packer_codes[p] for p in df['packer_name']]
    df = exp_util.label_encode(df, respath)
    l = len(df)
    train_l = int(l * 0.7)
    train_x = df.sample(train_l, random_state=SEED)
    test_x = df[~df.index.isin(train_x.index)]
    test_y = test_x['packer_name']
    train_y = train_x['packer_name']
    train_x = train_x.drop(columns=DROP_COLUMNS, axis=1, errors='ignore')
    test_x = test_x.drop(columns=DROP_COLUMNS, axis=1, errors='ignore')

    return train_x, train_y, test_x, test_y
Beispiel #4
0
def get_common_features():
    from collections import Counter
    features = Counter()
    packers = []
    for p in util.PACKERS:
        if p in [
                'none', 'dolphin-dropper-3', 'themida-v2', 'telock', 'kkrunchy'
        ]:
            continue
        packers += [p]
        respath = '../../results/paper/experiments/exp-singlePacker/rf/lab-v3/{}/import/exp.db.json'.format(
            p)
        tmp = get_top_features(respath, 50)
        for t in tmp:
            features[t] += 1
    features = features.most_common(11)
    features = [f for f, _ in features if f != 'api_import_nb']

    df = util.load_wildlab_df()
    cols = util.LABELS
    cols = [d for d in df.columns if d.startswith('imp_') or d in cols]
    df = df[cols]
    print(packers)
    for f in features:
        latex = '{} & '.format(f)
        for p in packers:
            dp = df[df.packer_name == p]
            db = dp[dp.benign]
            dm = dp[dp.malicious]
            x = len(db[db[f]])
            y = len(dm[dm[f]])
            latex += '\\textbf{' + str(x) + ' (' + str(
                round((x * 100.0) / len(db),
                      2)) + '\%)} & \\textbf{' + str(y) + ' (' + str(
                          round((y * 100.0) / len(dm), 2)) + '\%)} & '
        latex = latex[:-3] + " \\\\"
        print(latex)
Beispiel #5
0
import itertools
import numpy as np
import exp_util

import sys
sys.path.append('../')
import util

ratio_step = 10
rounds = 5
ratios = [r / 100 for r in range(0, 100 + ratio_step, ratio_step)]
# for the main program
iterations = list(itertools.product(*[ratios, [1.0], range(rounds)]))[:11]
model_name = 'nn'

dataframe = util.load_wildlab_df()
columns = [c for c in util.LABELS if c in dataframe.columns]
dataframe = dataframe[columns]
res_dir = '{}/exp-labDiffPackedBenign/{}'.format(exp_util.RES_ROOT, model_name)

util.make_dir(res_dir)
database = '{}/exp.db'.format(res_dir)

n_workers = 1
cores_per_worker = -1

sizes = dict(training_ratio=0.7,
             testing_packed_benign_ratio=0.5,
             testing_packed_malicious_ratio=1)

Beispiel #6
0
import exp_util

import sys
sys.path.append('../')
import util

rounds = 5
# for the main program
iterations = list(itertools.product(*[[0.5], [0.75], range(rounds)]))
model_name = sys.argv[2]
features = exp_util.get_features_ctgs(sys.argv[3:])

if 'strings' in features:
    dataframe = util.load_wildlab_df(nocorrupted=False,
                                     noduplicate=True,
                                     vtagree=True,
                                     dpiagree=False,
                                     strings=True)
else:
    dataframe = util.load_wildlab_df(nocorrupted=False,
                                     noduplicate=True,
                                     vtagree=True,
                                     dpiagree=False,
                                     strings=False)

if features == 'all' or ['all'] == features:
    res_dir = '{}/exp-wild/{}/all'.format(exp_util.RES_ROOT, model_name)
else:
    res_dir = '{}/exp-wild/{}/{}'.format(exp_util.RES_ROOT, model_name,
                                         '-'.join(sorted(features)))
util.make_dir(res_dir)