Example #1
0
def time_feature_extraction(rep=1):
    w = Workspace(days=1, empty=True)
    set_name = 'mixed_dga_grouped_family_50000_59_0.pkl'
    w.load(set_name, settings.SetTypes.mixed_dga_grouped_family.value)
    domains = [ld.domain for ld in w.data_sets_loaded['mixed_dga_grouped_family_50000_59_0.pkl'].full]

    t = Timer(lambda: feature_extraction.extract_all_features(domains))
    print(t.timeit(number=rep))
Example #2
0
def train_mixed_dga(clf_types=['svm', 'rf'], n_jobs=-1):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)

    parallel = Parallel(n_jobs=n_jobs, verbose=1)
    parallel(
        delayed(_training)(clf_type, s)
        for s in w.data_sets_loaded.values()
        for clf_type in clf_types
    )
Example #3
0
def start_single_grouped_day_logo(n_jobs=-1, clf_type=None):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.single_dga_grouped_day.value)

    if not clf_type:
        eval_train_test.logo_cv('svm', w.data_sets_loaded.values(), n_jobs=n_jobs)
        eval_train_test.logo_cv('rf', w.data_sets_loaded.values(), n_jobs=n_jobs)
    elif clf_type == 'rf':
        eval_train_test.logo_cv('rf', w.data_sets_loaded.values(), n_jobs=n_jobs)
    elif clf_type == 'svm':
        eval_train_test.logo_cv('svm', w.data_sets_loaded.values(), n_jobs=n_jobs)
Example #4
0
def start_mix_dga_kfold(repetitions=5, n_jobs=-1, clf_type=None):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)

    if not clf_type:
        eval_train_test.kfold_cv('svm_mix', w.data_sets_loaded.values(), repetitions=repetitions, n_jobs=n_jobs)
        eval_train_test.kfold_cv('rf_mix', w.data_sets_loaded.values(), repetitions=repetitions, n_jobs=n_jobs)
    elif clf_type == 'rf':
        eval_train_test.kfold_cv('rf_mix', w.data_sets_loaded.values(), repetitions=repetitions, n_jobs=n_jobs)
    elif clf_type == 'svm':
        eval_train_test.kfold_cv('svm_mix', w.data_sets_loaded.values(), repetitions=repetitions, n_jobs=n_jobs)
Example #5
0
def predict_exact_or_threshold(clf_type=None, nomix=True, threshold=1, exact=1):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)

    clfs = classifiers.ClassificationEnsemble(nomix=nomix, only_type=clf_type, only_mix=False)

    ds = w.data_sets_loaded['mixed_dga_grouped_family_50000_59_2.pkl']
    dmns, lbls, groups = ds.expand()

    lbls, predicted = clfs.predict_exactly_one_or_threshold(dmns, lbls, threshold=threshold, exact=exact)

    stats = Statistic(set_id='mixed_dga_grouped_family_50000_59_2.pkl',
                      id='threshold_test')

    stats.add_run(lbls, predicted, dmns)

    return stats
Example #6
0
def train_all_available_dga(clf_type=None):
    """
    Trains all available single DGAs.
    :param clf_type: 'svm', 'rf', None (None means for both)
    :return:
    """
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.single_dga.value)

    if not clf_type:
        for ct in ['svm', 'rf']:
            for s in w.data_sets_loaded.values():
                _training(ct, s)
    elif clf_type == 'rf':
        for s in w.data_sets_loaded.values():
            _training(clf_type, s)
    elif clf_type == 'svm':
        for s in w.data_sets_loaded.values():
            _training(clf_type, s)
Example #7
0
def predict_all_mixed_sets_on_x(n_jobs=8):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)

    clfs = classifiers.ClassificationEnsemble().clfs

    clfs = [c for c in clfs if 'mix' not in c.dga_type]

    parallel = Parallel(n_jobs=n_jobs, verbose=1)

    ds = w.data_sets_loaded['mixed_dga_grouped_family_50000_59_2.pkl']
    dmns, lbls, groups = ds.expand()

    # res is list of tuples: clf.clf_type, clf.dga_type, lbls, pred_lbl
    res = parallel(
        delayed(eval_train_test.predict_all_on_x)(clf, ds, dmns, lbls, groups)
        for clf in clfs
    )

    data.serialize_keep_copy(settings.ANALYSIS_FOLDER + '/x_vs_all_results.pkl', (res, dmns))
Example #8
0
def start_x_trained_y_test(n_jobs=-1, clf_type=None, x='siemens'):

    if x == 'rwth':
        load_clfs_from = '/work/ss930620/dga_detection_workspace/clfs/'
    elif x == 'siemens':
        load_clfs_from = '/work/ss930620/dga_detection_workspace_siemens/clfs/'

    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)
    test_data_sets = w.data_sets_loaded.values()

    if not clf_type:
        eval_train_test.trained_clfs_vs_sets(classifiers.ClassificationEnsemble(only_mix=True, clfs_from_path=load_clfs_from).clfs, test_data_sets, n_jobs=n_jobs)
    elif clf_type == 'rf':
        eval_train_test.trained_clfs_vs_sets(
            classifiers.ClassificationEnsemble(only_mix=True, only_type='rf', clfs_from_path=load_clfs_from).clfs, test_data_sets,
            n_jobs=n_jobs)
    elif clf_type == 'svm':
        eval_train_test.trained_clfs_vs_sets(
            classifiers.ClassificationEnsemble(only_mix=True, only_type='svm', clfs_from_path=load_clfs_from).clfs,
            test_data_sets,
            n_jobs=n_jobs)
Example #9
0
def time_training(rep=1):
    w = Workspace(days=1, empty=True)
    set_name = 'mixed_dga_grouped_family_50000_59_0.pkl'
    w.load(set_name, settings.SetTypes.mixed_dga_grouped_family.value)
    dom, lab, _ = w.data_sets_loaded['mixed_dga_grouped_family_50000_59_0.pkl'].expand()

    svm = SVMClassifier(dga='mix')
    t = Timer(lambda:  svm.training(dom, lab))
    print('SVM Training: ' + str(t.timeit(number=rep)))

    rf = RFClassifier(dga='mix')
    t = Timer(lambda:  rf.training(dom, lab))
    print('RF Training: ' + str(t.timeit(number=rep)))

    set_name = 'mixed_dga_grouped_family_50000_59_1.pkl'
    w.load(set_name, settings.SetTypes.mixed_dga_grouped_family.value)
    dom, lab, _ = w.data_sets_loaded['mixed_dga_grouped_family_50000_59_1.pkl'].expand()

    t = Timer(lambda:  svm.predict(dom, lab))
    print('SVM Classify: ' + str(t.timeit(number=rep)))

    t = Timer(lambda: rf.predict(dom, lab))
    print('RF Classify: ' + str(t.timeit(number=rep)))