예제 #1
0
def test_all_chains():
    nomix =[True, False]
    clf_type = ['svm', 'rf', None]
    exact = [1, 2, 3, 4]
    threshold = [1, 2, 3, 4, 5, 6]

    results = dict()

    for c in clf_type:
        for m in nomix:
            for t in threshold:
                results[('threshold', c, m, t)] = predict_threshold(clf_type=c, nomix=m, threshold=t)

    for m in nomix:
        for t in threshold:
            results[('duo_threshold', c, m, t)] = predict_duo_threshold(nomix=m, threshold=t)

    for m in nomix:
        results[('duo', c, m)] = predict_duo(nomix=m)

    for c in clf_type:
        for m in nomix:
            for t in threshold:
                for e in exact:
                    results[('exact_threshold', c, m, t, e)] = predict_exact_or_threshold(nomix=m, clf_type=c, threshold=t, exact=e)

    threshold = [1, 2]
    for t in threshold:
        results[('onlymix_threshold', t)] = predict_threshold(nomix=False, only_mix=True, threshold=t)

    data.serialize_keep_copy(settings.ANALYSIS_FOLDER + '/chaining.pkl', results)
    return results
예제 #2
0
def _serialize_cv_results(stats_list, persist, where):
    res_stats_map = defaultdict(list)
    for s, data_set_id in stats_list:
        res_stats_map[data_set_id].append(s)
    if persist:
        data.serialize_keep_copy(where, res_stats_map, keep_copy=True)
    return res_stats_map
예제 #3
0
    def serialize(self, where=None, keep_copy=True):

        if not where:
            stats_file = settings.EVAL_FOLDER + self.id
        else:
            stats_file = where + self.id
        data.serialize_keep_copy(stats_file, self, keep_copy=keep_copy)
예제 #4
0
 def serialize(self, to=settings.CLF_FOLDER, id=''):
     """
     Write classifier to disk.
     :param to: folder where to save
     :return:
     """
     to += '/{!s}_{!s}_{!s}.pkl'.format(self.dga, self.clf_type, id)
     data.serialize_keep_copy(to, self)
예제 #5
0
def test_rf_mix_chains():
    results = dict()
    threshold = [1, 2]
    clfs_all = classifiers.ClassificationEnsemble(nomix=False, only_type='rf', only_mix=True).clfs
    for i in range(3, 19):
        for t in threshold:
            clfs_sel = classifiers.ClassificationEnsemble(nomix=False, only_type='rf', only_mix=True,clf_list=random.sample(clfs_all, i))
            results[('mix_rf', t, i)] = predict_threshold(nomix=False, only_mix=True, threshold=t, clfs=clfs_sel)

    data.serialize_keep_copy(settings.ANALYSIS_FOLDER + '/chaining.pkl', results)
    return results
예제 #6
0
def predict_all_mixed_sets_on_x(n_jobs=8):
    w = Workspace(days=1, empty=True)
    w.load_all(settings.SetTypes.mixed_dga_grouped_family.value)

    clfs = classifiers.ClassificationEnsemble().clfs

    clfs = [c for c in clfs if 'mix' not in c.dga_type]

    parallel = Parallel(n_jobs=n_jobs, verbose=1)

    ds = w.data_sets_loaded['mixed_dga_grouped_family_50000_59_2.pkl']
    dmns, lbls, groups = ds.expand()

    # res is list of tuples: clf.clf_type, clf.dga_type, lbls, pred_lbl
    res = parallel(
        delayed(eval_train_test.predict_all_on_x)(clf, ds, dmns, lbls, groups)
        for clf in clfs
    )

    data.serialize_keep_copy(settings.ANALYSIS_FOLDER + '/x_vs_all_results.pkl', (res, dmns))