Ejemplo n.º 1
0
    def handle_app(app_id, ids_entries, experiment):
        """ Full flow for one classifier. """

        verify_ids_entries(ids_entries, app_id, experiment.storer_printer)

        training, scoring = ids_tools.ids_entries_to_train_test(ids_entries)
        X_train, _ = IdsConverter().ids_entries_to_X_y(training)
        X_test, y_true = IdsConverter().ids_entries_to_X_y(scoring)

        classifiers = [sk_svm.OneClassSVM(), sk_ens.IsolationForest()]
        for classifier in classifiers:
            classifier.fit(X_train)
            y_pred = classifier.predict(X_test)
            experiment.visualise_store("SPEC", app_id, classifier, y_true,
                                       y_pred)
Ejemplo n.º 2
0
def _read_log_lines_then_yield(yielder, first_line):
    """ Read all provided log lines from the given yielder. """

    first_entry = LogEntry.from_log_string(first_line)
    log_entries = [first_entry]
    for line in yielder:
        log_entry = LogEntry.from_log_string(line)
        log_entries.append(log_entry)

    ids_entry_dict = IdsConverter().log_entries_to_ids_entries_dict(
        log_entries)

    for _, app_entries in ids_entry_dict.items():
        for ids_entry in app_entries:
            yield ids_entry
Ejemplo n.º 3
0
    def run(experiment):

        log_entries = []

        for line in Dir.yield_lines(experiment.file_path, ITEM_LIMIT):
            log_entry = LogEntry.from_log_string(line)
            log_entries.append(log_entry)

        experiment.entries = log_entries

        # Exp 1: map
        time_before_map = time.time()
        OneHotVsMapping.handle_log_entries("MAP", OneHotVsMappingConverter(),
                                           log_entries, experiment)
        # Exp 2: one-hot
        time_after_map_before_one_hot = time.time()
        OneHotVsMapping.handle_log_entries("OHOT", IdsConverter(), log_entries,
                                           experiment)
        time_after_all = time.time()

        time_for_map = time_after_map_before_one_hot - time_before_map
        time_for_one_hot = time_after_all - time_after_map_before_one_hot

        timing_lines = [
            "Benchmark result | %s entries processed | OneClassSVM classifier"
            % len(log_entries), "",
            "Mapping: %s" % util.fmtr.format_time_passed(time_for_map),
            "One-hot: %s" % util.fmtr.format_time_passed(time_for_one_hot)
        ]

        experiment.add_result_file("time_map_vs_onehot", timing_lines)
Ejemplo n.º 4
0
def _yield_idse_lines(yielder):
    """ Yield (and verify) IDSE lines one by one from the given yielder. """

    converter = IdsConverter()

    for line in yielder:
        app_id, vector, vclass = _process_idse_line(line, converter)
        yield IdsEntry(app_id, vector, vclass)
Ejemplo n.º 5
0
    def read_convert(self, file_path):
        """ Read IDS entries from the given file and convert the result. """

        converter = IdsConverter()

        self.entries = []

        for entry in idse_dao.yield_entries(file_path):
            self.entries.append(entry)

            if len(self.entries) >= 5000000:
                warnings.warn(
                    "Skipping remaining entries - limit of 5000000 reached!")
                break

        ids_entries_dict = converter.ids_entries_to_dict(self.entries)

        return ids_entries_dict
Ejemplo n.º 6
0
    def run_cycle_for_app(ids_entries, app_id, percentage_intruded_training,
                          experiment):
        """ One app with the given percentage. """

        verify_ids_entries(ids_entries, app_id, experiment.storer_printer)

        training, scoring = CleanTrainingVsDistorted.custom_train_test_split(
            ids_entries, percentage_intruded_training)

        X_train, _ = IdsConverter().ids_entries_to_X_y(training)
        X_test, y_true = IdsConverter().ids_entries_to_X_y(scoring)

        classifier = sk_svm.OneClassSVM()
        name = CleanTrainingVsDistorted.get_name(percentage_intruded_training)

        classifier.fit(X_train)
        y_pred = classifier.predict(X_test)
        experiment.visualise_store(name, app_id, classifier, y_true, y_pred)
Ejemplo n.º 7
0
    def handle_all(experiment):
        """ Full flow for a one-fits-all classifier. """

        from ids.TEMP_IDS_CONVERTER import IdsConverter as TEMPCONVERTER
        converter = TEMPCONVERTER()
        log_entries = []

        for line in Dir.yield_lines(experiment.file_path, ITEM_LIMIT):
            log_entry = LogEntry.from_log_string(line)
            log_entries.append(log_entry)

        all_entries = converter.LOG_ENTRIES_TO_IDS_ENTRIES(log_entries,
                                                           binary=True)

        training_entries, scoring_entries = ids_tools.ids_entries_to_train_test(
            all_entries)
        X_train, _ = IdsConverter().ids_entries_to_X_y(training_entries)

        scoring_dict = {}
        for ids_entry in scoring_entries:
            if ids_entry.app_id not in scoring_dict:
                scoring_dict[ids_entry.app_id] = []
            scoring_dict[ids_entry.app_id].append(ids_entry)

        # Classify with all entries: training_entries
        classifiers = [sk_svm.OneClassSVM(), sk_ens.IsolationForest()]
        for classifier in classifiers:
            classifier.fit(X_train)

        # Score for each app: scoring_dict
        for app_id, app_entries in util.seqr.yield_items_in_key_order(
                scoring_dict):
            X_test, y_true = IdsConverter().ids_entries_to_X_y(app_entries)
            y_preds = [clf.predict(X_test) for clf in classifiers]
            for clf, y_pred in zip(classifiers, y_preds):
                experiment.visualise_store("ALL", app_id, clf, y_true, y_pred)
Ejemplo n.º 8
0
def _score_pr(file_path):

    printer = util.prtr.Printer()
    squelcher = util.prtr.Printer(squelch=True)
    converter = IdsConverter()

    log_entries = _read_file_flow(file_path)

    scores_acc = _empty_app_id_dict()
    scores_prec = _empty_app_id_dict()
    scores_rec = _empty_app_id_dict()

    printer.prt("Preparing... ", newline=False)

    # converted_entries: [(app_id, vector, class)]
    converted_entries = []
    for log_entry in log_entries:
        converted_entries.append(
            converter.log_entry_to_prepared_tuple(log_entry, binary=True))

    printer.prt("Filtering... ", newline=False)
    train_entries, test_entries = ids_tools.converted_entries_to_train_test(
        converted_entries)

    printer.prt("Splitting... ", newline=False)
    train_dict = converter.prepared_tuples_to_train_dict(
        train_entries, squelcher)
    test_dict = converter.prepared_tuples_to_train_dict(
        test_entries, squelcher)

    result_table = []
    result_table.append(["App id", "Actual (+)", "Actual (-)"])
    printer.prt("Scoring... ")
    for app_id in converter.app_ids:

        X_train, y_train = train_dict[app_id]
        X_test, y_test = test_dict[app_id]

        clf = sklearn.svm.OneClassSVM(random_state=0)
        clf.fit(X_train)

        result = clf.predict(X_test)

        # TODO MOAR
        warnings.filterwarnings(
            "ignore", category=sklearn.exceptions.UndefinedMetricWarning)
        scores_acc[app_id].append(sk_met.accuracy_score(y_test, result))
        scores_prec[app_id].append(sk_met.precision_score(y_test, result))
        scores_rec[app_id].append(sk_met.recall_score(y_test, result))

        tn, fp, fn, tp = sk_met.confusion_matrix(y_test, result).ravel()
        result_table.append(["{} (+)".format(app_id), tp, fp])
        result_table.append(["{} (-)".format(" " * len(app_id)), fn, tn])
        dash = "-" * 10
        result_table.append([dash, dash, dash])

    _print_scores(scores_acc, printer, headline="Accuracy")
    _print_scores(scores_prec, printer, headline="Precision")
    _print_scores(scores_rec, printer, headline="Recall")
    util.outp.print_table(result_table,
                          headline="Confusion matrix",
                          printer=printer)