Beispiel #1
0
    def execution(self):
        process = gatherer.capture_nfcapd(util.paths['nfcapd'], 60)
        dataset = Dataset.query.get(self.model.dataset_id)
        logger.info(f'process pid: {process.pid}')
        logger.info(f'dataset file: {dataset.file}')

        try:
            while not self.event.is_set():
                nfcapd_files = util.directory_content(util.paths['nfcapd'])[1]

                try:
                    if not 'current' in nfcapd_files[0]:
                        logger.info(f'nfcapd files: {nfcapd_files[:-1]}')

                        # gathering flows.
                        flows = self.gathering(nfcapd_files[:-1])

                        # cleaning remaining files
                        util.clean_directory(util.paths['nfcapd'],
                                             'nfcapd.20*')
                        util.clean_directory(f'{util.paths["csv"]}tmp/', '*')

                        if len(flows[0]) < 18:
                            raise ValueError('No matched flows')
                        logger.info(f'flow: {flows[0]}')

                        # preprocessing flows.
                        formatter = Formatter()
                        flows = formatter.format_flows(flows)
                        logger.info(f'formatted flow: {flows[0]}')

                        modifier = Modifier(2, dataset.aggregation)
                        extractor = Extractor([
                            feature.id + 7 for feature in self.model.features
                        ])

                        while flows:
                            flow, flows = modifier.aggregate(flows)
                            features, _ = extractor.extract(flow)
                            # detecting intrusions.
                            pred, _, _ = self.detector.test([features])

                            if pred[0]:
                                # mitigating intrusions.
                                self.mitigating(flow)
                    time.sleep(2)
                except IndexError:
                    time.sleep(2)
                    continue
                except ValueError as error:
                    logger.error(error)
                    util.clean_directory(util.paths['nfcapd'], 'nfcapd.20*')
                    util.clean_directory(f'{util.paths["csv"]}tmp/', '*')
                    continue
        finally:
            logger.info('thread status: false')
            process.kill()
Beispiel #2
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions."""

        # gathering flows
        raw_csv_file = util.directory_content(formatter_path)[1][0]
        header, flows = gatherer.open_csv(formatter_path, raw_csv_file)

        # preprocessing flows
        formatter = Formatter()
        cls.header = formatter.format_header(header)
        cls.flows = formatter.format_flows(flows)
Beispiel #3
0
def model():
    if request.method == 'POST':
        # creating an absolute path of a temporary directory
        tmp_directory = mkdtemp()
        model = Model.query.get(request.form['model_pk'])
        dataset = Dataset.query.get(model.dataset_id)
        preprocessing = Preprocessing.query.get(model.preprocessing_id)
        classifier = Classifier.query.get(model.classifier_id)
        prep_key = '_'.join(preprocessing.name.lower().split(' '))
        clf_key = '_'.join(classifier.name.lower().split(' '))
        logger.info(f'classifier: {classifier.name}')
        logger.info(f'preprocessing: {preprocessing.name}')

        # gathering flows.
        header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/',
                                          dataset.file)
        session['last_models'].remove(model.id)
        logger.info(f'raw flow: {flows[0]}')

        # removing unselected models.
        for model_pk in session['last_models']:
            db.session.delete(Model.query.get(model_pk))
        db.session.commit()

        # preprocessing flows.
        formatter = Formatter(gather=False, train=True)
        flows = formatter.format_flows(flows)
        logger.info(f'final flow: {flows[0]}')

        # extracting features.
        # adding extra value to skip first unused features.
        extractor = Extractor([feature.id + 7 for feature in model.features])
        features, labels = extractor.extract_features_labels(flows)
        logger.info(f'feature: {features[0]}, label: {labels[0]}')

        # tunning and retraining.
        detector = Detector(copy.deepcopy(classifiers_obj[clf_key]))
        detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]),
                               dataset.kfolds, tmp_directory)
        detector.retrain(features, labels)

        # model persistence.
        pickle.dump(detector, open(f'{util.paths["models"]}{model.file}',
                                   'wb'))
        logger.info(f'model file: {model.file}')
        # removing the temporary directory used by the Pipeline object.
        rmtree(tmp_directory)

    return redirect(url_for('setting.load'))
Beispiel #4
0
    def test_aggregate_flows(self):
        """Tests if the features were correctly aggregated."""

        # gathering flows
        expt_csv = util.directory_content(modifier_path)[1][0]
        expt_header, expt_flows = gatherer.open_csv(modifier_path, expt_csv)

        # preprocessing flows
        formatter = Formatter(gather=False, train=True)
        expt_flows = formatter.format_flows(expt_flows)

        self.assertListEqual(self.header, expt_header,
                             'aggregation performed incorrectly in header')
        self.assertListEqual(self.flows, expt_flows,
                             'aggregation performed incorrectly in flows')
Beispiel #5
0
    def setUpClass(cls):
        """Initiates the parameters to feed the test functions."""

        # gathering flows
        raw_csv_file = util.directory_content(modifier_path)[1][-1]
        header, flows = gatherer.open_csv(modifier_path, raw_csv_file)

        # preprocessing flows
        formatter = Formatter()
        header = formatter.format_header(header)
        flows = formatter.format_flows(flows)

        # threshold defined according to the expected result in test dataset
        modifier = Modifier(label=0, threshold=5)
        cls.header = modifier.extend_header(header)
        cls.flows = modifier.aggregate_flows(flows)
Beispiel #6
0
def preprocessing_flows(directory):
    form = PreprocessingFlowsForm()

    if request.method == 'POST' and form.validate_on_submit():
        path = (session['paths_hist']['root'] +
                session['paths_hist'][directory])
        logger.info(f'path: {path}')
        logger.info(f'sample: {form.sample.data}, '
                    f'threshold: {form.threshold.data}, '
                    f'label: {form.label.data}')

        for file in session['files']:
            # gathering flows.
            header, flows = gatherer.open_csv(path, file, form.sample.data)
            logger.info(f'flow: {flows[0]}')

            # preprocessing flows.
            formatter = Formatter()
            header = formatter.format_header(header)
            flows = formatter.format_flows(flows)
            logger.info(f'formatted flow: {flows[0]}')

            modifier = Modifier(label=form.label.data,
                                threshold=form.threshold.data)
            header = modifier.extend_header(header)
            flows = modifier.aggregate_flows(flows)
            logger.info(f'modified flow: {flows[0]}')

            # exporting flows.
            name = file.split(".csv")[0]
            exporter.flows_csv(header, flows, f'{util.paths["csv"]}/flows/',
                               f'{name}_s{len(flows)}.csv')

        return redirect(
            url_for('creation.content',
                    function='preprocessing_flows',
                    directory=directory))
    return render_template('creation/preprocessing_flows.html',
                           form=form,
                           directory=directory)
Beispiel #7
0
def result():
    models = [Model.query.get(model_pk) for model_pk in session['last_models']]
    dataset = Dataset.query.get(models[-1].dataset_id)

    # gathering flows.
    header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/',
                                      dataset.file)
    logger.info(f'raw flow: {flows[0]}')

    # preprocessing flows.
    formatter = Formatter(gather=False, train=True)
    flows = formatter.format_flows(flows)
    logger.info(f'final flow: {flows[0]}')

    # extracting features.
    # adding extra value to skip first unused features.
    extractor = Extractor([feature.id + 7 for feature in models[-1].features])
    features, labels = extractor.extract_features_labels(flows)
    logger.info(f'feature: {features[0]}, label: {labels[0]}')

    x_train, x_test, y_train, y_test = train_test_split(
        features, labels, test_size=dataset.split / 100, stratify=labels)
    logger.info(f'x_train: {len(x_train)}')
    logger.info(f'x_test: {len(x_test)}')
    logger.info(f'y_train: {len(y_train)}')
    logger.info(f'y_test: {len(y_test)}')

    for model in models:
        # creating an absolute path of a temporary directory.
        cachedir = mkdtemp()
        preprocessing = Preprocessing.query.get(model.preprocessing_id)
        classifier = Classifier.query.get(model.classifier_id)
        prep_key = '_'.join(preprocessing.name.lower().split(' '))
        clf_key = '_'.join(classifier.name.lower().split(' '))
        logger.info(f'classifier: {classifier.name}')
        logger.info(f'preprocessing: {preprocessing.name}')

        # tunning, training and test.
        detector = Detector(copy.deepcopy(classifiers_obj[clf_key]))
        detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]),
                               dataset.kfolds, cachedir)

        hparam, train_date, train_dur = detector.train(x_train, y_train)
        pred, test_date, test_dur = detector.test(x_test)

        # results.
        outcome = evaluator.metrics(y_test, pred)
        result = Result(train_date=train_date,
                        test_date=test_date,
                        train_duration=train_dur,
                        test_duration=test_dur,
                        accuracy=outcome['accuracy'],
                        precision=outcome['precision'],
                        recall=outcome['recall'],
                        f1_score=outcome['f1_score'],
                        true_negative=outcome['tn'],
                        false_positive=outcome['fp'],
                        false_negative=outcome['fn'],
                        true_positive=outcome['tp'],
                        hyperparameters=str(hparam),
                        model_id=model.id)
        db.session.add(result)
        db.session.commit()
        # removing the temporary directory used by the Pipeline object.
        rmtree(cachedir)
    columns = Model.__table__.columns

    return render_template('setting/result.html',
                           columns=columns,
                           models=models)