def execution(self): process = gatherer.capture_nfcapd(util.paths['nfcapd'], 60) dataset = Dataset.query.get(self.model.dataset_id) logger.info(f'process pid: {process.pid}') logger.info(f'dataset file: {dataset.file}') try: while not self.event.is_set(): nfcapd_files = util.directory_content(util.paths['nfcapd'])[1] try: if not 'current' in nfcapd_files[0]: logger.info(f'nfcapd files: {nfcapd_files[:-1]}') # gathering flows. flows = self.gathering(nfcapd_files[:-1]) # cleaning remaining files util.clean_directory(util.paths['nfcapd'], 'nfcapd.20*') util.clean_directory(f'{util.paths["csv"]}tmp/', '*') if len(flows[0]) < 18: raise ValueError('No matched flows') logger.info(f'flow: {flows[0]}') # preprocessing flows. formatter = Formatter() flows = formatter.format_flows(flows) logger.info(f'formatted flow: {flows[0]}') modifier = Modifier(2, dataset.aggregation) extractor = Extractor([ feature.id + 7 for feature in self.model.features ]) while flows: flow, flows = modifier.aggregate(flows) features, _ = extractor.extract(flow) # detecting intrusions. pred, _, _ = self.detector.test([features]) if pred[0]: # mitigating intrusions. self.mitigating(flow) time.sleep(2) except IndexError: time.sleep(2) continue except ValueError as error: logger.error(error) util.clean_directory(util.paths['nfcapd'], 'nfcapd.20*') util.clean_directory(f'{util.paths["csv"]}tmp/', '*') continue finally: logger.info('thread status: false') process.kill()
def setUpClass(cls): """Initiates the parameters to feed the test functions.""" # gathering flows raw_csv_file = util.directory_content(formatter_path)[1][0] header, flows = gatherer.open_csv(formatter_path, raw_csv_file) # preprocessing flows formatter = Formatter() cls.header = formatter.format_header(header) cls.flows = formatter.format_flows(flows)
def model(): if request.method == 'POST': # creating an absolute path of a temporary directory tmp_directory = mkdtemp() model = Model.query.get(request.form['model_pk']) dataset = Dataset.query.get(model.dataset_id) preprocessing = Preprocessing.query.get(model.preprocessing_id) classifier = Classifier.query.get(model.classifier_id) prep_key = '_'.join(preprocessing.name.lower().split(' ')) clf_key = '_'.join(classifier.name.lower().split(' ')) logger.info(f'classifier: {classifier.name}') logger.info(f'preprocessing: {preprocessing.name}') # gathering flows. header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/', dataset.file) session['last_models'].remove(model.id) logger.info(f'raw flow: {flows[0]}') # removing unselected models. for model_pk in session['last_models']: db.session.delete(Model.query.get(model_pk)) db.session.commit() # preprocessing flows. formatter = Formatter(gather=False, train=True) flows = formatter.format_flows(flows) logger.info(f'final flow: {flows[0]}') # extracting features. # adding extra value to skip first unused features. extractor = Extractor([feature.id + 7 for feature in model.features]) features, labels = extractor.extract_features_labels(flows) logger.info(f'feature: {features[0]}, label: {labels[0]}') # tunning and retraining. detector = Detector(copy.deepcopy(classifiers_obj[clf_key])) detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]), dataset.kfolds, tmp_directory) detector.retrain(features, labels) # model persistence. pickle.dump(detector, open(f'{util.paths["models"]}{model.file}', 'wb')) logger.info(f'model file: {model.file}') # removing the temporary directory used by the Pipeline object. rmtree(tmp_directory) return redirect(url_for('setting.load'))
def test_aggregate_flows(self): """Tests if the features were correctly aggregated.""" # gathering flows expt_csv = util.directory_content(modifier_path)[1][0] expt_header, expt_flows = gatherer.open_csv(modifier_path, expt_csv) # preprocessing flows formatter = Formatter(gather=False, train=True) expt_flows = formatter.format_flows(expt_flows) self.assertListEqual(self.header, expt_header, 'aggregation performed incorrectly in header') self.assertListEqual(self.flows, expt_flows, 'aggregation performed incorrectly in flows')
def setUpClass(cls): """Initiates the parameters to feed the test functions.""" # gathering flows raw_csv_file = util.directory_content(modifier_path)[1][-1] header, flows = gatherer.open_csv(modifier_path, raw_csv_file) # preprocessing flows formatter = Formatter() header = formatter.format_header(header) flows = formatter.format_flows(flows) # threshold defined according to the expected result in test dataset modifier = Modifier(label=0, threshold=5) cls.header = modifier.extend_header(header) cls.flows = modifier.aggregate_flows(flows)
def preprocessing_flows(directory): form = PreprocessingFlowsForm() if request.method == 'POST' and form.validate_on_submit(): path = (session['paths_hist']['root'] + session['paths_hist'][directory]) logger.info(f'path: {path}') logger.info(f'sample: {form.sample.data}, ' f'threshold: {form.threshold.data}, ' f'label: {form.label.data}') for file in session['files']: # gathering flows. header, flows = gatherer.open_csv(path, file, form.sample.data) logger.info(f'flow: {flows[0]}') # preprocessing flows. formatter = Formatter() header = formatter.format_header(header) flows = formatter.format_flows(flows) logger.info(f'formatted flow: {flows[0]}') modifier = Modifier(label=form.label.data, threshold=form.threshold.data) header = modifier.extend_header(header) flows = modifier.aggregate_flows(flows) logger.info(f'modified flow: {flows[0]}') # exporting flows. name = file.split(".csv")[0] exporter.flows_csv(header, flows, f'{util.paths["csv"]}/flows/', f'{name}_s{len(flows)}.csv') return redirect( url_for('creation.content', function='preprocessing_flows', directory=directory)) return render_template('creation/preprocessing_flows.html', form=form, directory=directory)
def result(): models = [Model.query.get(model_pk) for model_pk in session['last_models']] dataset = Dataset.query.get(models[-1].dataset_id) # gathering flows. header, flows = gatherer.open_csv(f'{util.paths["csv"]}datasets/', dataset.file) logger.info(f'raw flow: {flows[0]}') # preprocessing flows. formatter = Formatter(gather=False, train=True) flows = formatter.format_flows(flows) logger.info(f'final flow: {flows[0]}') # extracting features. # adding extra value to skip first unused features. extractor = Extractor([feature.id + 7 for feature in models[-1].features]) features, labels = extractor.extract_features_labels(flows) logger.info(f'feature: {features[0]}, label: {labels[0]}') x_train, x_test, y_train, y_test = train_test_split( features, labels, test_size=dataset.split / 100, stratify=labels) logger.info(f'x_train: {len(x_train)}') logger.info(f'x_test: {len(x_test)}') logger.info(f'y_train: {len(y_train)}') logger.info(f'y_test: {len(y_test)}') for model in models: # creating an absolute path of a temporary directory. cachedir = mkdtemp() preprocessing = Preprocessing.query.get(model.preprocessing_id) classifier = Classifier.query.get(model.classifier_id) prep_key = '_'.join(preprocessing.name.lower().split(' ')) clf_key = '_'.join(classifier.name.lower().split(' ')) logger.info(f'classifier: {classifier.name}') logger.info(f'preprocessing: {preprocessing.name}') # tunning, training and test. detector = Detector(copy.deepcopy(classifiers_obj[clf_key])) detector.define_tuning(copy.deepcopy(preprocessing_obj[prep_key]), dataset.kfolds, cachedir) hparam, train_date, train_dur = detector.train(x_train, y_train) pred, test_date, test_dur = detector.test(x_test) # results. outcome = evaluator.metrics(y_test, pred) result = Result(train_date=train_date, test_date=test_date, train_duration=train_dur, test_duration=test_dur, accuracy=outcome['accuracy'], precision=outcome['precision'], recall=outcome['recall'], f1_score=outcome['f1_score'], true_negative=outcome['tn'], false_positive=outcome['fp'], false_negative=outcome['fn'], true_positive=outcome['tp'], hyperparameters=str(hparam), model_id=model.id) db.session.add(result) db.session.commit() # removing the temporary directory used by the Pipeline object. rmtree(cachedir) columns = Model.__table__.columns return render_template('setting/result.html', columns=columns, models=models)