def _get_trained_classifier(self, train_exp_id): trained_exp = experiment.get_factory().from_exp_id( train_exp_id, self.exp_conf.secuml_conf, self.session) trained_conf = trained_exp.exp_conf.core_conf trained_classifier = trained_conf.model_class(trained_conf) trained_classifier.load_model( path.join(trained_exp.output_dir(), 'model.out')) return trained_classifier
def _get_datasets(self, alerts_ids): # alerts_instances test_instances = self.test_exp.test_instances alerts_instances = test_instances.get_from_ids(alerts_ids) # train_instances loaded from the Train experient. diadem_id = self.test_exp.exp_conf.parent query = self.test_exp.session.query(ExpRelationshipsAlchemy) query = query.filter(ExpRelationshipsAlchemy.parent_id == diadem_id) children = [r.child.diadem_exp for r in query.all()] train_exps = [c for c in children if c.type == 'train'] assert(len(train_exps) == 1) train_exp_id = train_exps[0].exp_id exp = experiment.get_factory().from_exp_id( train_exp_id, self.test_exp.exp_conf.secuml_conf, self.test_exp.session) train_instances = exp.get_instances() return train_instances, alerts_instances
def _get_datasets(self, alerts_ids): # alerts_instances test_instances = self.test_exp.test_instances alerts_instances = test_instances.get_from_ids(alerts_ids) # train_instances loaded from the Train experiment. diadem_id = self.test_exp.exp_conf.parent query = self.test_exp.session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter(DiademExpAlchemy.type == 'train') query = query.filter(ExpAlchemy.kind == 'Detection') query = query.filter(ExpRelationshipsAlchemy.parent_id == diadem_id) train_exp_id = query.one().exp_id exp = experiment.get_factory().from_exp_id( train_exp_id, self.test_exp.exp_conf.secuml_conf, self.test_exp.session) train_instances = exp.get_instances().get_annotated_instances( label=MALICIOUS) return train_instances, alerts_instances
annotations_file = self.exp_conf.annotations_conf.annotations_filename stats_exp = FeaturesAnalysisExpAlchemy( id=self.exp_id, features_set_id=features_set_id, annotations_filename=annotations_file) self.session.add(stats_exp) self.session.flush() def run(self): Experiment.run(self) instances = self.get_instances() with_density = instances.num_instances() < 150000 if not with_density: self.exp_conf.logger.warning('There are more than 150.000, so ' 'the density plots are not ' 'displayed. ') stats = FeaturesAnalysis(instances, self.exp_conf.multiclass, self.exp_conf.logger, with_density=with_density) stats.gen_plots(self.output_dir()) stats.gen_scoring(self.output_dir()) def web_template(self): return 'features_analysis/main.html' experiment.get_factory().register('FeaturesAnalysis', FeaturesAnalysisExperiment, FeaturesAnalysisConf)
self.conf.logger.warning('There are too few class labels.' 'The instances are not projected ' 'before building the clustering.') return instances def run(self, instances=None, drop_annotated_instances=False, quick=False): Experiment.run(self) instances = self.get_instances() core_conf = self.exp_conf.core_conf clustering = core_conf.algo(instances, core_conf) clustering.fit() clustering.generate(drop_annotated_instances=drop_annotated_instances) clustering.export(self.output_dir(), quick=quick) def set_clusters(self, instances, assigned_clusters, centroids, drop_annotated_instances, cluster_labels): Experiment.run(self) clustering = Clusters(instances, assigned_clusters) clustering.generate(centroids, drop_annotated_instances=drop_annotated_instances, cluster_labels=cluster_labels) clustering.export(self.output_dir(), drop_annotated_instances=drop_annotated_instances) def web_template(self): return 'clustering/main.html' experiment.get_factory().register('Clustering', ClusteringExperiment, ClusteringConf)
def update_curr_exp(exp_id): return experiment.get_factory().from_exp_id(exp_id, secuml_conf, session)
# SecuML # Copyright (C) 2016-2018 ANSSI # # SecuML is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # SecuML is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with SecuML. If not, see <http://www.gnu.org/licenses/>. from secuml.exp import experiment from .active_learning import ActiveLearningExperiment from .rcd_conf import RcdConf class RcdExperiment(ActiveLearningExperiment): pass experiment.get_factory().register('Rcd', RcdExperiment, RcdConf)
self._export() def add_to_db(self): Experiment.add_to_db(self) from secuml.exp.diadem import add_diadem_exp_to_db add_diadem_exp_to_db(self.session, self.exp_conf.exp_id, self.exp_conf.fold_id, self.kind, alerts_conf=self.alerts_conf, classifier_conf=self.exp_conf.core_conf) def _test(self): if self.test_instances.has_ground_truth(): diadem_set_perf_monitoring(self.session, self.exp_conf.exp_id) self.predictions, exec_time = self.classifier.testing( self.test_instances) self.monitoring = TestMonitoring(self, self.classifier.conf, exec_time, alerts_conf=self.alerts_conf) self.monitoring.init(self.test_instances) self.monitoring.add_predictions(self.predictions) def _export(self): self.monitoring.display(self.output_dir()) experiment.get_factory().register('Train', TrainExp, TrainConf) experiment.get_factory().register('Test', TestExp, TestConf)
self, self.test_conf.num_folds, self.exp_conf.core_conf.classifier_conf) global_cv_monitoring.init(cv_datasets.get_features_ids()) for fold_id, datasets in enumerate(cv_datasets._datasets): classifier, test_predictions = self._run_one_fold( datasets, cv_monitoring, fold_id) global_cv_monitoring.add_fold(fold_id, test_predictions, classifier.pipeline) classifiers[fold_id] = classifier global_cv_monitoring.display(self.output_dir()) return classifiers def _gen_datasets(self, instances): if instances is None: instances = self.get_instances() classifier_conf = self.exp_conf.core_conf.classifier_conf return self.test_conf.gen_datasets(classifier_conf, instances) def _set_train_test_exp(self, train_test, exp, fold_id): if fold_id is None: self._train_test_exps[train_test] = exp else: if 'folds' not in self._train_test_exps: self._train_test_exps['folds'] = {} if fold_id not in self._train_test_exps['folds']: self._train_test_exps['folds'][fold_id] = {} self._train_test_exps['folds'][fold_id][train_test] = exp experiment.get_factory().register('Diadem', DiademExp, DiademConf)
fold_id=self.exp_conf.fold_id, type=self.kind)) self.session.flush() def _test(self, classifier): if self.test_instances.has_ground_truth(): diadem_set_perf_monitoring(self.session, self.exp_conf.exp_id) self.predictions, self.prediction_time = classifier.testing( self.test_instances) self.monitoring.add_predictions(self.predictions, self.prediction_time) def _export(self): self.monitoring.display(self.output_dir()) self._set_diadem_conf() def _set_diadem_conf(self): classif_conf = self.classifier.conf query = self.session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == self.exp_conf.exp_id) diadem_exp = query.one() diadem_exp.multiclass = classif_conf.multiclass diadem_exp.proba = classif_conf.is_probabilist() diadem_exp.with_scoring = classif_conf.scoring_function() is not None diadem_exp.alerts = self.exp_conf.core_conf is not None diadem_exp.model_interp = classif_conf.is_interpretable() diadem_exp.pred_interp = classif_conf.interpretable_predictions() self.session.flush() experiment.get_factory().register('Detection', DetectionExp, DetectionConf)
from secuml.exp import experiment from secuml.exp.experiment import Experiment from .conf import ProjectionConf class ProjectionExperiment(Experiment): def run(self, instances=None, export=True): Experiment.run(self) instances = self.get_instances() core_conf = self.exp_conf.core_conf dimension_reduction = core_conf.algo(core_conf) # Fit dimension_reduction.fit(instances) if export: dimension_reduction.export_fit(self.output_dir(), instances) # Transformation projected_instances = dimension_reduction.transform(instances) if export: dimension_reduction.export_transform(self.output_dir(), instances, projected_instances) return projected_instances def web_template(self): return 'projection/main.html' experiment.get_factory().register('Projection', ProjectionExperiment, ProjectionConf)
exp_db = query.one() exp_db.finished = True def run_next_iter(self, output_dir=None): self.curr_iter = Iteration(self.exp, self.iter_num, datasets=self.datasets, prev_iter=self.prev_iter, budget=self.current_budget) try: self.current_budget = self.curr_iter.run() except (NoAnnotationAdded, NoUnlabeledDataLeft) as e: self.exp.exp_conf.logger.info(e) return True else: self.exp.session.commit() self.iter_num += 1 self.curr_iter.prev_iter = None self.prev_iter = self.curr_iter return False class RcdExp(ActiveLearningExp): pass experiment.get_factory().register('ActiveLearning', ActiveLearningExp, ActiveLearningConf) experiment.get_factory().register('Rcd', RcdExp, RcdConf)
stop = False while not stop: stop = self.run_next_iter(output_dir) # Update the database. The active learning experiment is finished. query = self.exp.session.query(ActiveLearningExpAlchemy) query = query.filter(ActiveLearningExpAlchemy.id == self.exp.exp_id) exp_db = query.one() exp_db.finished = True def run_next_iter(self, output_dir=None): self.curr_iter = Iteration(self.exp, self.iter_num, datasets=self.datasets, prev_iter=self.prev_iter, budget=self.current_budget) try: self.current_budget = self.curr_iter.run() except (NoAnnotationAdded, NoUnlabeledDataLeft) as e: self.exp.exp_conf.logger.info(e) return True else: self.exp.session.commit() self.iter_num += 1 self.curr_iter.prev_iter = None self.prev_iter = self.curr_iter return False experiment.get_factory().register('ActiveLearning', ActiveLearningExperiment, ActiveLearningConf)