Esempio n. 1
0
 def _get_trained_classifier(self, train_exp_id):
     trained_exp = experiment.get_factory().from_exp_id(
         train_exp_id, self.exp_conf.secuml_conf, self.session)
     trained_conf = trained_exp.exp_conf.core_conf
     trained_classifier = trained_conf.model_class(trained_conf)
     trained_classifier.load_model(
         path.join(trained_exp.output_dir(), 'model.out'))
     return trained_classifier
Esempio n. 2
0
 def _get_datasets(self, alerts_ids):
     # alerts_instances
     test_instances = self.test_exp.test_instances
     alerts_instances = test_instances.get_from_ids(alerts_ids)
     # train_instances loaded from the Train experient.
     diadem_id = self.test_exp.exp_conf.parent
     query = self.test_exp.session.query(ExpRelationshipsAlchemy)
     query = query.filter(ExpRelationshipsAlchemy.parent_id == diadem_id)
     children = [r.child.diadem_exp for r in query.all()]
     train_exps = [c for c in children if c.type == 'train']
     assert(len(train_exps) == 1)
     train_exp_id = train_exps[0].exp_id
     exp = experiment.get_factory().from_exp_id(
                                      train_exp_id,
                                      self.test_exp.exp_conf.secuml_conf,
                                      self.test_exp.session)
     train_instances = exp.get_instances()
     return train_instances, alerts_instances
Esempio n. 3
0
 def _get_datasets(self, alerts_ids):
     # alerts_instances
     test_instances = self.test_exp.test_instances
     alerts_instances = test_instances.get_from_ids(alerts_ids)
     # train_instances loaded from the Train experiment.
     diadem_id = self.test_exp.exp_conf.parent
     query = self.test_exp.session.query(DiademExpAlchemy)
     query = query.join(DiademExpAlchemy.exp)
     query = query.join(ExpAlchemy.parents)
     query = query.filter(DiademExpAlchemy.type == 'train')
     query = query.filter(ExpAlchemy.kind == 'Detection')
     query = query.filter(ExpRelationshipsAlchemy.parent_id == diadem_id)
     train_exp_id = query.one().exp_id
     exp = experiment.get_factory().from_exp_id(
         train_exp_id, self.test_exp.exp_conf.secuml_conf,
         self.test_exp.session)
     train_instances = exp.get_instances().get_annotated_instances(
         label=MALICIOUS)
     return train_instances, alerts_instances
Esempio n. 4
0
        annotations_file = self.exp_conf.annotations_conf.annotations_filename
        stats_exp = FeaturesAnalysisExpAlchemy(
            id=self.exp_id,
            features_set_id=features_set_id,
            annotations_filename=annotations_file)
        self.session.add(stats_exp)
        self.session.flush()

    def run(self):
        Experiment.run(self)
        instances = self.get_instances()
        with_density = instances.num_instances() < 150000
        if not with_density:
            self.exp_conf.logger.warning('There are more than 150.000, so '
                                         'the density plots are not '
                                         'displayed. ')
        stats = FeaturesAnalysis(instances,
                                 self.exp_conf.multiclass,
                                 self.exp_conf.logger,
                                 with_density=with_density)
        stats.gen_plots(self.output_dir())
        stats.gen_scoring(self.output_dir())

    def web_template(self):
        return 'features_analysis/main.html'


experiment.get_factory().register('FeaturesAnalysis',
                                  FeaturesAnalysisExperiment,
                                  FeaturesAnalysisConf)
Esempio n. 5
0
                self.conf.logger.warning('There are too few class labels.'
                                         'The instances are not projected '
                                         'before building the clustering.')
        return instances

    def run(self, instances=None, drop_annotated_instances=False, quick=False):
        Experiment.run(self)
        instances = self.get_instances()
        core_conf = self.exp_conf.core_conf
        clustering = core_conf.algo(instances, core_conf)
        clustering.fit()
        clustering.generate(drop_annotated_instances=drop_annotated_instances)
        clustering.export(self.output_dir(), quick=quick)

    def set_clusters(self, instances, assigned_clusters, centroids,
                     drop_annotated_instances, cluster_labels):
        Experiment.run(self)
        clustering = Clusters(instances, assigned_clusters)
        clustering.generate(centroids,
                            drop_annotated_instances=drop_annotated_instances,
                            cluster_labels=cluster_labels)
        clustering.export(self.output_dir(),
                          drop_annotated_instances=drop_annotated_instances)

    def web_template(self):
        return 'clustering/main.html'


experiment.get_factory().register('Clustering', ClusteringExperiment,
                                  ClusteringConf)
Esempio n. 6
0
def update_curr_exp(exp_id):
    return experiment.get_factory().from_exp_id(exp_id, secuml_conf, session)
Esempio n. 7
0
# SecuML
# Copyright (C) 2016-2018  ANSSI
#
# SecuML is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SecuML is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with SecuML. If not, see <http://www.gnu.org/licenses/>.

from secuml.exp import experiment

from .active_learning import ActiveLearningExperiment
from .rcd_conf import RcdConf


class RcdExperiment(ActiveLearningExperiment):
    pass


experiment.get_factory().register('Rcd', RcdExperiment, RcdConf)
Esempio n. 8
0
        self._export()

    def add_to_db(self):
        Experiment.add_to_db(self)
        from secuml.exp.diadem import add_diadem_exp_to_db
        add_diadem_exp_to_db(self.session,
                             self.exp_conf.exp_id,
                             self.exp_conf.fold_id,
                             self.kind,
                             alerts_conf=self.alerts_conf,
                             classifier_conf=self.exp_conf.core_conf)

    def _test(self):
        if self.test_instances.has_ground_truth():
            diadem_set_perf_monitoring(self.session, self.exp_conf.exp_id)
        self.predictions, exec_time = self.classifier.testing(
            self.test_instances)
        self.monitoring = TestMonitoring(self,
                                         self.classifier.conf,
                                         exec_time,
                                         alerts_conf=self.alerts_conf)
        self.monitoring.init(self.test_instances)
        self.monitoring.add_predictions(self.predictions)

    def _export(self):
        self.monitoring.display(self.output_dir())


experiment.get_factory().register('Train', TrainExp, TrainConf)
experiment.get_factory().register('Test', TestExp, TestConf)
Esempio n. 9
0
            self, self.test_conf.num_folds,
            self.exp_conf.core_conf.classifier_conf)
        global_cv_monitoring.init(cv_datasets.get_features_ids())
        for fold_id, datasets in enumerate(cv_datasets._datasets):
            classifier, test_predictions = self._run_one_fold(
                datasets, cv_monitoring, fold_id)
            global_cv_monitoring.add_fold(fold_id, test_predictions,
                                          classifier.pipeline)
            classifiers[fold_id] = classifier
        global_cv_monitoring.display(self.output_dir())
        return classifiers

    def _gen_datasets(self, instances):
        if instances is None:
            instances = self.get_instances()
        classifier_conf = self.exp_conf.core_conf.classifier_conf
        return self.test_conf.gen_datasets(classifier_conf, instances)

    def _set_train_test_exp(self, train_test, exp, fold_id):
        if fold_id is None:
            self._train_test_exps[train_test] = exp
        else:
            if 'folds' not in self._train_test_exps:
                self._train_test_exps['folds'] = {}
            if fold_id not in self._train_test_exps['folds']:
                self._train_test_exps['folds'][fold_id] = {}
            self._train_test_exps['folds'][fold_id][train_test] = exp


experiment.get_factory().register('Diadem', DiademExp, DiademConf)
Esempio n. 10
0
                                          fold_id=self.exp_conf.fold_id,
                                          type=self.kind))
        self.session.flush()

    def _test(self, classifier):
        if self.test_instances.has_ground_truth():
            diadem_set_perf_monitoring(self.session, self.exp_conf.exp_id)
        self.predictions, self.prediction_time = classifier.testing(
                                                           self.test_instances)
        self.monitoring.add_predictions(self.predictions, self.prediction_time)

    def _export(self):
        self.monitoring.display(self.output_dir())
        self._set_diadem_conf()

    def _set_diadem_conf(self):
        classif_conf = self.classifier.conf
        query = self.session.query(DiademExpAlchemy)
        query = query.filter(DiademExpAlchemy.exp_id == self.exp_conf.exp_id)
        diadem_exp = query.one()
        diadem_exp.multiclass = classif_conf.multiclass
        diadem_exp.proba = classif_conf.is_probabilist()
        diadem_exp.with_scoring = classif_conf.scoring_function() is not None
        diadem_exp.alerts = self.exp_conf.core_conf is not None
        diadem_exp.model_interp = classif_conf.is_interpretable()
        diadem_exp.pred_interp = classif_conf.interpretable_predictions()
        self.session.flush()


experiment.get_factory().register('Detection', DetectionExp, DetectionConf)
Esempio n. 11
0
from secuml.exp import experiment
from secuml.exp.experiment import Experiment
from .conf import ProjectionConf


class ProjectionExperiment(Experiment):

    def run(self, instances=None, export=True):
        Experiment.run(self)
        instances = self.get_instances()
        core_conf = self.exp_conf.core_conf
        dimension_reduction = core_conf.algo(core_conf)
        # Fit
        dimension_reduction.fit(instances)
        if export:
            dimension_reduction.export_fit(self.output_dir(), instances)
        # Transformation
        projected_instances = dimension_reduction.transform(instances)
        if export:
            dimension_reduction.export_transform(self.output_dir(), instances,
                                                 projected_instances)
        return projected_instances

    def web_template(self):
        return 'projection/main.html'


experiment.get_factory().register('Projection', ProjectionExperiment,
                                  ProjectionConf)
Esempio n. 12
0
        exp_db = query.one()
        exp_db.finished = True

    def run_next_iter(self, output_dir=None):
        self.curr_iter = Iteration(self.exp,
                                   self.iter_num,
                                   datasets=self.datasets,
                                   prev_iter=self.prev_iter,
                                   budget=self.current_budget)
        try:
            self.current_budget = self.curr_iter.run()
        except (NoAnnotationAdded, NoUnlabeledDataLeft) as e:
            self.exp.exp_conf.logger.info(e)
            return True
        else:
            self.exp.session.commit()
            self.iter_num += 1
            self.curr_iter.prev_iter = None
            self.prev_iter = self.curr_iter
            return False


class RcdExp(ActiveLearningExp):
    pass


experiment.get_factory().register('ActiveLearning', ActiveLearningExp,
                                  ActiveLearningConf)

experiment.get_factory().register('Rcd', RcdExp, RcdConf)
Esempio n. 13
0
        stop = False
        while not stop:
            stop = self.run_next_iter(output_dir)
        # Update the database. The active learning experiment is finished.
        query = self.exp.session.query(ActiveLearningExpAlchemy)
        query = query.filter(ActiveLearningExpAlchemy.id == self.exp.exp_id)
        exp_db = query.one()
        exp_db.finished = True

    def run_next_iter(self, output_dir=None):
        self.curr_iter = Iteration(self.exp, self.iter_num,
                                   datasets=self.datasets,
                                   prev_iter=self.prev_iter,
                                   budget=self.current_budget)
        try:
            self.current_budget = self.curr_iter.run()
        except (NoAnnotationAdded, NoUnlabeledDataLeft) as e:
            self.exp.exp_conf.logger.info(e)
            return True
        else:
            self.exp.session.commit()
            self.iter_num += 1
            self.curr_iter.prev_iter = None
            self.prev_iter = self.curr_iter
            return False


experiment.get_factory().register('ActiveLearning',
                                  ActiveLearningExperiment,
                                  ActiveLearningConf)