def execute(self): name = 'AL%d-Iter%d-main' % (self.exp.exp_id, self.iteration.iter_num) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, self.model_conf, None, name=name, parent=self.exp.exp_id) self.model_exp = DiademExp(exp_conf, session=self.exp.session) classifier_type = get_classifier_type( self.model_conf.classifier_conf.__class__) cv_monitoring = classifier_type == ClassifierType.supervised prev_classifier = None prev_iter = self.iteration.prev_iter if prev_iter is not None: prev_classifier = prev_iter.update_model.classifier self.model_exp.run(instances=self.iteration.datasets.instances, cv_monitoring=cv_monitoring, init_classifier=prev_classifier) self._set_exec_time() self.classifier = self.model_exp.get_train_exp().classifier
def get_naive_bayes_conf(self): name = '-'.join([ 'AL%d' % self.exp.exp_id, 'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes' ]) classifier_conf = self.exp.exp_conf.core_conf.classifier_conf optim_conf = classifier_conf.hyperparam_conf.optim_conf multiclass = True factory = classifiers.get_factory() naive_bayes_conf = factory.get_default('GaussianNaiveBayes', optim_conf.num_folds, optim_conf.n_jobs, multiclass, self.exp.logger) test_conf = UnlabeledLabeledConf(self.exp.logger) classification_conf = ClassificationConf(naive_bayes_conf, test_conf, self.exp.logger) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, classification_conf, None, name=name, parent=self.exp.exp_id) DiademExp(exp_conf, session=self.exp.session) return naive_bayes_conf
def _run_logistic_regression(self): name = '-'.join([ 'AL%d' % (self.exp.exp_id), 'Iter%d' % (self.iteration.iter_num), 'all', 'LogisticRegression' ]) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, self.exp.exp_conf.core_conf.multiclass_model, None, name=name, parent=self.exp.exp_id) model_exp = DiademExp(exp_conf, session=self.exp.session) model_exp.run(instances=self.iteration.datasets.instances, cv_monitoring=False) train_exp = model_exp.get_train_exp() test_exp = model_exp.get_detection_exp('test') self.lr_predicted_proba = test_exp.predictions.all_probas self.lr_predicted_labels = test_exp.predictions.values self.lr_class_labels = train_exp.classifier.class_labels self.lr_time = train_exp.monitoring.exec_times.total() self.lr_time += test_exp.monitoring.exec_time.predictions
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) logger = secuml_conf.logger dataset_conf = DatasetConf.from_args(args, logger) features_conf = FeaturesConf.from_args(args, logger) annotations_conf = AnnotationsConf(args.annotations_file, None, logger) core_conf = strategies_conf.get_factory().from_args('Rcd', args, logger) return RcdConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.common_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = projection_conf.get_factory().from_args(args.algo, args, secuml_conf.logger) return ProjectionConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = strategies_conf.get_factory().from_args(args.strategy, args, secuml_conf.logger) return ActiveLearningConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.common_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) return FeaturesAnalysisConf(secuml_conf, dataset_conf, features_conf, annotations_conf, None, name=args.exp_name)
def from_json(conf_json, secuml_conf): logger = secuml_conf.logger dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], logger) factory = strategies_conf.get_factory() core_conf = factory.from_json(conf_json['core_conf'], logger) conf = RcdConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=conf_json['name'], parent=conf_json['parent']) conf.exp_id = conf_json['exp_id'] return conf
def from_args(args): secuml_conf = ExpConf.common_from_args(args) already_trained = None core_conf = ClassificationConf.from_args(args, secuml_conf.logger) if args.model_class != 'AlreadyTrained': annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: already_trained = args.model_exp_id annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name, already_trained=already_trained)
def from_json(conf_json, secuml_conf): dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], secuml_conf.logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], secuml_conf.logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], secuml_conf.logger) conf = FeaturesAnalysisConf(secuml_conf, dataset_conf, features_conf, annotations_conf, None, name=conf_json['name'], parent=conf_json['parent']) conf.exp_id = conf_json['exp_id'] return conf
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = clustering_conf.get_factory().from_args( args.algo, args, secuml_conf.logger) conf = ClusteringConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name, label=args.label) return conf
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ ClassifierType.supervised, ClassifierType.semisupervised ]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id alerts_conf = AlertsConf.from_args(args, secuml_conf.logger) if (classifier_type == ClassifierType.unsupervised and alerts_conf.classifier_conf is not None): raise InvalidInputArguments('Supervised classification of the ' 'alerts is not supported for ' 'unsupervised model classes. ') if classif_conf.classifier_conf.multiclass: if alerts_conf.with_analysis(): raise InvalidInputArguments('Alerts analysis is not supported ' 'for multiclass models. ') else: alerts_conf = None if (classif_conf.test_conf.method == 'dataset' and classif_conf.test_conf.streaming and alerts_conf.with_analysis()): raise InvalidInputArguments('Alerts analysis is not supported ' 'in streaming mode. ') dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) if (features_conf.sparse and not classif_conf.classifier_conf.accept_sparse): raise InvalidInputArguments('%s does not support sparse ' 'features. ' % args.model_class) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, alerts_conf, name=args.exp_name, already_trained=already_trained, no_training_detection=args.no_training_detection)
def from_json(conf_json, secuml_conf): dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], secuml_conf.logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], secuml_conf.logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], secuml_conf.logger) core_conf = ClassificationConf.from_json(conf_json['core_conf'], secuml_conf.logger) exp_conf = DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=conf_json['name'], parent=conf_json['parent'], already_trained=conf_json['already_trained']) exp_conf.exp_id = conf_json['exp_id'] return exp_conf
def from_json(conf_json, secuml_conf): dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], secuml_conf.logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], secuml_conf.logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], secuml_conf.logger) core_conf = projection_conf.get_factory().from_json( conf_json['core_conf'], secuml_conf.logger) conf = ProjectionConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=conf_json['name'], parent=conf_json['parent']) conf.exp_id = conf_json['exp_id'] return conf
def from_json(conf_json, secuml_conf): logger = secuml_conf.logger dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], logger) factory = classifiers.get_factory() classifier_conf = factory.from_json(conf_json['core_conf'], logger) exp_conf = TestConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classifier_conf, name=conf_json['name'], parent=conf_json['parent'], fold_id=conf_json['fold_id'], kind=conf_json['kind']) exp_conf.exp_id = conf_json['exp_id'] return exp_conf
def from_args(args): if args.annotations_file is None and args.multiclass: raise InvalidInputArguments('--annotations <file> is required. ' 'An annotation file must be specified ' 'to group the instances according to ' 'their families.') secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) return FeaturesAnalysisConf(secuml_conf, dataset_conf, features_conf, annotations_conf, args.multiclass, name=args.exp_name)
def _create_clustering_exp(self, core_clustering_conf): exp_conf = self.test_exp.exp_conf features_conf = FeaturesConf( exp_conf.features_conf.input_features, exp_conf.features_conf.sparse, exp_conf.features_conf.logger, filter_in_f=exp_conf.features_conf.filter_in_f, filter_out_f=exp_conf.features_conf.filter_out_f) conf = ClusteringConf(exp_conf.secuml_conf, exp_conf.dataset_conf, features_conf, exp_conf.annotations_conf, core_clustering_conf, name='Alerts_%i' % exp_conf.exp_id, parent=exp_conf.exp_id) return AlertsClusteringExp(conf, self.test_exp.exp_conf.parent, create=True, session=self.test_exp.session)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ClassifierType.supervised, ClassifierType.semisupervised]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, name=args.exp_name, already_trained=already_trained)
def _get_multiclass_conf(self): conf = self.rcd_conf.classification_conf name = '-'.join(['AL%d' % self.exp.exp_id, 'Iter%d' % self.iteration.iter_num, self.label, 'analysis']) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, conf, None, name=name, parent=self.exp.exp_id) self.multiclass_exp = DiademExp(exp_conf, session=self.exp.session) return conf
def from_json(conf_json, secuml_conf): logger = secuml_conf.logger dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], logger) if conf_json['core_conf'] is not None: alerts_conf = AlertsConf.from_json(conf_json['core_conf'], logger) else: alerts_conf = None exp_conf = DetectionConf(secuml_conf, dataset_conf, features_conf, annotations_conf, alerts_conf, name=conf_json['name'], parent=conf_json['parent'], fold_id=conf_json['fold_id'], kind=conf_json['kind']) exp_conf.exp_id = conf_json['exp_id'] return exp_conf
def from_json(conf_json, secuml_conf): dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], secuml_conf.logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], secuml_conf.logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], secuml_conf.logger) core_conf = None if conf_json['core_conf'] is not None: core_conf = clustering_conf.get_factory().from_json( conf_json['core_conf'], secuml_conf.logger) exp_conf = ClusteringConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=conf_json['name'], parent=conf_json['parent'], label=conf_json['label']) exp_conf.exp_id = conf_json['exp_id'] return exp_conf
def _create_clustering_exp(self): core_conf = CoreClusteringConf(self.exp.exp_conf.logger, self.categories.num_categories) name = '-'.join(['AL%d' % self.exp.exp_id, 'Iter%d' % self.iteration.iter_num, self.label, 'clustering']) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = ClusteringConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, core_conf, name=name, parent=self.exp.exp_id) clustering_exp = ClusteringExperiment(exp_conf, session=self.exp.session) return clustering_exp
def create_exp(self): Experiment.create_exp(self) # create projection experiment self.projection_exp = None if self.exp_conf.core_conf is None: return projection_core_conf = self.exp_conf.core_conf.projection_conf if projection_core_conf is not None: features_conf = FeaturesConf( self.exp_conf.features_conf.input_features, self.exp_conf.secuml_conf.logger) projection_conf = ProjectionConf(self.exp_conf.secuml_conf, self.exp_conf.dataset_conf, features_conf, self.exp_conf.annotations_conf, projection_core_conf, name='-'.join( [self.exp_conf.name, 'proj']), parent=self.exp_id) self.projection_exp = ProjectionExperiment(projection_conf, session=self.session)