Example #1
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Learn a detection model. '
                     'The ground-truth must be stored in '
                     'annotations/ground_truth.csv.')
     ExpConf.gen_parser(parser)
     ClassificationConf.gen_parser(parser)
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [ClassifierType.supervised,
                                ClassifierType.semisupervised]:
             AnnotationsConf.gen_parser(
                         model_parser, required=False,
                         message='CSV file containing the annotations of '
                                 'some or all the instances.')
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     return parser
Example #2
0
 def get_naive_bayes_conf(self):
     name = '-'.join([
         'AL%d' % self.exp.exp_id,
         'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes'
     ])
     classifier_conf = self.exp.exp_conf.core_conf.classifier_conf
     optim_conf = classifier_conf.hyperparam_conf.optim_conf
     multiclass = True
     factory = classifiers.get_factory()
     naive_bayes_conf = factory.get_default('GaussianNaiveBayes',
                                            optim_conf.num_folds,
                                            optim_conf.n_jobs, multiclass,
                                            self.exp.logger)
     test_conf = UnlabeledLabeledConf(self.exp.logger)
     classification_conf = ClassificationConf(naive_bayes_conf, test_conf,
                                              self.exp.logger)
     features_conf = FeaturesConf(
         self.exp.exp_conf.features_conf.input_features,
         self.exp.exp_conf.features_conf.sparse,
         self.exp.exp_conf.features_conf.logger,
         filter_in_f=self.exp.exp_conf.features_conf.filter_in_f,
         filter_out_f=self.exp.exp_conf.features_conf.filter_out_f)
     exp_conf = DiademConf(self.exp.exp_conf.secuml_conf,
                           self.exp.exp_conf.dataset_conf,
                           features_conf,
                           self.exp.exp_conf.annotations_conf,
                           classification_conf,
                           None,
                           name=name,
                           parent=self.exp.exp_id)
     DiademExp(exp_conf, session=self.exp.session)
     return naive_bayes_conf
Example #3
0
 def get_naive_bayes_conf(self):
     name = '-'.join([
         'AL%d' % self.exp.exp_id,
         'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes'
     ])
     classifier_conf = self.exp.exp_conf.core_conf.classifier_conf
     optim_conf = classifier_conf.hyperparam_conf.optim_conf
     multiclass = True
     hyperparam_conf = HyperparamConf.get_default(
         optim_conf.num_folds, optim_conf.n_jobs, multiclass,
         GaussianNaiveBayesConf._get_hyper_desc(), self.exp.logger)
     naive_bayes_conf = GaussianNaiveBayesConf(multiclass, hyperparam_conf,
                                               self.exp.logger)
     test_conf = UnlabeledLabeledConf(self.exp.logger, None)
     classification_conf = ClassificationConf(naive_bayes_conf, test_conf,
                                              self.exp.logger)
     exp_conf = DiademConf(self.exp.exp_conf.secuml_conf,
                           self.exp.exp_conf.dataset_conf,
                           self.exp.exp_conf.features_conf,
                           self.exp.exp_conf.annotations_conf,
                           classification_conf,
                           name=name,
                           parent=self.exp.exp_id)
     naive_bayes_exp = DiademExp(exp_conf, session=self.exp.session)
     naive_bayes_exp.create_exp()
     return naive_bayes_conf
Example #4
0
 def _create_naive_bayes_conf(self):
     name = '-'.join([
         'AL%d' % (self.exp.exp_id),
         'Iter%d' % (self.iteration.iter_num), 'all', 'NaiveBayes'
     ])
     multiclass_model = self.exp.exp_conf.core_conf.multiclass_model
     classifier_conf = multiclass_model.classifier_conf
     optim_conf = classifier_conf.hyperparam_conf.optim_conf
     multiclass = True
     factory = classifiers.get_factory()
     naive_bayes_conf = factory.get_default('GaussianNaiveBayes',
                                            optim_conf.num_folds,
                                            optim_conf.n_jobs, multiclass,
                                            self.exp.logger)
     test_conf = UnlabeledLabeledConf(self.exp.logger)
     classif_conf = ClassificationConf(naive_bayes_conf, test_conf,
                                       self.exp.logger)
     DiademConf(self.exp.exp_conf.secuml_conf,
                self.exp.exp_conf.dataset_conf,
                self.exp.exp_conf.features_conf,
                self.exp.exp_conf.annotations_conf,
                classif_conf,
                None,
                name=name,
                parent=self.exp.exp_id)
     return naive_bayes_conf
Example #5
0
def _rcd_conf(args, logger):
    factory = classifiers.get_factory()
    classifier_conf = factory.get_default('LogisticRegression', None, None,
                                          True, logger)
    classif_conf = ClassificationConf(classifier_conf,
                                      UnlabeledLabeledConf(logger), logger)
    return RcdStrategyConf(classif_conf, args.cluster_strategy,
                           args.num_annotations, 'uniform', logger)
Example #6
0
 def _get_main_model_conf(self, validation_conf, logger):
     hyperparam_conf = HyperparamConf.get_default(None, None, False, None,
                                                  logger)
     classifier_conf = SssvddConf(hyperparam_conf, logger)
     return ClassificationConf(classifier_conf,
                               UnlabeledLabeledConf(logger),
                               logger,
                               validation_conf=validation_conf)
Example #7
0
 def _get_lr_conf(self, validation_conf, logger, multiclass=False):
     factory = classifiers.get_factory()
     classifier_conf = factory.get_default('LogisticRegression', None, None,
                                           multiclass, logger)
     return ClassificationConf(classifier_conf,
                               UnlabeledLabeledConf(logger),
                               logger,
                               validation_conf=validation_conf)
Example #8
0
 def from_json(self, obj, logger):
     class_name = obj['__type__']
     main_model = ClassificationConf.from_json(obj['main_model_conf'],
                                               logger)
     validation_conf = None
     if obj['validation_conf'] is None:
         return None
     validation_conf = TestDatasetConf(logger, None, obj['validation_conf'])
     return self.methods[class_name].from_json(obj, main_model,
                                               validation_conf, logger)
Example #9
0
 def _get_lr_conf(self, validation_conf, logger, multiclass=False):
     hyperparam_conf = HyperparamConf.get_default(
                                 None, None, multiclass,
                                 LogisticRegressionConf._get_hyper_desc(),
                                 logger)
     core_conf = LogisticRegressionConf(multiclass, 'liblinear',
                                        hyperparam_conf, logger)
     return ClassificationConf(core_conf,
                               UnlabeledLabeledConf(logger, None),
                               logger, validation_conf=validation_conf)
Example #10
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Train and evaluate a detection '
         'model. ')
     ExpConf.gen_parser(parser, sparse=True)
     parser.add_argument('--no-training-detection',
                         action='store_true',
                         default=False,
                         help='''When specified, the detection model is
                                 not applied to the training instances. ''')
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [
                 ClassifierType.supervised, ClassifierType.semisupervised
         ]:
             default = None
             message = '''CSV file containing the annotations of some
                          instances, or GROUND_TRUTH to use the ground
                          truth annotations stored in idents.csv. '''
             if classifier_type == ClassifierType.supervised:
                 default = 'GROUND_TRUTH'
                 message = '%s Default: GROUND_TRUTH.' % message
             AnnotationsConf.gen_parser(model_parser,
                                        required=default is None,
                                        default=default,
                                        message=message)
         ClassificationConf.gen_parser(model_parser)
         AlertsConf.gen_parser(model_parser)
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     ClassificationConf.gen_parser(already_trained)
     AlertsConf.gen_parser(already_trained)
     return parser
Example #11
0
def _rcd_conf(args, logger):
    hyperparam_conf = HyperparamConf.get_default(
                                None, None, True,
                                LogisticRegressionConf._get_hyper_desc(),
                                logger)
    core_conf = LogisticRegressionConf(True, 'liblinear', hyperparam_conf,
                                       logger)
    classif_conf = ClassificationConf(core_conf,
                                      UnlabeledLabeledConf(logger, None),
                                      logger)
    return RcdStrategyConf(classif_conf, args.cluster_strategy,
                           args.num_annotations, 'uniform', logger)
Example #12
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     already_trained = None
     core_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     if args.model_class != 'AlreadyTrained':
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         already_trained = args.model_exp_id
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, core_conf, name=args.exp_name,
                       already_trained=already_trained)
Example #13
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [
             ClassifierType.supervised, ClassifierType.semisupervised
     ]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     alerts_conf = AlertsConf.from_args(args, secuml_conf.logger)
     if (classifier_type == ClassifierType.unsupervised
             and alerts_conf.classifier_conf is not None):
         raise InvalidInputArguments('Supervised classification of the '
                                     'alerts is not supported for '
                                     'unsupervised model classes. ')
     if classif_conf.classifier_conf.multiclass:
         if alerts_conf.with_analysis():
             raise InvalidInputArguments('Alerts analysis is not supported '
                                         'for multiclass models. ')
         else:
             alerts_conf = None
     if (classif_conf.test_conf.method == 'dataset'
             and classif_conf.test_conf.streaming
             and alerts_conf.with_analysis()):
         raise InvalidInputArguments('Alerts analysis is not supported '
                                     'in streaming mode. ')
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     if (features_conf.sparse
             and not classif_conf.classifier_conf.accept_sparse):
         raise InvalidInputArguments('%s does not support sparse '
                                     'features. ' % args.model_class)
     return DiademConf(secuml_conf,
                       dataset_conf,
                       features_conf,
                       annotations_conf,
                       classif_conf,
                       alerts_conf,
                       name=args.exp_name,
                       already_trained=already_trained,
                       no_training_detection=args.no_training_detection)
Example #14
0
 def from_args(self, method, args, logger):
     validation_conf = None
     if args.validation_datasets is not None:
         validation_conf = ValidationDatasetsConf.from_args(args, logger)
     class_ = self.get_class(method)
     main_model_type = class_.main_model_type()
     main_model_conf = None
     if main_model_type is not None:
         factory = classifiers.get_factory()
         args.multiclass = main_model_type == 'multiclass'
         classifier_conf = factory.from_args(args.model_class, args, logger)
         test_conf = UnlabeledLabeledConf(logger)
         main_model_conf = ClassificationConf(
                                         classifier_conf, test_conf,
                                         logger,
                                         validation_conf=validation_conf)
     return class_.from_args(args, main_model_conf, validation_conf, logger)
Example #15
0
 def from_json(conf_json, secuml_conf):
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'],
                                          secuml_conf.logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            secuml_conf.logger)
     annotations_conf = AnnotationsConf.from_json(
                                               conf_json['annotations_conf'],
                                               secuml_conf.logger)
     core_conf = ClassificationConf.from_json(conf_json['core_conf'],
                                              secuml_conf.logger)
     exp_conf = DiademConf(secuml_conf, dataset_conf, features_conf,
                           annotations_conf, core_conf,
                           name=conf_json['name'],
                           parent=conf_json['parent'],
                           already_trained=conf_json['already_trained'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
Example #16
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [ClassifierType.supervised,
                            ClassifierType.semisupervised]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, classif_conf, name=args.exp_name,
                       already_trained=already_trained)
Example #17
0
 def from_json(obj, logger):
     classif_conf = ClassificationConf.from_json(obj['classification_conf'],
                                                 logger)
     return RcdStrategyConf(classif_conf, obj['cluster_strategy'],
                            obj['num_annotations'], obj['cluster_weights'],
                            logger)