def gen_parser(): parser = argparse.ArgumentParser( description='Learn a detection model. ' 'The ground-truth must be stored in ' 'annotations/ground_truth.csv.') ExpConf.gen_parser(parser) ClassificationConf.gen_parser(parser) factory = classifiers.get_factory() models = factory.get_methods() models.remove('AlreadyTrained') subparsers = parser.add_subparsers(dest='model_class') subparsers.required = True for model in models: model_parser = subparsers.add_parser(model) factory.gen_parser(model, model_parser) classifier_type = get_classifier_type(factory.get_class(model)) if classifier_type in [ClassifierType.supervised, ClassifierType.semisupervised]: AnnotationsConf.gen_parser( model_parser, required=False, message='CSV file containing the annotations of ' 'some or all the instances.') # Add subparser for already trained model already_trained = subparsers.add_parser('AlreadyTrained') factory.gen_parser('AlreadyTrained', already_trained) return parser
def get_naive_bayes_conf(self): name = '-'.join([ 'AL%d' % self.exp.exp_id, 'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes' ]) classifier_conf = self.exp.exp_conf.core_conf.classifier_conf optim_conf = classifier_conf.hyperparam_conf.optim_conf multiclass = True factory = classifiers.get_factory() naive_bayes_conf = factory.get_default('GaussianNaiveBayes', optim_conf.num_folds, optim_conf.n_jobs, multiclass, self.exp.logger) test_conf = UnlabeledLabeledConf(self.exp.logger) classification_conf = ClassificationConf(naive_bayes_conf, test_conf, self.exp.logger) features_conf = FeaturesConf( self.exp.exp_conf.features_conf.input_features, self.exp.exp_conf.features_conf.sparse, self.exp.exp_conf.features_conf.logger, filter_in_f=self.exp.exp_conf.features_conf.filter_in_f, filter_out_f=self.exp.exp_conf.features_conf.filter_out_f) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, features_conf, self.exp.exp_conf.annotations_conf, classification_conf, None, name=name, parent=self.exp.exp_id) DiademExp(exp_conf, session=self.exp.session) return naive_bayes_conf
def get_naive_bayes_conf(self): name = '-'.join([ 'AL%d' % self.exp.exp_id, 'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes' ]) classifier_conf = self.exp.exp_conf.core_conf.classifier_conf optim_conf = classifier_conf.hyperparam_conf.optim_conf multiclass = True hyperparam_conf = HyperparamConf.get_default( optim_conf.num_folds, optim_conf.n_jobs, multiclass, GaussianNaiveBayesConf._get_hyper_desc(), self.exp.logger) naive_bayes_conf = GaussianNaiveBayesConf(multiclass, hyperparam_conf, self.exp.logger) test_conf = UnlabeledLabeledConf(self.exp.logger, None) classification_conf = ClassificationConf(naive_bayes_conf, test_conf, self.exp.logger) exp_conf = DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, self.exp.exp_conf.features_conf, self.exp.exp_conf.annotations_conf, classification_conf, name=name, parent=self.exp.exp_id) naive_bayes_exp = DiademExp(exp_conf, session=self.exp.session) naive_bayes_exp.create_exp() return naive_bayes_conf
def _create_naive_bayes_conf(self): name = '-'.join([ 'AL%d' % (self.exp.exp_id), 'Iter%d' % (self.iteration.iter_num), 'all', 'NaiveBayes' ]) multiclass_model = self.exp.exp_conf.core_conf.multiclass_model classifier_conf = multiclass_model.classifier_conf optim_conf = classifier_conf.hyperparam_conf.optim_conf multiclass = True factory = classifiers.get_factory() naive_bayes_conf = factory.get_default('GaussianNaiveBayes', optim_conf.num_folds, optim_conf.n_jobs, multiclass, self.exp.logger) test_conf = UnlabeledLabeledConf(self.exp.logger) classif_conf = ClassificationConf(naive_bayes_conf, test_conf, self.exp.logger) DiademConf(self.exp.exp_conf.secuml_conf, self.exp.exp_conf.dataset_conf, self.exp.exp_conf.features_conf, self.exp.exp_conf.annotations_conf, classif_conf, None, name=name, parent=self.exp.exp_id) return naive_bayes_conf
def _rcd_conf(args, logger): factory = classifiers.get_factory() classifier_conf = factory.get_default('LogisticRegression', None, None, True, logger) classif_conf = ClassificationConf(classifier_conf, UnlabeledLabeledConf(logger), logger) return RcdStrategyConf(classif_conf, args.cluster_strategy, args.num_annotations, 'uniform', logger)
def _get_main_model_conf(self, validation_conf, logger): hyperparam_conf = HyperparamConf.get_default(None, None, False, None, logger) classifier_conf = SssvddConf(hyperparam_conf, logger) return ClassificationConf(classifier_conf, UnlabeledLabeledConf(logger), logger, validation_conf=validation_conf)
def _get_lr_conf(self, validation_conf, logger, multiclass=False): factory = classifiers.get_factory() classifier_conf = factory.get_default('LogisticRegression', None, None, multiclass, logger) return ClassificationConf(classifier_conf, UnlabeledLabeledConf(logger), logger, validation_conf=validation_conf)
def from_json(self, obj, logger): class_name = obj['__type__'] main_model = ClassificationConf.from_json(obj['main_model_conf'], logger) validation_conf = None if obj['validation_conf'] is None: return None validation_conf = TestDatasetConf(logger, None, obj['validation_conf']) return self.methods[class_name].from_json(obj, main_model, validation_conf, logger)
def _get_lr_conf(self, validation_conf, logger, multiclass=False): hyperparam_conf = HyperparamConf.get_default( None, None, multiclass, LogisticRegressionConf._get_hyper_desc(), logger) core_conf = LogisticRegressionConf(multiclass, 'liblinear', hyperparam_conf, logger) return ClassificationConf(core_conf, UnlabeledLabeledConf(logger, None), logger, validation_conf=validation_conf)
def gen_parser(): parser = argparse.ArgumentParser( description='Train and evaluate a detection ' 'model. ') ExpConf.gen_parser(parser, sparse=True) parser.add_argument('--no-training-detection', action='store_true', default=False, help='''When specified, the detection model is not applied to the training instances. ''') factory = classifiers.get_factory() models = factory.get_methods() models.remove('AlreadyTrained') subparsers = parser.add_subparsers(dest='model_class') subparsers.required = True for model in models: model_parser = subparsers.add_parser(model) factory.gen_parser(model, model_parser) classifier_type = get_classifier_type(factory.get_class(model)) if classifier_type in [ ClassifierType.supervised, ClassifierType.semisupervised ]: default = None message = '''CSV file containing the annotations of some instances, or GROUND_TRUTH to use the ground truth annotations stored in idents.csv. ''' if classifier_type == ClassifierType.supervised: default = 'GROUND_TRUTH' message = '%s Default: GROUND_TRUTH.' % message AnnotationsConf.gen_parser(model_parser, required=default is None, default=default, message=message) ClassificationConf.gen_parser(model_parser) AlertsConf.gen_parser(model_parser) # Add subparser for already trained model already_trained = subparsers.add_parser('AlreadyTrained') factory.gen_parser('AlreadyTrained', already_trained) ClassificationConf.gen_parser(already_trained) AlertsConf.gen_parser(already_trained) return parser
def _rcd_conf(args, logger): hyperparam_conf = HyperparamConf.get_default( None, None, True, LogisticRegressionConf._get_hyper_desc(), logger) core_conf = LogisticRegressionConf(True, 'liblinear', hyperparam_conf, logger) classif_conf = ClassificationConf(core_conf, UnlabeledLabeledConf(logger, None), logger) return RcdStrategyConf(classif_conf, args.cluster_strategy, args.num_annotations, 'uniform', logger)
def from_args(args): secuml_conf = ExpConf.common_from_args(args) already_trained = None core_conf = ClassificationConf.from_args(args, secuml_conf.logger) if args.model_class != 'AlreadyTrained': annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: already_trained = args.model_exp_id annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name, already_trained=already_trained)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ ClassifierType.supervised, ClassifierType.semisupervised ]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id alerts_conf = AlertsConf.from_args(args, secuml_conf.logger) if (classifier_type == ClassifierType.unsupervised and alerts_conf.classifier_conf is not None): raise InvalidInputArguments('Supervised classification of the ' 'alerts is not supported for ' 'unsupervised model classes. ') if classif_conf.classifier_conf.multiclass: if alerts_conf.with_analysis(): raise InvalidInputArguments('Alerts analysis is not supported ' 'for multiclass models. ') else: alerts_conf = None if (classif_conf.test_conf.method == 'dataset' and classif_conf.test_conf.streaming and alerts_conf.with_analysis()): raise InvalidInputArguments('Alerts analysis is not supported ' 'in streaming mode. ') dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) if (features_conf.sparse and not classif_conf.classifier_conf.accept_sparse): raise InvalidInputArguments('%s does not support sparse ' 'features. ' % args.model_class) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, alerts_conf, name=args.exp_name, already_trained=already_trained, no_training_detection=args.no_training_detection)
def from_args(self, method, args, logger): validation_conf = None if args.validation_datasets is not None: validation_conf = ValidationDatasetsConf.from_args(args, logger) class_ = self.get_class(method) main_model_type = class_.main_model_type() main_model_conf = None if main_model_type is not None: factory = classifiers.get_factory() args.multiclass = main_model_type == 'multiclass' classifier_conf = factory.from_args(args.model_class, args, logger) test_conf = UnlabeledLabeledConf(logger) main_model_conf = ClassificationConf( classifier_conf, test_conf, logger, validation_conf=validation_conf) return class_.from_args(args, main_model_conf, validation_conf, logger)
def from_json(conf_json, secuml_conf): dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], secuml_conf.logger) features_conf = FeaturesConf.from_json(conf_json['features_conf'], secuml_conf.logger) annotations_conf = AnnotationsConf.from_json( conf_json['annotations_conf'], secuml_conf.logger) core_conf = ClassificationConf.from_json(conf_json['core_conf'], secuml_conf.logger) exp_conf = DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=conf_json['name'], parent=conf_json['parent'], already_trained=conf_json['already_trained']) exp_conf.exp_id = conf_json['exp_id'] return exp_conf
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ClassifierType.supervised, ClassifierType.semisupervised]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, name=args.exp_name, already_trained=already_trained)
def from_json(obj, logger): classif_conf = ClassificationConf.from_json(obj['classification_conf'], logger) return RcdStrategyConf(classif_conf, obj['cluster_strategy'], obj['num_annotations'], obj['cluster_weights'], logger)