def gen_parser(): parser = argparse.ArgumentParser( description='Clustering of the data for data exploration.') ExpConf.gen_parser(parser) AnnotationsConf.gen_parser( parser, message='''CSV file containing the annotations of some instances, or GROUND_TRUTH to use the ground truth annotations stored in idents.csv. These annotations are used for semi-supervised projections.''') parser.add_argument( '--label', choices=['all', 'malicious', 'benign'], default='all', help='''The clustering is built from all the instances in the dataset, or only from the benign or malicious ones. By default, the clustering is built from all the instances. The malicious and benign instances are selected according to the ground-truth stored in idents.csv.''') subparsers = parser.add_subparsers(dest='algo') subparsers.required = True factory = clustering_conf.get_factory() for algo in factory.get_methods(): algo_parser = subparsers.add_parser(algo) factory.gen_parser(algo, algo_parser) return parser
def gen_parser(): parser = argparse.ArgumentParser( description='Learn a detection model. ' 'The ground-truth must be stored in ' 'annotations/ground_truth.csv.') ExpConf.gen_parser(parser) ClassificationConf.gen_parser(parser) factory = classifiers.get_factory() models = factory.get_methods() models.remove('AlreadyTrained') subparsers = parser.add_subparsers(dest='model_class') subparsers.required = True for model in models: model_parser = subparsers.add_parser(model) factory.gen_parser(model, model_parser) classifier_type = get_classifier_type(factory.get_class(model)) if classifier_type in [ClassifierType.supervised, ClassifierType.semisupervised]: AnnotationsConf.gen_parser( model_parser, required=False, message='CSV file containing the annotations of ' 'some or all the instances.') # Add subparser for already trained model already_trained = subparsers.add_parser('AlreadyTrained') factory.gen_parser('AlreadyTrained', already_trained) return parser
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, classifier_conf, name=None, parent=None, fold_id=None, kind='test'): ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, classifier_conf, name=name, parent=parent) self.fold_id = fold_id self.kind = kind
def gen_parser(): parser = argparse.ArgumentParser(description='Features Analysis') ExpConf.gen_parser(parser, filters=False) AnnotationsConf.gen_parser( parser, required=True, message='CSV file containing the annotations of some or all' ' the instances.') return parser
def gen_parser(): parser = argparse.ArgumentParser( description='Rare Category Detection', formatter_class=argparse.RawTextHelpFormatter) ExpConf.gen_parser(parser, filters=True, sparse=True) AnnotationsConf.gen_parser( parser, default='init_annotations.csv', required=False, message='CSV file containing the initial annotations ' 'used to learn the first supervised detection ' 'model.') strategies_conf.get_factory().gen_parser('Rcd', parser) return parser
def gen_parser(): parser = argparse.ArgumentParser( description='Projection of the data for data visualization.') ExpConf.gen_parser(parser) AnnotationsConf.gen_parser( parser, message='CSV file containing the annotations of some' ' instances. These annotations are used for ' 'semi-supervised projections.') subparsers = parser.add_subparsers(dest='algo') subparsers.required = True for algo in projection_conf.get_factory().get_methods(): algo_parser = subparsers.add_parser(algo) projection_conf.get_factory().gen_parser(algo, algo_parser) return parser
def gen_parser(): parser = argparse.ArgumentParser(description='Features Analysis') ExpConf.gen_parser(parser, filters=False, sparse=True) AnnotationsConf.gen_parser( parser, required=False, message='CSV file containing the annotations of some or ' 'all the instances.') parser.add_argument('--multiclass', default=False, action='store_true', help='The instances are grouped according to ' 'their families instead of their binary ' 'labels.') return parser
def gen_parser(): parser = argparse.ArgumentParser( description='Active Learning', formatter_class=argparse.RawTextHelpFormatter) ExpConf.gen_parser(parser, filters=True, sparse=True) AnnotationsConf.gen_parser( parser, default=None, required=False, message='CSV file containing the initial annotations ' 'used to learn the first detection model.') subparsers = parser.add_subparsers(dest='strategy') subparsers.required = True strategies = strategies_conf.get_factory().get_methods() for strategy in strategies: strategy_parser = subparsers.add_parser(strategy) strategies_conf.get_factory().gen_parser(strategy, strategy_parser) return parser
def gen_parser(): parser = argparse.ArgumentParser( description='Projection of the data for data visualization.') ExpConf.gen_parser(parser) AnnotationsConf.gen_parser( parser, message='''CSV file containing the annotations of some instances, or GROUND_TRUTH to use the ground truth annotations stored in idents.csv. These annotations are used for semi-supervised projections and are displayed in the GUI.''') subparsers = parser.add_subparsers(dest='algo') subparsers.required = True for algo in projection_conf.get_factory().get_methods(): algo_parser = subparsers.add_parser(algo) projection_conf.get_factory().gen_parser(algo, algo_parser) return parser
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, multiclass, name=None, parent=None): ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, None, name=name, parent=parent) self.multiclass = multiclass
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=None, parent=None, already_trained=None): self.already_trained = already_trained ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=name, parent=parent)
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=None, parent=None, fold_id=None): ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=name, parent=parent) self.fold_id = fold_id
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=None, parent=None, label='all'): ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=name, parent=parent) self.label = label
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) logger = secuml_conf.logger dataset_conf = DatasetConf.from_args(args, logger) features_conf = FeaturesConf.from_args(args, logger) annotations_conf = AnnotationsConf(args.annotations_file, None, logger) core_conf = strategies_conf.get_factory().from_args('Rcd', args, logger) return RcdConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.common_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = projection_conf.get_factory().from_args(args.algo, args, secuml_conf.logger) return ProjectionConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def gen_parser(): parser = argparse.ArgumentParser( description='Train and evaluate a detection ' 'model. ') ExpConf.gen_parser(parser, sparse=True) parser.add_argument('--no-training-detection', action='store_true', default=False, help='''When specified, the detection model is not applied to the training instances. ''') factory = classifiers.get_factory() models = factory.get_methods() models.remove('AlreadyTrained') subparsers = parser.add_subparsers(dest='model_class') subparsers.required = True for model in models: model_parser = subparsers.add_parser(model) factory.gen_parser(model, model_parser) classifier_type = get_classifier_type(factory.get_class(model)) if classifier_type in [ ClassifierType.supervised, ClassifierType.semisupervised ]: default = None message = '''CSV file containing the annotations of some instances, or GROUND_TRUTH to use the ground truth annotations stored in idents.csv. ''' if classifier_type == ClassifierType.supervised: default = 'GROUND_TRUTH' message = '%s Default: GROUND_TRUTH.' % message AnnotationsConf.gen_parser(model_parser, required=default is None, default=default, message=message) ClassificationConf.gen_parser(model_parser) AlertsConf.gen_parser(model_parser) # Add subparser for already trained model already_trained = subparsers.add_parser('AlreadyTrained') factory.gen_parser('AlreadyTrained', already_trained) ClassificationConf.gen_parser(already_trained) AlertsConf.gen_parser(already_trained) return parser
def from_args(args): secuml_conf = ExpConf.common_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) return FeaturesAnalysisConf(secuml_conf, dataset_conf, features_conf, annotations_conf, None, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = strategies_conf.get_factory().from_args(args.strategy, args, secuml_conf.logger) return ActiveLearningConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name)
def __init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, alerts_conf, name=None, parent=None, already_trained=None, no_training_detection=False): self.already_trained = already_trained ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=name, parent=parent) self.alerts_conf = alerts_conf self.no_training_detection = no_training_detection
def from_args(args): secuml_conf = ExpConf.common_from_args(args) already_trained = None core_conf = ClassificationConf.from_args(args, secuml_conf.logger) if args.model_class != 'AlreadyTrained': annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: already_trained = args.model_exp_id annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name, already_trained=already_trained)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) core_conf = clustering_conf.get_factory().from_args( args.algo, args, secuml_conf.logger) conf = ClusteringConf(secuml_conf, dataset_conf, features_conf, annotations_conf, core_conf, name=args.exp_name, label=args.label) return conf
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ ClassifierType.supervised, ClassifierType.semisupervised ]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id alerts_conf = AlertsConf.from_args(args, secuml_conf.logger) if (classifier_type == ClassifierType.unsupervised and alerts_conf.classifier_conf is not None): raise InvalidInputArguments('Supervised classification of the ' 'alerts is not supported for ' 'unsupervised model classes. ') if classif_conf.classifier_conf.multiclass: if alerts_conf.with_analysis(): raise InvalidInputArguments('Alerts analysis is not supported ' 'for multiclass models. ') else: alerts_conf = None if (classif_conf.test_conf.method == 'dataset' and classif_conf.test_conf.streaming and alerts_conf.with_analysis()): raise InvalidInputArguments('Alerts analysis is not supported ' 'in streaming mode. ') dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) if (features_conf.sparse and not classif_conf.classifier_conf.accept_sparse): raise InvalidInputArguments('%s does not support sparse ' 'features. ' % args.model_class) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, alerts_conf, name=args.exp_name, already_trained=already_trained, no_training_detection=args.no_training_detection)
def from_args(args): if args.annotations_file is None and args.multiclass: raise InvalidInputArguments('--annotations <file> is required. ' 'An annotation file must be specified ' 'to group the instances according to ' 'their families.') secuml_conf = ExpConf.secuml_conf_from_args(args) dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) return FeaturesAnalysisConf(secuml_conf, dataset_conf, features_conf, annotations_conf, args.multiclass, name=args.exp_name)
def from_args(args): secuml_conf = ExpConf.secuml_conf_from_args(args) classif_conf = ClassificationConf.from_args(args, secuml_conf.logger) model_class = classifiers.get_factory().get_class(args.model_class) classifier_type = get_classifier_type(model_class) if classifier_type in [ClassifierType.supervised, ClassifierType.semisupervised]: annotations_conf = AnnotationsConf(args.annotations_file, None, secuml_conf.logger) else: annotations_conf = AnnotationsConf(None, None, secuml_conf.logger) already_trained = None if args.model_class == 'AlreadyTrained': already_trained = args.model_exp_id dataset_conf = DatasetConf.from_args(args, secuml_conf.logger) features_conf = FeaturesConf.from_args(args, secuml_conf.logger) return DiademConf(secuml_conf, dataset_conf, features_conf, annotations_conf, classif_conf, name=args.exp_name, already_trained=already_trained)
def fields_to_export(self): fields = ExpConf.fields_to_export(self) fields.extend([('already_trained', exportFieldMethod.primitive)]) return fields
def fields_to_export(self): fields = ExpConf.fields_to_export(self) fields.append(('multiclass', exportFieldMethod.primitive)) return fields
def fields_to_export(self): fields = ExpConf.fields_to_export(self) fields.extend([('label', exportFieldMethod.primitive)]) return fields
def fields_to_export(self): fields = ExpConf.fields_to_export(self) fields.extend([('fold_id', exportFieldMethod.primitive)]) fields.extend([('kind', exportFieldMethod.primitive)]) return fields
def fields_to_export(self): fields = ExpConf.fields_to_export(self) fields.extend([('already_trained', exportFieldMethod.primitive), ('alerts_conf', exportFieldMethod.obj), ('no_training_detection', exportFieldMethod.primitive)]) return fields