Ejemplo n.º 1
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Clustering of the data for data exploration.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='''CSV file containing the annotations of some
                            instances, or GROUND_TRUTH to use the ground
                            truth annotations stored in idents.csv.
                            These annotations are used for semi-supervised
                            projections.''')
     parser.add_argument(
         '--label',
         choices=['all', 'malicious', 'benign'],
         default='all',
         help='''The clustering is built from all the instances in the
                      dataset, or only from the benign or malicious ones.
                      By default, the clustering is built from all the
                      instances. The malicious and benign instances are
                      selected according to the ground-truth stored in
                      idents.csv.''')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     factory = clustering_conf.get_factory()
     for algo in factory.get_methods():
         algo_parser = subparsers.add_parser(algo)
         factory.gen_parser(algo, algo_parser)
     return parser
Ejemplo n.º 2
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Learn a detection model. '
                     'The ground-truth must be stored in '
                     'annotations/ground_truth.csv.')
     ExpConf.gen_parser(parser)
     ClassificationConf.gen_parser(parser)
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [ClassifierType.supervised,
                                ClassifierType.semisupervised]:
             AnnotationsConf.gen_parser(
                         model_parser, required=False,
                         message='CSV file containing the annotations of '
                                 'some or all the instances.')
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     return parser
Ejemplo n.º 3
0
 def __init__(self, secuml_conf, dataset_conf, features_conf,
              annotations_conf, classifier_conf, name=None, parent=None,
              fold_id=None, kind='test'):
     ExpConf.__init__(self, secuml_conf, dataset_conf, features_conf,
                      annotations_conf, classifier_conf, name=name,
                      parent=parent)
     self.fold_id = fold_id
     self.kind = kind
Ejemplo n.º 4
0
 def gen_parser():
     parser = argparse.ArgumentParser(description='Features Analysis')
     ExpConf.gen_parser(parser, filters=False)
     AnnotationsConf.gen_parser(
         parser,
         required=True,
         message='CSV file containing the annotations of some or all'
         ' the instances.')
     return parser
Ejemplo n.º 5
0
 def gen_parser():
     parser = argparse.ArgumentParser(
                              description='Rare Category Detection',
                              formatter_class=argparse.RawTextHelpFormatter)
     ExpConf.gen_parser(parser, filters=True, sparse=True)
     AnnotationsConf.gen_parser(
                 parser, default='init_annotations.csv', required=False,
                 message='CSV file containing the initial annotations '
                         'used to learn the first supervised detection '
                         'model.')
     strategies_conf.get_factory().gen_parser('Rcd', parser)
     return parser
Ejemplo n.º 6
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Projection of the data for data visualization.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='CSV file containing the annotations of some'
         ' instances. These annotations are used for '
         'semi-supervised projections.')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     for algo in projection_conf.get_factory().get_methods():
         algo_parser = subparsers.add_parser(algo)
         projection_conf.get_factory().gen_parser(algo, algo_parser)
     return parser
Ejemplo n.º 7
0
 def gen_parser():
     parser = argparse.ArgumentParser(description='Features Analysis')
     ExpConf.gen_parser(parser, filters=False, sparse=True)
     AnnotationsConf.gen_parser(
         parser,
         required=False,
         message='CSV file containing the annotations of some or '
         'all the instances.')
     parser.add_argument('--multiclass',
                         default=False,
                         action='store_true',
                         help='The instances are grouped according to '
                         'their families instead of their binary '
                         'labels.')
     return parser
Ejemplo n.º 8
0
 def gen_parser():
     parser = argparse.ArgumentParser(
             description='Active Learning',
             formatter_class=argparse.RawTextHelpFormatter)
     ExpConf.gen_parser(parser, filters=True, sparse=True)
     AnnotationsConf.gen_parser(
                 parser, default=None, required=False,
                 message='CSV file containing the initial annotations '
                         'used to learn the first detection model.')
     subparsers = parser.add_subparsers(dest='strategy')
     subparsers.required = True
     strategies = strategies_conf.get_factory().get_methods()
     for strategy in strategies:
         strategy_parser = subparsers.add_parser(strategy)
         strategies_conf.get_factory().gen_parser(strategy, strategy_parser)
     return parser
Ejemplo n.º 9
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Projection of the data for data visualization.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='''CSV file containing the annotations of some
                         instances, or GROUND_TRUTH to use the ground
                         truth annotations stored in idents.csv.
                         These annotations are used for semi-supervised
                         projections and are displayed in the GUI.''')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     for algo in projection_conf.get_factory().get_methods():
         algo_parser = subparsers.add_parser(algo)
         projection_conf.get_factory().gen_parser(algo, algo_parser)
     return parser
Ejemplo n.º 10
0
 def __init__(self,
              secuml_conf,
              dataset_conf,
              features_conf,
              annotations_conf,
              multiclass,
              name=None,
              parent=None):
     ExpConf.__init__(self,
                      secuml_conf,
                      dataset_conf,
                      features_conf,
                      annotations_conf,
                      None,
                      name=name,
                      parent=parent)
     self.multiclass = multiclass
Ejemplo n.º 11
0
 def __init__(self,
              secuml_conf,
              dataset_conf,
              features_conf,
              annotations_conf,
              core_conf,
              name=None,
              parent=None,
              already_trained=None):
     self.already_trained = already_trained
     ExpConf.__init__(self,
                      secuml_conf,
                      dataset_conf,
                      features_conf,
                      annotations_conf,
                      core_conf,
                      name=name,
                      parent=parent)
Ejemplo n.º 12
0
 def __init__(self,
              secuml_conf,
              dataset_conf,
              features_conf,
              annotations_conf,
              core_conf,
              name=None,
              parent=None,
              fold_id=None):
     ExpConf.__init__(self,
                      secuml_conf,
                      dataset_conf,
                      features_conf,
                      annotations_conf,
                      core_conf,
                      name=name,
                      parent=parent)
     self.fold_id = fold_id
Ejemplo n.º 13
0
 def __init__(self,
              secuml_conf,
              dataset_conf,
              features_conf,
              annotations_conf,
              core_conf,
              name=None,
              parent=None,
              label='all'):
     ExpConf.__init__(self,
                      secuml_conf,
                      dataset_conf,
                      features_conf,
                      annotations_conf,
                      core_conf,
                      name=name,
                      parent=parent)
     self.label = label
Ejemplo n.º 14
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_args(args, logger)
     features_conf = FeaturesConf.from_args(args, logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None, logger)
     core_conf = strategies_conf.get_factory().from_args('Rcd', args,
                                                         logger)
     return RcdConf(secuml_conf, dataset_conf, features_conf,
                    annotations_conf, core_conf, name=args.exp_name)
Ejemplo n.º 15
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = projection_conf.get_factory().from_args(args.algo, args,
                                                         secuml_conf.logger)
     return ProjectionConf(secuml_conf, dataset_conf, features_conf,
                           annotations_conf, core_conf, name=args.exp_name)
Ejemplo n.º 16
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Train and evaluate a detection '
         'model. ')
     ExpConf.gen_parser(parser, sparse=True)
     parser.add_argument('--no-training-detection',
                         action='store_true',
                         default=False,
                         help='''When specified, the detection model is
                                 not applied to the training instances. ''')
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [
                 ClassifierType.supervised, ClassifierType.semisupervised
         ]:
             default = None
             message = '''CSV file containing the annotations of some
                          instances, or GROUND_TRUTH to use the ground
                          truth annotations stored in idents.csv. '''
             if classifier_type == ClassifierType.supervised:
                 default = 'GROUND_TRUTH'
                 message = '%s Default: GROUND_TRUTH.' % message
             AnnotationsConf.gen_parser(model_parser,
                                        required=default is None,
                                        default=default,
                                        message=message)
         ClassificationConf.gen_parser(model_parser)
         AlertsConf.gen_parser(model_parser)
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     ClassificationConf.gen_parser(already_trained)
     AlertsConf.gen_parser(already_trained)
     return parser
Ejemplo n.º 17
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     return FeaturesAnalysisConf(secuml_conf,
                                 dataset_conf,
                                 features_conf,
                                 annotations_conf,
                                 None,
                                 name=args.exp_name)
Ejemplo n.º 18
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = strategies_conf.get_factory().from_args(args.strategy,
                                                         args,
                                                         secuml_conf.logger)
     return ActiveLearningConf(secuml_conf, dataset_conf, features_conf,
                               annotations_conf, core_conf,
                               name=args.exp_name)
Ejemplo n.º 19
0
 def __init__(self,
              secuml_conf,
              dataset_conf,
              features_conf,
              annotations_conf,
              core_conf,
              alerts_conf,
              name=None,
              parent=None,
              already_trained=None,
              no_training_detection=False):
     self.already_trained = already_trained
     ExpConf.__init__(self,
                      secuml_conf,
                      dataset_conf,
                      features_conf,
                      annotations_conf,
                      core_conf,
                      name=name,
                      parent=parent)
     self.alerts_conf = alerts_conf
     self.no_training_detection = no_training_detection
Ejemplo n.º 20
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     already_trained = None
     core_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     if args.model_class != 'AlreadyTrained':
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         already_trained = args.model_exp_id
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, core_conf, name=args.exp_name,
                       already_trained=already_trained)
Ejemplo n.º 21
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = clustering_conf.get_factory().from_args(
         args.algo, args, secuml_conf.logger)
     conf = ClusteringConf(secuml_conf,
                           dataset_conf,
                           features_conf,
                           annotations_conf,
                           core_conf,
                           name=args.exp_name,
                           label=args.label)
     return conf
Ejemplo n.º 22
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [
             ClassifierType.supervised, ClassifierType.semisupervised
     ]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     alerts_conf = AlertsConf.from_args(args, secuml_conf.logger)
     if (classifier_type == ClassifierType.unsupervised
             and alerts_conf.classifier_conf is not None):
         raise InvalidInputArguments('Supervised classification of the '
                                     'alerts is not supported for '
                                     'unsupervised model classes. ')
     if classif_conf.classifier_conf.multiclass:
         if alerts_conf.with_analysis():
             raise InvalidInputArguments('Alerts analysis is not supported '
                                         'for multiclass models. ')
         else:
             alerts_conf = None
     if (classif_conf.test_conf.method == 'dataset'
             and classif_conf.test_conf.streaming
             and alerts_conf.with_analysis()):
         raise InvalidInputArguments('Alerts analysis is not supported '
                                     'in streaming mode. ')
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     if (features_conf.sparse
             and not classif_conf.classifier_conf.accept_sparse):
         raise InvalidInputArguments('%s does not support sparse '
                                     'features. ' % args.model_class)
     return DiademConf(secuml_conf,
                       dataset_conf,
                       features_conf,
                       annotations_conf,
                       classif_conf,
                       alerts_conf,
                       name=args.exp_name,
                       already_trained=already_trained,
                       no_training_detection=args.no_training_detection)
Ejemplo n.º 23
0
 def from_args(args):
     if args.annotations_file is None and args.multiclass:
         raise InvalidInputArguments('--annotations <file> is required. '
                                     'An annotation file must be specified '
                                     'to group the instances according to '
                                     'their families.')
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     return FeaturesAnalysisConf(secuml_conf,
                                 dataset_conf,
                                 features_conf,
                                 annotations_conf,
                                 args.multiclass,
                                 name=args.exp_name)
Ejemplo n.º 24
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [ClassifierType.supervised,
                            ClassifierType.semisupervised]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, classif_conf, name=args.exp_name,
                       already_trained=already_trained)
Ejemplo n.º 25
0
 def fields_to_export(self):
     fields = ExpConf.fields_to_export(self)
     fields.extend([('already_trained', exportFieldMethod.primitive)])
     return fields
Ejemplo n.º 26
0
 def fields_to_export(self):
     fields = ExpConf.fields_to_export(self)
     fields.append(('multiclass', exportFieldMethod.primitive))
     return fields
Ejemplo n.º 27
0
 def fields_to_export(self):
     fields = ExpConf.fields_to_export(self)
     fields.extend([('label', exportFieldMethod.primitive)])
     return fields
Ejemplo n.º 28
0
 def fields_to_export(self):
     fields = ExpConf.fields_to_export(self)
     fields.extend([('fold_id', exportFieldMethod.primitive)])
     fields.extend([('kind', exportFieldMethod.primitive)])
     return fields
Ejemplo n.º 29
0
 def fields_to_export(self):
     fields = ExpConf.fields_to_export(self)
     fields.extend([('already_trained', exportFieldMethod.primitive),
                    ('alerts_conf', exportFieldMethod.obj),
                    ('no_training_detection', exportFieldMethod.primitive)])
     return fields