def generateParser(parser, binary=True):
     Experiment.projectDatasetFeturesParser(parser)
     al_group = ActiveLearningConfiguration.generateActiveLearningArguments(
         parser)
     ActiveLearningConfiguration.generateSupervisedLearningArguments(
         parser, binary=binary)
     return al_group
Exemple #2
0
    def generateParser(parser):
        Experiment.projectDatasetFeturesParser(parser)

        parser.add_argument('--num-folds', type=int, default=4)
        parser.add_argument('--multilabel', action='store_true', default=False)
        sample_weight_help = 'When set to True, the detection model is learned with '
        sample_weight_help += 'sample weights inverse to the proportion of the family '
        sample_weight_help += 'in the dataset. Useless if the families are not specified.'
        parser.add_argument('--sample-weight',
                            action='store_true',
                            default=False,
                            help=sample_weight_help)

        ## Validation parameters
        validation_help = 'Validation parameters: \n '
        validation_help += 'The detection model is validated with a proportion of '
        validation_help += 'the instances in the input dataset, or with a separate validation'
        validation_help += ' dataset. By default 10% of the instances are used for validation'
        validation_group = parser.add_argument_group(validation_help)
        validation_group.add_argument('--test-size', type=float, default=0.1)
        validation_group.add_argument('--validation-dataset', default=None)

        ## Alerts
        alerts_group = parser.add_argument_group('Alerts parameters')
        alerts_group.add_argument(
            '--top-n-alerts',
            default=100,
            help='Number of most confident alerts displayed.')
        alerts_group.add_argument(
            '--detection-threshold',
            type=float,
            default=0.8,
            help=
            'An alert is raised if the predicted probability of maliciousness '
            + 'is above this threshold.')
        alerts_group.add_argument(
            '--clustering-algo',
            default='Kmeans',
            choices=['Kmeans', 'GaussianMixture'],
            help='Clustering algorithm to analyse the alerts.')
        alerts_group.add_argument(
            '--num-clusters',
            type=int,
            default=4,
            help='Number of clusters built from the alerts.')
Exemple #3
0
    def generateParser(parser):
        Experiment.projectDatasetFeturesParser(parser)

        parser.add_argument(
            '--families-supervision',
            action='store_true',
            default=False,
            help=
            'When set to True, the semi-supervision is based on the families '
            +
            'instead of the binary labels. Useless if an unsupervised projection method is used.'
        )

        parser.add_argument(
            '--labels',
            '-l',
            dest='labels_file',
            default=None,
            help='CSV file containing the labels of some instances. ' +
            'These labels are used for semi-supervised projections.')

        parser.add_argument('--num-components', type=int, default=None)
Exemple #4
0
 def generateParser(parser):
     # Generic arguments
     Experiment.projectDatasetFeturesParser(parser)
     # Clustering arguments
     parser.add_argument('--num-clusters', type=int, default=4)
     label_help = 'The clustering is built from all the instances in the dataset, '
     label_help += 'or only from the benign or malicious ones. '
     label_help += 'By default, the clustering is built from all the instances. '
     label_help += 'The malicious and benign instances are selected according to '
     label_help += 'the ground truth labels stored in labels/true_labels.csv.'
     parser.add_argument('--label',
                         choices=['all', 'malicious', 'benign'],
                         default='all',
                         help=label_help)
     # Projection arguments
     projection_group = parser.add_argument_group('Projection parameters')
     projection_group.add_argument(
         '--projection-algo',
         choices=['Pca', 'Rca', 'Lda', 'Lmnn', 'Nca', 'Itml', None],
         default=None,
         help='Projection performed before building the clustering. ' +
         'By default the instances are not projected.')
     projection_group.add_argument(
         '--families-supervision',
         action='store_true',
         default=False,
         help=
         'When set to True, the semi-supervision is based on the families '
         +
         'instead of the binary labels. Useless if an unsupervised projection method is used.'
     )
     projection_group.add_argument(
         '--labels',
         '-l',
         dest='labels_file',
         default=None,
         help='CSV file containing the labels of some instances. ' +
         'These labels are used for semi-supervised projections.')