Beispiel #1
0
class UpdateModel(object):
    def __init__(self, iteration):
        self.iteration = iteration
        self.models = {}
        self.times = {}

    def run(self):
        models_conf = self.iteration.conf.models_conf
        for k, conf in models_conf.iteritems():
            self.runModel(k, conf)

    def runModel(self, kind, conf):
        self.setDatasets(conf)

        model = conf.model_class(conf, self.datasets, cv_monitoring=False)
        model.training()
        model.testing()
        if self.datasets.validation_instances is not None:
            model.validation()
        self.models[kind] = model

        # Execution time monitoring
        time = model.training_execution_time + model.testing_execution_time
        self.times[kind] = time

        return None

    def setDatasets(self, conf):
        al_datasets = self.iteration.datasets
        self.datasets = ClassifierDatasets(conf)
        self.datasets.setDatasets(al_datasets.getTrainInstances(conf),
                                  al_datasets.getTestInstances())
        self.datasets.setValidationInstances(al_datasets.validation_instances)
Beispiel #2
0
 def run(self):
     instances = InstancesFromExperiment(self).getInstances()
     test_instances = None
     if self.classification_conf.test_conf.method == 'test_dataset':
         test_exp = self.classification_conf.test_conf.test_exp
         test_instances = InstancesFromExperiment(test_exp).getInstances()
     datasets = ClassifierDatasets(self.classification_conf)
     datasets.generateDatasets(instances, test_instances)
     learning = self.classification_conf.model_class(self.classification_conf, datasets)
     learning.run(self.getOutputDirectory(), self)
Beispiel #3
0
class TrainTestValidation(object):
    def __init__(self, iteration):
        self.iteration = iteration
        self.models = {}
        self.times = {}

    def run(self):
        models_conf = self.iteration.experiment.conf.models_conf
        self.models_exp = {}
        for k, conf in models_conf.iteritems():
            self.models_exp[k] = self.runModel(k, conf)
        self.exportModelsExperiments()

    def exportModelsExperiments(self):
        export_models = {}
        for k, exp in self.models_exp.iteritems():
            export_models[k] = exp.experiment_id
        output_file = self.iteration.output_directory
        output_file += 'models_experiments.json'
        with open(output_file, 'w') as f:
            json.dump(export_models, f, indent=2)

    def runModel(self, kind, conf):
        self.setDatasets(conf)
        # Create the experiment
        exp = self.iteration.experiment
        name = 'AL' + str(exp.experiment_id) + '-Iter' + str(
            self.iteration.iteration_number) + '-' + kind
        model_exp = ClassificationExperiment(exp.project,
                                             exp.dataset,
                                             exp.session,
                                             experiment_name=name,
                                             labels_id=exp.labels_id,
                                             parent=exp.experiment_id)
        model_exp.setFeaturesFilenames(exp.features_filenames)
        model_exp.setClassifierConf(conf)
        model_exp.createExperiment()
        model_exp.export()
        # Build the model
        model = conf.model_class(model_exp.classification_conf,
                                 self.datasets,
                                 cv_monitoring=True)
        model.run(model_exp.getOutputDirectory(), model_exp)
        self.models[kind] = model
        # Execution time monitoring
        time = model.training_execution_time + model.testing_execution_time
        self.times[kind] = time
        return model_exp

    def setDatasets(self, conf):
        al_datasets = self.iteration.datasets
        self.datasets = ClassifierDatasets(conf)
        self.datasets.setDatasets(al_datasets.getTrainInstances(conf),
                                  al_datasets.getTestInstances())
        self.datasets.setValidationInstances(al_datasets.validation_instances)
 def runNaiveBayes(self):
     naive_bayes_conf = self.createNaiveBayesConf()
     # Update training data - the naive Bayes classifier is trained on all the data
     self.datasets.test_instances.families = list(self.lr_predicted_labels)
     all_datasets = ClassifierDatasets(naive_bayes_conf)
     train_instances = copy.deepcopy(self.datasets.train_instances)
     train_instances.union(self.datasets.test_instances)
     all_datasets.train_instances = train_instances
     all_datasets.test_instances = None
     all_datasets.setSampleWeights()
     self.evalClusteringPerf(all_datasets.train_instances)
     # Train the naive Bayes detection model and predict
     self.naive_bayes = GaussianNaiveBayes(naive_bayes_conf, all_datasets)
     self.naive_bayes.training()
     self.nb_time = self.naive_bayes.training_execution_time
     num_test_instances = self.datasets.test_instances.numInstances()
     self.datasets.test_instances.families = [None] * num_test_instances
     if num_test_instances == 0:
         self.nb_predicted_log_proba = []
     else:
         self.nb_predicted_log_proba = self.naive_bayes.pipeline.predict_log_proba(
             self.datasets.test_instances.getFeatures())
     start_time = time.time()
     if num_test_instances == 0:
         self.nb_predicted_labels = []
     else:
         self.nb_predicted_labels = self.naive_bayes.pipeline.predict(
             self.datasets.test_instances.getFeatures())
     self.nb_time += time.time() - start_time
     self.nb_class_labels = self.naive_bayes.class_labels
Beispiel #5
0
 def buildMulticlassClassifier(self):
     if self.multiclass_model is not None:
         return
     multiclass_exp = self.createMulticlassExperiment()
     datasets = self.iteration.datasets
     predicted_instances = datasets.getInstancesFromIds(self.predicted_ids)
     multiclass_datasets = ClassifierDatasets(
         multiclass_exp, multiclass_exp.classification_conf)
     multiclass_datasets.train_instances = self.annotated_instances
     multiclass_datasets.test_instances = predicted_instances
     multiclass_datasets.setSampleWeights()
     self.multiclass_model = multiclass_exp.classification_conf.model_class(
         multiclass_exp, multiclass_datasets, cv_monitoring=True)
     self.multiclass_model.run()
Beispiel #6
0
 def buildMulticlassClassifier(self, alerts_ids):
     multiclass_exp = self.createMulticlassExperiment()
     multiclass_datasets = ClassifierDatasets(
         multiclass_exp, multiclass_exp.classification_conf)
     malicious_ids = self.datasets.train_instances.getMaliciousIds()
     multiclass_datasets.train_instances = self.datasets.train_instances.getInstancesFromIds(
         malicious_ids)
     multiclass_datasets.test_instances = self.datasets.test_instances.getInstancesFromIds(
         alerts_ids)
     multiclass_datasets.setSampleWeights()
     multiclass_model = multiclass_exp.classification_conf.model_class(
         multiclass_exp, multiclass_datasets, cv_monitoring=False)
     multiclass_model.run()
     return multiclass_model
Beispiel #7
0
 def trainNaiveBayes(self):
     naive_bayes_conf = self.getNaiveBayesConf()
     datasets = ClassifierDatasets(naive_bayes_conf)
     current_families = copy.deepcopy(self.instances.families)
     # families are altered
     self.instances.families = self.assigned_categories
     datasets.train_instances = self.instances
     datasets.test_instances  = None
     datasets.setSampleWeights()
     naive_bayes = GaussianNaiveBayes(naive_bayes_conf, datasets)
     naive_bayes.training()
     # families are restored
     self.instances.families = current_families
     return naive_bayes
 def runNaiveBayes(self):
     # Create an experiment for the naive Bayes model
     exp = self.iteration.experiment
     name = '-'.join([
         'AL' + str(exp.experiment_id),
         'Iter' + str(self.iteration.iteration_number), 'all', 'NaiveBayes'
     ])
     naive_bayes_exp = ClassificationExperiment(
         exp.project,
         exp.dataset,
         exp.db,
         exp.cursor,
         experiment_name=name,
         experiment_label=exp.experiment_label,
         parent=exp.experiment_id)
     naive_bayes_exp.setFeaturesFilenames(exp.features_filenames)
     test_conf = TestConfiguration()
     test_conf.setUnlabeled(labels_annotations='annotations')
     naive_bayes_conf = GaussianNaiveBayesConfiguration(
         exp.conf.models_conf['multiclass'].num_folds, False, True,
         test_conf)
     naive_bayes_exp.setClassifierConf(naive_bayes_conf)
     naive_bayes_exp.createExperiment()
     naive_bayes_exp.export()
     # Update training data - the naive Bayes classifier is trained on all the data
     self.datasets.test_instances.families = list(self.lr_predicted_labels)
     all_datasets = ClassifierDatasets(naive_bayes_exp,
                                       naive_bayes_exp.classification_conf)
     train_instances = Instances()
     train_instances.union(self.datasets.train_instances,
                           self.datasets.test_instances)
     all_datasets.train_instances = train_instances
     all_datasets.test_instances = None
     all_datasets.setSampleWeights()
     self.evalClusteringPerf(all_datasets.train_instances)
     # Train the naive Bayes detection model and predict
     self.naive_bayes = GaussianNaiveBayes(naive_bayes_exp, all_datasets)
     self.naive_bayes.training()
     self.nb_time = self.naive_bayes.training_execution_time
     self.datasets.test_instances.families = [
         None
     ] * self.datasets.test_instances.numInstances()
     self.nb_predicted_log_proba = self.naive_bayes.pipeline.predict_log_proba(
         self.datasets.test_instances.getFeatures())
     start_time = time.time()
     self.nb_predicted_labels = self.naive_bayes.pipeline.predict(
         self.datasets.test_instances.getFeatures())
     self.nb_time += time.time() - start_time
     self.nb_class_labels = self.naive_bayes.class_labels
Beispiel #9
0
 def trainNaiveBayes(self, iteration_number):
     naive_bayes_exp = self.createNaiveBayesExperiment(iteration_number)
     # Train the naive Bayes detection model and predict
     datasets = ClassifierDatasets(naive_bayes_exp, naive_bayes_exp.classification_conf)
     current_families = copy.deepcopy(self.instances.families)
     # families are altered
     self.instances.families = self.assigned_categories
     datasets.train_instances = self.instances
     datasets.test_instances  = None
     datasets.setSampleWeights()
     naive_bayes = GaussianNaiveBayes(naive_bayes_exp, datasets)
     naive_bayes.training()
     # families are restored
     self.instances.families = current_families
     return naive_bayes
 def buildMulticlassClassifier(self):
     if self.multiclass_model is not None:
         return
     multiclass_conf = self.getMulticlassConf()
     datasets = self.iteration.datasets
     predicted_instances = datasets.getInstancesFromIds(self.predicted_ids)
     multiclass_datasets = ClassifierDatasets(multiclass_conf)
     multiclass_datasets.train_instances = self.annotated_instances
     multiclass_datasets.test_instances  = predicted_instances
     multiclass_datasets.setSampleWeights()
     self.multiclass_model = multiclass_conf.model_class(
             multiclass_conf,
             multiclass_datasets,
             cv_monitoring = True)
     self.multiclass_model.training()
     self.multiclass_model.testing()
     if multiclass_datasets.validation_instances is not None:
         self.multiclass_model.validation()
Beispiel #11
0
 def setDatasets(self, conf):
     al_datasets = self.iteration.datasets
     self.datasets = ClassifierDatasets(conf)
     self.datasets.setDatasets(al_datasets.getTrainInstances(conf),
                               al_datasets.getTestInstances())
     self.datasets.setValidationInstances(al_datasets.validation_instances)