Ejemplo n.º 1
0
class UpdateModel(object):

    def __init__(self, iteration):
        self.iteration = iteration
        self.models = {}
        self.times = {}

    def run(self):
        models_conf = self.iteration.conf.models_conf
        for k, conf in models_conf.items():
            self.runModel(k, conf)

    def runModel(self, kind, conf):
        self.setDatasets(conf)
        model = conf.model_class(conf, cv_monitoring=False)
        model.trainTestValidation(self.datasets)
        self.models[kind] = model

        # Execution time monitoring
        time = model.training_execution_time + model.testing_execution_time
        self.times[kind] = time

        return None

    def setDatasets(self, conf):
        al_datasets = self.iteration.datasets
        self.datasets = ClassifierDatasets(None, conf.sample_weight)
        self.datasets.setDatasets(al_datasets.getTrainInstances(conf),
                                  al_datasets.getTestInstances())
        self.datasets.setValidationInstances(al_datasets.validation_instances)
Ejemplo n.º 2
0
 def runNaiveBayes(self):
     naive_bayes_conf = self.createNaiveBayesConf()
     # Update training data - the naive Bayes classifier is trained on all the data
     self.datasets.test_instances.annotations.setFamilies(
         list(self.lr_predicted_labels))
     all_datasets = ClassifierDatasets(None, naive_bayes_conf.sample_weight)
     train_instances = copy.deepcopy(self.datasets.train_instances)
     train_instances.union(self.datasets.test_instances)
     all_datasets.setDatasets(train_instances, None)
     self.evalClusteringPerf(all_datasets.train_instances)
     # Train the naive Bayes detection model and predict
     self.naive_bayes = GaussianNaiveBayes(naive_bayes_conf)
     self.naive_bayes.training(all_datasets)
     self.nb_time = self.naive_bayes.training_execution_time
     num_test_instances = self.datasets.test_instances.numInstances()
     self.datasets.test_instances.annotations.setFamilies(
         [None] * num_test_instances)
     if num_test_instances == 0:
         self.nb_predicted_log_proba = []
     else:
         self.nb_predicted_log_proba = self.naive_bayes.pipeline.predict_log_proba(
             self.datasets.test_instances.features.getValues())
     start_time = time.time()
     if num_test_instances == 0:
         self.nb_predicted_labels = []
     else:
         self.nb_predicted_labels = self.naive_bayes.pipeline.predict(
             self.datasets.test_instances.features.getValues())
     self.nb_time += time.time() - start_time
     self.nb_class_labels = self.naive_bayes.class_labels
Ejemplo n.º 3
0
 def getMulticlassDatasets(self, model, alerts_ids):
     datasets = ClassifierDatasets(None, model.conf.sample_weight)
     train_instances = self.datasets.train_instances.getAnnotatedInstances(
         label=labels_tools.MALICIOUS)
     test_instances = self.datasets.test_instances.getInstancesFromIds(
         alerts_ids)
     datasets.setDatasets(train_instances, test_instances)
     return datasets
Ejemplo n.º 4
0
 def trainNaiveBayes(self):
     naive_bayes_conf = self.getNaiveBayesConf()
     datasets = ClassifierDatasets(None, naive_bayes_conf.sample_weight)
     current_families = copy.deepcopy(
         self.instances.annotations.getFamilies())
     # families are altered
     self.instances.annotations.setFamilies(self.assigned_categories)
     datasets.setDatasets(self.instances, None)
     naive_bayes = GaussianNaiveBayes(naive_bayes_conf)
     naive_bayes.training(datasets)
     # families are restored
     self.instances.annotations.setFamilies(current_families)
     return naive_bayes
Ejemplo n.º 5
0
 def buildMulticlassClassifier(self):
     multiclass_conf = self.getMulticlassConf()
     datasets = self.iteration.datasets
     predicted_instances = datasets.getInstancesFromIds(self.predicted_ids)
     multiclass_datasets = ClassifierDatasets(None,
                                              multiclass_conf.sample_weight)
     multiclass_datasets.setDatasets(self.annotated_instances,
                                     predicted_instances)
     if self.multiclass_model is None:
         self.multiclass_model = multiclass_conf.model_class(
             multiclass_conf, cv_monitoring=True)
         self.multiclass_model.training(multiclass_datasets)
         self.multiclass_model.testing(multiclass_datasets)
         if multiclass_datasets.validation_instances is not None:
             self.multiclass_model.validation(multiclass_datasets)
     return multiclass_datasets.train_instances, multiclass_datasets.test_instances
 def generateDatasets(self):
     instances = InstancesFromExperiment(self).getInstances()
     test_instances = None
     if self.conf.test_conf.method == 'dataset':
         test_instances = InstancesFromExperiment(
             self.test_exp).getInstances()
     if self.conf.test_conf.method in [
             'cv', 'temporal_cv', 'sliding_window'
     ]:
         datasets = CvClassifierDatasets(self.conf.test_conf,
                                         self.conf.families_supervision,
                                         self.conf.sample_weight)
     else:
         datasets = ClassifierDatasets(self.conf.test_conf,
                                       self.conf.sample_weight)
     datasets.generateDatasets(instances, test_instances)
     return datasets
Ejemplo n.º 7
0
 def generateDatasets(self, classification_conf, instances, test_instances):
     datasets = ClassifierDatasets(self,
                                   classification_conf.sample_weight)
     datasets.generateDatasets(instances, test_instances)
     return datasets
Ejemplo n.º 8
0
 def setDatasets(self, conf):
     al_datasets = self.iteration.datasets
     self.datasets = ClassifierDatasets(None, conf.sample_weight)
     self.datasets.setDatasets(al_datasets.getTrainInstances(conf),
                               al_datasets.getTestInstances())
     self.datasets.setValidationInstances(al_datasets.validation_instances)