def runNaiveBayes(self): naive_bayes_conf = self.createNaiveBayesConf() # Update training data - the naive Bayes classifier is trained on all the data self.datasets.test_instances.annotations.setFamilies( list(self.lr_predicted_labels)) all_datasets = ClassifierDatasets(None, naive_bayes_conf.sample_weight) train_instances = copy.deepcopy(self.datasets.train_instances) train_instances.union(self.datasets.test_instances) all_datasets.setDatasets(train_instances, None) self.evalClusteringPerf(all_datasets.train_instances) # Train the naive Bayes detection model and predict self.naive_bayes = GaussianNaiveBayes(naive_bayes_conf) self.naive_bayes.training(all_datasets) self.nb_time = self.naive_bayes.training_execution_time num_test_instances = self.datasets.test_instances.numInstances() self.datasets.test_instances.annotations.setFamilies( [None] * num_test_instances) if num_test_instances == 0: self.nb_predicted_log_proba = [] else: self.nb_predicted_log_proba = self.naive_bayes.pipeline.predict_log_proba( self.datasets.test_instances.features.getValues()) start_time = time.time() if num_test_instances == 0: self.nb_predicted_labels = [] else: self.nb_predicted_labels = self.naive_bayes.pipeline.predict( self.datasets.test_instances.features.getValues()) self.nb_time += time.time() - start_time self.nb_class_labels = self.naive_bayes.class_labels
class UpdateModel(object): def __init__(self, iteration): self.iteration = iteration self.models = {} self.times = {} def run(self): models_conf = self.iteration.conf.models_conf for k, conf in models_conf.items(): self.runModel(k, conf) def runModel(self, kind, conf): self.setDatasets(conf) model = conf.model_class(conf, cv_monitoring=False) model.trainTestValidation(self.datasets) self.models[kind] = model # Execution time monitoring time = model.training_execution_time + model.testing_execution_time self.times[kind] = time return None def setDatasets(self, conf): al_datasets = self.iteration.datasets self.datasets = ClassifierDatasets(None, conf.sample_weight) self.datasets.setDatasets(al_datasets.getTrainInstances(conf), al_datasets.getTestInstances()) self.datasets.setValidationInstances(al_datasets.validation_instances)
def getMulticlassDatasets(self, model, alerts_ids): datasets = ClassifierDatasets(None, model.conf.sample_weight) train_instances = self.datasets.train_instances.getAnnotatedInstances( label=labels_tools.MALICIOUS) test_instances = self.datasets.test_instances.getInstancesFromIds( alerts_ids) datasets.setDatasets(train_instances, test_instances) return datasets
def trainNaiveBayes(self): naive_bayes_conf = self.getNaiveBayesConf() datasets = ClassifierDatasets(None, naive_bayes_conf.sample_weight) current_families = copy.deepcopy( self.instances.annotations.getFamilies()) # families are altered self.instances.annotations.setFamilies(self.assigned_categories) datasets.setDatasets(self.instances, None) naive_bayes = GaussianNaiveBayes(naive_bayes_conf) naive_bayes.training(datasets) # families are restored self.instances.annotations.setFamilies(current_families) return naive_bayes
def buildMulticlassClassifier(self): multiclass_conf = self.getMulticlassConf() datasets = self.iteration.datasets predicted_instances = datasets.getInstancesFromIds(self.predicted_ids) multiclass_datasets = ClassifierDatasets(None, multiclass_conf.sample_weight) multiclass_datasets.setDatasets(self.annotated_instances, predicted_instances) if self.multiclass_model is None: self.multiclass_model = multiclass_conf.model_class( multiclass_conf, cv_monitoring=True) self.multiclass_model.training(multiclass_datasets) self.multiclass_model.testing(multiclass_datasets) if multiclass_datasets.validation_instances is not None: self.multiclass_model.validation(multiclass_datasets) return multiclass_datasets.train_instances, multiclass_datasets.test_instances