class CrossValidation(object): dts = None #metodo utilizado para classifacao classifier = None #conjunto de dados de teste teste_sub_data_set = None #conjunto de dados de treinamento training_sub_data_set = None evaluate = None #numero de folds k = 1 file_path = "" class_name = "" #caminho da pasta onde serao salvos os resultados result_path = "" preprocessor = None def __init__(self): print("init") self.evaluate = EvaluateModule() def run(self): self.classifier.setResultPath(self.result_path) self.classifier.setClass_name(self.class_name) self.foldExecution() def foldExecution(self): i = self.iteration for self.iteration in range(i, (self.k + 1)): tempo_inicio = time.time() self.loadTrainingData() self.loadTestData() #executa funcoes para transformacao de dados categoricos if self.preprocessor: self.preprocessor.setDataSet(self.training_sub_data_set) self.preprocessor.setTestDataSet(self.teste_sub_data_set) self.training_sub_data_set, self.teste_sub_data_set = self.preprocessor.transformCategory( ) #seta dados de treinamento e teste no classificador self.classifier.setDataSet(self.training_sub_data_set) self.classifier.setTestDataSet(self.teste_sub_data_set) #seta iteracao do cross no classficador self.classifier.setIteration(self.iteration) #executa o processo de treino e teste do classificador self.classifier.run() del (self.training_sub_data_set) self.loadTestData() #seta conjunto de dados original de teste e iteracao atual do cross-validation na classe de avaliacao self.evaluate.setTestDataSet(self.teste_sub_data_set) self.evaluate.setIteration(self.iteration) self.evaluate.setClass_name(self.class_name) ''' #verifica quel o metodo de classificacao utilziado if(isinstance(self.classifier, RnaClassifier)): print("rna") self.evaluate.setResultPath( self.result_path) elif(isinstance(self.classifier, KnnClassifier)): print("knn") self.evaluate.setResultPath(self.result_path) elif(isinstance(self.classifier, SvmClassifier)): print("svm") self.evaluate.setResultPath(self.result_path) elif(isinstance(self.classifier, RfClassifier)): print("rf") self.evaluate.setResultPath(self.result_path) elif(isinstance(self.classifier, ClusteredKnnClassifier)): print("clustered knn") #self.evaluate.setResultPath(self.result_path) elif(isinstance(self.classifier, ClusteredDensityKnnClassifier)): print("clustered density knn") #self.evaluate.setResultPath(self.result_path) ''' if (isinstance(self.classifier, HybridClassifier)): print("hybrid") self.evaluate.setResultPath(self.result_path + "final_method_classification/") else: self.evaluate.setResultPath(self.result_path) tempo_execucao = time.time() - tempo_inicio self.evaluate.setTempoExecucao(tempo_execucao) self.evaluate.setTrainingTime(self.classifier.getTrainingTime()) self.evaluate.setTestTime(self.classifier.getTestTime()) #executa metodo de avaliacao self.evaluate.run() #carrega conjunto de treinamento de acordo coma iteracao atual do cross valiadation def loadTrainingData(self): for i in range(1, (self.k + 1)): if (((self.k + 1) - i) != self.iteration): new_sub_data_set = DataSet.loadSubDataSet(self.file_path + "sub_data_set_" + str(i) + ".csv") if (i == 1): self.training_sub_data_set = new_sub_data_set else: self.training_sub_data_set = DataSet.concatSubDataSet( self.training_sub_data_set, new_sub_data_set) del (new_sub_data_set) #self.training_sub_data_set = self.training_sub_data_set.reset_index() print(self.training_sub_data_set) #carrega conjunto de teste de acordo coma iteracao atual do cross valiadation def loadTestData(self): self.teste_sub_data_set = DataSet.loadSubDataSet(self.file_path + "sub_data_set_" + str((self.k + 1) - self.iteration) + ".csv") print(self.teste_sub_data_set) #self.teste_sub_data_set = self.teste_sub_data_set.reset_index() #print(self.teste_sub_data_set) #exit() def setIteration(self, iteration): self.iteration = iteration def setClassifier(self, classifier): self.classifier = classifier def getClassifier(self): return classifier def setPreprocessor(self, preprocessor): self.preprocessor = preprocessor def getPreprocessor(self): return preprocessor def setEvaluateModule(self, evaluate): self.evaluate = evaluate def getEvaluateModule(self): return evaluate def setFilePath(self, file_path): self.file_path = file_path def setResultPath(self, result_path): self.result_path = result_path def setK(self, k): self.k = k def setClass_name(self, class_name): self.class_name = class_name
class CrossValidation(object): dts = None #metodo utilizado para classifacao classifier = None #conjunto de dados de teste testData = None #conjunto de dados de treinamento trainingData = None evaluate = None #numero de folds numberOfFolds = 10 file_path = "" #caminho da pasta onde serao salvos os resultados result_path = "" preprocessor = None def __init__(self): # print("Cross Validation constructor") self.evaluate = EvaluateModule() def run(self): self.classifier.setResultPath(self.result_path) self.foldExecution() def foldExecution(self): i = self.iteration for self.iteration in range(i, (self.numberOfFolds + 1)): tempo_inicio = time.time() self.loadTrainingData() self.loadTestData() #executa funcoes para transformacao de dados categoricos if self.preprocessor: self.preprocessor.setDataSet(self.trainingData) self.preprocessor.setTestDataSet(self.testData) self.trainingData, self.testData = self.preprocessor.transformCategory( ) #seta dados de treinamento e teste no classificador self.classifier.setDataSet(self.trainingData) self.classifier.setTestDataSet(self.testData) #seta iteracao do cross no classficador self.classifier.setIteration(self.iteration) #executa o processo de treino e teste do classificador self.classifier.run() del (self.trainingData) # self.loadTestData() #seta conjunto de dados original de teste e iteracao atual do cross-validation na classe de avaliacao self.evaluate.setTestDataSet(self.testData) self.evaluate.setIteration(self.iteration) #verifica quel o metodo de classificacao utilziado if (isinstance(self.classifier, RnaClassifier)): print("rna") self.evaluate.setResultPath(self.result_path) elif (isinstance(self.classifier, KnnClassifier)): print("knn") # self.evaluate.setResultPath(self.result_path) elif (isinstance(self.classifier, ClusteredKnnClassifier)): print("clustered knn") #self.evaluate.setResultPath(self.result_path) elif (isinstance(self.classifier, ClusteredDensityKnnClassifier)): print("clustered density knn") #self.evaluate.setResultPath(self.result_path) elif (isinstance(self.classifier, HybridClassifier)): print("hybrid") annModel = self.classifier.getRna().getModel() self.saveModelToFile(annModel, 'ann') self.evaluate.setResultPath(self.result_path + "final_method_classification/") tempo_execucao = time.time() - tempo_inicio self.evaluate.setTempoExecucao(tempo_execucao) self.evaluate.setTrainingTime(self.classifier.getTrainingTime()) self.evaluate.setTestTime(self.classifier.getTestTime()) #executa metodo de avaliacao self.evaluate.run() #carrega conjunto de treinamento de acordo coma iteracao atual do cross valiadation def loadTrainingData(self): #exclude current cross validation iteration corresponding fold trainFolds = glob.glob(self.file_path + 'fold_[!' + str(self.iteration) + ']*.csv') self.trainingData = pd.concat( (pd.read_csv(fold) for fold in trainFolds)) #carrega conjunto de teste de acordo com a iteracao atual do cross validation def loadTestData(self): self.testData = DataSet.loadSubDataSet(self.file_path + "fold_" + str(self.iteration) + ".csv") def setIteration(self, iteration): self.iteration = iteration def setClassifier(self, classifier): self.classifier = classifier def getClassifier(self): return classifier def setPreprocessor(self, preprocessor): self.preprocessor = preprocessor def getPreprocessor(self): return preprocessor def setEvaluateModule(self, evaluate): self.evaluate = evaluate def getEvaluateModule(self): return evaluate def setFilePath(self, file_path): self.file_path = file_path def setResultPath(self, result_path): directory = os.path.dirname(result_path) if not os.path.exists(directory): os.makedirs(directory) self.result_path = result_path def setK(self, k): self.numberOfFolds = k def saveModelToFile(self, model, prefix): directory = os.path.dirname(self.file_path + 'models/') if not os.path.exists(directory): os.makedirs(directory) fileName = directory + prefix + '_' + str(self.iteration - 1) pickle.dump(model, open(fileName, 'wb')) print('[' + str(datetime.datetime.now()).split('.')[0] + '] ' + fileName + ' saved [')