def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayesMultinomial", [])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning if config.DEEP_LEARNING_METHOD != -1: if config.DEEP_LEARNING_METHOD == 1: logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: SdA_2.runDL([trainingFile, testingFile]) if config.NUM_MONITORED_SITES != -1: [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] ''' return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma '-C','131072'] ) # Cost ''' if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072' ]) # Cost else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072' ]) # Cost
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma '-C','131072'] ) # Cost
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning (AE) if config.AE != -1: [trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) if config.CROSS_VALIDATION == 0: return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.classifiers.bayes.NaiveBayes", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K' ])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA6([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) if config.n_components_PCA != 0: [trainingFile,testingFile] = Utils.calcPCA2([trainingFile,testingFile]) if config.n_components_LDA != 0: [trainingFile,testingFile] = Utils.calcLDA6([trainingFile,testingFile]) if config.n_components_QDA != 0: [trainingFile,testingFile] = Utils.calcQDA([trainingFile,testingFile]) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C','131072'] ) # Cost
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) # deep learning if config.DEEP_LEARNING_METHOD != -1: #DLMethod = Utils.intToDL(config.DEEP_LEARNING_METHOD) #print 'Deep Learning Method: ' + DLMethod #[trainingFile, testingFile] = DLMethod.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) #logistic_sgd_2.calcLog_sgd([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: SdA_2.runDL([trainingFile, testingFile]) if config.CROSS_VALIDATION == 0: return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.classifiers.bayes.NaiveBayes", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K' ])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning (AE) if config.DEEP_LEARNING_METHOD != -1: #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: [trainingFile, testingFile ] = logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: # DL classifier return mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: return SdA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 5: return mlp_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 6: return SdA_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 7: return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) [trainingFileOrig, testingFileOrig] = [trainingFile, testingFile] if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) # deep learning if config.DEEP_LEARNING_METHOD != -1: #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: [trainingFile, testingFile ] = logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: # DL classifier return mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: return SdA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 5: return mlp_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 6: return SdA_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 7: return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) if config.OC_SVM == 0: # multi-class svm if config.CROSS_VALIDATION == 0: #print 'WARNING: NB classifier with Bi-Di. ###########///////////XXXXXX???????? ' #return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost #'-S','2', # one-class svm '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: # one-class svm if config.CROSS_VALIDATION == 0: print str(config.SVM_KERNEL) print str(config.OC_SVM_Nu) return wekaAPI.executeOneClassSVM( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', str(config.SVM_KERNEL), #'-K','2', # RBF kernel #'-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 #'-C','131072', # Cost #'-N','0.001', # nu '-N', str(config.OC_SVM_Nu), # nu '-S', '2' ]) #, # one-class svm #'-B'] ) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) ''' if (config.DATA_SOURCE == 62 or config.DATA_SOURCE == 63 or config.DATA_SOURCE == 64 or config.DATA_SOURCE == 65): if config.LABEL_NOISE_RATIO != 0: [trainingFile,testingFile] = Utils.makeLabelNoise([trainingFile,testingFile],config.LABEL_NOISE_RATIO) ''' if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost #'-S','2', # one-class svm '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) if config.NUM_FEATURES_RF != 0: [trainingFile, testingFile] = Utils.calcTreeBaseRF([trainingFile, testingFile], config.NUM_FEATURES_RF) if config.OC_SVM == 0: # multi-class svm if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: # one-class svm if config.CROSS_VALIDATION == 0: print str(config.SVM_KERNEL) print str(config.OC_SVM_Nu) return wekaAPI.executeOneClassSVM( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', str(config.SVM_KERNEL), #'-K','0', # kernel #'-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 #'-C','131072', # Cost #'-N','0.01', # nu '-N', str(config.OC_SVM_Nu), # nu '-S', '2' ]) #, # one-class svm #'-B'] ) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence
def __applyFeatureSelection(trainingFilename, testingFilename, outputFoldername, featuresFilename): # if not os.path.exists(outputFoldername): # os.mkdir(outputFoldername) # else: # shutil.rmtree(outputFoldername) # delete and remake folder # os.mkdir(outputFoldername) [accuracy,debugInfo] = wekaAPI.execute( trainingFilename, testingFilename, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C','131072'] ) # Cost print outputFoldername print 'accuracy before feature selection ' + str(accuracy) AccAllFeatures = str(accuracy) trainList = Utils.readFile(trainingFilename) testList = Utils.readFile(testingFilename) featuresList = Utils.readFile(featuresFilename) if len(featuresList) > 0: featuresList = featuresList[0].split("{")[1].split("}")[0].split(",") else: featuresList = [0, 1, 2] # dummy features featuresList = [int(i) for i in featuresList] featuresList = sorted(featuresList) #print featuresList.__contains__(4) NumSelectedFeatures = len(featuresList) newTrainList = [] newTestList = [] for i in range(len(trainList)): if trainList[i].startswith('@'): if trainList[i].startswith('@ATTRIBUTE'): if featuresList.__contains__(i-1): # featuresList index startr from 0, arff features(line) starts from 1 #print trainList[i].split(" ")[1] newTrainList.append(trainList[i]) else: newTrainList.append(trainList[i]) if trainList[i].startswith('@ATTRIBUTE class'): newTrainList.append(trainList[i]) else: newInstance = [] instanceSplit = trainList[i].split(",") newInstance = [instanceSplit[j] for j in featuresList] # take indecies from featuresList whose index starts from 0 #newInstance = [instanceSplit[j-1] for j in featuresList] # take indecies from featuresList whose index starts from 1 #for j in range(len(instanceSplit)): # if featuresList.__contains__(j+1): # newInstance.append(instanceSplit[j]) newInstance.append(instanceSplit[-1]) newTrainList.append(",".join(newInstance)) NumAllFeatures = len(instanceSplit) - 1 for i in range(len(testList)): if testList[i].startswith('@'): if testList[i].startswith('@ATTRIBUTE'): if featuresList.__contains__(i-1): # featuresList index startr from 0, arff features(line) starts from 1 #print testList[i].split(" ")[1] newTestList.append(testList[i]) else: newTestList.append(testList[i]) if testList[i].startswith('@ATTRIBUTE class'): newTestList.append(testList[i]) else: newInstance = [] instanceSplit = testList[i].split(",") newInstance = [instanceSplit[j] for j in featuresList] # take indecies from featuresList whose index starts from 0 #newInstance = [instanceSplit[j-1] for j in featuresList] # take indecies from featuresList whose index starts from 1 #for j in range(len(instanceSplit)): # if featuresList.__contains__(j+1): # newInstance.append(instanceSplit[j]) newInstance.append(instanceSplit[-1]) newTestList.append(",".join(newInstance)) fnewTrainName = trainingFilename[:-5]+'_Features'+'.arff' fnewTrain = open(os.path.join(outputFoldername, fnewTrainName), 'w') for item in newTrainList: fnewTrain.write(item+'\n') fnewTrain.close() # writing the new testing file (with lower dimensions) fnewTestName = testingFilename[:-5]+'_Features'+'.arff' fnewTest = open(os.path.join(outputFoldername, fnewTestName), 'w') for item in newTestList: fnewTest.write(item+'\n') fnewTest.close() [accuracy,debugInfo] = wekaAPI.execute( fnewTrainName, fnewTestName, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C','131072'] ) # Cost print 'accuracy after feature selection ' + str(accuracy) AccSelectedFeatures = str(accuracy) output = [NumAllFeatures] output.append(NumSelectedFeatures) output.append(AccAllFeatures) output.append(AccSelectedFeatures) output.append(config.RUN_ID) writeOutputResultsFile(trainingFilename, output) print ''