def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning if config.DEEP_LEARNING_METHOD != -1: if config.DEEP_LEARNING_METHOD == 1: logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: SdA_2.runDL([trainingFile, testingFile]) if config.NUM_MONITORED_SITES != -1: [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] ''' return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma '-C','131072'] ) # Cost ''' if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072' ]) # Cost else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072' ]) # Cost
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA6([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) classifier = "svm" kwargs = {} kwargs['C'] = 2**11 kwargs['kernel'] = 'rbf' kwargs['gamma'] = 2 if config.CROSS_VALIDATION == 0: return wekaAPI.executeSklearn(trainingFile, testingFile, classifier, **kwargs) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeSklearnCrossValidationScaleWithRange( file, classifier, config.CROSS_VALIDATION, (-1, 1), **kwargs) # CV with normalization
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayesMultinomial", [])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if (config.CLUSTERING_METHOD == 1): clusteringAPI.calcKmeans([trainingFile, testingFile], "Description goes here!") elif (config.CLUSTERING_METHOD == 2): [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) clusteringAPI.calcKmeans([trainingFile, testingFile], "Description goes here!") elif (config.CLUSTERING_METHOD == 3): [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) clusteringAPI.calcKmeansCvxHullDelaunay( [trainingFile, testingFile], "Description goes here!") elif (config.CLUSTERING_METHOD == 4): [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) clusteringAPI.calcKmeansCvxHullDelaunay_Mixed( [trainingFile, testingFile], "Description goes here!") elif (config.CLUSTERING_METHOD == 5): [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) clusteringAPI.calcKmeansCvxHullDelaunay_Mixed_KNN( [trainingFile, testingFile], "Description goes here!", threshold=3) return ['NA', []]
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma '-C','131072'] ) # Cost
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning (AE) if config.AE != -1: [trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA6([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) if config.n_components_PCA != 0: [trainingFile,testingFile] = Utils.calcPCA2([trainingFile,testingFile]) if config.n_components_LDA != 0: [trainingFile,testingFile] = Utils.calcLDA6([trainingFile,testingFile]) if config.n_components_QDA != 0: [trainingFile,testingFile] = Utils.calcQDA([trainingFile,testingFile]) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", ['-K','2', # RBF kernel '-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C','131072'] ) # Cost
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) if config.CROSS_VALIDATION == 0: return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.classifiers.bayes.NaiveBayes", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K' ])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) # deep learning if config.DEEP_LEARNING_METHOD != -1: #DLMethod = Utils.intToDL(config.DEEP_LEARNING_METHOD) #print 'Deep Learning Method: ' + DLMethod #[trainingFile, testingFile] = DLMethod.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) #logistic_sgd_2.calcLog_sgd([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: SdA_2.runDL([trainingFile, testingFile]) if config.CROSS_VALIDATION == 0: return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.classifiers.bayes.NaiveBayes", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K' ])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) # deep learning (AE) if config.DEEP_LEARNING_METHOD != -1: #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: [trainingFile, testingFile ] = logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: # DL classifier return mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: return SdA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 5: return mlp_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 6: return SdA_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 7: return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile]) return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) ''' if (config.DATA_SOURCE == 62 or config.DATA_SOURCE == 63 or config.DATA_SOURCE == 64 or config.DATA_SOURCE == 65): if config.LABEL_NOISE_RATIO != 0: [trainingFile,testingFile] = Utils.makeLabelNoise([trainingFile,testingFile],config.LABEL_NOISE_RATIO) ''' if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost #'-S','2', # one-class svm '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence
def classify( runID, trainingSet, testingSet ): [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA6([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) classifier = "RF" kwargs = {} kwargs[ 'n_estimators'] = 500 #number of trees/ length of the fingerprint kwargs['criterion'] = "gini" kwargs['oob_score'] = True kwargs['n_jobs'] = 3 if config.NUM_MONITORED_SITES == -1 and config.NUM_NON_MONITORED_SITES == -1: # closed world if config.CROSS_VALIDATION == 0: return wekaAPI.executeSklearn(trainingFile, testingFile, classifier, **kwargs) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeSklearnCrossValidation( file, classifier, config.CROSS_VALIDATION, **kwargs) # CV with normalization else: # open world trainList = wekaAPI.readFile(trainingFile) testList = wekaAPI.readFile(testingFile) trainInstancesList = [] testInstancesList = [] classes = "" yTrain = [] yTest = [] for line in trainList: if line[0] == '@': if line.lower().startswith("@attribute class"): classes = line.split(" ")[2] else: # instancesList.append(float(line.split(",")[:-1])) trainInstancesList.append( [float(i) for i in line.split(",")[:-1]]) yTrain.append(line.split(",")[-1]) for line in testList: if line[0] != '@': testInstancesList.append( [float(i) for i in line.split(",")[:-1]]) yTest.append(line.split(",")[-1]) XTr = numpy.array(trainInstancesList) yTr = numpy.array(yTrain) XTe = numpy.array(testInstancesList) yTe = numpy.array(yTest) clf = RandomForestClassifier(**kwargs) print('Generating Leaves...') training_leaves = clf.apply(XTr) test_leaves = clf.apply(XTe) training_leaves = [ numpy.array(training_leaf, dtype=int) for training_leaf in training_leaves ] test_leaves = [ numpy.array(test_leaf, dtype=int) for test_leaf in test_leaves ] true_positive = 0 false_positive = 0 knn = 3 # k value debugInfo = [] print('Calculating Distances...') for test_leaf_idx in range(len(test_leaves)): test_leaf = test_leaves[test_leaf_idx] #array of leaf values true_label = yTe[test_leaf_idx] dist_predicted_labels = [ ] # List of (distance, predicated_label) pairs for training_leaf_idx in range(len(training_leaves)): training_leaf = training_leaves[ training_leaf_idx] #array of leaf values predicted_label = yTr[training_leaf_idx] distance = numpy.sum(training_leaf != test_leaf) / float( training_leaf.size) if distance == 1.0: continue dist_predicted_labels.append( (distance, predicted_label)) # tuple(distance, predicted_label) closest_distances_labels = sorted( dist_predicted_labels )[:knn] #array of tuples (distance, predicted_label) # vote function labels = [label for _, label in closest_distances_labels] if len(set(labels)) == 1: classified_label = labels[0] else: classified_label = config.binaryLabels[1] #webpageNonMon debugInfo.append([true_label, classified_label]) # for debug purposes if true_label != config.binaryLabels[ 1] and true_label == classified_label: true_positive += 1 if true_label == config.binaryLabels[ 1] and true_label != classified_label: false_positive += 1 num_unmonitored_test_instances = yTe.count(config.binaryLabels[1]) num_monitored_test_instances = len( yTe) - num_unmonitored_test_instances true_positive_rate = true_positive / float( num_monitored_test_instances) false_positive_rate = false_positive / float( num_unmonitored_test_instances) print("True Positive Count = %d / %d" % (true_positive, num_monitored_test_instances)) print("False Positive Count = %d / %d" % (false_positive, num_unmonitored_test_instances)) print("True Positive Rate: ", true_positive_rate) print("False Positive Rate: ", false_positive_rate) result = [true_positive_rate, false_positive_rate] return [result, debugInfo]
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) [trainingFileOrig, testingFileOrig] = [trainingFile, testingFile] if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) # deep learning if config.DEEP_LEARNING_METHOD != -1: #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) #SdA_2.calcSdA([trainingFile, testingFile]) if config.DEEP_LEARNING_METHOD == 1: [trainingFile, testingFile ] = logistic_sgd_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 2: [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) [trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 3: # DL classifier return mlp_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 4: return SdA_2.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 5: return mlp_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 6: return SdA_3.runDL([trainingFile, testingFile]) elif config.DEEP_LEARNING_METHOD == 7: return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) if config.OC_SVM == 0: # multi-class svm if config.CROSS_VALIDATION == 0: #print 'WARNING: NB classifier with Bi-Di. ###########///////////XXXXXX???????? ' #return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K']) return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost #'-S','2', # one-class svm '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: # one-class svm if config.CROSS_VALIDATION == 0: print str(config.SVM_KERNEL) print str(config.OC_SVM_Nu) return wekaAPI.executeOneClassSVM( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', str(config.SVM_KERNEL), #'-K','2', # RBF kernel #'-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 #'-C','131072', # Cost #'-N','0.001', # nu '-N', str(config.OC_SVM_Nu), # nu '-S', '2' ]) #, # one-class svm #'-B'] ) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.CROSS_VALIDATION == 0: trainList = wekaAPI.readFile(trainingFile) testList = wekaAPI.readFile(testingFile) trainInstancesList = [] testInstancesList = [] classes = "" yTrain = [] yTest = [] for line in trainList: if line[0] == '@': if line.lower().startswith("@attribute class"): classes = line.split(" ")[2] else: # instancesList.append(float(line.split(",")[:-1])) trainInstancesList.append( [float(i) for i in line.split(",")[:-1]]) yTrain.append(line.split(",")[-1]) for line in testList: if line[0] != '@': testInstancesList.append( [float(i) for i in line.split(",")[:-1]]) yTest.append(line.split(",")[-1]) else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle fileList = wekaAPI.readFile(file) fileInstancesList = [] y = [] for line in fileList: if line[0] == '@': if line.lower().startswith("@attribute class"): classes = line.split(" ")[2] else: fileInstancesList.append( [float(i) for i in line.split(",")[:-1]]) y.append(line.split(",")[-1]) trainInstancesList, testInstancesList, yTrain, yTest = train_test_split( fileInstancesList, y, test_size=6000, random_state=42) XTr = numpy.array(trainInstancesList) yTr = numpy.array(yTrain) XTe = numpy.array(testInstancesList) yTe = numpy.array(yTest) # print ("Scaling data...") # scaler = StandardScaler() # XTr = scaler.fit_transform(XTr) # XTe = scaler.fit_transform(XTe) if config.NUM_MONITORED_SITES == -1 and config.NUM_NON_MONITORED_SITES == -1: print "Closed-world" else: print "Open-world" print("Classification...") classifier = kNN() classifier.fit(XTr, yTr) prediction = classifier.predict(XTe) totalPredictions = 0 totalCorrectPredictions = 0 debugInfo = [] for i in range(0, len(yTe)): actualClass = yTe[i] predictedClass = prediction[i] probEstimate = 'NA' # debugInfo.append([actualClass,predictedClass]) debugInfo.append([actualClass, predictedClass, probEstimate]) totalPredictions += 1.0 if actualClass == predictedClass: totalCorrectPredictions += 1.0 accuracy = totalCorrectPredictions / totalPredictions * 100.0 print("Accuracy = ", accuracy) positive = [] # monitored negative = [] # non-monitored positive.append(config.binaryLabels[0]) # 'webpageMon' negative.append(config.binaryLabels[1]) # 'webpageNonMon' tp = 0 tn = 0 fp = 0 fn = 0 for entry in debugInfo: if entry[0] in positive: # actual is positive if entry[1] in positive: # predicted is positive too tp += 1 else: # predicted is negative fn += 1 elif entry[0] in negative: # actual is negative if entry[1] in positive: # predicted is positive fp += 1 else: # predicted is negative too tn += 1 tpr = str("%.4f" % (float(tp) / float(tp + fn))) fpr = str("%.4f" % (float(fp) / float(fp + tn))) Acc = str("%.4f" % (float(tp + tn) / float(tp + tn + fp + fn))) F1 = str("%.4f" % (float(2 * tp) / float((2 * tp) + (fn) + (fp)))) F2 = str("%.4f" % (float(5 * tp) / float((5 * tp) + (4 * fn) + (fp)))) # beta = 2 print "TPR, FPR, ACC, tp, tn, fp, fn, F1, F2" print tpr, fpr, Acc, tp, tn, fp, fn, F1, F2 return [accuracy, debugInfo]
def classify(runID, trainingSet, testingSet): [trainingFile, testingFile] = arffWriter.writeArffFiles(runID, trainingSet, testingSet) if config.NUM_MONITORED_SITES != -1: #no need to classify as this is for generating openworld datasets. See the line above (arffWriter) [accuracy, debugInfo] = ['NA', []] return [accuracy, debugInfo] if config.n_components_PCA != 0: [trainingFile, testingFile] = Utils.calcPCA2([trainingFile, testingFile]) if config.n_components_LDA != 0: [trainingFile, testingFile] = Utils.calcLDA4([trainingFile, testingFile]) if config.n_components_QDA != 0: [trainingFile, testingFile] = Utils.calcQDA([trainingFile, testingFile]) if config.lasso != 0: #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile]) #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile]) Utils.calcLogisticRegression([trainingFile, testingFile]) #Utils.plotDensity([trainingFile,testingFile]) #Utils.plot([trainingFile,testingFile]) if config.NUM_FEATURES_RF != 0: [trainingFile, testingFile] = Utils.calcTreeBaseRF([trainingFile, testingFile], config.NUM_FEATURES_RF) if config.OC_SVM == 0: # multi-class svm if config.CROSS_VALIDATION == 0: return wekaAPI.execute( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence else: # one-class svm if config.CROSS_VALIDATION == 0: print str(config.SVM_KERNEL) print str(config.OC_SVM_Nu) return wekaAPI.executeOneClassSVM( trainingFile, testingFile, "weka.Run weka.classifiers.functions.LibSVM", [ '-K', str(config.SVM_KERNEL), #'-K','0', # kernel #'-G','0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 #'-C','131072', # Cost #'-N','0.01', # nu '-N', str(config.OC_SVM_Nu), # nu '-S', '2' ]) #, # one-class svm #'-B'] ) # confidence else: file = Utils.joinTrainingTestingFiles( trainingFile, testingFile) # join and shuffle return wekaAPI.executeCrossValidation( file, "weka.Run weka.classifiers.functions.LibSVM", [ '-x', str(config.CROSS_VALIDATION), # number of folds '-K', '2', # RBF kernel '-G', '0.0000019073486328125', # Gamma ##May20 '-Z', # normalization 18 May 2015 '-C', '131072', # Cost '-B' ]) # confidence