def printResults(): testGroundTruth = FileDataSource(testGroundTruthFileName, DataSourceIface.doAllocateNumericTable, DataSourceIface.doDictionaryFromContext) testGroundTruth.loadDataBlock() printNumericTables(testGroundTruth.getNumericTable(), predictionResult.get(classifier.prediction.prediction), "Ground truth", "Classification results", "NaiveBayes classification results (first 20 observations):", 20, interval=15, flt64=False)
def printResultsM(): # Print the classification results printNumericTables(groundTruthLabels, predictedLabels, "Ground truth", "Classification results", "SVM classification results (first 20 observations):", 20, interval=15, flt64=False) # Print the quality metrics qualityMetricResult = qualityMetricSetResult.getResult( multi_class_classifier.quality_metric_set.confusionMatrix) printNumericTable( qualityMetricResult.get(multiclass_confusion_matrix.confusionMatrix), "Confusion matrix:") block = BlockDescriptor() qualityMetricsTable = qualityMetricResult.get( multiclass_confusion_matrix.multiClassMetrics) qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block) qualityMetricsData = block.getArray().flatten() print("Average accuracy: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.averageAccuracy])) print("Error rate: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.errorRate])) print("Micro precision: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.microPrecision])) print("Micro recall: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.microRecall])) print("Micro F-score: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.microFscore])) print("Macro precision: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.macroPrecision])) print("Macro recall: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.macroRecall])) print("Macro F-score: {0:.3f}".format( qualityMetricsData[multiclass_confusion_matrix.macroFscore])) qualityMetricsTable.releaseBlockOfRows(block)
def printResultsB(): # Print the classification results printNumericTables(groundTruthLabels, predictedLabels, "Ground truth", "Classification results", "SVM classification results (first 20 observations):", 20, interval=15, flt64=False) # Print the quality metrics qualityMetricResult = qualityMetricSetResult.getResult( svm.quality_metric_set.confusionMatrix) printNumericTable( qualityMetricResult.get(binary_confusion_matrix.confusionMatrix), "Confusion matrix:") block = BlockDescriptor() qualityMetricsTable = qualityMetricResult.get( binary_confusion_matrix.binaryMetrics) qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block) qualityMetricsData = block.getArray().flatten() print("Accuracy: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.accuracy])) print("Precision: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.precision])) print("Recall: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.recall])) print("F-score: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.fscore])) print("Specificity: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.specificity])) print("AUC: {0:.3f}".format( qualityMetricsData[binary_confusion_matrix.AUC])) qualityMetricsTable.releaseBlockOfRows(block)
trainDataFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_train_?.csv" trainDataLabelsFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_train_labels_?.csv" testDataFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_test_1.csv" testDataLabelsFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_test_labels_1.csv" # Read the training data and labels from a specified path trainDataAndLabelsRDD = getMergedDataAndLabelsRDD(trainDataFilesPath, trainDataLabelsFilesPath, sc) # Read the test data and labels from a specified path testDataAndLabelsRDD = getMergedDataAndLabelsRDD(testDataFilesPath, testDataLabelsFilesPath, sc) # Compute the results of the Naive Bayes algorithm for dataRDD result = runNaiveBayes(trainDataAndLabelsRDD, testDataAndLabelsRDD) # Print the results parts_List = testDataAndLabelsRDD.collect() for _, (t1, t2) in parts_List: expected = deserializeNumericTable(t2) # Redirect stdout to a file for correctness verification stdout = sys.stdout sys.stdout = open('NaiveBayesDense.out', 'w') printNumericTables(expected, result, "Ground truth", "Classification results", "NaiveBayes classification results (first 20 observations):", 20, flt64=False) # Restore stdout sys.stdout = stdout sc.stop()
#print accuracy print("Accuracy".format(qualityMet.get('accuracy'))) #print confusion matrix printNumericTable(qualityMet.get('confusionMatrix'),"Confusion Matrix") #print all metrics print("All available metrics") daal_DF.printAllQualityMetrics(qualityMet) #Serialize and save daal_DF.serialize(trainingResult, fileName='DF', useCompression=True) #Deserialize dese_trainingRes = daal_DF.deserialize(fileName='DF.npy', useCompression=True) #Print predicted responses and actual responses printNumericTables ( testGroundTruth, predictResults, "Ground truth", "Classification results", "Decision Forest classification results (first 20 observations):", 20, flt64=False ) #*****Multi-classifier print("**** Multi-Classifier****") data = load_digits() x = data.data y = data.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.40, random_state=42) trainData = HomogenNumericTable(x_train) testData=HomogenNumericTable(x_test) nD_y_train= y_train[:,np.newaxis] trainDependentVariables= HomogenNumericTable(nD_y_train) nD_y_test = y_test[:,np.newaxis] testGroundTruth = HomogenNumericTable(nD_y_test)
z = [[x] for x in y_train] trainDependentVariables = HomogenNumericTable(z) z = [[x] for x in y_test] testData = HomogenNumericTable(x_test) testGroundTruth = HomogenNumericTable(z) ''' Instantiate SVM object MultiSVM(nClasses, method="boser", C = 1, tolerence = 0.001, tau = 0.000001, maxIterations = 1000000, cacheSize = 8000000, doShrinking = True, kernel = 'linear', sigma = 0,k=1, b=0,dtype=float64) ''' daal_svm = MultiSVM(10, cacheSize=600000000) #Training trainingResult = daal_svm.training(trainData, trainDependentVariables) #Predict predictResponses = daal_svm.predict(trainingResult, testData) #Serialize daal_svm.serialize(trainingResult, fileName='svm') #deserialize dese_trainingRes = daal_svm.deserialize(fileName='svm.npy') #or predict with quality metrics predictResponses, metrics = daal_svm.predictWithQualityMetrics( dese_trainingRes, testData, testGroundTruth) daal_svm.printAllQualityMetrics(metrics) printNumericTables(testGroundTruth, predictResponses, "Ground truth\t", "Classification results", "SVN classification results (first 20 observations):", 20, flt64=False)