Example #1
0
def printResults():

    testGroundTruth = FileDataSource(testGroundTruthFileName,
                                     DataSourceIface.doAllocateNumericTable,
                                     DataSourceIface.doDictionaryFromContext)
    testGroundTruth.loadDataBlock()

    printNumericTables(testGroundTruth.getNumericTable(),
                       predictionResult.get(classifier.prediction.prediction),
                       "Ground truth",
                       "Classification results",
                       "NaiveBayes classification results (first 20 observations):",
                       20,
                       interval=15,
                       flt64=False)
Example #2
0
def printResultsM():

    # Print the classification results
    printNumericTables(groundTruthLabels,
                       predictedLabels,
                       "Ground truth",
                       "Classification results",
                       "SVM classification results (first 20 observations):",
                       20,
                       interval=15,
                       flt64=False)
    # Print the quality metrics
    qualityMetricResult = qualityMetricSetResult.getResult(
        multi_class_classifier.quality_metric_set.confusionMatrix)
    printNumericTable(
        qualityMetricResult.get(multiclass_confusion_matrix.confusionMatrix),
        "Confusion matrix:")

    block = BlockDescriptor()
    qualityMetricsTable = qualityMetricResult.get(
        multiclass_confusion_matrix.multiClassMetrics)
    qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block)
    qualityMetricsData = block.getArray().flatten()
    print("Average accuracy: {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.averageAccuracy]))
    print("Error rate:       {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.errorRate]))
    print("Micro precision:  {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.microPrecision]))
    print("Micro recall:     {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.microRecall]))
    print("Micro F-score:    {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.microFscore]))
    print("Macro precision:  {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.macroPrecision]))
    print("Macro recall:     {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.macroRecall]))
    print("Macro F-score:    {0:.3f}".format(
        qualityMetricsData[multiclass_confusion_matrix.macroFscore]))
    qualityMetricsTable.releaseBlockOfRows(block)
Example #3
0
def printResultsB():

    # Print the classification results
    printNumericTables(groundTruthLabels,
                       predictedLabels,
                       "Ground truth",
                       "Classification results",
                       "SVM classification results (first 20 observations):",
                       20,
                       interval=15,
                       flt64=False)

    # Print the quality metrics
    qualityMetricResult = qualityMetricSetResult.getResult(
        svm.quality_metric_set.confusionMatrix)
    printNumericTable(
        qualityMetricResult.get(binary_confusion_matrix.confusionMatrix),
        "Confusion matrix:")

    block = BlockDescriptor()
    qualityMetricsTable = qualityMetricResult.get(
        binary_confusion_matrix.binaryMetrics)
    qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block)
    qualityMetricsData = block.getArray().flatten()
    print("Accuracy:      {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.accuracy]))
    print("Precision:     {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.precision]))
    print("Recall:        {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.recall]))
    print("F-score:       {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.fscore]))
    print("Specificity:   {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.specificity]))
    print("AUC:           {0:.3f}".format(
        qualityMetricsData[binary_confusion_matrix.AUC]))
    qualityMetricsTable.releaseBlockOfRows(block)
Example #4
0
    trainDataFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_train_?.csv"
    trainDataLabelsFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_train_labels_?.csv"
    testDataFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_test_1.csv"
    testDataLabelsFilesPath = "/Spark/NaiveBayesDense/data/NaiveBayesDense_test_labels_1.csv"

    # Read the training data and labels from a specified path
    trainDataAndLabelsRDD = getMergedDataAndLabelsRDD(trainDataFilesPath, trainDataLabelsFilesPath, sc)

    # Read the test data and labels from a specified path
    testDataAndLabelsRDD = getMergedDataAndLabelsRDD(testDataFilesPath, testDataLabelsFilesPath, sc)

    # Compute the results of the Naive Bayes algorithm for dataRDD
    result = runNaiveBayes(trainDataAndLabelsRDD, testDataAndLabelsRDD)

    # Print the results
    parts_List = testDataAndLabelsRDD.collect()
    for _, (t1, t2) in parts_List:
        expected = deserializeNumericTable(t2)

    # Redirect stdout to a file for correctness verification
    stdout = sys.stdout
    sys.stdout = open('NaiveBayesDense.out', 'w')

    printNumericTables(expected, result, "Ground truth", "Classification results",
                       "NaiveBayes classification results (first 20 observations):", 20, flt64=False)

    # Restore stdout
    sys.stdout = stdout

    sc.stop()
#print accuracy
print("Accuracy".format(qualityMet.get('accuracy')))
#print confusion matrix
printNumericTable(qualityMet.get('confusionMatrix'),"Confusion Matrix")
#print all metrics
print("All available metrics")
daal_DF.printAllQualityMetrics(qualityMet)
#Serialize and save
daal_DF.serialize(trainingResult, fileName='DF', useCompression=True)
#Deserialize
dese_trainingRes = daal_DF.deserialize(fileName='DF.npy', useCompression=True)

#Print predicted responses and actual responses
printNumericTables (
    testGroundTruth, predictResults,
    "Ground truth", "Classification results",
    "Decision Forest classification results (first 20 observations):", 20,  flt64=False
 )

#*****Multi-classifier
print("**** Multi-Classifier****")
data = load_digits()
x = data.data
y = data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.40, random_state=42)
trainData = HomogenNumericTable(x_train)
testData=HomogenNumericTable(x_test)
nD_y_train=  y_train[:,np.newaxis]
trainDependentVariables= HomogenNumericTable(nD_y_train)
nD_y_test =  y_test[:,np.newaxis]
testGroundTruth = HomogenNumericTable(nD_y_test)
z = [[x] for x in y_train]
trainDependentVariables = HomogenNumericTable(z)
z = [[x] for x in y_test]
testData = HomogenNumericTable(x_test)
testGroundTruth = HomogenNumericTable(z)
'''
Instantiate SVM object MultiSVM(nClasses, method="boser", C = 1, tolerence = 0.001, tau = 0.000001, maxIterations = 1000000, cacheSize = 8000000, doShrinking = True, kernel = 'linear',
                 sigma = 0,k=1, b=0,dtype=float64)
'''
daal_svm = MultiSVM(10, cacheSize=600000000)
#Training
trainingResult = daal_svm.training(trainData, trainDependentVariables)
#Predict
predictResponses = daal_svm.predict(trainingResult, testData)
#Serialize
daal_svm.serialize(trainingResult, fileName='svm')
#deserialize
dese_trainingRes = daal_svm.deserialize(fileName='svm.npy')
#or predict with quality metrics
predictResponses, metrics = daal_svm.predictWithQualityMetrics(
    dese_trainingRes, testData, testGroundTruth)
daal_svm.printAllQualityMetrics(metrics)

printNumericTables(testGroundTruth,
                   predictResponses,
                   "Ground truth\t",
                   "Classification results",
                   "SVN classification results (first 20 observations):",
                   20,
                   flt64=False)