def runMetrics(labeledDataRDD, *args): html = '<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>' confusionHtml = '<p>Confusion Tables for each Model</p>' for model in args: label = model.__class__.__name__ predictionAndLabels = model.predict( labeledDataRDD.map(lambda lp: lp.features)) metrics = MulticlassMetrics(\ predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\ ) html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\ .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 ) if (displayConfusionTable): confusionMatrix = metrics.call("confusionMatrix") confusionMatrixArray = confusionMatrix.toArray() #labels = metrics.call("labels") confusionHtml += "<p>" + label + "<p>" confusionHtml += "<table>" for row in confusionMatrixArray: confusionHtml += "<tr>" for cell in row: confusionHtml += "<td>" + str(cell) + "</td>" confusionHtml += "</tr>" confusionHtml += "</table>" html += '</table>' if (displayConfusionTable): html += confusionHtml display(HTML(html))
def runMetrics(labeledDataRDD, *args): html='<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>' confusionHtml = '<p>Confusion Tables for each Model</p>' for model in args: label= model.__class__.__name__ predictionAndLabels = model.predict(labeledDataRDD.map(lambda lp: lp.features)) metrics = MulticlassMetrics(\ predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\ ) html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\ .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 ) if ( displayConfusionTable ): confusionMatrix = metrics.call("confusionMatrix") confusionMatrixArray = confusionMatrix.toArray() #labels = metrics.call("labels") confusionHtml += "<p>" + label + "<p>" confusionHtml += "<table>" for row in confusionMatrixArray: confusionHtml += "<tr>" for cell in row: confusionHtml+="<td>" + str(cell) + "</td>" confusionHtml += "</tr>" confusionHtml += "</table>" html+='</table>' if ( displayConfusionTable ): html+=confusionHtml display(HTML(html))
def runMetrics(labeledDataRDD, *args): html = '<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>' confusionHtml = '<p>Confusion Tables for each Model</p>' for model in args: label = model.__class__.__name__ predictionAndLabels = model.predict( labeledDataRDD.map(lambda lp: lp.features)) metrics = MulticlassMetrics(\ predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\ ) html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\ .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 ) if (displayConfusionTable): #get labels from RDD handler = getTrainingHandler() classLabels = labeledDataRDD.map(lambda t: t.label).distinct().map( lambda l: handler.getClassLabel(l)).collect() confusionMatrix = metrics.call("confusionMatrix") confusionMatrixArray = confusionMatrix.toArray() #labels = metrics.call("labels") confusionHtml += "<p>" + label + "<p>" confusionHtml += "<table>" confusionHtml += "<tr><td></td>" for classLabel in classLabels: confusionHtml += "<td>" + str(classLabel) + "</td>" confusionHtml += "</tr>" for i, row in enumerate(confusionMatrixArray): confusionHtml += "<tr>" confusionHtml += "<td>" + classLabels[i] + "</td>" for j, cell in enumerate(row): confusionHtml += "<td style='text-align:center'>" + ( "<b>" if (i == j) else "") + str(cell) + ("</b>" if (i == j) else "") + "</td>" confusionHtml += "</tr>" confusionHtml += "</table>" html += '</table>' if (displayConfusionTable): html += confusionHtml display(HTML(html))
def doRender(self, handlerId): html='<div class="pd_save"><table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>' confusionHtml = '<p>Confusion Tables for each Model</p>' for modelName,model in Configuration.getModels(): label= model.__class__.__name__ labeledDataRDD, sqlTableName = Configuration.getLabeledData(self.entity) predictionAndLabels = model.predict(labeledDataRDD.map(lambda lp: lp.features)) metrics = MulticlassMetrics(\ predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\ ) html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\ .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 ) displayConfusionTable = True if ( displayConfusionTable ): #get labels from RDD handler=training.getTrainingHandler() classLabels = labeledDataRDD.map(lambda t: t.label).distinct().map(lambda l: handler.getClassLabel(l)).collect() confusionMatrix = metrics.call("confusionMatrix") confusionMatrixArray = confusionMatrix.toArray() #labels = metrics.call("labels") confusionHtml += "<p>" + label + "<p>" confusionHtml += "<table>" confusionHtml+="<tr><td></td>" for classLabel in classLabels: confusionHtml+="<td>" + str(classLabel) + "</td>" confusionHtml+="</tr>" for i, row in enumerate(confusionMatrixArray): confusionHtml += "<tr>" confusionHtml += "<td>" + classLabels[i] + "</td>" for j, cell in enumerate(row): confusionHtml+="<td style='text-align:center'>" + ("<b>" if (i==j) else "") + str(cell) + ("</b>" if (i==j) else "") + "</td>" confusionHtml += "</tr>" confusionHtml += "</table>" html+='</table></div>' if ( displayConfusionTable ): html+=confusionHtml self._addHTML(html)
# cvModel uses the best model found from the Cross Validation # Evaluate best model f1= MulticlassClassificationEvaluator(labelCol ='label',predictionCol = "prediction",metricName="f1") print('f1:', f1.evaluate(rfpredictions)) # Confusion Matrix import pandas as pd from pyspark.mllib.evaluation import MulticlassMetrics predictionAndLabels = rfpredictions.select('label', 'prediction') metrics = MulticlassMetrics(predictionAndLabels.rdd.map(lambda x: tuple(map(float, x)))) confusion_matrix = metrics.confusionMatrix().toArray() labels = [int(l) for l in metrics.call('labels')] confusion_matrix = pd.DataFrame(confusion_matrix , index=labels, columns=labels) confusion_matrix # Model metrics by class # **Precision** is the ratio of correctly predicted positive observations to the total predicted positive observations. (Precision = TP/(TP+FP)) # # **Recall (Sensitivity)** is the ratio of correctly predicted positive observations to the all observations in actual class (Recall = TP/(TP+FN)) # # **F1 score** is the weighted average of Precision and Recall. Therefore, this score takes both false positives and false negatives into account. (F1 Score = 2*(Recall * Precision) / (Recall + Precision)) # #