def evaluate(predictions): """ Evaluation Metrics """ # label to indexedLabel mappings # out = sorted(set([(i[0], i[1]) for i in predictions.select(predictions.label, predictions.indexedLabel).collect()]), key=lambda x: x[0]) print "Predictions" predictions.select("prediction", "indexedLabel", "features").show(5) # Select (prediction, true label) and evaluate model predictionAndLabels = predictions.select("prediction", "indexedLabel").rdd metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class labels = predictions.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)) print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate) treeModel = model.stages[2] print treeModel # summary only
def evaluate(labelsAndPredictions, data, labels): """ Evaluation Metrics """ # Instantiate metrics object metrics = MulticlassMetrics(labelsAndPredictions) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)) print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
def NaiveBayesEvaluation(TransformedDataset): nb = NaiveBayes() nb.setLabelCol("LabelIndex") nb.setPredictionCol("Label_Prediction") training, test = TransformedDataset.randomSplit([0.8, 0.2], seed=11) nvModel = nb.fit(training) prediction = nvModel.transform(test) # selected = prediction.select("body", "LabelIndex", "label", "Label_Prediction") # for row in selected.collect(): # print(row) from pyspark.mllib.evaluation import MulticlassMetrics predictionAndLabels = prediction.select( "Label_Prediction", "LabelIndex").rdd.map(lambda r: (float(r[0]), float(r[1]))) # predictionAndLabels = test.rdd.map(lambda lp: (float(nvModel.predict(lp.features)), lp.label)) metrics = MulticlassMetrics(predictionAndLabels) precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class labels = prediction.rdd.map(lambda lp: lp.label).distinct().collect() labelIndices = prediction.rdd.map( lambda lp: lp.LabelIndex).distinct().collect() labelIndicesPairs = prediction.rdd.map( lambda lp: (lp.label, lp.LabelIndex)).distinct().collect() print("Labels", labels) print("Label Indices", labelIndices) print("Label Indice Pairs", labelIndicesPairs) for label, labelIndex in sorted(labelIndicesPairs): print("\n Class %s precision = %s" % (label, metrics.precision(labelIndex))) print("Class %s recall = %s" % (label, metrics.recall(labelIndex))) print( "Class %s F1 Measure = %s" % (label, metrics.fMeasure(labelIndex, beta=1.0)), "\n") # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)) print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
def evaluate_predictions(predictions, show=True): from pyspark.ml.evaluation import BinaryClassificationEvaluator from pyspark.mllib.evaluation import BinaryClassificationMetrics, MulticlassMetrics log = {} evaluator = BinaryClassificationEvaluator(metricName='areaUnderROC') log['auroc'] = evaluator.evaluate(predictions) # Show Validation Score (AUPR) evaluator = BinaryClassificationEvaluator(metricName='areaUnderPR') log['aupr'] = evaluator.evaluate(predictions) # Metrics predictionRDD = predictions.select( ['label', 'prediction']).rdd.map(lambda line: (line[1], line[0])) metrics = MulticlassMetrics(predictionRDD) # Overall statistics log['precision'] = metrics.precision() log['recall'] = metrics.recall() log['F1 Measure'] = metrics.fMeasure() # Statistics by class distinctPredictions = collect_tuple( predictions.select('prediction').distinct()) for x in sorted(distinctPredictions): log[x] = {} log[x]['precision'] = metrics.precision(x) log[x]['recall'] = metrics.recall(x) log[x]['F1 Measure'] = metrics.fMeasure(x, beta=1.0) # Confusion Matrix log['cm'] = metrics.confusionMatrix().toArray() log['cmpercent'] = cm_percent(log['cm'], predictions.count(), show) if show: show_predictions(predictions) print('Confusion Matrix') print(' TP', 'FN\n', 'FP', 'TN') print(log['cm']) print(' PC', 'FN\n', 'FP', 'PW') print(log['cmpercent']) print('') print("Area under ROC = {}".format(log['auroc'])) print("Area under AUPR = {}".format(log['aupr'])) print('\nOverall\ntprecision = {}\nrecall = {}\nF1 Measure = {}\n'. format(log['precision'], log['recall'], log['F1 Measure'])) for x in sorted(distinctPredictions): print('Label {}\ntprecision = {}\nrecall = {}\nF1 Measure = {}\n'. format(x, log[x]['precision'], log[x]['recall'], log[x]['F1 Measure'])) return log
def printMetrics(predictions_and_labels, output_file): metrics = MulticlassMetrics(predictions_and_labels) output_file.write('Precision of True '+str(metrics.precision(1))+'\n') output_file.write('Precision of False' + str(metrics.precision(0))+'\n') output_file.write('Recall of True '+str(metrics.recall(1))+'\n') output_file.write('Recall of False '+str(metrics.recall(0))+'\n') output_file.write('F-1 Score '+str(metrics.fMeasure())+'\n') output_file.write('Confusion Matrix\n'+str(metrics.confusionMatrix().toArray())+'\n') print('Precision of True '+str(metrics.precision(1))) print('Precision of False'+str(metrics.precision(0))) print('Recall of True '+str(metrics.recall(1))) print('Recall of False '+str(metrics.recall(0))) print('F-1 Score '+str(metrics.fMeasure())) print('Confusion Matrix\n'+str(metrics.confusionMatrix().toArray()))
def main(spark, model_file, data_file): '''Main routine for supervised evaluation Parameters ---------- spark : SparkSession object model_file : string, path to store the serialized model file data_file : string, path to the parquet file to load ''' ### # TODO: YOUR CODE GOES HERE #load best lr model model = PipelineModel.load(model_file) # Load the test dataframe test = spark.read.parquet(data_file) predictions = model.transform(test) predictionAndLabels = predictions.rdd.map(lambda lp: (lp.prediction, lp.label)) metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Overall Stats:") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Weighted stats print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted F1 Score = %s" % metrics.weightedFMeasure()) # Statistics by class print("Stats by class") for (genre, label) in predictions.select('genre', 'label').distinct().collect(): print("Class %s precision = %s" % (genre, metrics.precision(label))) print("Class %s recall = %s" % (genre, metrics.recall(label))) print("Class %s F1 Score = %s" % (genre, metrics.fMeasure(label, beta=1.0)))
def multi_clf_performance(name, method, train, test): model = method.fit(train) prediction = model.transform(test) print(f"-----------Performance of {name} on testing set-----------") # Compute raw scores on the test set predictionAndLabels = prediction.select('prediction', 'label') # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels.rdd) # Overall statistics print("----------Summary Stats----------------------") print(f"Weighted precision: {multi_evaluator.evaluate(prediction, {multi_evaluator.metricName: 'weightedPrecision'})}") print(f"Weighted recall: {multi_evaluator.evaluate(prediction, {multi_evaluator.metricName: 'weightedRecall'})}") print(f"F1 Score: {multi_evaluator.evaluate(prediction, {multi_evaluator.metricName: 'f1'})}") print(f"Accuracy: {multi_evaluator.evaluate(prediction, {multi_evaluator.metricName: 'accuracy'})}") # Statistics by class print("--------Stats by class----------------------") labels = [row.asDict()['label'] for row in test.select('label').distinct().collect()] for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Score = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats #print("--------Weighted Stats----------------------") #print("Weighted precision = %s" % metrics.weightedPrecision) #print("Weighted recall = %s" % metrics.weightedRecall) #print("Weighted F1 Score = %s" % metrics.weightedFMeasure()) #print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate) print("-----------------------------------------------------------")
def calculate_metrics(self, df): """ define your own metrics to evaluate cross validation :params: df: dataframe containing {aprediction} and {label} columns :returns: confusion matrix """ # turn gt into label preds_and_labels = df.select('prediction', f.col('label').cast(t.FloatType())) metrics = MulticlassMetrics(preds_and_labels.rdd.map(tuple)) # confusion matrix metrics_dict = dict( # unweighted measures tpr=metrics.truePositiveRate(label=1.0), fpr=metrics.falsePositiveRate(label=1.0), precision=metrics.precision(label=1.0), recall=metrics.recall(label=1.0), fMeasure=metrics.fMeasure(label=1.0)) metrics_dict = { k: round(v, 3) if k != "confusion" else v for k, v in metrics_dict.items() } return metrics_dict
def evaluate(df, labelCols, gettopX=-1, getfirstX=-1): labelCols2 = [i + "_pred" for i in labelCols] df.cache() r_list = { i: np.zeros((len(labelCols))) for i in ['accuracy', 'precision', 'recall', 'fmeasure'] } for i in xrange(len(labelCols)): predandlabels = df.select(labelCols2[i], labelCols[i]).rdd \ .map(lambda x: (float(x[labelCols2[i]]), float(x[labelCols[i]]))) metrics = MulticlassMetrics(predandlabels) # print metrics.confusionMatrix() r_list['accuracy'][i] = metrics.accuracy r_list['precision'][i] = metrics.precision(1.0) r_list['recall'][i] = metrics.recall(1.0) r_list['fmeasure'][i] = metrics.fMeasure(label=1.0) results = {} for m, rs in r_list.iteritems(): results[m] = np.mean(rs) for code, num in [('top', gettopX), ('first', getfirstX)]: if num <= 0: continue if code == 'top': idx = np.argsort(np.nan_to_num(r_list['fmeasure']))[-num:] elif code == 'first': idx = xrange(num) for m, rs in r_list.iteritems(): results['{0}_{1}'.format(m, code)] = np.mean(rs[idx]) return results
def print_performance_metrics(predictions): # Evaluate model evaluator = BinaryClassificationEvaluator(rawPredictionCol="rawPrediction") auc = evaluator.evaluate(predictions, {evaluator.metricName: "areaUnderROC"}) aupr = evaluator.evaluate(predictions, {evaluator.metricName: "areaUnderPR"}) print("auc = {}".format(auc)) print("aupr = {}".format(aupr)) # Get RDD of predictions and labels for eval metrics predictionAndLabels = predictions.select("prediction", "label").rdd # Instantiate metrics objects binary_metrics = BinaryClassificationMetrics(predictionAndLabels) multi_metrics = MulticlassMetrics(predictionAndLabels) # Area under precision-recall curve print("Area under PR = {}".format(binary_metrics.areaUnderPR)) # Area under ROC curve print("Area under ROC = {}".format(binary_metrics.areaUnderROC)) # Accuracy print("Accuracy = {}".format(multi_metrics.accuracy)) # Confusion Matrix print(multi_metrics.confusionMatrix()) # F1 print("F1 = {}".format(multi_metrics.fMeasure(1.0))) # Precision print("Precision = {}".format(multi_metrics.precision(1.0))) # Recall print("Recall = {}".format(multi_metrics.recall(1.0))) # FPR print("FPR = {}".format(multi_metrics.falsePositiveRate(1.0))) # TPR print("TPR = {}".format(multi_metrics.truePositiveRate(1.0)))
def getF1Score(model, test_df): pred = model.transform(test_df) pl = pred.select("label", "prediction").rdd.cache() metrics = MulticlassMetrics(pl) f1score = metrics.fMeasure() print("the F1-score of the model is : {}".format(f1score)) return f1score
def printMeasurementMetrics(predictions_and_labels): metrics = MulticlassMetrics(predictions_and_labels) print('Precision Result of setosa: ', metrics.precision(1)) print('Precision Result of versicolor:', metrics.precision(2)) print('Precision Result of virginica:', metrics.precision(3)) print('F-1 Score: ', metrics.fMeasure()) print('Confusion Matrix\n', metrics.confusionMatrix().toArray())
def evaluate(model, word_column="words", vectorizer="w2v"): doc2vecs_df = featurize(word_column, vectorizer) if type(model) == LinearSVC: paramGrid = ParamGridBuilder() \ .addGrid(model.regParam, [0.1]) \ .build() elif type(model) == GBTClassifier: paramGrid = ParamGridBuilder() \ .addGrid(model.maxIter, [50]) \ .build() elif type(model) == RandomForestClassifier: paramGrid = ParamGridBuilder() \ .addGrid(model.maxBins, [100]) \ .build() elif type(model) == MultilayerPerceptronClassifier: paramGrid = ParamGridBuilder() \ .addGrid(model.layers, [[122, 50, 2]]) \ .build() # .addGrid(model.layers, [[120, 2], [120, 50, 2], [120, 75, 50, 2]]) \ elif type(model) == FMClassifier: paramGrid = ParamGridBuilder() \ .addGrid(model.stepSize, [.01, .001]) \ .build() print('Evaluating...') w2v_train_df, w2v_test_df = doc2vecs_df.randomSplit([0.8, 0.2]) si = StringIndexer(inputCol="LABEL", outputCol="label") model_evaluator = MulticlassClassificationEvaluator( labelCol="label", predictionCol="prediction", metricName="f1") classifier_pipeline = Pipeline(stages=[si, model]) crossval = CrossValidator(estimator=classifier_pipeline, estimatorParamMaps=paramGrid, evaluator=model_evaluator, numFolds=5) fit_model = crossval.fit(doc2vecs_df) predictions = fit_model.transform(w2v_test_df) # predictions.toPandas().to_csv('predictions.csv') # predictions.groupBy('prediction', 'label', 'PRODUCT_CATEGORY') # predictions.describe() summarizer = Summarizer.metrics("mean", "count") predictions.select( summarizer.summary(predictions.filter( predictions.label == 1).pos)).show(truncate=False) preds_and_labels = predictions.select(['prediction', 'label']) metrics = MulticlassMetrics(preds_and_labels.rdd.map(tuple)) print('Confusion Matrix') print(metrics.confusionMatrix().toArray()) # Overall statistics precision = metrics.precision(1.0) recall = metrics.recall(1.0) f1Score = metrics.fMeasure(1.0) print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) accuracy = model_evaluator.evaluate(predictions) trainingSummary = fit_model.bestModel.stages[-1].extractParamMap() print(trainingSummary) return accuracy
def printMetrics(predictions_and_labels): metrics = MulticlassMetrics(predictions_and_labels) print 'Precision of True ', metrics.precision(1) print 'Precision of False', metrics.precision(0) print 'Recall of True ', metrics.recall(1) print 'Recall of False ', metrics.recall(0) print 'F-1 Score ', metrics.fMeasure() print 'Confusion Matrix\n', metrics.confusionMatrix().toArray()
def printMetrics(result): metrics = MulticlassMetrics(result) print("\nPrecision of True\n", metrics.precision(1)) print("\nPrecision of False\n", metrics.precision(0)) print("\nRecall of True\n", metrics.recall(1)) print("\nRecall of False\n", metrics.recall(0)) print("\nF1 score\n", metrics.fMeasure()) print("\nConfusion Matrix\n", metrics.confusionMatrix().toArray())
def evaluate(df_prediction): evaluator = BinaryClassificationEvaluator() rc = evaluator.evaluate(df_prediction, {evaluator.metricName: "areaUnderROC"}) pr = evaluator.evaluate(df_prediction, {evaluator.metricName: "areaUnderPR"}) predictionRDD = df_prediction.select(['label', 'prediction']).rdd.map(lambda line: (line[1], line[0])) metrics = MulticlassMetrics(predictionRDD) f1 = metrics.fMeasure() return [roc,pr,f1]
def displayMetrics(pred): ev = MulticlassMetrics(pred.select(["label", "prediction"]).rdd) # Overall statistics print("Accuracy = %s" % ev.accuracy) print("Precision = %s" % ev.precision()) print("Recall = %s" % ev.recall()) print("F1 Score = %s" % ev.fMeasure())
def main(spark, model_file, data_file): '''Main routine for supervised evaluation Parameters ---------- spark : SparkSession object model_file : string, path to store the serialized model file data_file : string, path to the parquet file to load ''' # Load data. dataset = spark.read.parquet(data_file) # Load model. model = PipelineModel.load(model_file) prediction = model.transform(dataset) predictionAndLabels = prediction.select(["prediction", "label"]).rdd # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) print("\n") # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("\n") labels = predictionAndLabels.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))
def evaluate(pl): # input predictionsAndLabels testErr = predictionsAndLabels.filter( lambda lp: lp[0] != lp[1]).count() / float(predictionsAndLabels.count()) metrics = MulticlassMetrics(predictionsAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() return testErr, precision, recall, f1Score
def metrics_basic(data): metrics = MulticlassMetrics(data) precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score)
def printMetrics(self, preds, prediction="indexedLabel", indexedLabel="prediction"): metrics = MulticlassMetrics(preds.select(prediction, indexedLabel).rdd) labels = [0, 1] for label in sorted(labels): try: print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) except: print("No malicious predictions")
def classification_report(actual_data, prediction_data): # Calculate actual / predicted labels in rdd from prediction_and_labels = prepare_data(actual_data, prediction_data) # Calculate calculate class level metrics metrics = MulticlassMetrics(prediction_and_labels) classes = set(actual_data.rdd.map(lambda x: x.test_label[0]).collect()) print('Class\tPrecision\tRecall\tF-Score') for c in sorted(classes): print('{}\t{}\t{}\t{}'.format(c, round(metrics.precision(c), 3), round(metrics.recall(c), 3), round(metrics.fMeasure(c), 3)))
def evaluate(predictionAndLabels): log = {} # Show Validation Score (AUROC) evaluator = BinaryClassificationEvaluator(metricName='areaUnderROC') log['AUROC'] = "%f" % evaluator.evaluate(predictionAndLabels) print("Area under ROC = {}".format(log['AUROC'])) # Show Validation Score (AUPR) evaluator = BinaryClassificationEvaluator(metricName='areaUnderPR') log['AUPR'] = "%f" % evaluator.evaluate(predictionAndLabels) print("Area under PR = {}".format(log['AUPR'])) # Metrics predictionRDD = predictionAndLabels.select(['label', 'prediction']) \ .rdd.map(lambda line: (line[1], line[0])) metrics = MulticlassMetrics(predictionRDD) # Confusion Matrix print(metrics.confusionMatrix().toArray()) # Overall statistics log['precision'] = "%s" % metrics.precision() log['recall'] = "%s" % metrics.recall() log['F1 Measure'] = "%s" % metrics.fMeasure() print("[Overall]\tprecision = %s | recall = %s | F1 Measure = %s" % \ (log['precision'], log['recall'], log['F1 Measure'])) # Statistics by class labels = [0.0, 1.0] for label in sorted(labels): log[label] = {} log[label]['precision'] = "%s" % metrics.precision(label) log[label]['recall'] = "%s" % metrics.recall(label) log[label]['F1 Measure'] = "%s" % metrics.fMeasure(label, beta=0.5) print("[Class %s]\tprecision = %s | recall = %s | F1 Measure = %s" \ % (label, log[label]['precision'], log[label]['recall'], log[label]['F1 Measure'])) return log
def evaluateClassification(self, predictionAndLabels): metrics = MulticlassMetrics(predictionAndLabels) cm = metrics.confusionMatrix() result = {} result['Matrix'] = cm.toArray().tolist() result['Precision'] = metrics.precision() result['Recall'] = metrics.recall() result['F1 Score'] = metrics.fMeasure() return result
def overall_report(actual_data, prediction_data): # Calculate actual / predicted labels in rdd from prediction_and_labels = prepare_data(actual_data, prediction_data) # Calculate actual / predicted labels in rdd from metrics = MulticlassMetrics(prediction_and_labels) # Calculate overall level metrics # print('Precision:', metrics.precision(), type(metrics.precision())) return sc.parallelize([ (Vectors.dense(metrics.precision()), Vectors.dense(metrics.recall()), Vectors.dense(metrics.fMeasure())) ]).toDF(['Precision', 'Recall', 'F-Score'])
def main(): sc = SparkContext(appName="BayesClassifer") htf = HashingTF(50000) data = sc.textFile('/home/varshav/work/PycharmProjects/Sentiment/cleaned_bayes_labels.csv') data_cleaned = data.map(lambda line : line.split(",")) # Create an RDD of LabeledPoints using category labels as labels and tokenized, hashed text as feature vectors data_hashed = data_cleaned.map(lambda (label, text): LabeledPoint(label, htf.transform(text))) data_hashed.persist() # data = sc.textFile('/home/admin/work/spark-1.4.1-bin-hadoop2.4/data/mllib/sample_naive_bayes_data.txt').map(parseLine) #print data # Split data aproximately into training (60%) and test (40%) training, test = data_hashed.randomSplit([0.70, 0.30], seed=0) sameModel = NaiveBayesModel.load(sc, "/home/varshav/work/PycharmProjects/StockAnalysis/myModel") print "----------" print sameModel.predict(htf.transform("posts jump in net profit")) predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label)) predictionAndLabel1 = training.map(lambda p: (sameModel.predict(p.features), p.label)) prediction = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count() prediction1 = 1.0 * predictionAndLabel1.filter(lambda (x, v): x == v).count() / training.count() buy_buy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == 1 and v ==1).count() # Instantiate metrics object # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabel) # Overall statistics precision = metrics.precision() precision = normalize(precision) recall = metrics.recall() recall = normalize(recall) f1Score = metrics.fMeasure() f1Score = normalize(f1Score) print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) ''' # Statistics by class labels = data_hashed.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) ''' '''
def classification_report(actual_data, prediction_data): # Calculate actual / predicted labels in rdd from prediction_and_labels = prepare_data(actual_data, prediction_data) # Calculate calculate class level metrics metrics = MulticlassMetrics(prediction_and_labels) classes = set(actual_data.rdd.map(lambda x: x.test_labels[0]).collect()) results = [(Vectors.dense(float(c)), Vectors.dense(round(metrics.precision(c), 3)), Vectors.dense(round(metrics.recall(c), 3)), Vectors.dense(round(metrics.fMeasure(c), 3))) for c in sorted(classes)] return sc.parallelize(results).toDF( ['Class', 'Precision', 'Recall', 'F-Score'])
def validate_tffm(spark, sc, model, test_df, s3_metrics_path, s3_endpoint_path): # get predictions validation_df = model.transform(test_df) metricsSchema = StructType() \ .add("metric", StringType()) \ .add("value", DoubleType()) metrics_names = [] # apply threshold def thresholdScore(x): retval = 0.0 if x > 0.5: retval = 1.0 return retval thresholdScoreUdf = F.UserDefinedFunction(thresholdScore, T.FloatType()) validation_df_round = validation_df.withColumn('rscore', thresholdScoreUdf(validation_df.score)) predTffm = validation_df_round.select(['label','rscore']) predictionAndLabelsTffm = predTffm.rdd.map(lambda lp: (lp.rscore, lp.label)) metricsTffm = BinaryClassificationMetrics(predictionAndLabelsTffm) metrics_names.append(("Area_under_PR",metricsTffm.areaUnderPR)) metrics_names.append(("Area_under_ROC",metricsTffm.areaUnderROC)) mmetricsTffm = MulticlassMetrics(predictionAndLabelsTffm) metrics_names.append(("Precision",mmetricsTffm.precision())) metrics_names.append(("Recall",mmetricsTffm.recall())) metrics_names.append(("F1",mmetricsTffm.fMeasure())) metrics_names.append(("Weighted_recall",mmetricsTffm.weightedRecall)) metrics_names.append(("Weighted_precision",mmetricsTffm.weightedPrecision)) metrics_names.append(("Weighted_F1",mmetricsTffm.weightedFMeasure())) metrics_names.append(("Weighted_F05",mmetricsTffm.weightedFMeasure(beta=0.5))) metrics_names.append(("Weighted_FP_rate",mmetricsTffm.weightedFalsePositiveRate)) mRdd = sc.parallelize(metrics_names).coalesce(1) dfMetrics = spark.createDataFrame(mRdd, metricsSchema) dfMetrics.write.csv("{0}/{1}".format(s3_metrics_path, model.endpointName), mode="overwrite") endpointSchema = StructType() \ .add("time", StringType()) \ .add("endpoint", StringType()) endpoint_name = [] endpoint_name.append((str(time.time()),str(model.endpointName))) eRdd = sc.parallelize(endpoint_name).coalesce(1) dfEndpoint = spark.createDataFrame(eRdd, endpointSchema) dfEndpoint.write.csv("{0}/endpoint.txt".format(s3_endpoint_path), mode="overwrite")
def generateJson(AlgorithmName, taskid, traindata, predictionAndLabels): jsonContent = dict() jsonContent['AlgorithmName'] = AlgorithmName jsonContent['TaskId'] = taskid labels = traindata.map(lambda lp: lp.label).distinct().collect() jsonContent['LabelNum'] = len(labels) metrics = MulticlassMetrics(predictionAndLabels) precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() confusion_matrix = metrics.confusionMatrix().toArray() jsonContent['Precision'] = precision jsonContent['Recall'] = recall jsonContent['F1Score'] = f1Score jsonContent['ConfusionMatrix'] = confusion_matrix.tolist() jsonContent['Labels'] = list() for label in sorted(labels): tempList = dict() tempList['Precision'] = metrics.precision(label) tempList['Recall'] = metrics.recall(label) tempList['F1Measure'] = metrics.fMeasure(label, beta=1.0) jsonContent['Labels'].append(tempList) jsonContent['WeightedStats'] = dict() jsonContent['WeightedStats']['Precision'] = metrics.weightedRecall jsonContent['WeightedStats']['F1Score'] = metrics.weightedFMeasure() jsonContent['WeightedStats']['FalsePositiveRate'] = metrics.weightedFalsePositiveRate with open(taskid + '.json', 'w') as jsonFile: json.dump(jsonContent, jsonFile, indent=4, separators=(',', ': ')) jsonFile.flush()
def performance(predictions): predictionRDD = predictions.select(['label', 'prediction']).rdd.map(lambda line: (line[1], line[0])) binmetrics = BinaryClassificationMetrics(predictionRDD) metrics = MulticlassMetrics(predictionRDD) results = {'predictions':predictions, 'areaUnderROC':binmetrics.areaUnderROC, 'areaUnderPR':binmetrics.areaUnderPR, 'confusionMatrix':metrics.confusionMatrix().toArray(), 'accuracy':metrics.accuracy, 'precision':metrics.precision(), 'recall':metrics.recall(), 'f1measure':metrics.fMeasure()} return results
def overall_report(actual_data, prediction_data): # Calculate actual / predicted labels in rdd from prediction_and_labels = prepare_data(actual_data, prediction_data) # Calculate actual / predicted labels in rdd from metrics = MulticlassMetrics(prediction_and_labels) # Calculate overall level metrics # print('Precision:', metrics.precision(), type(metrics.precision())) # return sc.parallelize([(Vectors.dense(metrics.accuracy), # Vectors.dense(metrics.precision()), # Vectors.dense(metrics.recall()), # Vectors.dense(metrics.fMeasure()))]).toDF(['Accuracy', 'Precision', 'Recall', 'F - Score']) print('Accuracy\tPrecision\tRecall\tF-Score') print('{}\t{}\t{}\t{}'.format(metrics.accuracy, metrics.precision(), metrics.recall(), metrics.fMeasure()))
def performancerdd(self): self.calculator = 'RDDs' print('Calculating performance metrics using RDDs...') predictionRDD = self.predictions.select(['label','prediction']).rdd.map(lambda line: (line[1],line[0])) binmetrics = BinaryClassificationMetrics(predictionRDD) metrics = MulticlassMetrics(predictionRDD) self.areaUnderROC = binmetrics.areaUnderROC self.areaUnderPR = binmetrics.areaUnderPR self.confusionMatrix = metrics.confusionMatrix().toArray() self.accuracy = metrics.accuracy self.precision = metrics.precision() self.recall = metrics.recall() self.f1measure = metrics.fMeasure() self.falsePositive = metrics.falsePositiveRate(1.0) self.falseNegative = metrics.falsePositiveRate(0.0)
def printFinalResultMetrics(predictions_and_labels): metrics = MulticlassMetrics(predictions_and_labels) print '\n' print 'Precision of Setosa ', metrics.precision(1) print 'Precision of Versicolor', metrics.precision(2) print 'Precision of Virginica', metrics.precision(3) print '\n' print 'Recall of Setosa ', metrics.recall(1) print 'Recall of Versicolor ', metrics.recall(2) print 'Recall of Virginica ', metrics.recall(3) print '\n' print 'F-1 Score ', metrics.fMeasure() print '\n\n' print 'Confusion Matrix\n', metrics.confusionMatrix().toArray() print '\n\n' return
def modelStatistics(labelsAndPredictions): metrics = MulticlassMetrics(labelsAndPredictions) print(metrics.confusionMatrix()) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)) print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
model = SVMWithSGD.train(trainParsed, iterations=100) # Training Error trainLabelsAndPreds = trainParsed.map(lambda p: (p.label, float(model.predict(p.features)))) trainErr = trainLabelsAndPreds.filter(lambda (v, p): v != p).count()/float(trainParsed.count()) print trainErr # Test Error testLabelsAndPreds = testParsed.map(lambda p: (p.label, float(model.predict(p.features)))) testErr = testLabelsAndPreds.filter(lambda (v, p): v != p).count()/float(testParsed.count()) print testErr metrics = BinaryClassificationMetrics(testLabelsAndPreds) print metrics.areaUnderROC print metrics.areaUnderPR mcMetrics = MulticlassMetrics(testLabelsAndPreds) #TODO: Do this for classes 1.0,0.0 and not just overall print mcMetrics.precision() print mcMetrics.recall() print mcMetrics.fMeasure() model.save(sc, "SVMModel") ### Run Model on Validation Set ## TODO: output file of zipcodes and predicted success metrics ## TODO: Use bokeh on file to make visualization of the US
def train_model (conf): sc = SparkUtil.get_spark_context (conf.spark_conf) conf.output_dir = conf.output_dir.replace ("file:", "") conf.output_dir = "file://{0}".format (conf.output_dir) labeled = Evaluate.load_all (sc, conf). \ map (lambda b : LabeledPoint ( label = 1.0 if b.fact else 0.0, features = [ b.paraDist, b.sentDist, b.docDist ] ) ) # labeled = sc.parallelize ([ round ((x/10) * 9) for x in random.sample(range(1, 100000000), 30000) ]). \ # map (lambda b : LabeledPoint ( 1.0 if b % 2 == 0 else 0.0, # [ b, b * 2, b * 9 ] ) ) # print (labeled.collect ()) train, test = labeled.randomSplit (weights=[ 0.8, 0.2 ], seed=12345) count = train.count () start = time.time () model = LogisticRegressionWithLBFGS.train (train) elapsed = time.time () - start print ("Trained model on training set of size {0} in {1} seconds".format (count, elapsed)) start = time.time () model_path = os.path.join (conf.output_dir, "eval", "model") file_path = model_path.replace ("file://", "") if os.path.isdir (file_path): print ("Removing existing model {0}".format (file_path)) shutil.rmtree (file_path) model.save(sc, model_path) sameModel = LogisticRegressionModel.load(sc, model_path) elapsed = time.time () - start print ("Saved and restored model to {0} in {1} seconds".format (model_path, elapsed)) # Metrics labelsAndPreds = test.map (lambda p: (p.label, model.predict (p.features))) trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count () / float (train.count()) print("Training Error => {0}".format (trainErr)) predictionsAndLabels = labelsAndPreds.map (lambda x : ( float(x[1]), float(x[0]) )) metrics = MulticlassMetrics (predictionsAndLabels) print (" --------------> {0}".format (predictionsAndLabels.take (1000))) #print (labelsAndPreds.collect ()) print ("\nMETRICS:") try: print ("false positive (0.0): {0}".format (metrics.falsePositiveRate(0.0))) print ("false positive (1.0): {0}".format (metrics.falsePositiveRate(1.0))) except: traceback.print_exc () try: print ("precision : {0}".format (metrics.precision(1.0))) except: traceback.print_exc () try: print ("recall : {0}".format (metrics.recall(1.0))) except: traceback.print_exc () try: print ("fMeasure : {0}".format (metrics.fMeasure(0.0, 2.0))) except: traceback.print_exc () print ("confusion matrix : {0}".format (metrics.confusionMatrix().toArray ())) print ("precision : {0}".format (metrics.precision())) print ("recall : {0}".format (metrics.recall())) print ("weighted false pos : {0}".format (metrics.weightedFalsePositiveRate)) print ("weighted precision : {0}".format (metrics.weightedPrecision)) print ("weighted recall : {0}".format (metrics.weightedRecall)) print ("weight f measure : {0}".format (metrics.weightedFMeasure())) print ("weight f measure 2 : {0}".format (metrics.weightedFMeasure(2.0))) print ("") # Regression metrics predictedAndObserved = test.map (lambda p: (model.predict (p.features) / 1.0 , p.label / 1.0 ) ) regression_metrics = RegressionMetrics (predictedAndObserved) print ("explained variance......: {0}".format (regression_metrics.explainedVariance)) print ("absolute error..........: {0}".format (regression_metrics.meanAbsoluteError)) print ("mean squared error......: {0}".format (regression_metrics.meanSquaredError)) print ("root mean squared error.: {0}".format (regression_metrics.rootMeanSquaredError)) print ("r2......................: {0}".format (regression_metrics.r2)) print ("") labelsAndPreds = test.map (lambda p: (p.label, sameModel.predict (p.features))) testErr = labelsAndPreds.filter (lambda (v, p): v != p).count () / float (test.count ()) print ("Testing Error => {0}".format (testErr))
training, test = data.randomSplit([0.6, 0.4], seed=11) training.cache() # Run training algorithm to build the model model = LogisticRegressionWithLBFGS.train(training, numClasses=3) # Compute raw scores on the test set predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp.label)) # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class labels = data.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision)
def logisticRegression(trainFile, testFile, taskid, sc): # Load training data in LIBSVM format trainData = MLUtils.loadLibSVMFile(sc, trainFile) testData = MLUtils.loadLibSVMFile(sc, testFile) # Split data into training (60%) and test (40%) # traindata, testdata = data.randomSplit([0.6, 0.4], seed = 11L) # traindata.cache() # Load testing data in LIBSVM format #testdata = MLUtils.loadLibSVMFile(sc, loadTestingFilePath) labelNum = trainData.map(lambda lp: lp.label).distinct().count() # Run training algorithm to build the model model = LogisticRegressionWithLBFGS.train(trainData, numClasses=labelNum) # Compute raw scores on the test set predictionAndLabels = testData.map(lambda lp: (float(model.predict(lp.features)), lp.label)) Json.generateJson("LogisticRegression", taskid, trainData, predictionAndLabels); # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() #confusion_matrix = metrics.confusionMatrix().toArray() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class labels = trainData.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats print("Weighted recall = %s" % metrics.weightedRecall) print("Weighted precision = %s" % metrics.weightedPrecision) print("Weighted F(1) Score = %s" % metrics.weightedFMeasure()) print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)) print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate) # #return model parameters # res = [('1','Yes','TP Rate', metrics.truePositiveRate(0.0)), # ('2','Yes','FP Rate', metrics.falsePositiveRate(0.0)), # ('3','Yes','Precision', metrics.precision(0.0)), # ('4','Yes','Recall', metrics.recall(0.0)), # ('5','Yes','F-Measure', metrics.fMeasure(0.0, beta=1.0)), # ('1','Yes','TP Rate', metrics.truePositiveRate(1.0)), # ('2','Yes','FP Rate', metrics.falsePositiveRate(1.0)), # ('3','Yes','Precision', metrics.precision(1.0)), # ('4','Yes','Recall', metrics.recall(1.0)), # ('5','Yes','F-Measure', metrics.fMeasure(1.0, beta=1.0)), # ('1','Yes','TP Rate', metrics.truePositiveRate(2.0)), # ('2','Yes','FP Rate', metrics.falsePositiveRate(2.0)), # ('3','Yes','Precision', metrics.precision(2.0)), # ('4','Yes','Recall', metrics.recall(2.0)), # ('5','Yes','F-Measure', metrics.fMeasure(2.0, beta=1.0))] # #save output file path as JSON and dump into dumpFilePath # rdd = sc.parallelize(res) # SQLContext.createDataFrame(rdd).collect() # df = SQLContext.createDataFrame(rdd,['Order','CLass','Name', 'Value']) #tempDumpFilePath = dumpFilePath + "/part-00000" #if os.path.exists(tempDumpFilePath): # os.remove(tempDumpFilePath) #df.toJSON().saveAsTextFile(hdfsFilePath) #tmpHdfsFilePath = hdfsFilePath + "/part-00000" #subprocess.call(["hadoop","fs","-copyToLocal", tmpHdfsFilePath, dumpFilePath]) # Save and load model #clusters.save(sc, "myModel") #sameModel = KMeansModel.load(sc, "myModel")
training, test = data.randomSplit([0.85, 0.15], seed=11L) training.cache() # Run training algorithm to build the model model = LogisticRegressionWithLBFGS.train(training, numClasses=39) # Compute raw scores on the test set predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp.label)) # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() #accuracy = metrics.accuracy accuracy = 1.0 * predictionAndLabels.filter(lambda (x, v): x == v).count() / test.count() # print("Summary Stats") # print("Precision = %s" % precision) # print("Recall = %s" % recall) # print("F1 Score = %s" % f1Score) # print("Accuracy = %s" % accuracy) # Statistics by class labels = data.map(lambda lp: lp.label).distinct().collect() # for label in sorted(labels): # print("Class %s precision = %s" % (label, metrics.precision(label))) # print("Class %s recall = %s" % (label, metrics.recall(label))) # print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))) # Weighted stats
def test_prfs(): # TODO: revised so that it will take user's inputs instead of hardcoded values """ Test Precision, Recall, Fscore, and Support on multiclass classification data Input data: https://github.com/apache/spark/blob/master/data/mllib/sample_multiclass_classification_data.txt. """ # load the schemas (if existed) # create a hdfs directory #os.system("hdfs dfs -mkdir datasets") # load the data file into the hdfs directory os.system("hdfs dfs -put sample_multiclass_classification_data.txt datasets/sample_multiclass_classification_data.txt") data = MLUtils.loadLibSVMFile(scsingleton.sc, "hdfs://localhost:9000/datasets/sample_multiclass_classification_data.txt") # print data.take(1) # ie. [LabeledPoint(1.0, (4,[0,1,2,3],[-0.222222,0.5,-0.762712,-0.833333]))] # [ ( finalClassification, (numLabels, [label0, label1, label2, ..., labelN], [prob0, prob1, prob2, ..., probN]) ) ] # split data into train (60%), test (40%) trainingRDD, testRDD = data.randomSplit([0.6, 0.4]) trainingRDD.cache() testRDD.cache() with Timer() as t: numTest = testRDD.count() print "testRDD.count(): %s seconds" % t.secs # run training algorithm to build the model # without validation with Timer() as t: model = LogisticRegressionWithLBFGS.train(trainingRDD, numClasses=3) print "LogisticRegressionWithLBFGS.train(trainingRDD, numClasses=3): %s seconds" % t.secs # make a prediction with Timer() as t: testPredAndLabel = testRDD.map(lambda lp: (float(model.predict(lp.features)), lp.label)) print "testPredAndLabel: %s seconds" % t.secs # calculate Precision, Recall, F1-score metrics = MulticlassMetrics(testPredAndLabel) print( "precision = %s" % metrics.precision() ) print( "recall = %s" % metrics.recall() ) print( "f1-score = %s" % metrics.fMeasure() ) # statistics by class labels = data.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print( "Class %s precision = %s" % (label, metrics.precision(label)) ) print( "Class %s recall = %s" % (label, metrics.recall(label)) ) print( "Class %s f1-score = %s" % (label, metrics.fMeasure(label, beta=1.0)) ) # weighted stats print( "Weighted precision = %s" % metrics.weightedPrecision ) print( "Weighted recall = %s" % metrics.weightedRecall ) print( "Weighted f1-score = %s" % metrics.weightedFMeasure() ) print( "Weighted f(0.5)-score = %s" % metrics.weightedFMeasure(beta=0.5) ) print( "Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate ) return
# Load testing data in LIBSVM format #testdata = MLUtils.loadLibSVMFile(sc, loadTestingFilePath) # Run training algorithm to build the model model = LogisticRegressionWithLBFGS.train(traindata, numClasses=3) # Compute raw scores on the test set predictionAndLabels = testdata.map(lambda lp: (float(model.predict(lp.features)), lp.label)) # Instantiate metrics object metrics = MulticlassMetrics(predictionAndLabels) # Overall statistics precision = metrics.precision() recall = metrics.recall() f1Score = metrics.fMeasure() #confusion_matrix = metrics.confusionMatrix().toArray() print("Summary Stats") print("Precision = %s" % precision) print("Recall = %s" % recall) print("F1 Score = %s" % f1Score) # Statistics by class labels = traindata.map(lambda lp: lp.label).distinct().collect() for label in sorted(labels): print("Class %s precision = %s" % (label, metrics.precision(label))) print("Class %s recall = %s" % (label, metrics.recall(label))) print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))