コード例 #1
0
ファイル: Q1.py プロジェクト: prabhd33ps/ScalableML
rf_paramGrid = ParamGridBuilder().addGrid(rf.maxDepth, [1, 5, 10]).addGrid(
    rf.impurity, ['entropy', 'gini']).addGrid(rf.maxBins, [2, 10, 20]).build()

AUC_evaluator = BinaryClassificationEvaluator(rawPredictionCol='rawPrediction',
                                              labelCol='label',
                                              metricName="areaUnderROC")
accuracy_evaluator = MulticlassClassificationEvaluator(
    labelCol="label", predictionCol="prediction", metricName="accuracy")

rf_paramGrid = CrossValidator(estimator=rf_pipeline,
                              estimatorParamMaps=rf_paramGrid,
                              evaluator=accuracy_evaluator,
                              numFolds=3)

cvModel_rf = rf_paramGrid.fit(trainingData)

BestPipeline_rf = cvModel_rf.bestModel

paramDict = {
    param[0].name: param[1]
    for param in BestPipeline_rf.stages[-1].extractParamMap().items()
}

# Here, we're converting the dictionary to a JSON object to make it easy to print. You can print it however you'd like

print(json.dumps(paramDict, indent=4))

predction_rfc = cvModel_rf.transform(testData)

AUC_rfc = AUC_evaluator.evaluate(predction_rfc)