Exemple #1
0
    # 对于回归问题评估器可选择RegressionEvaluator,二值数据可选择BinaryClassificationEvaluator,多分类问题可选择MulticlassClassificationEvaluator
    cv = CrossValidator().setEstimator(lrPipeline).setEvaluator(
        MulticlassClassificationEvaluator().setLabelCol(
            "indexedLabel").setPredictionCol(
                "prediction")).setEstimatorParamMaps(paramGrid).setNumFolds(3)
    cvModel = cv.fit(train)

    lrPredictions = cvModel.transform(test)
    lrPreRel = lrPredictions.select("predictedLabel", "label", "features",
                                    "probability").collect()
    for item in lrPreRel:
        print(
            str(item['label']) + ',' + str(item['features']) + '-->prob=' +
            str(item['probability']) + ',predictedLabel' +
            str(item['predictedLabel']))
    evaluator = MulticlassClassificationEvaluator().setLabelCol(
        "indexedLabel").setPredictionCol("prediction")
    lrAccuracy = evaluator.evaluate(lrPredictions)
    print("lrAccuracy:{}".format(lrAccuracy))

    # 获取最优的逻辑斯蒂回归模型,并查看其具体的参数
    bestModel = cvModel.bestModel
    lrModel = bestModel.stages[2]
    print("Coefficients: " + str(lrModel.coefficientMatrix) + "Intercept: " +
          str(lrModel.interceptVector) + "numClasses: " +
          str(lrModel.numClasses) + "numFeatures: " + str(lrModel.numFeatures))

    print(lr.explainParam(lr.regParam))
    print(lr.explainParam(lr.elasticNetParam))
Exemple #2
0
# lrModel = lrPipelineModel.stages[2]
# print("Coefficients: " + str(lrModel.coefficients) + "Intercept: " + str(lrModel.intercept) + "numClasses: " + str(
#     lrModel.numClasses) + "numFeatures: " + str(lrModel.numFeatures))
#

paramGrid = ParamGridBuilder().addGrid(lr.elasticNetParam, [0.2, 0.8]).addGrid(
    lr.regParam, [0.01, 0.1, 0.5]).build()
cv = CrossValidator().setEstimator(lrPipeline).setEvaluator(
    MulticlassClassificationEvaluator().setLabelCol(
        "indexedLabel").setPredictionCol("prediction")).setEstimatorParamMaps(
            paramGrid).setNumFolds(3)
cvModel = cv.fit(trainingData)
lrPredictions = cvModel.transform(testData)
lrPreRel = lrPredictions.select("predictedLabel", "label", "features",
                                "probability").collect()
for item in lrPreRel:
    print(
        str(item['label']) + ',' + str(item['features']) + '-->prob=' +
        str(item['probability']) + ',predictedLabel' +
        str(item['predictedLabel']))
evaluator = MulticlassClassificationEvaluator().setLabelCol(
    "indexedLabel").setPredictionCol("prediction")
lrAccuracy = evaluator.evaluate(lrPredictions)
bestModel = cvModel.bestModel
lrModel = bestModel.stages[2]
print("Coefficients: " + str(lrModel.coefficientMatrix) + "Intercept: " +
      str(lrModel.interceptVector) + "numClasses: " + str(lrModel.numClasses) +
      "numFeatures: " + str(lrModel.numFeatures))
lr.explainParam(lr.regParam)
lr.explainParam(lr.elasticNetParam)