# MAGIC dataset given the specified metric # MAGIC # MAGIC 3. The **`CompueModelStatistics`** Transformer computes the different # MAGIC metrics on a scored dataset (in our case, the `validation` dataset) # MAGIC at the same time # COMMAND ---------- from mmlspark.train import TrainClassifier, ComputeModelStatistics from mmlspark.automl import FindBestModel # Prepare data for learning train, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123) # Train the models on the 'train' data lrHyperParams = [0.05, 0.1, 0.2, 0.4] logisticRegressions = [LogisticRegression(regParam = hyperParam) for hyperParam in lrHyperParams] lrmodels = [TrainClassifier(model=lrm, labelCol="label", numFeatures=10000).fit(train) for lrm in logisticRegressions] # Select the best model bestModel = FindBestModel(evaluationMetric="AUC", models=lrmodels).fit(test) # Get AUC on the validation dataset predictions = bestModel.transform(validation) metrics = ComputeModelStatistics().transform(predictions) print("Best model's AUC on validation set = " + "{0:.2f}%".format(metrics.first()["AUC"] * 100))
for gbt in gbtclassifiers ] trainedModels = lrmodels + rfmodels + gbtmodels # COMMAND ---------- # MAGIC %md Find the best model for the given test dataset. # COMMAND ---------- from mmlspark.automl import FindBestModel bestModel = FindBestModel(evaluationMetric="AUC", models=trainedModels).fit(ptest) bestModel.getEvaluationResults().show() bestModel.getBestModelMetrics().show() bestModel.getAllModelMetrics().show() # COMMAND ---------- # MAGIC %md Get the accuracy from the validation dataset. # COMMAND ---------- from mmlspark.train import ComputeModelStatistics predictions = bestModel.transform(pvalidation) metrics = ComputeModelStatistics().transform(predictions) print("Best model's accuracy on validation set = " + "{0:.2f}%".format(metrics.first()["accuracy"] * 100)) print("Best model's AUC on validation set = " + "{0:.2f}%".format(metrics.first()["AUC"] * 100))