# MAGIC to evaluate the predictions.  The following statistics are calculated for
# MAGIC regression models to evaluate:
# MAGIC * Mean squared error
# MAGIC * Root mean squared error
# MAGIC * R^2
# MAGIC * Mean absolute error
# MAGIC
# MAGIC Use the `ComputeModelStatistics` API to compute basic statistics for
# MAGIC the Poisson and the Random Forest models.

# COMMAND ----------

from mmlspark.train import ComputeModelStatistics
poissonMetrics = ComputeModelStatistics().transform(poissonPrediction)
print("Poisson Metrics")
poissonMetrics.toPandas()

# COMMAND ----------

randomForestMetrics = ComputeModelStatistics().transform(
    randomForestPrediction)
print("Random Forest Metrics")
randomForestMetrics.toPandas()

# COMMAND ----------

# MAGIC %md We can also compute per instance statistics for `poissonPrediction`:

# COMMAND ----------

from mmlspark.train import ComputePerInstanceStatistics
Exemple #2
0
# COMMAND ----------

# MAGIC %md Score the regressor on the test data.

# COMMAND ----------

scoredData = model.transform(testCat)
scoredData.limit(10).toPandas()

# COMMAND ----------

# MAGIC %md Compute model metrics against the entire scored dataset

# COMMAND ----------

from mmlspark.train import ComputeModelStatistics
metrics = ComputeModelStatistics().transform(scoredData)
metrics.toPandas()

# COMMAND ----------

# MAGIC %md Finally, compute and show statistics on individual predictions in the test
# MAGIC dataset, demonstrating the usage of `ComputePerInstanceStatistics`

# COMMAND ----------

from mmlspark.train import ComputePerInstanceStatistics
evalPerInstance = ComputePerInstanceStatistics().transform(scoredData)
evalPerInstance.select("ArrDelay", "Scores", "L1_loss", "L2_loss") \
               .limit(10).toPandas()
# COMMAND ----------

# MAGIC %md We evaluate the prediction result by using `mmlspark.train.ComputeModelStatistics` which returns four metrics:
# MAGIC * [MSE (Mean Squared Error)](https://en.wikipedia.org/wiki/Mean_squared_error)
# MAGIC * [RMSE (Root Mean Squared Error)](https://en.wikipedia.org/wiki/Root-mean-square_deviation) = sqrt(MSE)
# MAGIC * [R quared](https://en.wikipedia.org/wiki/Coefficient_of_determination)
# MAGIC * [MAE (Mean Absolute Error)](https://en.wikipedia.org/wiki/Mean_absolute_error)

# COMMAND ----------

metrics = ComputeModelStatistics(
    evaluationMetric='regression', labelCol='target',
    scoresCol='prediction').transform(lr_predictions)

results = metrics.toPandas()
results.insert(0, 'model', ['Spark MLlib - Linear Regression'])
display(results)

# COMMAND ----------

# MAGIC %md ## Vowpal Wabbit

# COMMAND ----------

# MAGIC %md Perform VW-style feature hashing. Many types (numbers, string, bool, map of string to (number, string)) are supported.

# COMMAND ----------

vw_featurizer = VowpalWabbitFeaturizer(
    inputCols=feature_cols,