예제 #1
0
# MAGIC %md
# MAGIC
# MAGIC ## Cross Validation

# COMMAND ----------

# MAGIC %md
# MAGIC For each model you can run the below comand to see its params and a brief explanation of each.

# COMMAND ----------

print(lr.explainParams())

# COMMAND ----------

print(gb.explainParams())

# COMMAND ----------

# MAGIC %md
# MAGIC
# MAGIC ####Logisitic Regression - Param Grid

# COMMAND ----------

from pyspark.ml.tuning import ParamGridBuilder, CrossValidator

# Create ParamGrid for Cross Validation
lrParamGrid = (ParamGridBuilder().addGrid(
    lr.regParam,
    [0.01, 0.5, 2.0]).addGrid(lr.elasticNetParam,
예제 #2
0
# 12. All steps
from pyspark.ml.classification import GBTClassifier
# 12.1
gbt = GBTClassifier(maxIter=10)
gbtModel = gbt.fit(train)
# 12.2
predictions = gbtModel.transform(test)
predictions.select('age', 'job', 'label', 'rawPrediction',
                                        'prediction', 'probability').show(10)

# 12.3
evaluator = BinaryClassificationEvaluator()
evaluator.evaluate(predictions)

# 12.4
gbt.explainParams()


############# GG. Gradient Boosting Cross-validation ##################

# 12.5 Cross validation using parameter grid
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator

# 12.6
paramGrid = (ParamGridBuilder()
             .addGrid(gbt.maxDepth, [2, 4, 6])
             .addGrid(gbt.maxBins, [20, 60])
             .addGrid(gbt.maxIter, [10, 20])
             .build())

# 12.7
예제 #3
0
dt = DecisionTreeClassifier()
print dt.explainParams()
dtModel = dt.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import RandomForestClassifier
rfClassifier = RandomForestClassifier()
print rfClassifier.explainParams()
trainedModel = rfClassifier.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import GBTClassifier
gbtClassifier = GBTClassifier()
print gbtClassifier.explainParams()
trainedModel = gbtClassifier.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import NaiveBayes
nb = NaiveBayes()
print nb.explainParams()
trainedModel = nb.fit(bInput.where("label != 0"))

# COMMAND ----------

from pyspark.mllib.evaluation import BinaryClassificationMetrics
out = trainedModel.transform(bInput)\
  .select("prediction", "label")\
  .rdd.map(lambda x: (float(x[0]), float(x[1])))
dt = DecisionTreeClassifier()
print(dt.explainParams())
dtModel = dt.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import RandomForestClassifier
rfClassifier = RandomForestClassifier()
print(rfClassifier.explainParams())
trainedModel = rfClassifier.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import GBTClassifier
gbtClassifier = GBTClassifier()
print(gbtClassifier.explainParams())
trainedModel = gbtClassifier.fit(bInput)

# COMMAND ----------

from pyspark.ml.classification import NaiveBayes
nb = NaiveBayes()
print(nb.explainParams())
trainedModel = nb.fit(bInput.where("label != 0"))

# COMMAND ----------

from pyspark.mllib.evaluation import BinaryClassificationMetrics
out = trainedModel.transform(bInput)\
  .select("prediction", "label")\
  .rdd.map(lambda x: (float(x[0]), float(x[1])))