rfClassifier = RandomForestClassifier() print rfClassifier.explainParams() trainedModel = rfClassifier.fit(bInput) # COMMAND ---------- from pyspark.ml.classification import GBTClassifier gbtClassifier = GBTClassifier() print gbtClassifier.explainParams() trainedModel = gbtClassifier.fit(bInput) # COMMAND ---------- from pyspark.ml.classification import NaiveBayes nb = NaiveBayes() print nb.explainParams() trainedModel = nb.fit(bInput.where("label != 0")) # COMMAND ---------- from pyspark.mllib.evaluation import BinaryClassificationMetrics out = trainedModel.transform(bInput)\ .select("prediction", "label")\ .rdd.map(lambda x: (float(x[0]), float(x[1]))) metrics = BinaryClassificationMetrics(out) # COMMAND ---------- print metrics.areaUnderPR print metrics.areaUnderROC
rfClassifier = RandomForestClassifier() print(rfClassifier.explainParams()) trainedModel = rfClassifier.fit(bInput) # COMMAND ---------- from pyspark.ml.classification import GBTClassifier gbtClassifier = GBTClassifier() print(gbtClassifier.explainParams()) trainedModel = gbtClassifier.fit(bInput) # COMMAND ---------- from pyspark.ml.classification import NaiveBayes nb = NaiveBayes() print(nb.explainParams()) trainedModel = nb.fit(bInput.where("label != 0")) # COMMAND ---------- from pyspark.mllib.evaluation import BinaryClassificationMetrics out = trainedModel.transform(bInput)\ .select("prediction", "label")\ .rdd.map(lambda x: (float(x[0]), float(x[1]))) metrics = BinaryClassificationMetrics(out) # COMMAND ---------- print(metrics.areaUnderPR) print(metrics.areaUnderROC) print("Receiver Operating Characteristic")