##### Trees ##### ##### Now let’s try three variants of tree-based classification. ##### The API is slightly different from previous algos. from pyspark.mllib.tree import DecisionTree from pyspark.mllib.tree import GradientBoostedTrees from pyspark.mllib.tree import RandomForest algo = DecisionTree() model = algo.trainClassifier(training_data,numClasses=2,categoricalFeaturesInfo={}) score(model) algo = GradientBoostedTrees() model = algo.trainClassifier(training_data,categoricalFeaturesInfo={},numIterations=10) score(model) algo = RandomForest() model = algo.trainClassifier(training_data,numClasses=2,categoricalFeaturesInfo={},numTrees=16) score(model) #### Naive Bayes #### Last but not least, let’s try the Naives Bayes classifier. from pyspark.mllib.classification import NaiveBayes algo = NaiveBayes() model = algo.train(training_data) score(model)