Example #1
0
##### Trees
#####
##### Now let’s try three variants of tree-based classification. 
##### The API is slightly different from previous algos.
from pyspark.mllib.tree import DecisionTree

from pyspark.mllib.tree import GradientBoostedTrees

from  pyspark.mllib.tree import RandomForest

algo = DecisionTree()
model = algo.trainClassifier(training_data,numClasses=2,categoricalFeaturesInfo={})
score(model)


algo = GradientBoostedTrees()
model = algo.trainClassifier(training_data,categoricalFeaturesInfo={},numIterations=10)
score(model)

algo = RandomForest()
model = algo.trainClassifier(training_data,numClasses=2,categoricalFeaturesInfo={},numTrees=16)
score(model)

#### Naive Bayes
#### Last but not least, let’s try the Naives Bayes classifier.
from pyspark.mllib.classification import NaiveBayes
algo = NaiveBayes()
model = algo.train(training_data)
score(model)