Python NaiveBayes.train Examples

Programming Language: Python

Namespace/Package Name: pyspark.ml.classification

Class/Type: NaiveBayes

Method/Function: train

Examples at hotexamples.com: 3

Python NaiveBayes.train - 3 examples found. These are the top rated real world Python examples of pyspark.ml.classification.NaiveBayes.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NaiveBayes(30)

fit(30)

transform(4)

train(3)

explainParams(2)

setLabelCol(2)

setPredictionCol(2)

getModelType(1)

getSmoothing(1)

load(1)

save(1)

setFeaturesCol(1)

setThresholds(1)

write(1)

Example #1

Show file

 def train(self):
     self.logger.log("info", "Training Model")
     raw_data = self.__sc.parallelize(self.__documents)
     raw_hashed_tf = raw_data.map(
         lambda dic: LabeledPoint(dic['label'], compTF(dic['text'])))
     raw_hashed_idf = compIDF(raw_hashed_tf)
     raw_hashed_tfidf = compTFIDF(raw_hashed_tf, raw_hashed_idf)
     self.__model = NaiveBayes.train(raw_hashed_tfidf)
     self.logger.log("info", "Complate")

Example #2

Show file

# Load training data
#spark = SparkSession.builder.appName("Lesson7").getOrCreate()
sc = SparkContext(appName="PythonNaiveBayesExample")
data = sc.textFile("C:\\PySpark_MLib\\data\\classification\\Immunotherapy.csv").map(parseLine)


#data = sc.read("C:\\PySpark_MLib\\data\\classification\\Immunotherapy.csv")

# Split the data into train and test
splits = data.randomSplit([0.6, 0.4], 0)
train = splits[0]
test = splits[1]

# create the trainer and set its parameters
nb = NaiveBayes(smoothing=1.0, modelType="multinomial")

#pipeline = Pipeline().setStages(Array(assembler,lr))
# train the model
model = nb.fit(train)
model = NaiveBayes.train(train, 1.0)

# select example rows to display.
predictions = model.transform(test)
predictions.show()

# compute accuracy on the test set
evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction",
 #                                             metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print("Test set accuracy = " + str(accuracy))

Example #3

Show file

File: mllib_toy.py Project: tianruip1994/Toy_Application

            new_string = "1 "
        elif "Iris-virginica" in line[-1]:
            new_string = "3 "
        elif "Iris-versicolor" in line[-1]:
            new_string = "3 "
        # new_string = line[0] + " "
        count = 1
        for i in line[:-1]:
            new_string += str(count) + ":" + str(i) + " "
            count += 1
        new_string += "\n"
        output_string += new_string
        # print(repr(row))
        # print(repr(new_string))
        # break
    output_file = open("iris.txt", "w")
    output_file.write(output_string)
    output_file.close()

    sc = SparkContext()
    sc.setLogLevel('ERROR')
    data = MLUtils.loadLibSVMFile(sc, "iris.txt")
    training, test = data.randomSplit([0.8, 0.2])
    model = NaiveBayes.train(training, 1.0)
    # Make prediction and test accuracy.
    predictionAndLabel = test.map(lambda p:
                                  (model.predict(p.features), p.label))
    accuracy = 1.0 * predictionAndLabel.filter(
        lambda (x, v): x == v).count() / test.count()
    print('model accuracy {}'.format(accuracy))