Python NaiveBayes.transform Beispiele

Programmiersprache: Python

Namespace / Paketname: pyspark.ml.classification

Klasse / Typ: NaiveBayes

Methode / Funktion: transform

Beispiele auf hotexamples.com: 4

Python NaiveBayes.transform - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pyspark.ml.classification.NaiveBayes.transform, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

NaiveBayes(30)

fit(30)

transform(4)

train(3)

explainParams(2)

setLabelCol(2)

setPredictionCol(2)

getModelType(1)

getSmoothing(1)

load(1)

save(1)

setFeaturesCol(1)

setThresholds(1)

write(1)

Beispiel #1

Datei anzeigen

Datei: naive_bayes.py Projekt: zachdj/elizabeth

def main(train_x,
         train_y,
         test_x,
         test_y=None,
         idf=False,
         ngram=1,
         base='gs',
         asm=False):
    # Load : DF[id, url, features, label?]
    # The DataFrames only have a labels column if labels are given.
    # We drop the text, since Naive Bayes doesn't use it and we already have all the tokens
    kind = 'asm' if asm else 'bytes'
    train = elizabeth.load(train_x, train_y, base=base, kind=kind).drop('text')
    test = elizabeth.load(test_x, test_y, base=base, kind=kind).drop('text')

    # convert the string labels to numeric indices
    # the handleInvalid param allows the label indexer to deal with labels that weren't seen during fitting
    label_indexer = StringIndexer(inputCol='label',
                                  outputCol='indexedLabel',
                                  handleInvalid="skip")
    label_indexer = label_indexer.fit(train)
    train = label_indexer.transform(train)
    # the test set won't always have labels
    if test_y is not None:
        test = label_indexer.transform(test)

    index_labeller = IndexToString(inputCol='prediction',
                                   outputCol='predictedClass',
                                   labels=label_indexer.labels)

    # Train the preprocessor and transform the data.
    prep = elizabeth.Preprocessor()
    prep.add(NGram(n=int(ngram)))
    prep.add(CountVectorizer())
    if idf: prep.add(IDF())
    train = prep.fit(train)
    test = prep.transform(test)

    # Naive Bayes : DF[id, url, text, features, label?, rawPrediction, probability, prediction]
    nb = NaiveBayes(labelCol='indexedLabel').fit(train)
    test = nb.transform(test)
    test = index_labeller.transform(
        test)  # DF[id, url, ... prediction, predictedClass]

    # If labels are given for the test set, print a score.s
    if test_y:
        test = test.orderBy(test.id)
        test = test.withColumn(
            'correct', (test.label == test.predictedClass).cast('double'))
        test = test.select(avg(test.correct))
        print(test.show())

    # If no labels are given for the test set, print predictions.
    else:
        test = test.orderBy(test.id).select(test.predictedClass)
        test = test.rdd.map(lambda row: int(row.predictedClass))
        test = test.toLocalIterator()
        print(*test, sep='\n')

Beispiel #2

Datei anzeigen

Datei: product_prediction.py.py Projekt: ShreyasGithub/pyspark_ccf

model = NaiveBayes()
model = model.fit(train_data)

# # model evaluation

# In[ ]:

from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# In[ ]:

acc_eval = MulticlassClassificationEvaluator()

# In[ ]:

test_results = model.transform(test_data)

# In[ ]:

test_results = test_results.filter(test_results['prediction'] > 0)

# In[ ]:

test_results.count()

# In[ ]:

print('F1')
acc_eval.evaluate(test_results)

# In[ ]:

Beispiel #3

Datei anzeigen

Datei: NaiveBayes_2.4.py Projekt: H1j4ck3d/TFG

train = splits[0]
test = splits[1]

# Creamos el modelo de Naive Bayes, lo entrenamos y realizamos la prediccion
now = datetime.datetime.now()
print(now.year, now.month, now.day, now.hour, now.minute, now.second)

nb = NaiveBayes(labelCol='attack_cat_index',
                featuresCol='features',
                predictionCol='prediction')
nb = nb.fit(train)

now = datetime.datetime.now()
print(now.year, now.month, now.day, now.hour, now.minute, now.second)

result = nb.transform(test)

#Creamos una funcion para el TPR
prediction_list = result.select("attack_cat_index", "prediction").toPandas()[[
    "attack_cat_index", "prediction"
]].values.tolist()


def truePositiveRate(list, label):
    tot_count = 0
    true_count = 0
    for a in list:
        if a[0] == label:
            tot_count = tot_count + 1
            if a[1] == label:
                true_count = true_count + 1

Beispiel #4

Datei anzeigen

assemblerInputs = indexedCategoricalCols + numericColList
assembler = VectorAssembler(inputCols=assemblerInputs, outputCol="features")
df = assembler.transform(df)

# Indexing binary labels
labeller = StringIndexer(inputCol=label, outputCol="label").fit(df)
df = labeller.transform(df).select(["features", "label"])

### Randomly split data into training and test sets. set seed for reproducibility
(trainingData, testData) = df.randomSplit([0.7, 0.3], seed=100)

#dt = DecisionTreeClassifier(labelCol="label", featuresCol="features")
dt = LogisticRegression(regParam=0.01)
model = dt.fit(trainingData)

# Make predictions.
predictions = model.transform(testData)
evaluator = Evaluator()
# Select example rows to display.
predictions.select("prediction", "label", "features").show()
# Evaluate the learned model
print("LogRegression Test %s: %f" %
      (evaluator.getMetricName(), evaluator.evaluate(predictions)))

model = NaiveBayes(thresholds=[0.1, 1.0])
model = dt.fit(trainingData)
predictions = model.transform(testData)
predictions.select("prediction", "label", "features").show()

print("Bayes Test %s: %f" %
      (evaluator.getMetricName(), evaluator.evaluate(predictions)))