Python NaiveBayes.predict 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: naivebayes

클래스/타입: NaiveBayes

메소드/함수: predict

hotexamples.com에서의 예제들: 10

Python NaiveBayes.predict - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 naivebayes.NaiveBayes.predict에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

NaiveBayes(30)

train(14)

predict(9)

classify(7)

fit(6)

evaluate(3)

predict_all(2)

show(2)

printMostindicativeHamWords(1)

predict_(1)

prediksi(1)

printMostPopularHamWords(1)

printMostPopularSpamWords(1)

train_set(1)

printMostindicativeSpamWords(1)

save(1)

train_model(1)

perfomance(1)

score(1)

teach(1)

update(1)

saveToDB(1)

load_json(1)

mulai_training(1)

evaluate_model(1)

calMetric(1)

class_perfomance(1)

classifier(1)

classifyAndEvaluateAllInFolder(1)

compute(1)

computeCrossValidation(1)

dump_json(1)

getTestWords(1)

calBayes(1)

get_word_list(1)

human_labels(1)

init(1)

learn_parameters(1)

load(1)

loadModelFromDB(1)

load_data_training(1)

value_weight(1)

예제 #1

파일 보기

파일: cross_validation.py 프로젝트: IshitaTakeshi/NaiveBayes

class CrossValidation(object):
    def __init__(self):
        self.classifier = NaiveBayes()

    def create_data(self, user_ids):
        data = []
        for category, ids in user_ids.items():
            tweets = get_tweets(ids)
            categories = [category] * len(tweets)
            data += list(zip(tweets, categories))

        np.random.shuffle(data)
        return data

    def split(self, data, test_percentage):
        n_test = int(len(data)*test_percentage)
        n_training = len(data)-n_test

        # unzip (inverse of zip)
        training = zip(*data[:n_training])
        test = zip(*data[n_training:])
        return training, test

    def show_tweets_with_labels(self, tweets, labels):
        for tweet, label in zip(tweets, labels):
            print("{}:\n{}\n".format(label, tweet))

    def evaluate(self, user_ids, test_percentage=0.2, verbose=True):
        """
        user_ids: Twitter IDs separated into categories.
        test_percentage: Ratio of the amount of test data extracted
        from tweets.
        """

        if not(0 <= test_percentage <= 1):
            raise ValueError("test_percentage must be between 0 and 1 "
                             "(inclusive).")

        data = self.create_data(user_ids)
        training, test = self.split(data, test_percentage)

        tweets, categories = training
        self.classifier.fit(tweets, categories)

        tweets, answers = test
        results = self.classifier.predict(tweets)

        if(verbose):
            self.show_tweets_with_labels(tweets, results)

        return results, answers

예제 #2

파일 보기

class CrossValidation(object):
    def __init__(self):
        self.classifier = NaiveBayes()

    def create_data(self, user_ids):
        data = []
        for category, ids in user_ids.items():
            tweets = get_tweets(ids)
            categories = [category] * len(tweets)
            data += list(zip(tweets, categories))

        np.random.shuffle(data)
        return data

    def split(self, data, test_percentage):
        n_test = int(len(data) * test_percentage)
        n_training = len(data) - n_test

        # unzip (inverse of zip)
        training = zip(*data[:n_training])
        test = zip(*data[n_training:])
        return training, test

    def show_tweets_with_labels(self, tweets, labels):
        for tweet, label in zip(tweets, labels):
            print("{}:\n{}\n".format(label, tweet))

    def evaluate(self, user_ids, test_percentage=0.2, verbose=True):
        """
        user_ids: Twitter IDs separated into categories.
        test_percentage: Ratio of the amount of test data extracted
        from tweets.
        """

        if not (0 <= test_percentage <= 1):
            raise ValueError("test_percentage must be between 0 and 1 "
                             "(inclusive).")

        data = self.create_data(user_ids)
        training, test = self.split(data, test_percentage)

        tweets, categories = training
        self.classifier.fit(tweets, categories)

        tweets, answers = test
        results = self.classifier.predict(tweets)

        if (verbose):
            self.show_tweets_with_labels(tweets, results)

        return results, answers

예제 #3

파일 보기

파일: old-rocs.py 프로젝트: balqui/PyDaMelo

    if c_true == pos_class:
        pos += 1
    else:
        neg += 1

result_pos = []
result_neg = []
result_dif = []
result_nor = []
for (v, c_true) in d.test_set:
    """
    prepare predictions for sorting
    in case of equal weight, positive instances come first
    store both true class and first NB prediction
    """
    c_pred_nb = prnb.predict(v)
    wy = 0
    wn = 0
    for c in prnb.clssprobs:
        if c == pos_class: wy += prnb.value_weight(v, c)
        else: wn += prnb.value_weight(v, c)
    result_dif.append((wy - wn, c_true == pos_class, c_true, c_pred_nb[0]))
    result_pos.append((wy, c_true == pos_class, c_true, c_pred_nb[0]))
    result_neg.append((wn, c_true != pos_class, c_true, c_pred_nb[0]))
    result_nor.append(
        (wy / (wy + wn), c_true == pos_class, c_true, c_pred_nb[0]))

plt.plot([-0.001, 1.001], [-0.001, 1.001],
         color="orange")  # diagonal reference

trpos = 0

예제 #4

파일 보기

파일: naivebayes_tests.py 프로젝트: hamzaouazzi/MLAlgorithms

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from naivebayes import NaiveBayes


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


X, y = datasets.make_classification(n_samples=1000,
                                    n_features=10,
                                    n_classes=2,
                                    random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=123)

nb = NaiveBayes()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)

print("Naive Bayes classification accuracy", accuracy(y_test, predictions))

예제 #5

파일 보기

from data import Data
from naivebayes import NaiveBayes

filename = "datasets/weatherNominal.td"
## filename = "datasets/titanic.td"
## filename = "datasets/cmc.td"

d = Data(filename)
d.report()

pr = NaiveBayes(d)
pr.train()
pr.show()

for (v, c_true) in d.test_set:
    c_pred = pr.predict(v)[0]
    print(v, ":")
    print("   ", c_pred, "( true class:", c_true, ")")

##    print(pr.predict(("Class:1st","Sex:Female","Age:Child")))

##    print(pr.predict(("Class:Crew","Sex:Female","Age:Child")))

예제 #6

파일 보기

파일: main.py 프로젝트: ErnstRoell/MachineLearningAlgorithms

def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)


X, y = datasets.make_blobs(n_samples=1000,
                           n_features=2,
                           centers=3,
                           cluster_std=1.0,
                           center_box=(-10.0, 10.0),
                           shuffle=True,
                           random_state=123,
                           return_centers=False)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.9,
                                                    random_state=1234)

clf = NaiveBayes()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(accuracy(y_test, y_pred))

color_map = {0: 'r', 1: 'k', 2: 'g'}

label_color = [color_map[l] for l in y_pred]
plt.scatter(X_test[:, 0], X_test[:, 1], c=label_color)
plt.show()

예제 #7

파일 보기

파일: compare.py 프로젝트: pereverges/master/master-master/adm/compare.py

filename = "ds/titanicTr.txt"

d = Data(filename, 75)

prmap = MaxAPost(d)
prmap.train()

prnb = NaiveBayes(d)
prnb.train()

cmmap = ConfMat(prmap.clsscnts)
cmnb = ConfMat(prnb.clsscnts)
comparing = set([])
for (v, c_true) in d.test_set:
    c_pred_map = tuple(prmap.predict(v))
    c_pred_nb = tuple(prnb.predict(v))
    if len(c_pred_map) and len(c_pred_nb):
        warn = (c_pred_map[0] != c_pred_nb[0])
        cmmap.mat[c_pred_map[0], c_true] += 1
        cmnb.mat[c_pred_nb[0], c_true] += 1
    else:
        warn = True
    if warn:
        comparing.add((v, c_true, c_pred_map, c_pred_nb))

print
for r in sorted(comparing):
    print r[0], ": true class ", r[1]
    print "    MAP pred", r[2],
    print "    NB pred", r[3]

예제 #8

파일 보기

파일: main.py 프로젝트: kovkev/McGill_Fall2017_COMP551_P2

    validation_set_size = 10000
    train_set, validation_set = split_train_validation(dataset,
                                                       validation_set_size)
    num_to_train_on = 10000000
    time_before("training adaboost")
    ab.train_set(dataset[:num_to_train_on])
    time_after("training adaboost")
    time_before("training naive bayes")
    nb.train_set(dataset[:num_to_train_on])
    time_after("training naive bayes")

    kg_validations_nb = []
    kg_validations_ab = []

    for i in validation_set:
        kg_validations_nb.append(nb.predict(*i[1:]) == i[0])
        kg_validations_ab.append(ab.predict(*i[1:]) == i[0])

    # print("Errors nb: %s " % sum([0 if i else 1 for i in kg_validations_nb]))
    print("Errors ab: %s " % sum([0 if i else 1 for i in kg_validations_ab]))

    # import pdb; pdb.set_trace()

    predictions = []

    print("creating predictions...")
    with open(testset, "r") as testfile:
        data = testfile.read()
        lines = data.split('\n')[1:][:num_to_train_on]
        for line in lines:
            if not line:

예제 #9

파일 보기

# Warming up textrocessing engines
textPrep = TextPreprocessing(ngrams_n=4,
                             ngrams_count=2000).load(dataset.stopWords)

# Warming up the FeaturesMatrixBuilder
featuresMatrix = FeaturesMatrixBuilder(dataset, textPrep)

# Doing the actual training on the first 22000 reviews
XTrain, yTrain = featuresMatrix.buildTrainingData()
nb = NaiveBayes()
nb.fit(XTrain[:22000, :], yTrain[:22000])

# Validating on the remaining
y = yTrain[22000:]
yhat = nb.predict(XTrain[22000:, :])
m = getConfusionMatrix(yTrain[22000:], yhat)
print("\n=== RESULTS ===")
endTimer(t)
printResults(m)

# Running the model on the test set
print("Training using the whole training set this time")
nb.fit(XTrain, yTrain)
(XTest, ids) = featuresMatrix.buildTestData()
yhat = nb.predict(XTest)
with open("output/test.txt", "w") as f:
    t = timer()
    print("Writing the test results file")
    f.write("Id,Category\n")
    for i, yi in tqdm(enumerate(yhat)):

예제 #10

파일 보기

def simplified_bayes(train_letters, test_letters, prior):
    nb = NaiveBayes(train_letters, prior)
    return ''.join([nb.predict(letter) for letter in test_letters])