def _bnb(t, min_freq, save=False): if save: clf = bnb().fit(records, labels) save_classifier(clf, t, 'bnb', min_freq) return ('bnb', clf) else: clf = load_classifier(t, 'bnb', min_freq) return ('bnb', clf)
def naive_bayes_bnb(x_train, y_train, x_test, y_test): model = bnb() model.fit(x_train, y_train) expected = y_test predicted = model.predict(x_test) return expected, predicted
for qid in loadData.questions: if not loadData.questions[qid].userId in nbuserDict: nbuserDict[loadData.questions[qid].userId] = nbuserId nbuserId += 1 for aid in loadData.questions[qid].answers: if not loadData.answers[aid].userId in nbuserDict: nbuserDict[loadData.answers[aid].userId] = nbuserId nbuserId += 1 X = np.zeros((numQuestions/5, nbuserId)) y = np.zeros(numQuestions/5) yvalues = np.array([0, 1]) for t in range(0,10): tag = sortedTags[t][0] print loadData.tags[tag].tag clf = bnb(alpha=0.001) i = 0 for qid in loadData.questions: i = i % (numQuestions/5) X[i][nbuserDict[loadData.questions[qid].userId]] = 1 for aid in loadData.questions[qid].answers: X[i][nbuserDict[loadData.answers[aid].userId]] = 1 if tag in loadData.questions[qid].tags: y[i] = 1 else: y[i] = 0 if i == numQuestions/5 - 1: clf.partial_fit(X,y, classes=yvalues) i += 1 clf.fit(X,y)
from sklearn.naive_bayes import GaussianNB as gnb, BernoulliNB as bnb with open('train.json') as data: train = json.load(data) cuisine = [] ingredients = [] for i in train: cuisine.append(i["cuisine"]) ingredients.extend(i["ingredients"]) singredients = list(set(ingredients)) traind = [] d = {singredients[i]: i for i in range(len(singredients))} for i in train: row = [0] * len(singredients) for j in i["ingredients"]: row[d[j]] = 1 traind.append(row) k_fold = kf(n_splits=3) ga = cvs(gnb(), traind, cuisine, cv=k_fold, n_jobs=-1) ba = cvs(bnb(), traind, cuisine, cv=k_fold, n_jobs=-1) f = open('2d', 'wb') s = "Gaussian accuracy is: " + str(np.mean(ga)) print s f.write(s) s = "Bernoulli accuracy is: " + str(np.mean(ba)) print s f.write(s)
def predictByBNB(features, classes, test): clf = bnb(binarize=2) clf.fit(features, classes) return clf.predict(test)