Ejemplo n.º 1
0
def _bnb(t, min_freq, save=False):
    if save:
        clf = bnb().fit(records, labels)
        save_classifier(clf, t, 'bnb', min_freq)
        return ('bnb', clf)
    else:
        clf = load_classifier(t, 'bnb', min_freq)
        return ('bnb', clf)
Ejemplo n.º 2
0
def naive_bayes_bnb(x_train, y_train, x_test, y_test):
    model = bnb()
    model.fit(x_train, y_train)
    expected = y_test
    predicted = model.predict(x_test)
    return expected, predicted
Ejemplo n.º 3
0
for qid in loadData.questions:
  if not loadData.questions[qid].userId in nbuserDict:
    nbuserDict[loadData.questions[qid].userId] = nbuserId
    nbuserId += 1
  for aid in loadData.questions[qid].answers:
    if not loadData.answers[aid].userId in nbuserDict:
      nbuserDict[loadData.answers[aid].userId] = nbuserId
      nbuserId += 1
      
X = np.zeros((numQuestions/5, nbuserId))
y = np.zeros(numQuestions/5)
yvalues = np.array([0, 1])
for t in range(0,10):
  tag = sortedTags[t][0]
  print loadData.tags[tag].tag
  clf = bnb(alpha=0.001)
  i = 0
  for qid in loadData.questions:
    i = i % (numQuestions/5)
    X[i][nbuserDict[loadData.questions[qid].userId]] = 1
    for aid in loadData.questions[qid].answers:
      X[i][nbuserDict[loadData.answers[aid].userId]] = 1
    if tag in loadData.questions[qid].tags:
      y[i] = 1
    else:
      y[i] = 0
    if i == numQuestions/5 - 1:
      clf.partial_fit(X,y, classes=yvalues)
    i += 1

  clf.fit(X,y)
Ejemplo n.º 4
0
from sklearn.naive_bayes import GaussianNB as gnb, BernoulliNB as bnb

with open('train.json') as data:
    train = json.load(data)

cuisine = []
ingredients = []
for i in train:
    cuisine.append(i["cuisine"])
    ingredients.extend(i["ingredients"])

singredients = list(set(ingredients))
traind = []
d = {singredients[i]: i for i in range(len(singredients))}
for i in train:
    row = [0] * len(singredients)
    for j in i["ingredients"]:
        row[d[j]] = 1
    traind.append(row)

k_fold = kf(n_splits=3)
ga = cvs(gnb(), traind, cuisine, cv=k_fold, n_jobs=-1)
ba = cvs(bnb(), traind, cuisine, cv=k_fold, n_jobs=-1)

f = open('2d', 'wb')
s = "Gaussian accuracy is: " + str(np.mean(ga))
print s
f.write(s)
s = "Bernoulli accuracy is: " + str(np.mean(ba))
print s
f.write(s)
Ejemplo n.º 5
0
def predictByBNB(features, classes, test):
    clf = bnb(binarize=2)
    clf.fit(features, classes)
    return clf.predict(test)