Exemple #1
0
from db import News, session
from pprint import pprint
import string
from bayes import NaiveBayesClassifier

def clean(s):
    translator = str.maketrans("", "", string.punctuation)
    return s.translate(translator)

s = session()
titles = [clean(news.title).lower() for news in s.query(News).filter(News.label != None).all()]
marks = [news.label for news in s.query(News).filter(News.label != None).all()]

print(len(titles), len(marks))
pprint(titles[:4])

seventy_percent = round(len(titles) * 0.70)


x_train, y_train, x_test, y_test = titles[:seventy_percent], marks[:seventy_percent], titles[seventy_percent:], marks[seventy_percent:]
model = NaiveBayesClassifier()
model.fit(x_train, y_train)
print(model.score(x_test, y_test))
model.export_model('news_model.json')