예제 #1
0
from bottle import (route, run, template, request, redirect)
from parse import get_news, extract_next_page
from db import News, session
from scripts import save
from classify import Classifier

s = session()
classifier = Classifier()
mark_news = s.query(News).filter(News.label != None).all()
x_title = [row.title for row in mark_news]
y_lable = [row.label for row in mark_news]
classifier.fit(x_title, y_lable)
예제 #2
0
import csv
import string
from classify import Classifier


def clean(s):
    translator = str.maketrans("", "", string.punctuation)
    return s.translate(translator)


with open('SMSSpamCollection.csv') as f:
    data = list(csv.reader(f, delimiter="\t"))
X, y = [], []
for target, msg in data:
    X.append(msg)
    y.append(target)
X = [clean(x).lower() for x in X]
X_train, y_train, X_test, y_test = X[0:3900], y[0:3900], X[3900:], y[3900:]

test_model = Classifier()
test_model.fit(X_train, y_train)
print(test_model.score(X_test, y_test))