Beispiel #1
0
def classify_news():
    s = session()
    unclassified: tp.List[tp.Tuple[int, str]] = [
        (i.id, stemmer.clear(i.title))
        for i in s.query(News).filter(News.label == None).all()
    ]
    X: tp.List[str] = [i[1] for i in unclassified]
    if not pathlib.Path("model/model.pickle").is_file():
        raise ValueError("Классификатор не натренирован")
    with open("model/model.pickle", "rb") as model_file:
        model = pickle.load(model_file)
    labels = model.predict(X)
    for i, e in enumerate(unclassified):
        extract = s.query(News).filter(News.id == e[0]).first()
        extract.label = labels[i]
        s.commit()
    rows = s.query(News).filter(News.label != None).order_by(News.label).all()
    return template("classified_template", rows=rows)
Beispiel #2
0
import typing as tp

import bayes
import stemmer
from db import News, session

s = session()
rows = s.query(News).all()
stop_sign = int(0.7 * len(rows))
extracts: tp.List[str] = []
labels: tp.List[str] = []
for i in range(len(rows)):
    row = s.query(News).filter(News.id == (i + 1)).first()
    extracts.append(row.title)
    labels.append(row.label)
extracts = [stemmer.clear(x).lower() for x in extracts]
X_train, X_test = extracts[:stop_sign], extracts[stop_sign:]
y_train, y_test = labels[:stop_sign], labels[stop_sign:]
model = bayes.NaiveBayesClassifier(alpha=0.93)
model.fit(X_train, y_train)
print("Точность: ", end="")
print(model.score(X_test, y_test))
Beispiel #3
0
                    1 for i, e in enumerate(predicted)
                    if e != c and y_test[i] == c
                ])
                class_accuracies[c] = true_positives / (true_positives +
                                                        false_negatives)
        score = sum([i for i in class_accuracies.values()]) / len(
            list(set(y_test)))
        return score


if __name__ == "__main__":
    if not pathlib.Path("model/model.pickle").is_file():
        model = NaiveBayesClassifier(alpha=0.1)
        s = session()
        classified = [(i.title, i.label)
                      for i in s.query(News).filter(News.label != None).all()]
        X_train, y_train = [], []
        for label, extract in classified:
            X_train.append(label)
            y_train.append(extract)
        X_train = [stemmer.clear(x).lower() for x in X_train]
        print(f"Достано {len(X_train)} промаркированных новостей")
        print("Тренируюсь...")
        model.fit(X_train, y_train)
        print("Модель натренирована. Сохраняю...")
        with open("model/model.pickle", "wb") as model_file:
            pickle.dump(model, model_file)
        print("Сохранил!")
    else:
        print(f"Модель уже существует")