def classify_news(): s = session() unclassified: tp.List[tp.Tuple[int, str]] = [ (i.id, stemmer.clear(i.title)) for i in s.query(News).filter(News.label == None).all() ] X: tp.List[str] = [i[1] for i in unclassified] if not pathlib.Path("model/model.pickle").is_file(): raise ValueError("Классификатор не натренирован") with open("model/model.pickle", "rb") as model_file: model = pickle.load(model_file) labels = model.predict(X) for i, e in enumerate(unclassified): extract = s.query(News).filter(News.id == e[0]).first() extract.label = labels[i] s.commit() rows = s.query(News).filter(News.label != None).order_by(News.label).all() return template("classified_template", rows=rows)
import typing as tp import bayes import stemmer from db import News, session s = session() rows = s.query(News).all() stop_sign = int(0.7 * len(rows)) extracts: tp.List[str] = [] labels: tp.List[str] = [] for i in range(len(rows)): row = s.query(News).filter(News.id == (i + 1)).first() extracts.append(row.title) labels.append(row.label) extracts = [stemmer.clear(x).lower() for x in extracts] X_train, X_test = extracts[:stop_sign], extracts[stop_sign:] y_train, y_test = labels[:stop_sign], labels[stop_sign:] model = bayes.NaiveBayesClassifier(alpha=0.93) model.fit(X_train, y_train) print("Точность: ", end="") print(model.score(X_test, y_test))
1 for i, e in enumerate(predicted) if e != c and y_test[i] == c ]) class_accuracies[c] = true_positives / (true_positives + false_negatives) score = sum([i for i in class_accuracies.values()]) / len( list(set(y_test))) return score if __name__ == "__main__": if not pathlib.Path("model/model.pickle").is_file(): model = NaiveBayesClassifier(alpha=0.1) s = session() classified = [(i.title, i.label) for i in s.query(News).filter(News.label != None).all()] X_train, y_train = [], [] for label, extract in classified: X_train.append(label) y_train.append(extract) X_train = [stemmer.clear(x).lower() for x in X_train] print(f"Достано {len(X_train)} промаркированных новостей") print("Тренируюсь...") model.fit(X_train, y_train) print("Модель натренирована. Сохраняю...") with open("model/model.pickle", "wb") as model_file: pickle.dump(model, model_file) print("Сохранил!") else: print(f"Модель уже существует")