def pred_label(n): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') data_x, data_y = make_data_x_y() text_vec = vectorizer.transform(data_x[:n]) for text, label in zip(text_vec, data_y): print(f'{label}\t{model.predict(text)}\t{model.predict_proba(text)}')
def accuracy(): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') data_x, data_y = make_data_x_y() data_x = vectorizer.transform(data_x) data_y = np.array(data_y) label_true, label_pred = data_y, model.predict(data_x) print(classification_report(label_true, label_pred)) print(f'accuracy = {accuracy_score(label_true, label_pred)}')
def test(n): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') data_x, data_y = make_data_x_y() text_vec = vectorizer.transform(data_x[:n]) for vec, text in zip(text_vec, data_x): print(f'入力文 : {text.strip()}') print(f'予測結果 : {model.predict(vec)}') print(f'予測確率 : {model.predict_proba(vec)}') print()
def p_r_curve(): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') data_x, data_y = make_data_x_y() data_x = vectorizer.transform(data_x) data_y = np.array(data_y) precision, recall, threshold = precision_recall_curve( data_y, model.predict_proba(data_x)[:, 1]) plt.plot(precision, recall) plt.xlabel('Precision') plt.ylabel('Recall') plt.show()
def kfold(): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') data_x, data_y = make_data_x_y() data_x = vectorizer.transform(data_x) data_y = np.array(data_y) scoring = { 'accuracy': 'accuracy', "precision": "precision", "recall": "recall", "f1": "f1" } skf = StratifiedKFold(n_splits=5, shuffle=True) scores = cross_validate(model, data_x, data_y, cv=skf, scoring=scoring) for key, value in scores.items(): print(f'{key} : {value.mean()}')