''' 79. 適合率-再現率グラフの描画 ロジスティック回帰モデルの分類の閾値を変化させることで,適合率-再現率グラフを描画せよ. ''' import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.metrics import precision_recall_curve from scipy.stats import hmean from knock72 import load model = load("model") labels = load("labels") features = load("features") model.classes_ # => array([-1, 1]) probs = model.predict_proba(features)[:, 1] pre, rec, th = precision_recall_curve(labels, probs) # PR曲線 plt.plot(pre, rec) plt.xlabel("precision") plt.ylabel("recall") plt.savefig("out79_2d.png") x1, x2, y = zip(*[(p, r, hmean([p, r])) for p, r in zip(pre, rec) if p and r]) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter3D(x1, x2, y) ax.set_xlabel("precision") ax.set_ylabel("recall") ax.set_zlabel("f1") plt.savefig("out79_3d.png")
''' 74. 予測 73で学習したロジスティック回帰モデルを用い, 与えられた文の極性ラベル(正例なら"+1",負例なら"-1")と, その予測確率を計算するプログラムを実装せよ. ''' from sklearn.feature_extraction.text import TfidfVectorizer from knock72 import load, extract_features model = load("model") vocab = load("vocabs") _, docs = extract_features("./test.txt") vectorizer = TfidfVectorizer(vocabulary=vocab) # 疑惑のコード features = vectorizer.fit_transform(docs).toarray() # あとで見直す pp = zip(model.predict(features), model.predict_proba(features)) for predict, proba in pp: print(f"{predict:+d} : {max(proba):f}") ''' * sklearn.linear_model.LogisticRegression https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html - predict(self, X) [Methods] Predict class labels for samples in X. - predict_proba(self, X) [Methods] Probability estimates. '''
''' 73. 学習 72で抽出した素性を用いて,ロジスティック回帰モデルを学習せよ. ''' from sklearn.linear_model import LogisticRegression from knock72 import load, save labels = load("labels") features = load("features") model = LogisticRegression().fit(features, labels) save("model", model)