Ejemplo n.º 1
0
'''
75. 素性の重み
73で学習したロジスティック回帰モデルの中で,
重みの高い素性トップ10と,重みの低い素性トップ10を確認せよ.
'''
from sklearn.linear_model import LogisticRegression
from no73 import load_feats
import sys

feats, labels, sents = load_feats(sys.argv[1])
model = LogisticRegression(C=1e5)
model.fit(feats, labels)
with open('data/feats.csv') as f:
    header = f.readline()
header = header.split(",")[2:]
feats_dict = {}
for word, weight in zip(header, model.coef_[0]):
    feats_dict[word] = weight
for i, elem in enumerate(sorted(feats_dict.items(), key=lambda x: x[1])):
    if i > 9:
        break
    print("{}: {}".format(elem[0], elem[1]))
for i, elem in enumerate(sorted(feats_dict.items(), key=lambda x: -x[1])):
    if i > 9:
        break
    print("{}: {}".format(elem[0], elem[1]))
Ejemplo n.º 2
0
            predicts.append(1)
        else:
            predicts.append(-1)
    return predicts


if __name__ == '__main__':
    from no73 import load_feats
    from no77 import get_metrics
    from no78 import split_seq, get_train
    from sklearn.linear_model import LogisticRegression
    import matplotlib.pyplot as plt
    import sys
    fname = sys.argv[1]
    K = 5
    feats, labels, _ = load_feats(fname)
    split_feats = split_seq(feats, K)
    split_labels = split_seq(labels, K)

    probs_list = []
    for i in range(K):
        model = LogisticRegression(C=1e5)
        model.fit(get_train(split_feats, i), get_train(split_labels, i))
        probs = model.predict_proba(split_feats[i])
        probs_list.append(probs)
    threshold_list = []
    precision_list = []
    recall_list = []
    for threshold in range(101):
        threshold /= 100
        metrics_list = []