Ejemplo n.º 1
0
def predict_function():
    x_list = []
    predict_doc = joblib.load('logreg.pkl')
    y_train, x_train = get_feature()
    for line in x_train:
        x_list.append(dict(Counter(line)))
    X = DictVectorizer().fit_transform(x_list)
    pred = predict_doc.predict(X)
    prob = predict_doc.predict_proba(X)
    return pred, y_train, prob
Ejemplo n.º 2
0
def log_regression():
    x_list = []
    logreg = LogisticRegression()
    y_train, x_train = get_feature()
    for line in x_train:
        x_list.append(dict(Counter(line)))
    word_vec = DictVectorizer()
    X = word_vec.fit_transform(x_list)
    logreg.fit(X, y_train)
    joblib.dump(logreg, 'logreg.pkl')
    joblib.dump(word_vec, "word_vec.pkl")
Ejemplo n.º 3
0
def log_regression():
    x_list = []
    logreg = LogisticRegression()
    y_train, x_train = get_feature()
    for line in x_train:
        x_list.append(dict(Counter(line)))
    word_vec = DictVectorizer()
    X = word_vec.fit_transform(x_list)
    logreg.fit(X, y_train)
    joblib.dump(logreg, 'logreg.pkl')
    joblib.dump(word_vec,"word_vec.pkl")
Ejemplo n.º 4
0
def predict_function():
    x_list = []
    line_list = []
    line_dict = {}
    predict_doc = joblib.load('logreg.pkl')
    feature_doc = joblib.load("word_vec.pkl")
    y_train, x_train = get_feature()
    line = "bad bad good good"
    line_list = line.split()
    for line in x_train:
        for key in line:
            line_dict[key] = 0
    line_dict.update(dict(Counter(line_list)))
    for a in sorted(line_dict.items(), key=lambda x: x[1]):
        print(a)
    x_list.append(line_dict)
    print(x_list)
    exit()
    X = DictVectorizer().fit_transform(x_list)
    pred = predict_doc.predict(X)
    prob = predict_doc.predict_proba(X)
    for pred, prob in zip(pred, prob):
        print(pred, prob)
Ejemplo n.º 5
0
def predict_function():
    x_list = []
    line_list = []
    line_dict = {}
    predict_doc = joblib.load('logreg.pkl')
    feature_doc = joblib.load("word_vec.pkl")
    y_train, x_train = get_feature()
    line = "bad bad good good"
    line_list = line.split()
    for line in x_train:
        for key in line:
            line_dict[key] = 0
    line_dict.update(dict(Counter(line_list)))
    for a in sorted(line_dict.items(), key = lambda x:x[1]):
        print(a)
    x_list.append(line_dict)
    print(x_list)
    exit()
    X = DictVectorizer().fit_transform(x_list)
    pred = predict_doc.predict(X)
    prob = predict_doc.predict_proba(X)
    for pred, prob in zip(pred,prob):
        print(pred, prob)
Ejemplo n.º 6
0
def main():
    # モデルのロード
    model = joblib.load('model')
    vocab = joblib.load('vocab')

    # sentence = input('Sentence -> ').strip()[3:]
    f = open('sentiment.txt', encoding='utf8')
    for _ in range(124):
        next(f)
    sentence = f.readline()
    print('input\n', sentence)
    sentence = sentence.strip()[3:]
    feature = get_feature(sentence)

    vectorizer = TfidfVectorizer(vocabulary=vocab)
    feature_vec = vectorizer.fit_transform([feature]).toarray()

    predict = model.predict(feature_vec)[0]
    probability = model.predict_proba(feature_vec)[0]

    if predict == -1:
        print(f'label : negative ({probability[1] * 100:.3f}%)')
    else:
        print(f'label : positive ({probability[1] * 100:.3f}%)')
Ejemplo n.º 7
0
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib
from knock72 import get_feature


clf = LogisticRegression()
dic2vec = DictVectorizer()
features = list()
y = list()
for line in open('sentiment.txt'):
    spl = line.rstrip('\n').split()
    label, sent = spl[0], ' '.join(spl[1:])
    features.append(get_feature(sent))
    y.append(int(label))
x = dic2vec.fit_transform(features)
clf.fit(x, y)

joblib.dump(clf, 'knock73.model')
Ejemplo n.º 8
0
            for key, value in dict(Counter(feature[test])).items():
                if key in feature_dict:
                    feature_dict[key] = value
            pred_list.append(feature_dict)
        preds.append(logreg.predict(word_vec.fit_transform(pred_list)))
    return preds, test_numbers


def get_score(preds, target, test_numbers):
    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f_values = []
    for tests, pred in zip(test_numbers, preds):
        answers = []
        for test in tests:
            answers.append(target[test])
        all_accuracy.append(accuracy_score(answers, pred))
        all_precision.append(precision_score(answers, pred))
        all_recall.append(recall_score(answers, pred))
        all_f_values.append(f1_score(answers, pred))
    return np.array(all_accuracy).mean(), np.array(all_precision).mean(
    ), np.array(all_recall).mean(), np.array(all_f_values).mean()


if __name__ == '__main__':
    target, feature = get_feature()
    preds, test_numbers = make_kfold(target, feature)
    print("正解率:{}\t適合率:{}\t再現率:{}\tF1スコア:{}".format(
        *get_score(preds, target, test_numbers)))
Ejemplo n.º 9
0
from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib
from knock72 import get_feature

clf = joblib.load('knock73.model')
dic2vec = DictVectorizer()
features = list()
y = list()
sents = list()
for line in open('sentiment.txt'):
    spl = line.rstrip('\n').split()
    label, sent = spl[0], ' '.join(spl[1:])
    sents.append(sent)
    features.append(get_feature(sent))
    y.append(int(label))
x = dic2vec.fit_transform(features)
pred = clf.predict(x)
pred_proba = clf.predict_proba(x)

for pre, t, (prob_n, prob_p) in zip(pred, y, pred_proba):
    prob = prob_n if pre == -1 else prob_p
    print('{}\t{}\t{}'.format(t, pre, prob))
Ejemplo n.º 10
0
        for test in tests:
            feature_dict = defaultdict(int)
            for f in word_vec.get_feature_names():
                feature_dict[f] = 0
            for key, value in dict(Counter(feature[test])).items():
                if key in feature_dict:
                    feature_dict[key] = value
            pred_list.append(feature_dict)
        preds.append(logreg.predict(word_vec.fit_transform(pred_list)))
    return preds, test_numbers

def get_score(preds, target, test_numbers):
    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f_values = []
    for tests, pred in zip(test_numbers, preds):
        answers = []
        for test in tests:
            answers.append(target[test])
        all_accuracy.append(accuracy_score(answers, pred))
        all_precision.append(precision_score(answers, pred))
        all_recall.append(recall_score(answers, pred))
        all_f_values.append(f1_score(answers, pred))
    return np.array(all_accuracy).mean(), np.array(all_precision).mean(), np.array(all_recall).mean(), np.array(all_f_values).mean()

if __name__ == '__main__':
    target, feature = get_feature()
    preds, test_numbers = make_kfold(target, feature)
    print("正解率:{}\t適合率:{}\t再現率:{}\tF1スコア:{}".format(*get_score(preds, target, test_numbers)))
Ejemplo n.º 11
0
if __name__ == '__main__':

    # モデルのロード
    with open('model', 'rb') as f:
        model = pickle.load(f)

    # stopwordのリストをロード
    with open('stopwords', 'rb') as f:
        stopwords = pickle.load(f)
      
    # vocabサイズをロード
    with open('vocab_size', 'rb') as f:
        vocab = pickle.load(f)

    # 入力を受け取り素性抽出
    sentence = input('Please input sentence -> ').strip()[3:]
    sent_feature = get_feature(sentence, stopwords)

    # 素性をベクトルに変換
    voctorizer = CountVectorizer(vocabulary=vocab)
    feature_vec = voctorizer.fit_transform([sent_feature]).toarray()

    # 予測
    label_predict = model.predict(feature_vec)
    prob = model.predict_proba(feature_vec)
    
    print(f'label : {label_predict[0]}')
    print(f'prob(+1): {round(prob[0][0] * 100, 3)}%')
    print(f'prob(-1): {round(prob[0][1] * 100, 3)}%')
Ejemplo n.º 12
0
from knock72 import get_feature
from sklearn.feature_extraction.text import CountVectorizer
import pickle

with open("./chapter08/model", "rb") as f:
    model = pickle.load(f)
with open("./chapter08/vocab_size", "rb") as f:
    vocab_size = pickle.load(f)
sentence = "This is a very good wonderful fantastic movie."
sentence_feature = get_feature(sentence)
vectorizer = CountVectorizer(vocabulary=vocab_size)
feature_vec = vectorizer.fit_transform([sentence_feature]).toarray()
label_predict = model.predict(feature_vec)
prob = model.predict_proba(feature_vec)
print(label_predict, prob)