Esempio n. 1
0
def predict_titles(titles):
    lr_model, data, vectorizer = train()
    print(lr_model.predict(vectorizer.transform(titles).toarray()))
    print([
        max(i) for i in lr_model.predict_proba(
            vectorizer.transform(titles).toarray())
    ])
Esempio n. 2
0
def knock57():
    lr, data, vectorizer = train()
    inverse_vectorizer_vocabulary_ = {
        v: k
        for k, v in vectorizer.vocabulary_.items()
    }
    for cnt, class_name in enumerate(lr.classes_):
        lr.coef_[cnt]
        print(class_name)
        for i in heapq.nlargest(10, lr.coef_[cnt]):
            index1 = np.where(lr.coef_[cnt] == i)
            print(inverse_vectorizer_vocabulary_[index1[0][0]], ":", i)
        for i in heapq.nsmallest(10, lr.coef_[cnt]):
            index1 = np.where(lr.coef_[cnt] == i)
            print(inverse_vectorizer_vocabulary_[index1[0][0]], ":", i)
        print()
Esempio n. 3
0
def knock56():
    lr, data, vectorizer = train()

    # 一行目がprecision 、二行目がrecall 、、、
    # 一列目がカテゴリ1、二行目がカテゴリ2 、、、
    pprint(
        precision_recall_fscore_support(y_true=data[1][1],
                                        y_pred=lr.predict(data[1][0])))

    # precision, recall, f-score, support の順番
    print(
        "macro:",
        precision_recall_fscore_support(y_true=data[1][1],
                                        y_pred=lr.predict(data[1][0]),
                                        average="macro"))
    print(
        "micro:",
        precision_recall_fscore_support(y_true=data[1][1],
                                        y_pred=lr.predict(data[1][0]),
                                        average="micro"))
Esempio n. 4
0
def knock55():
    lr, data, vectorizer = train()
    print(confusion_matrix(y_true=data[0][1], y_pred=lr.predict(data[0][0])))
    print(confusion_matrix(y_true=data[1][1], y_pred=lr.predict(data[1][0])))
Esempio n. 5
0
    valid = open('valid.feature.txt')
    valid_ftr, valid_label = read_data(valid)
    test = open('test.feature.txt')
    test_ftr, test_label = read_data(test)

    vectorizer = vectorize(train_ftr)
    joblib.dump(vectorizer, 'vectorizer.pkl')

    x_train = vectorizer.transform(train_ftr)
    y_train = train_label
    
    regularization = []
    for c in range(-5,5):
        # train with regularization
        model_name = 'model_reg_10**' + str(c) + '.pkl'
        train(x_train, y_train, model_name, c)
        reg = '10e' + str(c)

        # calculate accuracy on train, valid, test
        acc_train = accuracy(train_ftr, train_label, model_name,'vectorizer.pkl')
        acc_valid = accuracy(valid_ftr, valid_label, model_name,'vectorizer.pkl')
        acc_test= accuracy(test_ftr, test_label, model_name,'vectorizer.pkl')
    
        regularization.append(reg + '\t' + str(round(acc_train,6)) + '\t' + str(round(acc_valid,6)) + '\t' + str(round(acc_test,6)))

    reg_val, acc_tr, acc_val, acc_ts = [],[],[],[]
    for i in regularization:
        temp = i.strip().split('\t')
        reg_val.append(float(temp[0]))
        acc_tr.append(float(temp[1]))
        acc_val.append(float(temp[2]))
Esempio n. 6
0
def knock54():
    lr, data, vectorizer = train()
    print("train", lr.score(data[0][0], data[0][1]))
    print("vaild", lr.score(data[1][0], data[1][1]))