예제 #1
0
파일: k78.py 프로젝트: OnizukaLab/100knock
def cross_validation():
    _, vocab = train()
    X_tensor = np.array([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences])
    Y_tensor = np.array(vocab.labels)
    preds = cross_val_predict(LogisticRegression(), X_tensor, Y_tensor, cv=5)
    print('Precision: ', precision_score(vocab.labels, preds))
    print('Recall: ', recall_score(vocab.labels, preds))
    print('F-measure: ', f1_score(vocab.labels, preds))
예제 #2
0
def labeling():
    model, vocab = train()
    X_test = np.array([
        np.sum([np.eye(vocab.vocab_size + 1)[wid] for wid in sentence], axis=0)
        for sentence in vocab.sentences
    ])
    true_label = [label[0] for label in vocab.labels]
    pred_label = model.predict(X_test)
    pred_prob = model.predict_proba(X_test)
    print('true|pred|prob')
    [
        print('{}\t{}\t{}'.format(true, pred, max(prob)))
        for true, pred, prob in zip(true_label, pred_label, pred_prob)
    ]
    return true_label, pred_label, pred_prob
예제 #3
0
파일: k74.py 프로젝트: OnizukaLab/100knock
def predict():
    model, vocab = train()
    X_test = np.array([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences])
    print(model.predict(X_test))
예제 #4
0
파일: k78.py 프로젝트: kamuiroeru/NLP_Knock
    # 余ったものを最後尾に追加
    splitList[-2] = splitList[-2] + splitList[-1]
    splitList = splitList[:-1]
    print('{}分割交差検定を開始します。'.format(len(splitList)))

    accuracys, presicions, recalls, F_measures = np.zeros(k * 4).reshape(
        4, k)  # それぞれk個の要素を持ったnp.arrayである

    for i in range(k):
        print('i = ' + str(i))
        testList = splitList[i]
        trainList = [
            line for l in splitList[:i] + splitList[i + 1:] for line in l
        ]  # flatten

        wdic = train(trainList)

        paramdic = {'TP': 0, 'FP': 0, 'TN': 0, 'FN': 0}
        for line in testList:
            ans, odds = likelihood(line, wdic)
            prediction = '+1' if odds > border else '-1'
            paramdic[classify('{}\t{}\t{}'.format(ans, prediction, odds))] += 1

        accuracys[i], presicions[i], recalls[i], F_measures[i] \
            = ret_score(*map(lambda x: paramdic[x], ['TP', 'FP', 'TN', 'FN']))

    print('正解率:\t{}'.format(np.average(accuracys)))
    print('適合率:\t{}'.format(np.average(presicions)))
    print('再現率:\t{}'.format(np.average(recalls)))
    print('F1スコア:\t{}'.format(np.average(F_measures)))
예제 #5
0
def main():
    _, vocab = train()
    X_tensor = np.sum([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences], axis=0)
    [print(vocab.id2word[idx]) for idx in X_tensor.argsort()[::-1][:10]]
    [print(vocab.id2word[idx]) for idx in X_tensor.argsort()[:10]]