def cross_validation(): _, vocab = train() X_tensor = np.array([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences]) Y_tensor = np.array(vocab.labels) preds = cross_val_predict(LogisticRegression(), X_tensor, Y_tensor, cv=5) print('Precision: ', precision_score(vocab.labels, preds)) print('Recall: ', recall_score(vocab.labels, preds)) print('F-measure: ', f1_score(vocab.labels, preds))
def labeling(): model, vocab = train() X_test = np.array([ np.sum([np.eye(vocab.vocab_size + 1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences ]) true_label = [label[0] for label in vocab.labels] pred_label = model.predict(X_test) pred_prob = model.predict_proba(X_test) print('true|pred|prob') [ print('{}\t{}\t{}'.format(true, pred, max(prob))) for true, pred, prob in zip(true_label, pred_label, pred_prob) ] return true_label, pred_label, pred_prob
def predict(): model, vocab = train() X_test = np.array([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences]) print(model.predict(X_test))
# 余ったものを最後尾に追加 splitList[-2] = splitList[-2] + splitList[-1] splitList = splitList[:-1] print('{}分割交差検定を開始します。'.format(len(splitList))) accuracys, presicions, recalls, F_measures = np.zeros(k * 4).reshape( 4, k) # それぞれk個の要素を持ったnp.arrayである for i in range(k): print('i = ' + str(i)) testList = splitList[i] trainList = [ line for l in splitList[:i] + splitList[i + 1:] for line in l ] # flatten wdic = train(trainList) paramdic = {'TP': 0, 'FP': 0, 'TN': 0, 'FN': 0} for line in testList: ans, odds = likelihood(line, wdic) prediction = '+1' if odds > border else '-1' paramdic[classify('{}\t{}\t{}'.format(ans, prediction, odds))] += 1 accuracys[i], presicions[i], recalls[i], F_measures[i] \ = ret_score(*map(lambda x: paramdic[x], ['TP', 'FP', 'TN', 'FN'])) print('正解率:\t{}'.format(np.average(accuracys))) print('適合率:\t{}'.format(np.average(presicions))) print('再現率:\t{}'.format(np.average(recalls))) print('F1スコア:\t{}'.format(np.average(F_measures)))
def main(): _, vocab = train() X_tensor = np.sum([np.sum([np.eye(vocab.vocab_size+1)[wid] for wid in sentence], axis=0) for sentence in vocab.sentences], axis=0) [print(vocab.id2word[idx]) for idx in X_tensor.argsort()[::-1][:10]] [print(vocab.id2word[idx]) for idx in X_tensor.argsort()[:10]]