コード例 #1
0
ファイル: model.py プロジェクト: lyssym/nlp_base
 def train(self):
     """
     训练
     """
     self.initialize_model()
     x_train, y_train = self.corpus.generator()
     self.model.fit(x_train, y_train)
     labels = list(self.model.classes_)
     x_test, y_test = self.corpus.generator(train=False)
     y_predict = self.model.predict(x_test)
     metrics.flat_f1_score(y_test, y_predict, average='weighted', labels=labels)
     sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))
     print(metrics.flat_classification_report(y_test, y_predict, labels=sorted_labels, digits=3))
     self.save_model()
コード例 #2
0
ファイル: train.py プロジェクト: duongkstn/word_tokenize
def train_full(data=None):
    data = data or get_tokenizer()
    train_sents, test_sents = train_test_split(data, test_size=0.2, shuffle=False)

    X_train = [sent2features(sent2tokens(s)) for s in data]
    y_train = [sent2labels(s) for s in data]

    X_test = [sent2features(sent2tokens(s)) for s in test_sents]
    y_test = [sent2labels(s) for s in test_sents]

    crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs',
        c1=0.1,
        c2=0.1,
        max_iterations=100,
        all_possible_transitions=True,
        model_filename='models/model.bin'
    )
    crf.fit(X_train, y_train)
    start = time.time()
    y_pred = crf.predict(X_test)
    end = time.time()
    test_time = end - start
    F1 = metrics.flat_f1_score(y_test, y_pred, average='weighted')
    print(F1)
    print("Test time: ", test_time)

    print(metrics.flat_classification_report(
        y_test, y_pred, digits=3
    ))
コード例 #3
0
ファイル: train.py プロジェクト: duongkstn/word_tokenize
def train_test(data=None):
    train_sents, dev_sents, test_sents = data or get_tokenizer()

    X_train = [sent2features(sent2tokens(s)) for s in train_sents]
    y_train = [sent2labels(s) for s in train_sents]

    print(len(X_train), len(y_train))

    X_dev = [sent2features(sent2tokens(s)) for s in dev_sents]
    y_dev = [sent2labels(s) for s in dev_sents]

    X_test = [sent2features(sent2tokens(s)) for s in test_sents]
    y_test = [sent2labels(s) for s in test_sents]

    crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs',
        c1=0.1,
        c2=0.1,
        max_iterations=100,
        all_possible_transitions=True,
        model_filename='model/model.bin'
    )
    crf.fit(X_train, y_train, X_dev=X_dev, y_dev=y_dev)
    start = time.time()
    y_pred = crf.predict(X_test)
    end = time.time()
    test_time = end - start
    F1 = metrics.flat_f1_score(y_test, y_pred, average='weighted')
    print("F1: ", F1)
    print("Test time: ", test_time)

    print(metrics.flat_classification_report(
        y_test, y_pred, digits=3
    ))
コード例 #4
0
ファイル: train.py プロジェクト: scrapinghub/webstruct
def _print_metrics(y_pred, y_true):
    labels = get_labels(y_true)
    print("Sequence accuracy: {:0.1%}".format(
        metrics.sequence_accuracy_score(y_true, y_pred))
    )
    print("Per-tag F1: {:0.3f}".format(
        metrics.flat_f1_score(y_true, y_pred,
                              average='macro',
                              labels=labels)
    ))
    print("Per-tag Classification report: \n{}".format(
        metrics.flat_classification_report(y_true, y_pred,
                                           labels=labels, digits=3))
    )
コード例 #5
0
def fscore_crf(Y, y_pred, labels):
    labels.remove('O')
    return metrics.flat_f1_score(Y, y_pred, average='weighted', labels=labels)
コード例 #6
0
Xtrain = [stofeatures(s) for s in trainwords]
ytrain = [stolabels(s) for s in trainwords]

Xtest = [stofeatures(s) for s in testwords]
ytest = [stolabels(s) for s in testwords]

# In[7]:

crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                           c1=0.15,
                           c2=0.15,
                           max_iterations=100,
                           all_possible_transitions=True)
crf.fit(Xtrain, ytrain)

# In[8]:

labels = list(crf.classes_)
predicted = crf.predict(Xtest)
metrics.flat_f1_score(ytest, predicted, average='weighted', labels=labels)

# In[9]:

labelsorted = sorted(labels, key=lambda n: (n[1:], n[0]))
print(
    metrics.flat_classification_report(ytest,
                                       predicted,
                                       labels=labelsorted,
                                       digits=3))
コード例 #7
0
crf = CRF_baseline_NER()
print(crf.sent2features(conll.sentences[0])[0])
train_sents = conll.sentences[:40000]
test_sents = conll.sentences[40000:]
crf.X_train = [crf.sent2features(s) for s in train_sents]
crf.y_train = [crf.sent2labels(s) for s in train_sents]

crf.X_test = [crf.sent2features(s) for s in test_sents]
crf.y_test = [crf.sent2labels(s) for s in test_sents]
crf.train()
labels = list(crf.crf_model.classes_)
labels.remove('O')
print(labels)

y_pred = crf.crf_model.predict(crf.X_test)
f1_score = metrics.flat_f1_score(crf.y_test, y_pred,
                      average='weighted', labels=labels)

precision_score = metrics.flat_precision_score(crf.y_test, y_pred,
                      average='weighted', labels=labels)

recall_score = metrics.flat_recall_score(crf.y_test, y_pred,
                      average='weighted', labels=labels)
stats = metrics.flat_classification_report(crf.y_test, y_pred,
                       labels=labels)
print("Precision: "+str(precision_score))
print("Recall: "+str(recall_score))
print("F1-score: "+str(recall_score))
print(stats)
filename = '../Models/crf_baseline_model.sav'
pickle.dump(crf.crf_model, open(filename, 'wb'))
print("Done with all")
コード例 #8
0
                        verbose=1,
                        n_jobs=1,
                        n_iter=20,
                        scoring=f1_scorer)
rs.fit(X_train, y_train)

# In[78]:

print('Best params:', rs.best_params_)
print('Best F-1 score:', rs.best_score_)

# In[14]:

# fitting the models with obtained hyperparameters c1=.055 and c=.066

crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                           c1=0.055,
                           c2=0.066,
                           max_iterations=1000,
                           all_possible_transitions=True,
                           verbose=False)
crf.fit(X_train, y_train)
labels = ["O", "D", "T"]

#predicting the entities for test data
y_pred = crf.predict(X_test)
print("F1 score for D, T and O label(average) is %lf " %
      (metrics.flat_f1_score(y_test, y_pred, average='macro', labels=labels)))
#printing the classfication report
showreport(y_test, y_pred)
コード例 #9
0
def lbfgs(train_X, train_Y, test_X, test_Y):
    algorithms = ['lbfgs']
    min_frequencies = [0, 0.02]
    all_states = [True, False]
    all_transitions = [True, False]
    c1s = [0, 0.01, 0.05, 0.1]
    c2s = [0, 0.01, 0.05, 0.1]

    i = 1
    N = len(algorithms) * len(min_frequencies) * len(all_states) * len(
        all_transitions) * len(c1s) * len(c2s)
    start = time.time()

    results = []

    for algo in algorithms:
        for min_freq in min_frequencies:
            for all_state in all_states:
                for all_transition in all_transitions:
                    for c1 in c1s:
                        for c2 in c2s:
                            print(round(100 * i / N), '%')
                            print('Time elapsed: {} s'.format(
                                round(time.time() - start)))
                            i += 1
                            params = {
                                'algo': algo,
                                'min_freq': min_freq,
                                'all_state': all_state,
                                'all_transition': all_transition,
                                'c1': c1,
                                'c2': c2
                            }
                            print(params)
                            try:
                                crf = sklearn_crfsuite.CRF(
                                    algorithm=algo,
                                    c1=c1,
                                    c2=c2,
                                    max_iterations=1000,
                                    all_possible_transitions=all_transition,
                                    all_possible_states=all_state,
                                    min_freq=min_freq)

                                crf.fit(train_X, train_Y)
                                pred_Y = crf.predict(test_X)

                                f1 = metrics.flat_f1_score(test_Y,
                                                           pred_Y,
                                                           average='weighted',
                                                           labels=[
                                                               'per', 'org',
                                                               'misc', 'loc',
                                                               'notpropn'
                                                           ])
                                res = metrics.flat_classification_report(
                                    test_Y,
                                    pred_Y,
                                    labels=[
                                        'per', 'org', 'misc', 'loc', 'notpropn'
                                    ],
                                    digits=4)

                                results.append((f1, params))

                                print(res)
                                print()

                            except:
                                print('Invalid parameter combination.')
                                continue

    file = open('results/lbfgs', 'wb')
    pickle.dump(results, file)
    file.close()
コード例 #10
0
ファイル: load_data.py プロジェクト: hunaif/graphical-models
X_test = [sent2features(s) for s in chat_sequence_all]
y_test = [sent2labels(s) for s in chat_sequence_all]

crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                           c1=0.1,
                           c2=0.1,
                           max_iterations=100,
                           all_possible_transitions=True,
                           verbose=True)

print("starting train..........\n")
crf.fit(X_train, y_train)

print("Following are the classes: \n")
labels = list(crf.classes_)
print(labels)

y_pred = crf.predict(X_test)
print("weighted f1 score......\n")
print(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels))

sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

print("class wise distribution.......\n")
print(
    metrics.flat_classification_report(y_test,
                                       y_pred,
                                       labels=sorted_labels,
                                       digits=3))

file_out.close()
コード例 #11
0
# The True Labels for the Test Set Data
a = []
for i in test_data:
    t = []
    for j in i:
        t.append(j[1])
    a.append(t)

# In[22]:

# To check Accuracy
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

# Accuracy as the percentage of the correct tags
metrics.flat_f1_score(a, ans, average='weighted', labels=labels)

# In[23]:

# Confusion Matrix for the Model
print(
    metrics.flat_classification_report(a, ans, labels=sorted_labels, digits=3))

# ## Module to implement CRF.

# In[24]:

# pip3 install sklearn-crfsuite # install this please

train_sents = corpus
コード例 #12
0
def test_flat_f1_score_binary():
    s = [["x", "y"], ["x", "y"]]
    score = metrics.flat_f1_score(s, s, average='weighted')
    assert score == 1.0
コード例 #13
0
ファイル: ne.py プロジェクト: Resses/nlp-final-assignment
crf_final = sklearn_crfsuite.CRF(algorithm='lbfgs',
                                 c1=0.5,
                                 c2=0.5,
                                 max_iterations=100,
                                 all_possible_transitions=True)
crf_final.fit(X_train, y_train)

# Just keep the 'B' and 'I' for F-1 scoring
labels = list(crf_final.classes_)
labels.remove('O')

######## CLASSIFICATION #########
print("Running on the training set")
y_train_pred = crf_final.predict(X_train)
print("F1-score" + str(
    metrics.flat_f1_score(
        y_train, y_train_pred, average='weighted', labels=labels)))

if args.dev:
    print("Running on the dev set")
    y_dev_pred = crf_final.predict(X_dev)
    print("F1-score" + str(
        metrics.flat_f1_score(
            y_dev, y_dev_pred, average='weighted', labels=labels)))

print("Running on the testing set")
y_test_pred = crf_final.predict(X_test)


######## OUTPUT #########
def generate_output(pred, outputfile):
    f = open(outputfile, 'w')
コード例 #14
0
                if tok.find("PER") != -1 or tok.find("per") != -1 or tok.find(
                        "musicartist") != -1:
                    temp.append(3)
                else:
                    if tok.find("MISC") != -1:
                        temp.append(4)
                    else:
                        temp.append(4)

    y.append(temp)

sorted_labels = definitions.KLASSES.copy()
del sorted_labels[4]

print("------------------------------------------------------")
print flat_f1_score(y, new, average='weighted', labels=sorted_labels.keys())
print flat_f1_score(y, old, average='weighted', labels=sorted_labels.keys())
print "-----------------------------------------"
print(
    flat_classification_report(y,
                               new,
                               labels=sorted_labels.keys(),
                               target_names=sorted_labels.values(),
                               digits=3))
print(
    flat_classification_report(y,
                               old,
                               labels=sorted_labels.keys(),
                               target_names=sorted_labels.values(),
                               digits=3))
コード例 #15
0
def test_flat_fscore():
    score = metrics.flat_f1_score(y1, y2, average='macro')
    assert score == 2 / 3
    assert metrics.flat_fbeta_score(y1, y2, beta=1, average='macro') == score
コード例 #16
0
    def gen_model(self, x_train, y_train, x_test, y_test):

        for i in range(len(y_train)):
            for j in range(len(y_train[i])):
                y_train[i][j] = y_train[i][j].replace('B-', '')
                y_train[i][j] = y_train[i][j].replace('O-', '')
                y_train[i][j] = y_train[i][j].replace('I-', '')

        for i in range(len(y_test)):
            for j in range(len(y_test[i])):
                y_test[i][j] = y_test[i][j].replace('B-', '')
                y_test[i][j] = y_test[i][j].replace('O-', '')
                y_test[i][j] = y_test[i][j].replace('I-', '')

        labels = ['DOS', 'UNIT', 'FREQ', 'DUR', 'WHO']
        # labels = ['O-DOS', 'B-DOS', 'I-UNIT', 'B-UNIT', 'O-UNIT', 'I-FREQ', 'B-FREQ', 'O-FREQ', 'I-DUR', 'B-DUR', 'O-DUR', 'I-WHO', 'B-WHO', 'O-WHO']
        # labels = ['m', 'r', 'f', 'do', 'du', 'mo']
        crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                                   max_iterations=100,
                                   all_possible_transitions=True)
        params_space = {
            'c1': scipy.stats.expon(scale=0.5),
            'c2': scipy.stats.expon(scale=0.05),
        }

        # use the same metric for evaluation
        f1_scorer = make_scorer(metrics.flat_f1_score,
                                average='weighted',
                                labels=labels)

        # search
        rand_search = RandomizedSearchCV(crf,
                                         params_space,
                                         cv=3,
                                         verbose=1,
                                         n_jobs=-1,
                                         n_iter=50,
                                         scoring=f1_scorer)
        rand_search.fit(x_train, y_train)

        crf = rand_search.best_estimator_

        y_prediction = crf.predict(x_test)

        # group B and I results
        sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

        joblib.dump(crf, 'model.pkl')

        precision = metrics.flat_precision_score(y_test,
                                                 y_prediction,
                                                 labels=sorted_labels,
                                                 average='micro')
        recall = metrics.flat_recall_score(y_test,
                                           y_prediction,
                                           labels=sorted_labels,
                                           average='micro')
        f1 = metrics.flat_f1_score(y_test,
                                   y_prediction,
                                   labels=sorted_labels,
                                   average='micro')

        print('MICRO')
        print(precision, recall, f1)

        precision = metrics.flat_precision_score(y_test,
                                                 y_prediction,
                                                 labels=sorted_labels,
                                                 average='macro')
        recall = metrics.flat_recall_score(y_test,
                                           y_prediction,
                                           labels=sorted_labels,
                                           average='macro')
        f1 = metrics.flat_f1_score(y_test,
                                   y_prediction,
                                   labels=sorted_labels,
                                   average='macro')

        print('MACRO')
        print(precision, recall, f1)

        return metrics.flat_classification_report(y_test,
                                                  y_prediction,
                                                  labels=sorted_labels,
                                                  digits=3)
コード例 #17
0
    def validate_performance(self, test_set):
        sentences = self.__load_corpus__(test_set)

        y_test = [self.model.sentence2labels(s) for s in sentences]

        y_prediction = []
        for i, sent in enumerate(sentences):
            new_sent = ' '.join([word[0] for word in sent])
            prediction = self.model.predict(new_sent)
            new_prediction = []
            if len(prediction) > 1:
                for p in prediction:
                    new_prediction += [p1 for p1 in p]
                # print(prediction)
                # print(new_prediction)

                prediction = new_prediction
            else:
                prediction = prediction[0]

            try:
                pred = [w[1] for w in prediction]
            except Exception:
                print(prediction)
                return

            # if len(pred) != len(y_test[i]):
            #     print(sent)
            #     print(new_sent)
            #     print(y_test[i])
            #     print(len(y_test[i]))
            #     print(pred)
            #     print(len(pred))

            y_prediction.append(pred)

        labels = [
            'O-DOS', 'B-DOS', 'I-UNIT', 'B-UNIT', 'O-UNIT', 'I-FREQ', 'B-FREQ',
            'O-FREQ', 'I-DUR', 'B-DUR', 'O-DUR', 'I-WHO', 'B-WHO', 'O-WHO'
        ]

        for i in range(len(y_prediction)):
            for j in range(len(y_prediction[i])):
                y_prediction[i][j] = y_prediction[i][j].replace('B-', '')
                y_prediction[i][j] = y_prediction[i][j].replace('O-', '')
                y_prediction[i][j] = y_prediction[i][j].replace('I-', '')

        for i in range(len(y_test)):
            for j in range(len(y_test[i])):
                y_test[i][j] = y_test[i][j].replace('B-', '')
                y_test[i][j] = y_test[i][j].replace('O-', '')
                y_test[i][j] = y_test[i][j].replace('I-', '')

        labels = ['DOS', 'UNIT', 'FREQ', 'DUR', 'WHO']

        # labels = ['DOS', 'UNIT', 'WHO', 'DUR', 'FREQ']

        sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

        precision = metrics.flat_precision_score(y_test,
                                                 y_prediction,
                                                 labels=sorted_labels,
                                                 average='micro')
        recall = metrics.flat_recall_score(y_test,
                                           y_prediction,
                                           labels=sorted_labels,
                                           average='micro')
        f1 = metrics.flat_f1_score(y_test,
                                   y_prediction,
                                   labels=sorted_labels,
                                   average='micro')

        print('MICRO')
        print(precision, recall, f1)

        precision = metrics.flat_precision_score(y_test,
                                                 y_prediction,
                                                 labels=sorted_labels,
                                                 average='macro')
        recall = metrics.flat_recall_score(y_test,
                                           y_prediction,
                                           labels=sorted_labels,
                                           average='macro')
        f1 = metrics.flat_f1_score(y_test,
                                   y_prediction,
                                   labels=sorted_labels,
                                   average='macro')

        print('MACRO')
        print(precision, recall, f1)

        print(
            metrics.flat_classification_report(y_test,
                                               y_prediction,
                                               labels=sorted_labels,
                                               digits=3))
    crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs',
        c1=0.1,
        c2=0.1,
        max_iterations=100,
        all_possible_transitions=True
    )
    crf.fit(X_train, y_train)

    labels = list(crf.classes_)
    labels.remove('O')

    y_pred = crf.predict(X_test)
    # print(y_pred)
    print(metrics.flat_f1_score(y_test, 
                          y_pred,
                          average='weighted', 
                          labels=['B-geo', 'I-geo']))
    
    tweetsFile = 'datasets/tweet-dataset.csv'
    tweetsTestData = readTweetForTesting(tweetsFile)['tweets'].head(50)
    # pipelineModelFile = 'multinomialNB.pkl'
    # pipeline = loadModel(pipelineModelFile)
    # result = predictLocation(tweetsTestData)
    # print(result)

    # Save Model
    # saveModel(crf, 'namedEntityRecognition.pkl')



コード例 #19
0
    def train(self, test_size=0.2, max_iterations=100, fold5valid=False):
        full_set_labels = []
        for sent in self.full_set:
            set_lab = []
            for word in sent:
                set_lab.append(word[1])
            full_set_labels.append(set_lab)

        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
            self.full_set,
            full_set_labels,
            test_size=test_size,
            random_state=0)
        self.x_train = [self.sent2features(s) for s in self.x_train]
        self.x_test = [self.sent2features(s) for s in self.x_test]

        print("Starting Training on " + str(len(self.x_train)) +
              " sentences...")
        batch_size = len(self.x_train) / 5
        scores = []
        if fold5valid:
            for i in range(5):
                self.crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                                                c1=0.1,
                                                c2=0.1,
                                                max_iterations=max_iterations,
                                                all_possible_transitions=True)

                indices = range(i * batch_size, (i + 1) * batch_size)
                train_batch = [
                    i for j, i in enumerate(self.x_train) if j not in indices
                ]
                test_batch = [
                    i for j, i in enumerate(self.x_train) if j in indices
                ]

                train_labels = [
                    i for j, i in enumerate(self.y_train) if j not in indices
                ]
                test_labels = [
                    i for j, i in enumerate(self.y_train) if j in indices
                ]

                self.crf.fit(train_batch, train_labels)
                labels = list(self.crf.classes_)
                labels.remove("N")
                y_pred = self.crf.predict(test_batch)
                val = metrics.flat_f1_score(test_labels,
                                            y_pred,
                                            average='weighted',
                                            labels=labels)
                scores.append(val)

            import numpy
            scores = numpy.array(scores)
            print("5 Fold scores:" + str(scores))
            f1score = scores.mean(), scores.std() * 2
            print("F1 Score: %0.2f (+/- %0.2f)" % (f1score))

            #self.crf.fit(self.x_train, self.y_train)
            self.trained = True
            print("Finished training...")
            return f1score

        else:
            import scipy
            from sklearn.metrics import make_scorer
            from sklearn.grid_search import RandomizedSearchCV

            self.crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                                            all_possible_transitions=True)
            params_space = {
                'c1': scipy.stats.expon(scale=0.5),
                'c2': scipy.stats.expon(scale=0.05),
                'max_iterations': range(20, 100),
            }
            self.crf.fit(self.x_train, self.y_train)
            labels = list(self.crf.classes_)
            labels.remove('N')
            # use the same metric for evaluation
            f1_scorer = make_scorer(metrics.flat_f1_score,
                                    average='weighted',
                                    labels=labels)

            # search
            rs = RandomizedSearchCV(self.crf,
                                    params_space,
                                    cv=5,
                                    verbose=1,
                                    n_jobs=-1,
                                    n_iter=100,
                                    scoring=f1_scorer)
            rs.fit(self.x_train, self.y_train)
            # crf = rs.best_estimator_
            print('best params:', rs.best_params_)
            print('best CV score:', rs.best_score_)
            print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ /
                                                1000000))
            self.trained = True
            return rs
コード例 #20
0
                           c1=0.1,
                           c2=0.1,
                           max_iterations=100,
                           all_possible_transitions=True)
crf.fit(X_train, y_train)

labels = list(crf.classes_)
labels.remove('O')

y_pred = crf.predict(X_test)

print("Full Test Accuracy:", crf.score(X_test, y_test))
print(
    "Full Test F1 Score:",
    metrics.flat_f1_score(y_test,
                          y_pred,
                          average='weighted',
                          labels=crf.classes_))
print("Trimmed Test F1 Score:",
      metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels))

sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))
print(
    metrics.flat_classification_report(y_test,
                                       y_pred,
                                       labels=sorted_labels,
                                       digits=3))
"""========================== append prior predictions and re-classify ========================"""
for sent, labels in enumerate(y_train):
    for word, label in enumerate(labels):
        if word > 0:
            X_train[sent][word]['prev.ent'] = labels[
コード例 #21
0
ファイル: sklearn_crf.py プロジェクト: ynandwan/ner_tagging
    X_test = [X[i] for i in range(len(X)) if groups[i] == gid]
    y_test = [Y[i] for i in range(len(Y)) if groups[i] == gid]
    
    %%time
    crf = sklearn_crfsuite.CRF(
        algorithm='pa',
        c=0.1,
        max_iterations=100,
        all_possible_transitions=False
    )
    crf.fit(X_train, y_train)
    labels = list(crf.classes_)
    labels.remove('O')
    labels
    y_pred = crf.predict(X_test)
    cross_val_results.append(metrics.flat_f1_score(y_test, y_pred,
                average='macro', labels=labels))
    
np.mean(cross_val_results)


def grid_search(X, y, labels):
    crf = sklearn_crfsuite.CRF(
        algorithm='pa',
        max_iterations=100,
        all_possible_transitions=False
    )
    params_space = {
        'c': [0.1]
    }

コード例 #22
0
from os.path import join, dirname
import time
import joblib
import pycrfsuite
from sklearn_crfsuite import metrics

from load_data import load_dataset

transformer = joblib.load(join(dirname(__file__), "model", "transformer.bin"))
path = join(dirname(__file__), "model", "model.bin")
estimator = pycrfsuite.Tagger()
estimator.open(path)

test_set = load_dataset(
    join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus",
         "test.txt"))
X_test, y_test = transformer.transform(test_set)
start = time.time()
y_pred = [estimator.tag(x) for x in X_test]
end = time.time()
test_time = end - start
f1_test_score = metrics.flat_f1_score(y_test, y_pred, average='weighted')
print("F1 score: ", f1_test_score)
print("Test time: ", test_time)
with open("report.txt", "w") as f:
    f.write("F1 score: " + str(f1_test_score) + "\n" + "Test time: " +
            str(test_time))
コード例 #23
0
          c1=0.1,
          c2=0.1,
          max_iterations=100,
          all_possible_transitions=False)

from sklearn.model_selection import cross_val_predict
from sklearn_crfsuite.metrics import flat_classification_report

pred = cross_val_predict(estimator=crf, X=Xtrain, y=Ytrain, cv=5)

report = flat_classification_report(y_pred=pred, y_true=Ytrain)
#%%
crf.fit(Xtrain, Ytrain)

y_pred = crf.predict(Xtest)
metrics.flat_f1_score(Ytest, y_pred, average='weighted')

print(metrics.flat_classification_report(Ytest, y_pred, digits=3))
#%%
print(report)

print(y_pred[0])
print(output_sql[2000])
print(main_lst1[2000])
print()

#%%

X = CountVectorizer(tokenizer=lambda doc: doc,
                    lowercase=False).fit_transform(lem_data)
#print(ngram_vectorizer.fit(data))
コード例 #24
0
def unified_approach(file_name, file_2):
    # train_path = "../Data/bio-ner/train"
    # dev_path = "../Data/bio-ner/dev"
    # create_file(train_path, "train")
    # create_file(dev_path, "dev")
    #exclude = ["Value", "Time", "Unit", "Location"]
    train_sentences = file_opener(file_name)
    dev_sentences = file_opener(file_2)

    x_train = [sentence_features(s) for s in train_sentences]
    y_train = [sentence_labels(s) for s in train_sentences]

    x_dev = [sentence_features(s) for s in dev_sentences]
    y_dev = [sentence_labels(s) for s in dev_sentences]

    crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs',
        c1=0.09684573395986483,
        c2=0.0800864058815976,
        max_iterations=100,
        all_possible_transitions=True
    )
    crf.fit(x_train, y_train)
    labels = list(crf.classes_)
    labels.remove('O')
    y_predicted = crf.predict(x_dev)

    # Get the various lists for evaluation of separate label parts
    y_pred_flat = []
    y_pred_iob = []
    y_pred_class = []
    y_dev_flat = []
    y_dev_iob = []
    y_dev_class = []

    for x in y_predicted:
        y_pred_flat += x
        for xx in x:
            y_pred_iob.append(xx[0])
            if xx != 'O':
                y_pred_class.append(xx[2:])
            else:
                y_pred_class.append('O')
    for x in y_dev:
        y_dev_flat += x
        for xx in x:
            y_dev_iob.append(xx[0])
            if xx != 'O':
                y_dev_class.append(xx[2:])
            else:
                y_dev_class.append('O')

    # print(set(y_pred_flat) - set(y_dev_flat))
    # print(set(y_dev_flat) - set(y_pred_flat))
    # print(set(y_pred_flat))
    # print(set(y_dev_flat))
    # print(labels)
    labels = list(set(y_pred_flat))
    labels.remove("O")
    print(labels)
    #labels = ["B-Biotic_Entity-L"]
    f1 = metrics.flat_f1_score(y_dev, y_predicted, average='weighted', labels=labels)

    # labels = list(set(y_pred_iob))
    # labels.remove('O')
    # iob_score = f1_score(y_dev_iob, y_pred_iob, average='weighted', labels=labels)
    # print("IOB Score:", iob_score)
    # labels = list(set(y_pred_class))
    # labels.remove('O')
    # class_score = f1_score(y_dev_class, y_pred_class, average='weighted', labels=labels)
    # print("Class Score:", class_score)
    print("Overall Score:", f1)
    return f1
コード例 #25
0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.6)

crf = CRF(algorithm = 'lbfgs',
         c1 = 0.1,
         c2 = 0.1,
         max_iterations = 100,
         all_possible_transitions = False)
crf.fit(X_train, y_train)

#Predicting on the test set.
y_pred = crf.predict(X_test)

for i in range(len(y_pred)):
	prediction = y_pred[i]
	testList = X_test[i]
	testSentence = ""
	for testTuple in testList:
		testSentence = testSentence + testTuple['word.lower()'] + ' '
	words = testSentence.split(" ")
	x = 0
	for wordPrediction in prediction:
		if wordPrediction == 'B-date' or wordPrediction == 'B-amt' or wordPrediction == 'B-mer' or wordPrediction == 'I-mer' or wordPrediction == 'I-date':
			print(words[x],wordPrediction)
		x +=1

f1_score = flat_f1_score(y_test, y_pred, average = 'weighted')
print(f1_score)

report = flat_classification_report(y_test, y_pred)
print(report)
コード例 #26
0
crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                           c1=0.1,
                           c2=0.1,
                           max_iterations=100,
                           all_possible_transitions=True)
crf.fit(X_train, y_train)

y_pred = crf.predict(X_test)
labels = list(crf.classes_)

sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

print('Number of test sentences used = 10')
print('----------------------Viterbi Results---------------------------')
print(
    'Viterbi Accuracy Score :',
    metrics.flat_f1_score(actual_tag, seq, average='weighted', labels=labels))
print(
    metrics.flat_classification_report(actual_tag,
                                       seq,
                                       labels=sorted_labels,
                                       digits=3))
print('------------------------CRF Results-----------------------------')
print('CRF Accuracy Score :',
      metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels))
print(
    metrics.flat_classification_report(y_test,
                                       y_pred,
                                       labels=sorted_labels,
                                       digits=3))
コード例 #27
0
    def cross_validate(self,
                       training_dataset=None,
                       num_folds=5,
                       prediction_directory=None,
                       groundtruth_directory=None,
                       asynchronous=False):
        """
        Performs k-fold stratified cross-validation using our model and pipeline.

        If the training dataset, groundtruth_directory and prediction_directory are passed, intermediate predictions during cross validation
        are written to the directory `write_predictions`. This allows one to construct a confusion matrix or to compute
        the prediction ambiguity with the methods present in the Dataset class to support pipeline development without
        a designated evaluation set.

        :param training_dataset: Dataset that is being cross validated (optional)
        :param num_folds: number of folds to split training data into for cross validation
        :param prediction_directory: directory to write predictions of cross validation to or `True` for default predictions sub-directory.
        :param groundtruth_directory: directory to write the ground truth MedaCy evaluates on
        :param asynchronous: Boolean for whether the preprocessing should be done asynchronously.
        :return: Prints out performance metrics, if prediction_directory
        """

        if num_folds <= 1:
            raise ValueError(
                "Number of folds for cross validation must be greater than 1, but is %s"
                % repr(num_folds))

        if prediction_directory is not None and training_dataset is None:
            raise ValueError(
                "Cannot generate predictions during cross validation if training dataset is not given."
                " Please pass the training dataset in the 'training_dataset' parameter."
            )
        if groundtruth_directory is not None and training_dataset is None:
            raise ValueError(
                "Cannot generate groundtruth during cross validation if training dataset is not given."
                " Please pass the training dataset in the 'training_dataset' parameter."
            )

        pipeline_report = self.pipeline.get_report()

        self.preprocess(training_dataset, asynchronous)

        if not (self.X_data and self.y_data):
            raise RuntimeError(
                "Must have features and labels extracted for cross validation")

        tags = sorted(training_dataset.get_labels(as_list=True))
        self.pipeline.entities = tags
        logging.info('Tagset: %s', tags)

        eval_stats = {}

        # Dict for storing mapping of sequences to their corresponding file
        groundtruth_by_document = {
            filename: []
            for filename in {x[2]
                             for x in self.X_data}
        }
        preds_by_document = {
            filename: []
            for filename in {x[2]
                             for x in self.X_data}
        }

        folds = create_folds(self.y_data, num_folds)

        for fold_num, fold_data in enumerate(folds, 1):
            train_indices, test_indices = fold_data
            fold_statistics = {}
            learner_name, learner = self.pipeline.get_learner()

            X_train = [self.X_data[index] for index in train_indices]
            y_train = [self.y_data[index] for index in train_indices]

            X_test = [self.X_data[index] for index in test_indices]
            y_test = [self.y_data[index] for index in test_indices]

            logging.info("Training Fold %i", fold_num)
            train_data = [x[0] for x in X_train]
            test_data = [x[0] for x in X_test]
            learner.fit(train_data, y_train)
            y_pred = learner.predict(test_data)

            if groundtruth_directory is not None:
                # Flattening nested structures into 2d lists
                document_indices = []
                span_indices = []
                for sequence in X_test:
                    document_indices += [sequence[2]] * len(sequence[0])
                    span_indices += list(sequence[1])
                groundtruth = [
                    element for sentence in y_test for element in sentence
                ]

                # Map the predicted sequences to their corresponding documents
                i = 0

                while i < len(groundtruth):
                    if groundtruth[i] == 'O':
                        i += 1
                        continue

                    entity = groundtruth[i]
                    document = document_indices[i]
                    first_start, first_end = span_indices[i]
                    # Ensure that consecutive tokens with the same label are merged
                    while i < len(groundtruth) - 1 and groundtruth[
                            i +
                            1] == entity:  # If inside entity, keep incrementing
                        i += 1

                    last_start, last_end = span_indices[i]
                    groundtruth_by_document[document].append(
                        (entity, first_start, last_end))
                    i += 1

            if prediction_directory is not None:
                # Flattening nested structures into 2d lists
                document_indices = []
                span_indices = []

                for sequence in X_test:
                    document_indices += [sequence[2]] * len(sequence[0])
                    span_indices += list(sequence[1])

                predictions = [
                    element for sentence in y_pred for element in sentence
                ]

                # Map the predicted sequences to their corresponding documents
                i = 0

                while i < len(predictions):
                    if predictions[i] == 'O':
                        i += 1
                        continue

                    entity = predictions[i]
                    document = document_indices[i]
                    first_start, first_end = span_indices[i]

                    # Ensure that consecutive tokens with the same label are merged
                    while i < len(predictions) - 1 and predictions[
                            i +
                            1] == entity:  # If inside entity, keep incrementing
                        i += 1

                    last_start, last_end = span_indices[i]
                    preds_by_document[document].append(
                        (entity, first_start, last_end))
                    i += 1

            # Write the metrics for this fold.
            for label in tags:
                fold_statistics[label] = {
                    "recall":
                    metrics.flat_recall_score(y_test,
                                              y_pred,
                                              average='weighted',
                                              labels=[label]),
                    "precision":
                    metrics.flat_precision_score(y_test,
                                                 y_pred,
                                                 average='weighted',
                                                 labels=[label]),
                    "f1":
                    metrics.flat_f1_score(y_test,
                                          y_pred,
                                          average='weighted',
                                          labels=[label])
                }

            # add averages
            fold_statistics['system'] = {
                "recall":
                metrics.flat_recall_score(y_test,
                                          y_pred,
                                          average='weighted',
                                          labels=tags),
                "precision":
                metrics.flat_precision_score(y_test,
                                             y_pred,
                                             average='weighted',
                                             labels=tags),
                "f1":
                metrics.flat_f1_score(y_test,
                                      y_pred,
                                      average='weighted',
                                      labels=tags)
            }

            table_data = [[
                label,
                format(fold_statistics[label]['precision'], ".3f"),
                format(fold_statistics[label]['recall'], ".3f"),
                format(fold_statistics[label]['f1'], ".3f")
            ] for label in tags + ['system']]

            logging.info(
                '\n' +
                tabulate(table_data,
                         headers=['Entity', 'Precision', 'Recall', 'F1'],
                         tablefmt='orgtbl'))

            eval_stats[fold_num] = fold_statistics

        statistics_all_folds = {}

        for label in tags + ['system']:
            statistics_all_folds[label] = {
                'precision_average':
                mean(eval_stats[fold][label]['precision']
                     for fold in eval_stats),
                'precision_max':
                max(eval_stats[fold][label]['precision']
                    for fold in eval_stats),
                'precision_min':
                min(eval_stats[fold][label]['precision']
                    for fold in eval_stats),
                'recall_average':
                mean(eval_stats[fold][label]['recall'] for fold in eval_stats),
                'recall_max':
                max(eval_stats[fold][label]['recall'] for fold in eval_stats),
                'f1_average':
                mean(eval_stats[fold][label]['f1'] for fold in eval_stats),
                'f1_max':
                max(eval_stats[fold][label]['f1'] for fold in eval_stats),
                'f1_min':
                min(eval_stats[fold][label]['f1'] for fold in eval_stats),
            }

        entity_counts = training_dataset.compute_counts()

        table_data = [
            [
                f"{label} ({entity_counts[label]})",  # Entity (Count)
                format(statistics_all_folds[label]['precision_average'],
                       ".3f"),
                format(statistics_all_folds[label]['recall_average'], ".3f"),
                format(statistics_all_folds[label]['f1_average'], ".3f"),
                format(statistics_all_folds[label]['f1_min'], ".3f"),
                format(statistics_all_folds[label]['f1_max'], ".3f")
            ] for label in tags + ['system']
        ]

        # Combine the pipeline report and the resulting data, then log it or print it (whichever ensures that it prints)

        output_str = '\n' + pipeline_report + '\n\n' + tabulate(
            table_data,
            headers=[
                'Entity (Count)', 'Precision', 'Recall', 'F1', 'F1_Min',
                'F1_Max'
            ],
            tablefmt='orgtbl')

        if logging.root.level > logging.INFO:
            print(output_str)
        else:
            logging.info(output_str)

        if prediction_directory:

            prediction_directory = os.path.join(
                training_dataset.data_directory, "predictions")
            groundtruth_directory = os.path.join(
                training_dataset.data_directory, "groundtruth")

            # Write annotations generated from cross-validation
            self.create_annotation_directory(directory=prediction_directory,
                                             training_dataset=training_dataset,
                                             option="predictions")

            # Write medaCy ground truth generated from cross-validation
            self.create_annotation_directory(directory=groundtruth_directory,
                                             training_dataset=training_dataset,
                                             option="groundtruth")

            # Add predicted/known annotations to the folders containing groundtruth and predictions respectively
            self.predict_annotation_evaluation(
                directory=groundtruth_directory,
                training_dataset=training_dataset,
                preds_by_document=preds_by_document,
                groundtruth_by_document=groundtruth_by_document,
                option="groundtruth")

            self.predict_annotation_evaluation(
                directory=prediction_directory,
                training_dataset=training_dataset,
                preds_by_document=preds_by_document,
                groundtruth_by_document=groundtruth_by_document,
                option="predictions")

            return Dataset(prediction_directory)
        else:
            return statistics_all_folds
コード例 #28
0
ファイル: training.py プロジェクト: mieczkowski-m/inl-zal
 def get_f1_score(self):
     return metrics.flat_f1_score(self.y_test,
                                  self.y_predict,
                                  average='weighted',
                                  labels=self.labels)
コード例 #29
0
    print("=======================")
    print("Load trained model ...")
    model = pickle.load(open("./models/" + MODEL_NAME, "rb"))
    print("Done!!!")

    predict = model.predict(X_test)

    print("=======================")
    print("Testing ....")
    print(len(y_test), len(predict))

    avg_count = 0
    print(predict[0])
    for i in range(len(y_test)):
        acc = evaluate(predict[i], y_test[i])
        # print(acc)
        avg_count += acc

    # print(score)

    print("Avg acc:", avg_count / float(len(y_test)))
    print(model.classes_)
    print("Accuracy\t:", metrics.flat_accuracy_score(y_test, predict))
    print("Precision\t:",
          metrics.flat_precision_score(y_test, predict, average=None))
    print("Recall\t:",
          len(metrics.flat_recall_score(y_test, predict, average=None)))
    print("F1\t:", metrics.flat_f1_score(y_test, predict, average=None))

    print("Done!!!")
コード例 #30
0
def test_flat_fscore():
    score = metrics.flat_f1_score(y1, y2, average='macro')
    assert score == 2 / 3
    assert metrics.flat_fbeta_score(y1, y2, beta=1, average='macro') == score
コード例 #31
0
ファイル: spacy_model.py プロジェクト: yushu-liu/medaCy
    def cross_validate(self,
                       folds=10,
                       training_dataset=None,
                       spacy_model_name=None,
                       epochs=None):
        """
        Runs a cross validation.

        :param folds: Number of fold to do for the cross validation.
        :param training_dataset: Path to the directory of BRAT files to use for the training data.
        :param spacy_model_name: Name of the spaCy model to start from.
        :param epochs: Number of epochs to us for every fold training.
        """
        if folds <= 1:
            raise ValueError(
                "Number of folds for cross validation must be greater than 1")

        if training_dataset is None:
            raise ValueError("Need a dataset to evaluate")

        if spacy_model_name is None:
            raise ValueError("Need a spacy model to start with")

        train_data = training_dataset.get_training_data()

        x_data, y_data = zip(*train_data)

        skipped_files = []
        evaluation_statistics = {}

        folds = SequenceStratifiedKFold(folds=folds)
        fold = 1

        for train_indices, test_indices in folds(x_data, y_data):
            logging.info("\n----EVALUATING FOLD %d----", fold)
            self.model = None
            fold_statistics = {}

            x_subdataset = training_dataset.get_subdataset(train_indices)
            self.fit(x_subdataset, spacy_model_name, epochs)
            logging.info('Done training!\n')

            nlp = self.model
            labels = list(x_subdataset.get_labels())

            y_subdataset = training_dataset.get_subdataset(test_indices)

            y_test = []
            y_pred = []

            for data_file in y_subdataset.get_data_files():
                ann_path = data_file.get_annotation_path()
                annotations = Annotations(ann_path)
                txt_path = data_file.get_text_path()

                with open(txt_path, 'r') as source_text_file:
                    text = source_text_file.read()

                doc = nlp(text)

                test_entities = annotations.get_spacy_entities()
                test_entities = self.entities_to_biluo(doc, test_entities)
                y_test.append(test_entities)

                pred_entities = self.predict(text)
                pred_entities = self.entities_to_biluo(doc, pred_entities)
                y_pred.append(pred_entities)

            logging.debug('\n------y_test------')
            logging.debug(y_test)
            logging.debug('\n------y_pred------')
            logging.debug(y_pred)

            # Write the metrics for this fold.
            for label in labels:
                fold_statistics[label] = {}
                recall = metrics.flat_recall_score(y_test,
                                                   y_pred,
                                                   average='weighted',
                                                   labels=[label])
                precision = metrics.flat_precision_score(y_test,
                                                         y_pred,
                                                         average='weighted',
                                                         labels=[label])
                f1_score = metrics.flat_f1_score(y_test,
                                                 y_pred,
                                                 average='weighted',
                                                 labels=[label])
                fold_statistics[label]['precision'] = precision
                fold_statistics[label]['recall'] = recall
                fold_statistics[label]['f1'] = f1_score

            # add averages
            fold_statistics['system'] = {}
            recall = metrics.flat_recall_score(y_test,
                                               y_pred,
                                               average='weighted',
                                               labels=labels)
            precision = metrics.flat_precision_score(y_test,
                                                     y_pred,
                                                     average='weighted',
                                                     labels=labels)
            f1_score = metrics.flat_f1_score(y_test,
                                             y_pred,
                                             average='weighted',
                                             labels=labels)
            fold_statistics['system']['precision'] = precision
            fold_statistics['system']['recall'] = recall
            fold_statistics['system']['f1'] = f1_score

            table_data = [[
                label,
                format(fold_statistics[label]['precision'], ".3f"),
                format(fold_statistics[label]['recall'], ".3f"),
                format(fold_statistics[label]['f1'], ".3f")
            ] for label in labels + ['system']]

            logging.info(
                tabulate(table_data,
                         headers=['Entity', 'Precision', 'Recall', 'F1'],
                         tablefmt='orgtbl'))

            evaluation_statistics[fold] = fold_statistics
            fold += 1

        if skipped_files:
            logging.info('\nWARNING. SKIPPED THE FOLLOWING ANNOTATIONS:')
            logging.info(skipped_files)

        statistics_all_folds = {}

        for label in labels + ['system']:
            statistics_all_folds[label] = {}
            statistics_all_folds[label]['precision_average'] = mean([
                evaluation_statistics[fold][label]['precision']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['precision_max'] = max([
                evaluation_statistics[fold][label]['precision']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['precision_min'] = min([
                evaluation_statistics[fold][label]['precision']
                for fold in evaluation_statistics
            ])

            statistics_all_folds[label]['recall_average'] = mean([
                evaluation_statistics[fold][label]['recall']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['recall_max'] = max([
                evaluation_statistics[fold][label]['recall']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['recall_min'] = min([
                evaluation_statistics[fold][label]['recall']
                for fold in evaluation_statistics
            ])

            statistics_all_folds[label]['f1_average'] = mean([
                evaluation_statistics[fold][label]['f1']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['f1_max'] = max([
                evaluation_statistics[fold][label]['f1']
                for fold in evaluation_statistics
            ])
            statistics_all_folds[label]['f1_min'] = min([
                evaluation_statistics[fold][label]['f1']
                for fold in evaluation_statistics
            ])

        table_data = [[
            label,
            format(statistics_all_folds[label]['precision_average'], ".3f"),
            format(statistics_all_folds[label]['recall_average'], ".3f"),
            format(statistics_all_folds[label]['f1_average'], ".3f"),
            format(statistics_all_folds[label]['f1_min'], ".3f"),
            format(statistics_all_folds[label]['f1_max'], ".3f")
        ] for label in labels + ['system']]

        table_string = '\n' + tabulate(table_data,
                                       headers=[
                                           'Entity', 'Precision', 'Recall',
                                           'F1', 'F1_Min', 'F1_Max'
                                       ],
                                       tablefmt='orgtbl')
        logging.info(table_string)
コード例 #32
0

X_train = [sent2features(s) for s in train_sents]
y_train = [sent2labels(s) for s in train_sents]

X_test = [sent2features(s) for s in test_sents]
y_test = [sent2labels(s) for s in test_sents]

pprint.pprint(X_train[0])
print(len(X_train))

pprint.pprint(y_train[0])
print(len(y_train))

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)

labels = list(crf.classes_)
labels.remove('O')
print(labels)


y_pred = crf.predict(X_test)
print(metrics.flat_f1_score(y_test, y_pred,
                      average='weighted', labels=labels))
コード例 #33
0
if __name__ == '__main__':
    # load data
    train_set = []
    for f in ["train.txt", "dev.txt", "test.txt"]:
        file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016",
                    "corpus", f)
        train_set += load_dataset(file)

    # transformer
    transformer = CustomTransformer(template)
    X, y = transformer.transform(train_set)

    # train
    crf_params = {
        'c1': 1.0,  # coefficient for L1 penalty
        'c2': 1e-3,  # coefficient for L2 penalty
        'max_iterations': 1000,  #
        # include transitions that are possible, but not observed
        'feature.possible_transitions': True
    }
    model_path = join(dirname(__file__), "final_model", "model.bin")
    X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.01)
    estimator = CRF(params=crf_params, filename=model_path)
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_dev)
    f1_score = metrics.flat_f1_score(y_dev, y_pred, average='weighted')
    print("Dev score: ", f1_score)

    joblib.dump(transformer, "final_model/transformer.bin")
コード例 #34
0
def cross_validate(x_folds, y_folds, params):
    f1_per = []
    f1_org = []
    f1_misc = []
    f1_loc = []
    f1_not = []

    precision_per = []
    precision_org = []
    precision_misc = []
    precision_loc = []
    precision_not = []

    recall_per = []
    recall_org = []
    recall_misc = []
    recall_loc = []
    recall_not = []

    for i in range(len(x_folds)):
        print('\rWorking on fold {}/{} ...'.format(i + 1, len(x_folds)),
              end='')

        crf = sklearn_crfsuite.CRF(**params)

        test_x, test_y, train_x, train_y = folds_2_tt(x_folds, y_folds, i)

        crf.fit(train_x, train_y)
        pred_y = crf.predict(test_x)

        f1_per.append(
            metrics.flat_f1_score(test_y, pred_y, average=None,
                                  labels=['per']))
        f1_org.append(
            metrics.flat_f1_score(test_y, pred_y, average=None,
                                  labels=['org']))
        f1_misc.append(
            metrics.flat_f1_score(test_y,
                                  pred_y,
                                  average=None,
                                  labels=['misc']))
        f1_loc.append(
            metrics.flat_f1_score(test_y, pred_y, average=None,
                                  labels=['loc']))
        f1_not.append(
            metrics.flat_f1_score(test_y,
                                  pred_y,
                                  average=None,
                                  labels=['notpropn']))

        precision_per.append(
            metrics.flat_precision_score(test_y,
                                         pred_y,
                                         average=None,
                                         labels=['per']))
        precision_org.append(
            metrics.flat_precision_score(test_y,
                                         pred_y,
                                         average=None,
                                         labels=['org']))
        precision_misc.append(
            metrics.flat_precision_score(test_y,
                                         pred_y,
                                         average=None,
                                         labels=['misc']))
        precision_loc.append(
            metrics.flat_precision_score(test_y,
                                         pred_y,
                                         average=None,
                                         labels=['loc']))
        precision_not.append(
            metrics.flat_precision_score(test_y,
                                         pred_y,
                                         average=None,
                                         labels=['notpropn']))

        recall_per.append(
            metrics.flat_recall_score(test_y,
                                      pred_y,
                                      average=None,
                                      labels=['per']))
        recall_org.append(
            metrics.flat_recall_score(test_y,
                                      pred_y,
                                      average=None,
                                      labels=['org']))
        recall_misc.append(
            metrics.flat_recall_score(test_y,
                                      pred_y,
                                      average=None,
                                      labels=['misc']))
        recall_loc.append(
            metrics.flat_recall_score(test_y,
                                      pred_y,
                                      average=None,
                                      labels=['loc']))
        recall_not.append(
            metrics.flat_recall_score(test_y,
                                      pred_y,
                                      average=None,
                                      labels=['notpropn']))

    print()
    avg_per_f1 = sum(f1_per) / len(f1_per)
    avg_org_f1 = sum(f1_org) / len(f1_org)
    avg_loc_f1 = sum(f1_loc) / len(f1_loc)
    avg_misc_f1 = sum(f1_misc) / len(f1_misc)
    avg_not_f1 = sum(f1_not) / len(f1_not)

    avg_per_precision = sum(precision_per) / len(precision_per)
    avg_org_precision = sum(precision_org) / len(precision_org)
    avg_loc_precision = sum(precision_loc) / len(precision_loc)
    avg_misc_precision = sum(precision_misc) / len(precision_misc)
    avg_not_precision = sum(precision_not) / len(precision_not)

    avg_per_recall = sum(recall_per) / len(recall_per)
    avg_org_recall = sum(recall_org) / len(recall_org)
    avg_loc_recall = sum(recall_loc) / len(recall_loc)
    avg_misc_recall = sum(recall_misc) / len(recall_misc)
    avg_not_recall = sum(recall_not) / len(recall_not)

    result = {
        'per': (avg_per_precision, avg_per_recall, avg_per_f1),
        'org': (avg_org_precision, avg_org_recall, avg_org_f1),
        'misc': (avg_misc_precision, avg_misc_recall, avg_misc_f1),
        'loc': (avg_loc_precision, avg_loc_recall, avg_loc_f1),
        'not': (avg_not_precision, avg_not_recall, avg_not_f1)
    }

    return result
コード例 #35
0
ファイル: analyze.py プロジェクト: duongkstn/word_tokenize
from os.path import join, dirname
import time
import joblib
import pycrfsuite
from sklearn_crfsuite import metrics

from load_data import load_dataset


transformer = joblib.load(join(dirname(__file__), "model", "transformer.bin"))
path = join(dirname(__file__), "model", "model.bin")
estimator = pycrfsuite.Tagger()
estimator.open(path)

test_set = load_dataset(join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", "test.txt"))
X_test, y_test = transformer.transform(test_set)
start = time.time()
y_pred = [estimator.tag(x) for x in X_test]
end = time.time()
test_time = end - start
f1_test_score = metrics.flat_f1_score(y_test, y_pred, average='weighted')
print("F1 score: ", f1_test_score)
print("Test time: ", test_time)
with open("report.txt", "w") as f:
    f.write("F1 score: " + str(f1_test_score) + "\n" + "Test time: " + str(test_time))
コード例 #36
0
# Calculate the features
x_train_features = [Parser().addr2features(address) for address in x_train]
x_test_features = [Parser().addr2features(address) for address in x_test]

# Train the model
crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
                           c1=0.1,
                           c2=0.1,
                           max_iterations=100,
                           all_possible_transitions=True)

crf.fit(x_train_features, y_train)
y_pred = crf.predict(x_test_features)

metrics.flat_f1_score(y_pred, y_test, average='weighted', labels=label_types)

# group B and I results
sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

print(
    metrics.flat_classification_report(y_test,
                                       y_pred,
                                       labels=label_types,
                                       digits=3))

# Model fit statistics
"""
                           precision    recall  f1-score   support

            AddressNumber       1.00      1.00      1.00       119
コード例 #37
0
def test_flat_f1_score_binary():
    s = [["x", "y"], ["x", "y"]]
    score = metrics.flat_f1_score(s, s, average='weighted')
    assert score == 1.0
コード例 #38
0
def evaluate(args, model, tokenizer, labels, pad_token_label_id, mode, prefix=""):
    eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode=mode)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # multi-gpu evaluate
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    model.eval()
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {"input_ids": batch[0],
                      "attention_mask": batch[1],
                      "labels": batch[3]}
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = batch[2] if args.model_type in ["bert", "xlnet"] else None  # XLM and RoBERTa don"t use segment_ids
            outputs = model(**inputs)
            tmp_eval_loss, logits, predicted_tags = outputs

            if args.n_gpu > 1:
                tmp_eval_loss = tmp_eval_loss.mean()  # mean() to average on multi-gpu parallel evaluating

            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            #preds = logits.detach().cpu().numpy()
            preds = predicted_tags
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            #preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            preds.extend(predicted_tags)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps
    #preds_logits = softmax(preds, axis=2)
    #preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels)}

    out_label_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                out_label_list[i].append(label_map[out_label_ids[i][j]])
                preds_list[i].append(label_map[preds[i][j]])
    
    results = {
        "loss": eval_loss,
        "precision": precision_score(out_label_list, preds_list),
        "recall": recall_score(out_label_list, preds_list),
        "f1": f1_score(out_label_list, preds_list),
        "flat_f1": metrics.flat_f1_score(out_label_list, preds_list, average='micro', labels=["B-PROP", "I-PROP"])
    }

    logger.info("***** Eval results %s *****", prefix)
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results, preds_list