Exemple #1
0
	def __init__(self, train_sents, feature_detector=prev_next_pos_iob, **kwargs):
		if not feature_detector:
			feature_detector = self.feature_detector
		
		train_chunks = chunk_trees2train_chunks(train_sents)
		self.tagger = ClassifierBasedTagger(train=train_chunks,
			feature_detector=feature_detector, **kwargs)
Exemple #2
0
    def __init__(self,
                 train_sents=None,
                 tagger="ClassifierBasedTagger",
                 model=None,
                 model_name="../results/modelCRF_featured",
                 entities=None,
                 language="english",
                 **kwargs):

        self.all_entities = []
        self.acronyms = []
        self.language = language

        if not model:
            assert isinstance(train_sents, Iterable)

        if tagger == "ClassifierBasedTagger":
            self.feature_detector = iob_features
            self.tagger = ClassifierBasedTagger(train=train_sents,
                                                feature_detector=iob_features,
                                                **kwargs)

        elif tagger == "CRFTagger":
            self.set_entities(entities)
            if not model:

                self.tagger = CRFTagger(feature_func=self.crf_features)
                self.tagger.train(
                    train_data=train_sents,
                    model_file="../results/{}".format(model_name))
            else:
                self.tagger = CRFTagger(feature_func=self.crf_features)
                self.tagger.set_model_file(model)
        else:
            raise Exception('Unknown tagger')
    def __init__(self, train_sents, **kwargs):
        assert isinstance(train_sents, Iterable)

        self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=train_sents,
                                            feature_detector=features,
                                            **kwargs)
Exemple #4
0
# test
tic()
tag1_eval['test_accuracy'] = tag1b_tagger.evaluate(val_sents)
tag1_eval['test_time'] = toc()
# display results
display_training_metrics(tag1_eval)
"""
# =============================================================================
# finalise a classification-based tagger
# =============================================================================
"""
""" 1. Naive Bayes classifier tagger with features and Brill """
nb_eval = dict()
# train
tic()
nb_tagger = ClassifierBasedTagger(train=train_sents,
                                  feature_detector=add_features)
nb_eval['train_time'] = toc()
# test
tic()
nb_eval['test_accuracy'] = nb_tagger.evaluate(val_sents)
nb_eval['test_time'] = toc()
# display results
display_training_metrics(nb_eval)
"""
# =============================================================================
# finalise a deep learning tagger
# =============================================================================
"""
""" 1. prepare the data """
# for train, test and validation
train_X, train_y = create_observation(train_sents)
Exemple #5
0
 def __init__(self, train_sents, **kwargs):
     self.tagger = ClassifierBasedTagger(train=train_sents,
                                         feature_detector=self.features,
                                         **kwargs)
Exemple #6
0
from nltk.tag import ClassifierBasedTagger
#from utils import read_ud_pos_data
#from tag import pos_features
if __name__ == "__main__":
    print("Loading data ...")
    train_data = list(
        read_ud_pos_data(r'C:\UD_English-EWT-master\en_ewt-ud-train.conllu'))
    test_data = list(
        read_ud_pos_data(r'C:\UD_English-EWT-master\en_ewt-ud-dev.conllu'))
    print("train_data", train_data)
    print("Data loaded .")
    start_time = time.time()
    print("Starting training ...")
    tagger = ClassifierBasedTagger(
        feature_detector=pos_features,
        train=train_data[:100],
        classifier_builder=train_scikit_classifier,
    )
    end_time = time.time()
    print("Training complete. Time={0:.2f}s".format(end_time - start_time))
    print("Computing test set accuracy ...")
    print(tagger.evaluate(test_data))  # 0.8949021790997296

import time
import itertools
from sklearn.feature_extraction import FeatureHasher
from sklearn.linear_model import Perceptron


def incremental_train_scikit_classifier(sentences, feature_detector,
                                        batch_size, max_iterations):
    testFeats = None

    for category in categories:
        instancesOfEntityTrain = getInstancesOfEntity(
            category, completeTaggedSentencesTrain)
        instancesOfEntityTest = getInstancesOfEntity(
            category, completeTaggedSentencesTest)

        entityFeatsTrain = train_feats(category, instancesOfEntityTrain)
        entityFeatsTest = train_feats(category, instancesOfEntityTrain)

        if trainFeats == None:
            trainFeats = entityFeatsTrain
            testFeats = entityFeatsTest
        else:
            trainFeats += entityFeatsTrain
            testFeats += entityFeatsTest

    features = prev_next_pos_iob

    #naiveBayers
    naiveBayers = NaiveBayesClassifier.train(trainFeats)
    naiveBayersTagger = ClassifierBasedTagger(
        train=completeTaggedSentencesTrain,
        feature_detector=features,
        classifier_builder=naiveBayers)
    nerChunkerNaiveBayers = ClassifierChunker(completeTaggedSentencesTrain,
                                              naiveBayersTagger)
    evalNaiveBayers = nerChunkerNaiveBayers.evaluate2(testFeats)
    print(evalNaiveBayers)
 def __init__(self, train_sents, feature_detector, **kwargs):
     train_chunks = chunk_trees2train_chunks(train_sents)
     self.tagger = ClassifierBasedTagger(train=train_chunks,
                                         feature_detector=feature_detector,
                                         **kwargs)