def __init__(self, train_sents, feature_detector=prev_next_pos_iob, **kwargs): if not feature_detector: feature_detector = self.feature_detector train_chunks = chunk_trees2train_chunks(train_sents) self.tagger = ClassifierBasedTagger(train=train_chunks, feature_detector=feature_detector, **kwargs)
def __init__(self, train_sents=None, tagger="ClassifierBasedTagger", model=None, model_name="../results/modelCRF_featured", entities=None, language="english", **kwargs): self.all_entities = [] self.acronyms = [] self.language = language if not model: assert isinstance(train_sents, Iterable) if tagger == "ClassifierBasedTagger": self.feature_detector = iob_features self.tagger = ClassifierBasedTagger(train=train_sents, feature_detector=iob_features, **kwargs) elif tagger == "CRFTagger": self.set_entities(entities) if not model: self.tagger = CRFTagger(feature_func=self.crf_features) self.tagger.train( train_data=train_sents, model_file="../results/{}".format(model_name)) else: self.tagger = CRFTagger(feature_func=self.crf_features) self.tagger.set_model_file(model) else: raise Exception('Unknown tagger')
def __init__(self, train_sents, **kwargs): assert isinstance(train_sents, Iterable) self.feature_detector = features self.tagger = ClassifierBasedTagger(train=train_sents, feature_detector=features, **kwargs)
# test tic() tag1_eval['test_accuracy'] = tag1b_tagger.evaluate(val_sents) tag1_eval['test_time'] = toc() # display results display_training_metrics(tag1_eval) """ # ============================================================================= # finalise a classification-based tagger # ============================================================================= """ """ 1. Naive Bayes classifier tagger with features and Brill """ nb_eval = dict() # train tic() nb_tagger = ClassifierBasedTagger(train=train_sents, feature_detector=add_features) nb_eval['train_time'] = toc() # test tic() nb_eval['test_accuracy'] = nb_tagger.evaluate(val_sents) nb_eval['test_time'] = toc() # display results display_training_metrics(nb_eval) """ # ============================================================================= # finalise a deep learning tagger # ============================================================================= """ """ 1. prepare the data """ # for train, test and validation train_X, train_y = create_observation(train_sents)
def __init__(self, train_sents, **kwargs): self.tagger = ClassifierBasedTagger(train=train_sents, feature_detector=self.features, **kwargs)
from nltk.tag import ClassifierBasedTagger #from utils import read_ud_pos_data #from tag import pos_features if __name__ == "__main__": print("Loading data ...") train_data = list( read_ud_pos_data(r'C:\UD_English-EWT-master\en_ewt-ud-train.conllu')) test_data = list( read_ud_pos_data(r'C:\UD_English-EWT-master\en_ewt-ud-dev.conllu')) print("train_data", train_data) print("Data loaded .") start_time = time.time() print("Starting training ...") tagger = ClassifierBasedTagger( feature_detector=pos_features, train=train_data[:100], classifier_builder=train_scikit_classifier, ) end_time = time.time() print("Training complete. Time={0:.2f}s".format(end_time - start_time)) print("Computing test set accuracy ...") print(tagger.evaluate(test_data)) # 0.8949021790997296 import time import itertools from sklearn.feature_extraction import FeatureHasher from sklearn.linear_model import Perceptron def incremental_train_scikit_classifier(sentences, feature_detector, batch_size, max_iterations):
testFeats = None for category in categories: instancesOfEntityTrain = getInstancesOfEntity( category, completeTaggedSentencesTrain) instancesOfEntityTest = getInstancesOfEntity( category, completeTaggedSentencesTest) entityFeatsTrain = train_feats(category, instancesOfEntityTrain) entityFeatsTest = train_feats(category, instancesOfEntityTrain) if trainFeats == None: trainFeats = entityFeatsTrain testFeats = entityFeatsTest else: trainFeats += entityFeatsTrain testFeats += entityFeatsTest features = prev_next_pos_iob #naiveBayers naiveBayers = NaiveBayesClassifier.train(trainFeats) naiveBayersTagger = ClassifierBasedTagger( train=completeTaggedSentencesTrain, feature_detector=features, classifier_builder=naiveBayers) nerChunkerNaiveBayers = ClassifierChunker(completeTaggedSentencesTrain, naiveBayersTagger) evalNaiveBayers = nerChunkerNaiveBayers.evaluate2(testFeats) print(evalNaiveBayers)
def __init__(self, train_sents, feature_detector, **kwargs): train_chunks = chunk_trees2train_chunks(train_sents) self.tagger = ClassifierBasedTagger(train=train_chunks, feature_detector=feature_detector, **kwargs)