print tagged_sentence grammar = """ NP: {<DT>?<JJ>?<NN.*>} ADJP: {<JJ>} ADVP: {<RB.*>} PP: {<IN>} VP: {<MD>?<VB.*>+} """ rc = RegexpParser(grammar) c = rc.parse(tagged_sentence) print c print rc.evaluate(test_data) from nltk.chunk.util import tree2conlltags, conlltags2tree train_sent = train_data[7] print train_sent wtc = tree2conlltags(train_sent) wtc tree = conlltags2tree(wtc) print tree
print tagged_sentence grammar = """ NP: {<DT>?<JJ>?<NN.*>} ADJP: {<JJ>} ADVP: {<RB.*>} PP: {<IN>} VP: {<MD>?<VB.*>+} """ rc = RegexpParser(grammar) c = rc.parse(tagged_sentence) print c print rc.evaluate(test_data) from nltk.chunk.util import tree2conlltags, conlltags2tree train_sent = train_data[7] print train_sent wtc = tree2conlltags(train_sent) wtc tree = conlltags2tree(wtc) print tree
grammar = """ NP: {<DT>?<JJ>?<NN.*>} ADJP: {<JJ>} ADVP: {<RB.*>} PP: {<IN>} VP: {<MD>?<VB.*>+} """ rc = RegexpParser(grammar) c = rc.parse(tagged_sentence) print c print rc.evaluate(test_data) from nltk.chunk.util import tree2conlltags, conlltags2tree train_sent = train_data[7] print train_sent wtc = tree2conlltags(train_sent) wtc tree = conlltags2tree(wtc) print tree
print(tagged_sentence) grammar = """ NP: {<DT>?<JJ>?<NN.*>} ADJP: {<JJ>} ADVP: {<RB.*>} PP: {<IN>} VP: {<MD>?<VB.*>+} """ rc = RegexpParser(grammar) c = rc.parse(tagged_sentence) print(c) print(rc.evaluate(test_data)) from nltk.chunk.util import tree2conlltags, conlltags2tree train_sent = train_data[7] print(train_sent) wtc = tree2conlltags(train_sent) wtc tree = conlltags2tree(wtc) print(tree)
# Making a chunker and testing it's accuracy # 1.1: Chunk optional determiner with nouns # 1.2: Merge adjective with noun chunk # 2.1: Chunk preposition # 3.1: Chunk optional modal with verb chunker = RegexpParser(r''' NP: {<DT>?<NN.*>+} <JJ>{}<NN.*> PP: {<IN>} VP: {<MD>?<VB.*>} ''') score = chunker.evaluate(conll2000.chunked_sents()) print(f"Accuracy of regex chunker: {score.accuracy()}") # Tagger-based chunker def conll_tag_chunks(chunk_sents): """ Extracts a list of tuples (pos, iob) from a list of trees. """ tagged_sents = [tree2conlltags(tree) for tree in chunk_sents] return [[(t, c) for (w, t, c) in sent] for sent in tagged_sents] def make_backoffs(training, tagger_classes, backoff=None): """