コード例 #1
0
# Save the grammar file.
pickle.dump(grammar, open("grammar.txt", "wb"))
print("Grammar induction finished.")
'''========= Part IV: Chunking ========'''
# In this part, we chunk sentences into different phrases using the IOB (Inside-Outside-Beginning) tags. There are 3 ki-
# nds of phrases: noun phrases (NP), verb phrases (VP) and preposition phrases (PP).

# Load the train and test dataset for chunking.
chunkTrain = nltk.corpus.conll2000.chunked_sents("train.txt")
chunkTest = nltk.corpus.conll2000.chunked_sents("test.txt")

# Initiate a Chunker object. Use the training corpus to train the chunker.
chunker = Chunker(chunkTrain)

# Evaluate the chunker's performance on the test corpus.
print(chunker.evaluate(chunkTest))

# Use the trained chunker to chunk our own texts.
chunkedSents = funcs.ChunkSents(tokens, chunker)

# Save the chunked texts.
pickle.dump(chunkedSents, open("chunked_sents.txt", "wb"))
print("Chunking finished.")
'''======== Part V: Deep parsing ========'''
# In this part, we used the grammar induced in previous step to parse our texts. Basically we used a shift-reduce parsi-
# ng algorithm to parse the texts and find out if there are larger phrases built on small phrases.

# Initiate a parser object. Load it with grammar.
parser = ShiftReduceParser(grammar)

# Use the parser to parse our own texts.