コード例 #1
0
ファイル: run.py プロジェクト: danjamker/N-Fly
    def __init__(self,
                 llwl='Brown',
                 llNL=2,
                 percen=80,
                 NE=True,
                 Col=True,
                 Gram=True,
                 Chu=True):
        '''      
        @param llwl:LogLikleyHood Corpa name ('Brown','AmE06','BE06')
        @param llNL:LogLikleyHood 
        @param percen: Presision of output default = 20, 20% returned
        @param NE: Uses NE default True 
        @param Col: Uses Collocation default True
        @param Gram: Uses N-Grams default True
        @param Chu: Uses Chunking default True
        '''

        self.NEs = NE
        self.Col = Col
        self.Gram = Gram
        self.Chu = Chu
        self.p = percen
        print 'Starting to build ', llwl
        self.LL = LogLikelihood(wordlist=llwl, NLength=llNL)
        print 'LL Loaded'
        self.POS = POS()
        print 'POS Loaded'
        self.GD = GetData()
        print 'GD Loaded'
        self.Cu = Chunker(self.POS)
        print 'Cu Loaded'
        self.FL = Filter()
        print 'FL Loaded'
        self.CC = Collocation(self.POS)
        print 'CC Loaded'
        self.Ng = NGram()
        print 'Ng Loaded'
        self.S = Select(percentil=self.p)
        print 'S Loaded'
        self.To = Tokenize(self.FL)
        print 'To Loaded'
コード例 #2
0
# Convert the grammar trees in the corpus into a CFG (Context-Free Grammar).
grammar = funcs.InduceNonTerminal(grammarTrain)

# Save the grammar file.
pickle.dump(grammar, open("grammar.txt", "wb"))
print("Grammar induction finished.")
'''========= Part IV: Chunking ========'''
# In this part, we chunk sentences into different phrases using the IOB (Inside-Outside-Beginning) tags. There are 3 ki-
# nds of phrases: noun phrases (NP), verb phrases (VP) and preposition phrases (PP).

# Load the train and test dataset for chunking.
chunkTrain = nltk.corpus.conll2000.chunked_sents("train.txt")
chunkTest = nltk.corpus.conll2000.chunked_sents("test.txt")

# Initiate a Chunker object. Use the training corpus to train the chunker.
chunker = Chunker(chunkTrain)

# Evaluate the chunker's performance on the test corpus.
print(chunker.evaluate(chunkTest))

# Use the trained chunker to chunk our own texts.
chunkedSents = funcs.ChunkSents(tokens, chunker)

# Save the chunked texts.
pickle.dump(chunkedSents, open("chunked_sents.txt", "wb"))
print("Chunking finished.")
'''======== Part V: Deep parsing ========'''
# In this part, we used the grammar induced in previous step to parse our texts. Basically we used a shift-reduce parsi-
# ng algorithm to parse the texts and find out if there are larger phrases built on small phrases.

# Initiate a parser object. Load it with grammar.