예제 #1
0
def TrainPCFG(CPDist, Start='S'):
    PcfgProds = []
    for node in CPDist.conditions():  #Populate PCFG
        for sample in CPDist[node].samples():
            RHS = []
            #try:
            if isinstance(sample, str): RHS = [sample]
            elif isinstance(sample, Nonterminal): RHS.append(sample)
            elif isinstance(sample, Tree): RHS.append(Nonterminal(sample.node))
            elif isinstance(sample, Token): RHS.append(sample)
            elif isinstance(sample, list) or isinstance(sample, tuple):
                for token in sample:
                    if isinstance(token, Tree):
                        RHS.append(Nonterminal(token.node))
                    elif isinstance(token, Nonterminal):
                        RHS.append(token)
                    elif isinstance(token, Token):
                        RHS.append(token)
                    else:
                        RHS.append(token)
            else:
                RHS.append(token.node)
                print 'Unknown ', node, sample
            PcfgProds.append(
                PCFGProduction(Nonterminal(node),
                               RHS,
                               prob=CPDist[node].prob(sample)))
            #except: print 'missed on', node,sample
    return HashPCFG(Nonterminal(Start), PcfgProds)
예제 #2
0
 def get_rand_rhs(self, lhs):
     if not isinstance(lhs, Nonterminal): lhs = Nonterminal(lhs.upper())
     r = random.random()
     s = 0
     for i in self._index[lhs]:
         p = self._productions[i]
         s += p.prob()
         if s > r: return p
     return None
예제 #3
0
 def get_rand_rhs(self,lhs):
     if not isinstance(lhs,Nonterminal): lhs = Nonterminal(lhs.upper())
     r = random.random()
     s = 0
     for i in self._index[lhs]:
         p = self._productions[i]
         s += p.prob()
         if s > r: return p
     return None
예제 #4
0
 def get_prob_rhs(self,lhs,prob):
     if not isinstance(lhs,Nonterminal): lhs = Nonterminal(lhs.upper())
     rhs = []
     if prob > 1 and prob < 100: prob /= 100
     p = 0.0
     for r in self._index[lhs]:
         rule = self._productions[r]
         p += rule.prob()
         rhs.append(rule)
         if p > prob: break
     return rhs
예제 #5
0
 def get_prob_rhs(self, lhs, prob):
     if not isinstance(lhs, Nonterminal): lhs = Nonterminal(lhs.upper())
     rhs = []
     if prob > 1 and prob < 100: prob /= 100
     p = 0.0
     for r in self._index[lhs]:
         rule = self._productions[r]
         p += rule.prob()
         rhs.append(rule)
         if p > prob: break
     return rhs
def demo2():
    from nltk.cfg import Nonterminal, CFGProduction, CFG
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [Nonterminal(s) for s in nonterminals.split()]
    productions = (
        # Syntactic Productions
        CFGProduction(S, NP, VP),
        CFGProduction(NP, Det, N),
        CFGProduction(NP, NP, PP),
        CFGProduction(VP, VP, PP),
        CFGProduction(VP, V, NP, PP),
        CFGProduction(VP, V, NP),
        CFGProduction(PP, P, NP),
        CFGProduction(PP),
        CFGProduction(PP, 'up', 'over', NP),

        # Lexical Productions
        CFGProduction(NP, 'I'),
        CFGProduction(Det, 'the'),
        CFGProduction(Det, 'a'),
        CFGProduction(N, 'man'),
        CFGProduction(V, 'saw'),
        CFGProduction(P, 'in'),
        CFGProduction(P, 'with'),
        CFGProduction(N, 'park'),
        CFGProduction(N, 'dog'),
        CFGProduction(N, 'statue'),
        CFGProduction(Det, 'my'),
    )
    grammar = CFG(S, productions)

    text = 'I saw a man in the park'.split()
    d = CFGDemo(grammar, text)
    d.mainloop()
예제 #7
0
 def print_all_rhs(self, lhs=''):
     for i, p in enumerate(self.get_all_rhs(lhs)):
         print p
     if not lhs:
         print len(self._lexicon_grammar), 'Lexicon Rules;',
         print len(self._nt_grammar), 'Nonterminal Rules;',
     else:
         print len([
             i for i in self._index[Nonterminal(lhs)]
             if i in self._lexicon_grammar
         ]), 'Lexicon Rules;',
         print len([
             i for i in self._index[Nonterminal(lhs)]
             if i in self._nt_grammar
         ]), 'Nonterminal Rules;',
     print i, 'Rules.'
예제 #8
0
def CountSubTree(CFDist, token):
    Output = []
    for child in token:
        if isinstance(child, Tree):
            Output.append(Nonterminal(child.node))
            CountSubTree(CFDist, child)
        elif isinstance(child, Token):
            text_token = child['TEXT'].lower()
            Output.append(text_token)
        else:
            print 'Unmatched token', token, child
    CFDist[token.node].inc(tuple(Output))
def demo():
    from nltk.cfg import Nonterminal, CFGProduction, CFG
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [Nonterminal(s) for s in nonterminals.split()]

    productions = (
        # Syntactic Productions
        CFGProduction(S, NP, VP),
        CFGProduction(NP, Det, N),
        CFGProduction(NP, NP, PP),
        CFGProduction(VP, VP, PP),
        CFGProduction(VP, V, NP, PP),
        CFGProduction(VP, V, NP),
        CFGProduction(PP, P, NP),
        CFGProduction(PP),
        CFGProduction(PP, 'up', 'over', NP),

        # Lexical Productions
        CFGProduction(NP, 'I'),
        CFGProduction(Det, 'the'),
        CFGProduction(Det, 'a'),
        CFGProduction(N, 'man'),
        CFGProduction(V, 'saw'),
        CFGProduction(P, 'in'),
        CFGProduction(P, 'with'),
        CFGProduction(N, 'park'),
        CFGProduction(N, 'dog'),
        CFGProduction(N, 'statue'),
        CFGProduction(Det, 'my'),
    )

    #productions *= 10
    grammar = CFG(S, productions)

    def cb(cfg):
        print cfg

    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
예제 #10
0
def demo():
    """
    Create a shift reduce parser demo, using a simple grammar and
    text. 
    """

    from nltk.cfg import Nonterminal, CFGProduction, CFG
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [Nonterminal(s) for s in nonterminals.split()]

    productions = (
        # Syntactic Productions
        CFGProduction(S, [NP, VP]),
        CFGProduction(NP, [Det, N]),
        CFGProduction(NP, [NP, PP]),
        CFGProduction(VP, [VP, PP]),
        CFGProduction(VP, [V, NP, PP]),
        CFGProduction(VP, [V, NP]),
        CFGProduction(PP, [P, NP]),

        # Lexical Productions
        CFGProduction(NP, ['I']),
        CFGProduction(Det, ['the']),
        CFGProduction(Det, ['a']),
        CFGProduction(N, ['man']),
        CFGProduction(V, ['saw']),
        CFGProduction(P, ['in']),
        CFGProduction(P, ['with']),
        CFGProduction(N, ['park']),
        CFGProduction(N, ['dog']),
        CFGProduction(N, ['statue']),
        CFGProduction(Det, ['my']),
    )

    grammar = CFG(S, productions)

    # tokenize the sentence
    sent = Token(TEXT='my dog saw a man in the park with a statue')
    WhitespaceTokenizer().tokenize(sent)

    ShiftReduceParserDemo(grammar, sent).mainloop()
    def __init__(self, parent, cfg=None, set_cfg_callback=None):
        self._parent = parent
        if cfg is not None: self._cfg = cfg
        else: self._cfg = CFG(Nonterminal('S'), [])
        self._set_cfg_callback = set_cfg_callback

        self._highlight_matching_nonterminals = 1

        # Create the top-level window.
        self._top = Toplevel(parent)
        self._init_bindings()

        self._init_startframe()
        self._startframe.pack(side='top', fill='x', expand=0)
        self._init_prodframe()
        self._prodframe.pack(side='top', fill='both', expand=1)
        self._init_buttons()
        self._buttonframe.pack(side='bottom', fill='x', expand=0)

        self._textwidget.focus()
예제 #12
0
def regexp():
    """
    Demo regexp grammar
    """
    from nltk.cfg import Nonterminal, CFGProduction, CFG
    nonterminals = 'NP N AND'
    (NP, N, AND) = [Nonterminal(s) for s in nonterminals.split()]
    productions = (
        CFGProduction(NP, NP, AND, NP),
        CFGProduction(NP, N),

        CFGProduction(N, 'cabbages'),
        CFGProduction(AND, 'and'),
        CFGProduction(N, 'kings'),
        )
    grammar = CFG(NP, productions)

    sent = 'cabbages and kings'
    text = WSTokenizer().tokenize(sent)

    RecursiveDescentParserDemo(grammar, text).mainloop()
예제 #13
0
 def get_rand_rhs(self,lhs):
     if not isinstance(lhs,Nonterminal): lhs = Nonterminal(lhs.upper())
     return self._productions[random.choice(self._index[lhs])]
 def _apply(self, *e):
     productions = self._parse_productions()
     start = Nonterminal(self._start.get())
     cfg = CFG(start, productions)
     if self._set_cfg_callback is not None:
         self._set_cfg_callback(cfg)
예제 #15
0
 def get_max_rhs(self,lhs):
     if not isinstance(lhs,Nonterminal): lhs = Nonterminal(lhs.upper())
     return self._productions[self._index[lhs][0]]
예제 #16
0
 def get_all_rhs(self,lhs=''):
     if not lhs: return self._productions
     if not isinstance(lhs,Nonterminal): lhs = Nonterminal(lhs.upper())
     return [self._productions[n] for n in self._index[lhs]]
예제 #17
0
 def get_max_rhs(self, lhs):
     if not isinstance(lhs, Nonterminal): lhs = Nonterminal(lhs.upper())
     return self._productions[self._index[lhs][0]]
예제 #18
0
 def get_rand_rhs(self, lhs):
     if not isinstance(lhs, Nonterminal): lhs = Nonterminal(lhs.upper())
     return self._productions[random.choice(self._index[lhs])]
예제 #19
0
 def get_all_rhs(self, lhs=''):
     if not lhs: return self._productions
     if not isinstance(lhs, Nonterminal): lhs = Nonterminal(lhs.upper())
     return [self._productions[n] for n in self._index[lhs]]