def trees(self, parse, nbest=1, post_process=False, filter=None, mrepr='tokens'):
        """
        Return a genartor of pretty printed Tree structures of the
        classification.
        
        The output for a word such as `aandeelhoudersvergadering` is::
        
            >>> trees = classifier.trees(results)
            >>> for tree in trees:
            ...    print tree
            (NOU
               (NOU (NOU (ADP aan) (NOU deel)) (VRB houd) (SUF er))
               (LE s)
               (NOU (VRB vergader) (SUF ing)))

        Args:
            - parse: the return value of :func:`mbmp.MBMA.classify` for a given word.
            - nbest (int): specify how many parses should be returned
            - post_process (bool): perform some cleaning operation after parsing
            - filter (function): a function used to filter certain trees
            (function must return True or False)
            - mrepr (str): specify how the leaves should be represented in the
            Tree: options are: token forms ('tokens'), lemma forms
            ('lemmas') and tokens and lemmas ('tokens-and-lemmas')
        Yields:
            the nbest parses
        
        """
        self.parser.set_grammar(make_grammar(parse, mrepr))
        return self.parser.nbest_parse(
            [m.pprint(mrepr) for m in parse], nbest, post_process, filter)
Exemple #2
0
def demo():
    """
    A demo showing some basic functionality.
    """
    from mbmp.parse.util import make_grammar
    from mbmp.datatypes import Morpheme
    parse = [
        Morpheme(pos='V|*V', token='ver', lemma='ver'),
        Morpheme(pos='V', token='koop', lemma='koop'),
        Morpheme(pos='A|V*', token='baar', lemma='baar')
    ]
    print 'Parse produced by MBMA:'
    print['(%s %s)' % (m.pos, m) for m in parse]
    print
    grammar = cPickle.load(open(PCFG))
    print 'Parsing word "ver koop baar" with default CKY Parser'
    parser = CKYParser(grammar)
    for tree in parser.nbest_parse('ver koop baar'.split()):
        print tree
    print
    print 'Parsing word "ver koop baar" with Mbma CKY Parser'
    print 'Compiling grammar rules from parse...'
    g = make_grammar(parse)
    for prod in g:
        print prod
    print
    parser = MbmaCKYParser(grammar)
    parser.set_grammar(g)
    for tree in parser.nbest_parse('ver koop baar'.split(), n=5):
        print tree
Exemple #3
0
def demo():
    """
    A demo showing some basic functionality.
    """
    from mbmp.parse.util import make_grammar
    from mbmp.datatypes import Morpheme
    parse = [Morpheme(pos='V|*V', token='ver', lemma='ver'),
             Morpheme(pos='V', token='eis', lemma='eis'),
             Morpheme(pos='V|VINFL', token='t', lemma='t')]
    print 'Parse produced by MBMA:'
    print parse
    print
    print 'Compiling grammar rules from parse...'
    productions = make_grammar(parse)
    for prod in productions:
        print prod
    print
    print 'Parsing word "ver eis t"'
    parser = MbmaParser(productions)
    for tree in parser.nbest_parse('ver eis t'.split()):
        print tree
Exemple #4
0
    def trees(self,
              parse,
              nbest=1,
              post_process=False,
              filter=None,
              mrepr='tokens'):
        """
        Return a genartor of pretty printed Tree structures of the
        classification.
        
        The output for a word such as `aandeelhoudersvergadering` is::
        
            >>> trees = classifier.trees(results)
            >>> for tree in trees:
            ...    print tree
            (NOU
               (NOU (NOU (ADP aan) (NOU deel)) (VRB houd) (SUF er))
               (LE s)
               (NOU (VRB vergader) (SUF ing)))

        Args:
            - parse: the return value of :func:`mbmp.MBMA.classify` for a given word.
            - nbest (int): specify how many parses should be returned
            - post_process (bool): perform some cleaning operation after parsing
            - filter (function): a function used to filter certain trees
            (function must return True or False)
            - mrepr (str): specify how the leaves should be represented in the
            Tree: options are: token forms ('tokens'), lemma forms
            ('lemmas') and tokens and lemmas ('tokens-and-lemmas')
        Yields:
            the nbest parses
        
        """
        self.parser.set_grammar(make_grammar(parse, mrepr))
        return self.parser.nbest_parse([m.pprint(mrepr) for m in parse], nbest,
                                       post_process, filter)
Exemple #5
0
    from mbmp.parse.util import make_grammar
    from mbmp.datatypes import Morpheme
    parse = [Morpheme(pos='V|*V', token='ver', lemma='ver'),
             Morpheme(pos='V', token='koop', lemma='koop'),
             Morpheme(pos='A|V*', token='baar', lemma='baar')]
    print 'Parse produced by MBMA:'
    print ['(%s %s)' % (m.pos, m) for m in parse]
    print
    grammar = cPickle.load(open(PCFG))
    print 'Parsing word "ver koop baar" with default CKY Parser'
    parser = CKYParser(grammar)
    for tree in parser.nbest_parse('ver koop baar'.split()):
        print tree
    print
    print 'Parsing word "ver koop baar" with Mbma CKY Parser'
    print 'Compiling grammar rules from parse...'
    g = make_grammar(parse)
    for prod in g:
        print prod
    print
    parser = MbmaCKYParser(grammar)
    parser.set_grammar(g)
    for tree in parser.nbest_parse('ver koop baar'.split(), n=5):
        print tree


__all__ = ['CKYParser', 'MbmaCKYParser']

if __name__ == '__main__':
    demo()