Example #1
0
    def from_sentence(cls, sentence, cacheable=True, opts=None):
        """Create PredPatt instance from a sentence (string), which we'll parse and
        convert to UD automatically. [English only]

        """
        from predpatt import Parser
        global _PARSER
        if _PARSER is None:
            _PARSER = Parser.get_instance(cacheable)
        parse = _PARSER(sentence)
        return cls(parse, opts=opts)
Example #2
0
    def from_constituency(cls, parse_string, cacheable=True, opts=None):
        """Create PredPatt instance from a constituency parse, which we'll convert to UD
        automatically. [English only]

        """
        from predpatt import Parser
        global _PARSER
        if _PARSER is None:
            _PARSER = Parser.get_instance(cacheable)
        parse = _PARSER.to_ud(parse_string)
        return cls(parse, opts=opts)
Example #3
0
 * Support multiple sentences?

 * Add option to list rules which fired.

 * Show calibration score.
"""
import re
from bottle import route, run, template, request
from predpatt import Parser, PredPatt, PredPattOpts
from textwrap import dedent

# TODO eventually we'll want to accept paragraphs as input.
#import nltk
#sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')

parser = Parser.get_instance()


@route('/')
@route('/', method='GET')
def main():

    patterns = ''
    sentence = 'The quick brown fox jumped over the lazy dog .'
    tags = ''
    parse = ''
    if request.GET.get('sentence', '').strip():
        sentence = request.GET.get('sentence', '').strip()

    pp_opts = PredPattOpts()
    for k, v in sorted(PredPattOpts().__dict__.iteritems()):
Example #4
0
print

# To change certain behaviors, you can pass different options for the PredPatt
# instance. For example, to disable expansion of conjunctions and extraction of
# amods, use the following:
from predpatt import PredPattOpts
P = PredPatt.from_sentence(sentence,
                           opts=PredPattOpts(resolve_amod=0, resolve_conj=0))

print P.pprint(color=1)

print '______________________________________________________________________________'
print

#______________________________________________________________________________
# Bonus material

# Already have a constituency parse? No problem!
P = PredPatt.from_constituency(
    '( (S (NP (NNP Chris)) (VP (VBZ loves) (NP (NNP Pat))) (. .)) )')
print P.pprint(track_rule=True, color=True)

print '______________________________________________________________________________'
print

# Using PredPatt's Parser interface
from predpatt import Parser
parser = Parser.get_instance()  # Create UD parser instance
parse = parser(sentence)  # Parse sentence
print parse.pprint()
Example #5
0
def test():
    from argparse import ArgumentParser
    p = ArgumentParser()
    p.add_argument('--filename', default='doc/DOCTEST.md')
    args = p.parse_args()

    sentences = re.findall(
        '^> (.*)\n([\w\W]*?)(?=^>|<END>)',
        codecs.open(args.filename, encoding='utf-8').read() + '\n<END>',
        re.MULTILINE)

    # TODO: Use PredPatt.from_string instead of duplicating code here.
    parser = Parser.get_instance()

    passed = 0
    failed = 0
    blank = 0
    for s, chunk in sentences:
        s = s.strip()
        if not s:
            continue

        # use cached parse listed in doctest chunk.
        parse_chunk = re.findall('<\!--parse=([\w\W]+?)-->', chunk)
        if parse_chunk:
            from predpatt.UDParse import DepTriple, UDParse
            [parse_chunk] = parse_chunk
            triples = [
                DepTriple(r, int(b), int(a)) for r, a, b in re.findall(
                    '(\S+)\(\S+?/(\d+), \S+?/(\d+)\)', parse_chunk)
            ]
            tokens = s.split()
            [tags_chunk] = re.findall('<\!--tags=([\w\W]+?)-->', chunk)
            tags = re.findall('\S+/(\S+)', tags_chunk)
            parse = UDParse(tokens, tags, triples)

        else:
            parse = parser(s)

        P = PredPatt(parse, ppattopts)
        relations = P.pprint(track_rule=True)
        tags = ' '.join('%s/%s' % x for x in zip(parse.tokens, parse.tags))
        parse = parse.pprint(K=4)

        relations = relations.replace('\t', '    ')
        relations = '\n'.join(line[4:].rstrip()
                              for line in relations.split('\n'))

        expected = []
        chunk = chunk.replace('\t', '    ')
        for line in chunk.split('\n'):
            if line.startswith('    '):
                line = line[4:].rstrip()
                expected.append(line)

        expected = '\n'.join(expected)

        if not expected.strip():
            blank += 1

        #got = '%s\n%s\n%s' % (tags, parse, relations)
        got = relations.strip() or '<empty>'
        got = re.sub(r'\s*\[.*\]', '', got)

        if expected.strip() == got.strip():
            #print colored('pass', 'green')
            passed += 1
        else:
            print()
            print(colored('> ' + s, 'yellow'))
            print(colored('fail', 'red'))
            print('expected:')
            for line in expected.split('\n'):
                print('   ', colored(line, 'blue'))
            print('got:')
            for line in got.split('\n'):
                print('   ', line)
            print()
            print(colored(tags, 'magenta'))
            print()
            print(colored(parse, 'magenta'))
            failed += 1

    msg = '[doctest] %.f%% (%s/%s) passed' % (passed * 100.0 /
                                              (passed + failed), passed,
                                              passed + failed)
    if failed == 0:
        print(msg)
    else:
        print()
        print(msg)
        print()
        if blank:
            print('blank:', blank)