def from_sentence(cls, sentence, cacheable=True, opts=None): """Create PredPatt instance from a sentence (string), which we'll parse and convert to UD automatically. [English only] """ from predpatt import Parser global _PARSER if _PARSER is None: _PARSER = Parser.get_instance(cacheable) parse = _PARSER(sentence) return cls(parse, opts=opts)
def from_constituency(cls, parse_string, cacheable=True, opts=None): """Create PredPatt instance from a constituency parse, which we'll convert to UD automatically. [English only] """ from predpatt import Parser global _PARSER if _PARSER is None: _PARSER = Parser.get_instance(cacheable) parse = _PARSER.to_ud(parse_string) return cls(parse, opts=opts)
* Support multiple sentences? * Add option to list rules which fired. * Show calibration score. """ import re from bottle import route, run, template, request from predpatt import Parser, PredPatt, PredPattOpts from textwrap import dedent # TODO eventually we'll want to accept paragraphs as input. #import nltk #sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') parser = Parser.get_instance() @route('/') @route('/', method='GET') def main(): patterns = '' sentence = 'The quick brown fox jumped over the lazy dog .' tags = '' parse = '' if request.GET.get('sentence', '').strip(): sentence = request.GET.get('sentence', '').strip() pp_opts = PredPattOpts() for k, v in sorted(PredPattOpts().__dict__.iteritems()):
print # To change certain behaviors, you can pass different options for the PredPatt # instance. For example, to disable expansion of conjunctions and extraction of # amods, use the following: from predpatt import PredPattOpts P = PredPatt.from_sentence(sentence, opts=PredPattOpts(resolve_amod=0, resolve_conj=0)) print P.pprint(color=1) print '______________________________________________________________________________' print #______________________________________________________________________________ # Bonus material # Already have a constituency parse? No problem! P = PredPatt.from_constituency( '( (S (NP (NNP Chris)) (VP (VBZ loves) (NP (NNP Pat))) (. .)) )') print P.pprint(track_rule=True, color=True) print '______________________________________________________________________________' print # Using PredPatt's Parser interface from predpatt import Parser parser = Parser.get_instance() # Create UD parser instance parse = parser(sentence) # Parse sentence print parse.pprint()
def test(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument('--filename', default='doc/DOCTEST.md') args = p.parse_args() sentences = re.findall( '^> (.*)\n([\w\W]*?)(?=^>|<END>)', codecs.open(args.filename, encoding='utf-8').read() + '\n<END>', re.MULTILINE) # TODO: Use PredPatt.from_string instead of duplicating code here. parser = Parser.get_instance() passed = 0 failed = 0 blank = 0 for s, chunk in sentences: s = s.strip() if not s: continue # use cached parse listed in doctest chunk. parse_chunk = re.findall('<\!--parse=([\w\W]+?)-->', chunk) if parse_chunk: from predpatt.UDParse import DepTriple, UDParse [parse_chunk] = parse_chunk triples = [ DepTriple(r, int(b), int(a)) for r, a, b in re.findall( '(\S+)\(\S+?/(\d+), \S+?/(\d+)\)', parse_chunk) ] tokens = s.split() [tags_chunk] = re.findall('<\!--tags=([\w\W]+?)-->', chunk) tags = re.findall('\S+/(\S+)', tags_chunk) parse = UDParse(tokens, tags, triples) else: parse = parser(s) P = PredPatt(parse, ppattopts) relations = P.pprint(track_rule=True) tags = ' '.join('%s/%s' % x for x in zip(parse.tokens, parse.tags)) parse = parse.pprint(K=4) relations = relations.replace('\t', ' ') relations = '\n'.join(line[4:].rstrip() for line in relations.split('\n')) expected = [] chunk = chunk.replace('\t', ' ') for line in chunk.split('\n'): if line.startswith(' '): line = line[4:].rstrip() expected.append(line) expected = '\n'.join(expected) if not expected.strip(): blank += 1 #got = '%s\n%s\n%s' % (tags, parse, relations) got = relations.strip() or '<empty>' got = re.sub(r'\s*\[.*\]', '', got) if expected.strip() == got.strip(): #print colored('pass', 'green') passed += 1 else: print() print(colored('> ' + s, 'yellow')) print(colored('fail', 'red')) print('expected:') for line in expected.split('\n'): print(' ', colored(line, 'blue')) print('got:') for line in got.split('\n'): print(' ', line) print() print(colored(tags, 'magenta')) print() print(colored(parse, 'magenta')) failed += 1 msg = '[doctest] %.f%% (%s/%s) passed' % (passed * 100.0 / (passed + failed), passed, passed + failed) if failed == 0: print(msg) else: print() print(msg) print() if blank: print('blank:', blank)