def parse(text): """ Primary function to run syntaxnet and PredPatt over input sentences. """ parse_tree, trace = annotate_text(text) conll_parsed = parse_to_conll(parse_tree) conll_pp = [ud_parse for sent_id, ud_parse in load_conllu(conll_parsed)][0] #PredPatt options. Modify as needed. resolve_relcl = True # relative clauses resolve_appos = True # appositional modifiers resolve_amod = True # adjectival modifiers resolve_conj = True # conjuction resolve_poss = True # possessives ud = dep_v2.VERSION # the version of UD opts = PredPattOpts(resolve_relcl=resolve_relcl, resolve_appos=resolve_appos, resolve_amod=resolve_amod, resolve_conj=resolve_conj, resolve_poss=resolve_poss, ud=ud) ppatt = PredPatt(conll_pp, opts=opts) #NOTE: #This returns the pretty print formatted string from PredPatt. This is done #largely as a place holder for JSON compatability within the REST API. return {'predpatt': ppatt.pprint(), 'conll': conll_parsed, 'original': text}
def main(): patterns = '' sentence = 'The quick brown fox jumped over the lazy dog .' tags = '' parse = '' if request.GET.get('sentence', '').strip(): sentence = request.GET.get('sentence', '').strip() pp_opts = PredPattOpts() for k, v in sorted(PredPattOpts().__dict__.iteritems()): v = int(float(request.GET.get( k, v))) # all options are true/false for now. setattr(pp_opts, k, v) if sentence: #for sent in sent_detector.tokenize('"John saw Mary", said Jason. Larry met Sally for dinner.'): # print tokenize(sent) original_sentence = sentence parse = parser(sentence, tokenized=False) P = PredPatt(parse, opts=pp_opts) patterns = P.pprint(track_rule=True) tags = ' '.join('%s/%s' % x for x in zip(parse.tokens, parse.tags)) parse = parse.pprint(K=3) # remove predpatt's bracketed comments patterns = re.sub(r'\s*\[.*?\]', '', patterns) patterns = dedent(patterns) opts = [] for k, v in sorted(pp_opts.__dict__.iteritems()): # Create a hidden textbox with the false value because the values of # "unchecked" boxes don't get posted with form. opts.append('<input type="hidden" value="0" name="%s">' % (k, )) opts.append('<input type="checkbox" name="%s" value="1" %s> %s<br/>' % (k, 'checked' if v else '', k)) options = '\n'.join(opts) return template(""" <html> <head> <!-- JQuery --> <script src="//code.jquery.com/jquery-2.1.4.min.js"></script> <!-- Bootstrap --> <link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css"/> <link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap-theme.min.css"/> <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.1/js/bootstrap.min.js"></script> <!-- Chosen Dropdown Library --> <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/chosen/1.4.2/chosen.css"/> <script src="//cdnjs.cloudflare.com/ajax/libs/chosen/1.4.2/chosen.jquery.min.js"></script> <style> html { overflow: -moz-scrollbars-vertical; overflow: scroll; } </style> </head> <body> <div style="width: 800px; padding: 10px; margin-left: auto; margin-right: auto;"> <h1>PredPatt</h1> <strong>Sentence</strong> <pre>{{sentence}}</pre> <strong>Propositions</strong> <div id="propositions"> <pre> {{patterns}} </pre> <div> <button class="btn" data-toggle="collapse" data-target="#parse" style="margin-bottom: 10px;">Toggle Parse</button> <div id="parse" class="collapse"> <strong>Tags</strong> <pre> {{tags}} </pre> <strong>Parse</strong> <pre> {{parse}} </pre> </div> </div> <strong>Input</strong> <form action="/" method="GET"> <textarea type="text" name="sentence" style="height:50px; width: 100%;" placeholder="e.g., The quick brown fox jumped over the lazy dog." class="form-control" autofocus>{{original_sentence}}</textarea> <div style="padding: 10px;"><strong>Options</strong><br/>""" + options + """ </div> <br/> <input type="submit" name="save" value="submit"> </form> </div> </body> </html> """, sentence=sentence, original_sentence=original_sentence, patterns=patterns, tags=tags, parse=parse, options=options)
def test(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument('--filename', default='doc/DOCTEST.md') args = p.parse_args() sentences = re.findall( '^> (.*)\n([\w\W]*?)(?=^>|<END>)', codecs.open(args.filename, encoding='utf-8').read() + '\n<END>', re.MULTILINE) # TODO: Use PredPatt.from_string instead of duplicating code here. parser = Parser.get_instance() passed = 0 failed = 0 blank = 0 for s, chunk in sentences: s = s.strip() if not s: continue # use cached parse listed in doctest chunk. parse_chunk = re.findall('<\!--parse=([\w\W]+?)-->', chunk) if parse_chunk: from predpatt.UDParse import DepTriple, UDParse [parse_chunk] = parse_chunk triples = [ DepTriple(r, int(b), int(a)) for r, a, b in re.findall( '(\S+)\(\S+?/(\d+), \S+?/(\d+)\)', parse_chunk) ] tokens = s.split() [tags_chunk] = re.findall('<\!--tags=([\w\W]+?)-->', chunk) tags = re.findall('\S+/(\S+)', tags_chunk) parse = UDParse(tokens, tags, triples) else: parse = parser(s) P = PredPatt(parse, ppattopts) relations = P.pprint(track_rule=True) tags = ' '.join('%s/%s' % x for x in zip(parse.tokens, parse.tags)) parse = parse.pprint(K=4) relations = relations.replace('\t', ' ') relations = '\n'.join(line[4:].rstrip() for line in relations.split('\n')) expected = [] chunk = chunk.replace('\t', ' ') for line in chunk.split('\n'): if line.startswith(' '): line = line[4:].rstrip() expected.append(line) expected = '\n'.join(expected) if not expected.strip(): blank += 1 #got = '%s\n%s\n%s' % (tags, parse, relations) got = relations.strip() or '<empty>' got = re.sub(r'\s*\[.*\]', '', got) if expected.strip() == got.strip(): #print colored('pass', 'green') passed += 1 else: print() print(colored('> ' + s, 'yellow')) print(colored('fail', 'red')) print('expected:') for line in expected.split('\n'): print(' ', colored(line, 'blue')) print('got:') for line in got.split('\n'): print(' ', line) print() print(colored(tags, 'magenta')) print() print(colored(parse, 'magenta')) failed += 1 msg = '[doctest] %.f%% (%s/%s) passed' % (passed * 100.0 / (passed + failed), passed, passed + failed) if failed == 0: print(msg) else: print() print(msg) print() if blank: print('blank:', blank)