def reparsesubtree(): """Re-parse selected subtree.""" sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] senttok, _ = worker.postokenize(sent) username = session['username'] treestr = request.args.get('tree', '') try: tree, _sent1 = validate(treestr, senttok) except ValueError as err: return str(err) error = '' dt = DrawTree(tree, senttok) _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_') nodeid = int(nodeid) subseq = sorted(dt.nodes[nodeid].leaves()) subsent = ' '.join(senttok[n] for n in subseq) # FIXME only works when root label of tree matches label in grammar. # need a single label that works across all stages. root = dt.nodes[nodeid].label # root = grammar.tolabel[next(iter(grammar.tblabelmapping[root]))] resp = WORKERS[username].submit(worker.getparses, subsent, (), (), root=root).result() _senttok, parsetrees, _messages, _elapsed = resp app.logger.info('%d-%d. [parse trees=%d] %s', sentno, nodeid, len(parsetrees), subsent) print(parsetrees[0][1]) nbest = Markup( '<pre>%d parse trees\n' '<a href="javascript: toggle(\'nbest\'); ">cancel</a>\n' '%s</pre>' % (len(parsetrees), '\n'.join( '%(n)d. [%(prob)s] ' '<a href="#" onClick="picksubtree(%(n)d); ">' 'use this subtree</a>; ' '\n\n' '%(tree)s' % dict(n=n + 1, prob=probstr(prob), tree=DrawTree(tree, subsent.split()).text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t%d' % (n + 1))) for n, (prob, tree, _treestr, fragments) in enumerate(parsetrees)))) return nbest
def filterparsetrees(): """For a parse tree in the cache, return a filtered set of its n-best parses matching current constraints.""" username = session['username'] session['actions'][CONSTRAINTS] += 1 session.modified = True sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] urlprm = dict(sentno=sentno) require = request.args.get('require', '') block = request.args.get('block', '') if require and require != '': urlprm['require'] = require if block and block != '': urlprm['block'] = block require, block = parseconstraints(require, block) frequire = request.args.get('frequire', '') fblock = request.args.get('fblock', '') frequire, fblock = parseconstraints(frequire, fblock) resp = WORKERS[username].submit(worker.getparses, sent, require, block).result() senttok, parsetrees, _messages, _elapsed = resp parsetrees_ = [ (n, prob, tree, treestr, frags) for n, (prob, tree, treestr, frags) in enumerate(parsetrees) if treestr is None or testconstraints(treestr, frequire, fblock) ] if len(parsetrees_) == 0: return ('No parse trees after filtering; try pressing Re-parse, ' 'or reload page to clear constraints.\n') nbest = Markup('%d parse trees\n%s' % (len(parsetrees_), '\n'.join( '%(n)d. [%(prob)s] ' '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; ' '<a href="/annotate/edit?%(urlprm)s">edit</a>; ' '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n' '%(tree)s' % dict(n=n + 1, prob=probstr(prob), urlprm=urlencode(dict(urlprm, n=n + 1)), tree=DrawTree(tree, senttok).text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t%d' % (n + 1))) for n, prob, tree, _treestr, fragments in parsetrees_))) return nbest
def rec(tree, n=0, depth=0): """Recursively produce a string representation of a decision tree.""" if tree.children_left[n] == tree.children_right[n]: x = tree.value[n].nonzero()[1][0] prob, _tree, _treestr, _fragments = parsetrees[x] thistree = ( '%(n)d. [%(prob)s] ' '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; ' '<a href="/annotate/edit?%(urlprm)s">edit</a>; ' '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n' % dict(n=x + 1, prob=probstr(prob), urlprm=urlencode(dict(urlprm, n=x + 1, dec=depth)))) return ('<span id="d%d" style="display: none; ">%stree %d:\n' '%s</span>' % (n, depth * '\t', x + 1, thistree)) left = tree.children_left[n] right = tree.children_right[n] return ('<span id=d%(n)d style="display: %(display)s; ">' '%(indent)s%(constituent)s ' '<a href="javascript: showhide(\'d%(right)s\', \'d%(left)s\', ' '\'dd%(exright)s\', \'%(numtrees)s\'); ">' 'good constituent</a> ' '<a href="javascript: showhide(\'d%(left)s\', \'d%(right)s\', ' '\'dd%(exleft)s\', \'%(numtrees)s\'); ">' 'bad constituent</a> ' '%(subtree1)s%(subtree2)s</span>' % dict( n=n, display='block' if n == 0 else 'none', indent=depth * 4 * ' ', constituent=featurenames[tree.feature[n]], left=left, right=right, exleft=path[:, left].nonzero()[0][0], exright=path[:, right].nonzero()[0][0], numtrees=len(parsetrees), subtree1=rec(tree, left, depth + 1), subtree2=rec(tree, right, depth + 1), ))
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: for node in results[-1].parsetree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) node.label = node.label.replace('[]', '') if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) frags = Markup('Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: for node in tree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: (sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def parse(): """Display parse. To be invoked by an AJAX call.""" sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] username = session['username'] require = request.args.get('require', '') block = request.args.get('block', '') urlprm = dict(sentno=sentno) if require and require != '': urlprm['require'] = require if block and block != '': urlprm['block'] = block require, block = parseconstraints(require, block) if require or block: session['actions'][CONSTRAINTS] += 1 session.modified = True if False and app.config['DEBUG']: resp = worker.getparses(sent, require, block) else: resp = WORKERS[username].submit(worker.getparses, sent, require, block).result() senttok, parsetrees, messages, elapsed = resp maxdepth = '' if not parsetrees: result = ('no parse! reload page to clear constraints, ' 'or continue with next sentence.') nbest = dep = depsvg = '' else: dep = depsvg = '' if workerattr('headrules'): dep = writedependencies(parsetrees[0][1], senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) result = '' dectree, maxdepth, _ = decisiontree(parsetrees, senttok, urlprm) prob, tree, _treestr, _fragments = parsetrees[0] nbest = Markup( '%s\nbest tree: %s' % (dectree, ('%(n)d. [%(prob)s] ' '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; ' '<a href="/annotate/edit?%(urlprm)s">edit</a>; ' '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n' '%(tree)s' % dict(n=1, prob=probstr(prob), urlprm=urlencode(dict(urlprm, n=1)), tree=DrawTree(tree, senttok).text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t1'))))) msg = '\n'.join(messages) elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join( ('length: %d;' % len(senttok), msg, elapsed, 'most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(treestr, senttok)) for n, (prob, _tree, treestr, _deriv) in enumerate(parsetrees) if treestr is not None) + '\n')) return render_template('annotatetree.html', sent=sent, result=result, nbest=nbest, info=info, dep=dep, depsvg=depsvg, maxdepth=maxdepth, msg='%d parse trees' % len(parsetrees))
def parse(): """ Parse sentence and return a textual representation of a parse tree, in a HTML fragment or plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'dop1') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang, html) if CACHE.get(key) is not None: return CACHE.get(key) link = url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = 1e-5 if coarse == 'pcfg-posterior' else 50 results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = {} result = 'no parse!' frags = nbest = '' else: if PARSERS[lang].relationalrealizational: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or {} parsetrees = heapq.nlargest(10, parsetrees.items(), key=itemgetter(1)) fragments = results[-1].fragments or () APP.logger.info('[%s] %s' % (probstr(prob), tree)) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok, abbr=True).text( unicodelines=True, html=html)) frags = Markup('Phrasal fragments used in the most probable derivation' ' of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(Tree.parse(frag, parse_leaf=int), terminals).text( unicodelines=True, html=html) for frag, terminals in fragments)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(PARSERS[lang].postprocess(tree)[0], senttok, abbr=True).text(unicodelines=True, html=html)) for n, (tree, prob) in enumerate(parsetrees))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), tree) for n, (tree, prob) in enumerate(parsetrees)) + '\n')) if html: CACHE.set(key, render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()), timeout=5000) else: CACHE.set(key, Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain'), timeout=5000) return CACHE.get(key)
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, lang=lang, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = '?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ('pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok, tags=tags, require=require, block=block)) if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] LOG.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess(parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join( '%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) deriv = Markup( 'Fragments used in the highest ranked derivation' ' of best parse tree:\n%s' % ( '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in parsetrees_[0][2] or () # if frag.count('(') > 1 ))) if parsetrees_ else '' msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(( 'length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, deriv, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, deriv, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, deriv=deriv, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode( dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) frags = Markup( 'Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join( '%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join( ('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join( '%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: ( sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = 'parse?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ( 'pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse( senttok, tags=tags, require=require, block=block)) if results[-1].noparse: parsetrees = [] result = 'no parse!' nbest = dep = depsvg = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess( parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join('%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join('%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')