def annotate(sentno): """Serve the main annotation page for a sentence.""" username = session['username'] if sentno == -1: sentno = firstunannotated(username) redirect(url_for('annotate', sentno=sentno)) session['actions'] = [0, 0, 0, 0, 0, 0, 0, time()] lineno = QUEUE[sentno - 1][0] sent = SENTENCES[lineno] senttok, _ = worker.postokenize(sent) annotation, n = getannotation(username, lineno) if annotation is not None: item = exporttree(annotation.splitlines(), functions='add') canonicalize(item.tree) worker.domorph(item.tree) tree = writediscbrackettree(item.tree, item.sent) return redirect( url_for('edit', sentno=sentno, annotated=1, tree=tree, n=n)) return render_template('annotate.html', prevlink=str(sentno - 1) if sentno > 1 else '#', nextlink=str(sentno + 1) if sentno < len(SENTENCES) else '#', sentno=sentno, lineno=lineno + 1, totalsents=len(SENTENCES), numannotated=numannotated(username), annotationhelp=ANNOTATIONHELP, sent=' '.join(senttok))
def edit(): """Edit tree manually.""" sentno = int(request.args.get('sentno')) # 1-indexed lineno = QUEUE[sentno - 1][0] sent = SENTENCES[lineno] username = session['username'] if 'dec' in request.args: session['actions'][DECTREE] += int(request.args.get('dec', 0)) session.modified = True if 'n' in request.args: n = int(request.args.get('n', 1)) session['actions'][NBEST] = n require = request.args.get('require', '') block = request.args.get('block', '') require, block = parseconstraints(require, block) resp = WORKERS[username].submit(worker.getparses, sent, require, block).result() senttok, parsetrees, _messages, _elapsed = resp tree = parsetrees[n - 1][1] elif 'tree' in request.args: tree, senttok = discbrackettree(request.args.get('tree')) else: return 'ERROR: pass n or tree argument.' treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip() msg = '' if request.args.get('annotated', False): msg = Markup('<font color=red>You have already annotated ' 'this sentence.</font>') return render_template( 'edittree.html', prevlink=('/annotate/annotate/%d' % (sentno - 1)) if sentno > 1 else '#', nextlink=('/annotate/annotate/%d' % (sentno + 1)) if sentno < len(SENTENCES) else '#', unextlink=('/annotate/annotate/%d' % firstunannotated(username)) if sentno < len(SENTENCES) else '#', treestr=treestr, senttok=' '.join(senttok), sentno=sentno, lineno=lineno + 1, totalsents=len(SENTENCES), numannotated=numannotated(username), poslabels=sorted(workerattr('poslabels')), phrasallabels=sorted(workerattr('phrasallabels')), functiontags=sorted( workerattr('functiontags') | set(app.config['FUNCTIONTAGWHITELIST'])), morphtags=sorted(workerattr('morphtags')), annotationhelp=ANNOTATIONHELP, rows=max(5, treestr.count('\n') + 1), cols=100, msg=msg)
def newlabel(): """Re-draw tree with newly picked label.""" sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] senttok, _ = worker.postokenize(sent) treestr = request.args.get('tree', '') try: tree, _sent1 = validate(treestr, senttok) except ValueError as err: return str(err) # FIXME: re-factor; check label AFTER replacing it # now actually replace label at nodeid _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_') nodeid = int(nodeid) dt = DrawTree(tree, senttok) match = LABELRE.match(dt.nodes[nodeid].label) if 'label' in request.args: label = request.args.get('label', '') dt.nodes[nodeid].label = (label + (match.group(2) or '') + (match.group(3) or '')) elif 'function' in request.args: label = request.args.get('function', '') if label == '': dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(3) or '') else: dt.nodes[nodeid].label = '%s-%s%s' % (match.group(1), label, match.group(3) or '') elif 'morph' in request.args: label = request.args.get('morph', '') if label == '': dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(2) or '') else: dt.nodes[nodeid].label = '%s%s/%s' % (match.group(1), match.group(2) or '', label) else: raise ValueError('expected label or function argument') tree = dt.nodes[0] dt = DrawTree(tree, senttok) # kludge.. treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip() link = ('<a href="/annotate/accept?%s">accept this tree</a>' % urlencode(dict(sentno=sentno, tree=treestr))) session['actions'][RELABEL] += 1 session.modified = True return Markup('%s\n\n%s\t%s' % (link, dt.text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t0'), treestr))
def replacesubtree(): n = int(request.args.get('n', 0)) sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] senttok, _ = worker.postokenize(sent) username = session['username'] treestr = request.args.get('tree', '') try: tree, _sent1 = validate(treestr, senttok) except ValueError as err: return str(err) error = '' dt = DrawTree(tree, senttok) _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_') nodeid = int(nodeid) subseq = sorted(dt.nodes[nodeid].leaves()) subsent = ' '.join(senttok[n] for n in subseq) root = dt.nodes[nodeid].label resp = WORKERS[username].submit(worker.getparses, subsent, (), (), root=root).result() _senttok, parsetrees, _messages, _elapsed = resp newsubtree = parsetrees[n - 1][1] pos = sorted(list(newsubtree.subtrees(lambda n: isinstance(n[0], int))), key=lambda n: n[0]) for n, a in enumerate(pos): a[0] = subseq[n] dt.nodes[nodeid][:] = newsubtree[:] tree = canonicalize(dt.nodes[0]) dt = DrawTree(tree, senttok) # kludge.. treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip() session['actions'][REPARSE] += 1 session.modified = True link = ('<a href="/annotate/accept?%s">accept this tree</a>' % urlencode(dict(sentno=sentno, tree=treestr))) return Markup('%s\n\n%s%s\t%s' % (link, error, dt.text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t0'), treestr))
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: for node in results[-1].parsetree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) node.label = node.label.replace('[]', '') if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) frags = Markup('Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: for node in tree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: (sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def reattach(): """Re-draw tree after re-attaching node under new parent.""" sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] senttok, _ = worker.postokenize(sent) treestr = request.args.get('tree', '') try: tree, _sent1 = validate(treestr, senttok) except ValueError as err: return str(err) dt = DrawTree(tree, senttok) error = '' if request.args.get('newparent') == 'deletenode': # remove nodeid by replacing it with its children _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_') nodeid = int(nodeid) x = dt.nodes[nodeid] if nodeid == 0 or isinstance(x[0], int): error = 'ERROR: cannot remove ROOT or POS node' else: children = list(x) x[:] = [] for y in dt.nodes[0].subtrees(): if any(child is x for child in y): i = y.index(x) y[i:i + 1] = children tree = canonicalize(dt.nodes[0]) dt = DrawTree(tree, senttok) # kludge.. break elif request.args.get('nodeid', '').startswith('newlabel_'): # splice in a new node under parentid _treeid, newparent = request.args.get('newparent', '').lstrip('t').split('_') newparent = int(newparent) label = request.args.get('nodeid').split('_', 1)[1] y = dt.nodes[newparent] if isinstance(y[0], int): error = 'ERROR: cannot add node under POS tag' else: children = list(y) y[:] = [] y[:] = [Tree(label, children)] tree = canonicalize(dt.nodes[0]) dt = DrawTree(tree, senttok) # kludge.. else: # re-attach existing node at existing new parent _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_') nodeid = int(nodeid) _treeid, newparent = request.args.get('newparent', '').lstrip('t').split('_') newparent = int(newparent) # remove node from old parent # dt.nodes[nodeid].parent.pop(dt.nodes[nodeid].parent_index) x = dt.nodes[nodeid] y = dt.nodes[newparent] for node in x.subtrees(): if node is y: error = ('ERROR: cannot re-attach subtree' ' under (descendant of) itself\n') break else: for node in dt.nodes[0].subtrees(): if any(child is x for child in node): if len(node) > 1: node.remove(x) dt.nodes[newparent].append(x) tree = canonicalize(dt.nodes[0]) dt = DrawTree(tree, senttok) # kludge.. else: error = ('ERROR: re-attaching only child creates' ' empty node %s; remove manually\n' % node) break treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip() link = ('<a href="/annotate/accept?%s">accept this tree</a>' % urlencode(dict(sentno=sentno, tree=treestr))) if error == '': session['actions'][REATTACH] += 1 session.modified = True return Markup('%s\n\n%s%s\t%s' % (link, error, dt.text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t0'), treestr))
def parse(): """Display parse. To be invoked by an AJAX call.""" sentno = int(request.args.get('sentno')) # 1-indexed sent = SENTENCES[QUEUE[sentno - 1][0]] username = session['username'] require = request.args.get('require', '') block = request.args.get('block', '') urlprm = dict(sentno=sentno) if require and require != '': urlprm['require'] = require if block and block != '': urlprm['block'] = block require, block = parseconstraints(require, block) if require or block: session['actions'][CONSTRAINTS] += 1 session.modified = True if False and app.config['DEBUG']: resp = worker.getparses(sent, require, block) else: resp = WORKERS[username].submit(worker.getparses, sent, require, block).result() senttok, parsetrees, messages, elapsed = resp maxdepth = '' if not parsetrees: result = ('no parse! reload page to clear constraints, ' 'or continue with next sentence.') nbest = dep = depsvg = '' else: dep = depsvg = '' if workerattr('headrules'): dep = writedependencies(parsetrees[0][1], senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) result = '' dectree, maxdepth, _ = decisiontree(parsetrees, senttok, urlprm) prob, tree, _treestr, _fragments = parsetrees[0] nbest = Markup( '%s\nbest tree: %s' % (dectree, ('%(n)d. [%(prob)s] ' '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; ' '<a href="/annotate/edit?%(urlprm)s">edit</a>; ' '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n' '%(tree)s' % dict(n=1, prob=probstr(prob), urlprm=urlencode(dict(urlprm, n=1)), tree=DrawTree(tree, senttok).text(unicodelines=True, html=True, funcsep='-', morphsep='/', nodeprops='t1'))))) msg = '\n'.join(messages) elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join( ('length: %d;' % len(senttok), msg, elapsed, 'most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(treestr, senttok)) for n, (prob, _tree, treestr, _deriv) in enumerate(parsetrees) if treestr is not None) + '\n')) return render_template('annotatetree.html', sent=sent, result=result, nbest=nbest, info=info, dep=dep, depsvg=depsvg, maxdepth=maxdepth, msg='%d parse trees' % len(parsetrees))
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, lang=lang, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = '?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ('pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok, tags=tags, require=require, block=block)) if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] LOG.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess(parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join( '%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) deriv = Markup( 'Fragments used in the highest ranked derivation' ' of best parse tree:\n%s' % ( '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in parsetrees_[0][2] or () # if frag.count('(') > 1 ))) if parsetrees_ else '' msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(( 'length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, deriv, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, deriv, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, deriv=deriv, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode( dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) frags = Markup( 'Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join( '%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join( ('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join( '%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: ( sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = 'parse?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ( 'pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse( senttok, tags=tags, require=require, block=block)) if results[-1].noparse: parsetrees = [] result = 'no parse!' nbest = dep = depsvg = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess( parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join('%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join('%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')