def domorph(tree): """Replace POS tags with morphological tags if available.""" for node in tree.subtrees(lambda n: n and not isinstance(n[0], Tree)): x = (node.source[treebank.MORPH] if hasattr(node, 'source') and node.source else None) if x and x != '--': treebank.handlemorphology('add', None, node, node.source)
def replacemorph(tree): """Replace POS tags with morphological tags if available.""" for node in tree.subtrees( lambda n: n and not isinstance(n[0], Tree)): x = (node.source[treebank.MORPH] if hasattr(node, 'source') and node.source else None) if x and x != '--': treebank.handlemorphology('replace', None, node, node.source) node.label = node.label.replace('[]', '')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: for node in results[-1].parsetree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) node.label = node.label.replace('[]', '') if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) frags = Markup('Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: for node in tree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: (sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')