Exemple #1
0
def annotate(sentno):
    """Serve the main annotation page for a sentence."""
    username = session['username']
    if sentno == -1:
        sentno = firstunannotated(username)
        redirect(url_for('annotate', sentno=sentno))
    session['actions'] = [0, 0, 0, 0, 0, 0, 0, time()]
    lineno = QUEUE[sentno - 1][0]
    sent = SENTENCES[lineno]
    senttok, _ = worker.postokenize(sent)
    annotation, n = getannotation(username, lineno)
    if annotation is not None:
        item = exporttree(annotation.splitlines(), functions='add')
        canonicalize(item.tree)
        worker.domorph(item.tree)
        tree = writediscbrackettree(item.tree, item.sent)
        return redirect(
            url_for('edit', sentno=sentno, annotated=1, tree=tree, n=n))
    return render_template('annotate.html',
                           prevlink=str(sentno - 1) if sentno > 1 else '#',
                           nextlink=str(sentno +
                                        1) if sentno < len(SENTENCES) else '#',
                           sentno=sentno,
                           lineno=lineno + 1,
                           totalsents=len(SENTENCES),
                           numannotated=numannotated(username),
                           annotationhelp=ANNOTATIONHELP,
                           sent=' '.join(senttok))
Exemple #2
0
def edit():
    """Edit tree manually."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    lineno = QUEUE[sentno - 1][0]
    sent = SENTENCES[lineno]
    username = session['username']
    if 'dec' in request.args:
        session['actions'][DECTREE] += int(request.args.get('dec', 0))
    session.modified = True
    if 'n' in request.args:
        n = int(request.args.get('n', 1))
        session['actions'][NBEST] = n
        require = request.args.get('require', '')
        block = request.args.get('block', '')
        require, block = parseconstraints(require, block)
        resp = WORKERS[username].submit(worker.getparses, sent, require,
                                        block).result()
        senttok, parsetrees, _messages, _elapsed = resp
        tree = parsetrees[n - 1][1]
    elif 'tree' in request.args:
        tree, senttok = discbrackettree(request.args.get('tree'))
    else:
        return 'ERROR: pass n or tree argument.'
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    msg = ''
    if request.args.get('annotated', False):
        msg = Markup('<font color=red>You have already annotated '
                     'this sentence.</font>')
    return render_template(
        'edittree.html',
        prevlink=('/annotate/annotate/%d' %
                  (sentno - 1)) if sentno > 1 else '#',
        nextlink=('/annotate/annotate/%d' %
                  (sentno + 1)) if sentno < len(SENTENCES) else '#',
        unextlink=('/annotate/annotate/%d' % firstunannotated(username))
        if sentno < len(SENTENCES) else '#',
        treestr=treestr,
        senttok=' '.join(senttok),
        sentno=sentno,
        lineno=lineno + 1,
        totalsents=len(SENTENCES),
        numannotated=numannotated(username),
        poslabels=sorted(workerattr('poslabels')),
        phrasallabels=sorted(workerattr('phrasallabels')),
        functiontags=sorted(
            workerattr('functiontags')
            | set(app.config['FUNCTIONTAGWHITELIST'])),
        morphtags=sorted(workerattr('morphtags')),
        annotationhelp=ANNOTATIONHELP,
        rows=max(5,
                 treestr.count('\n') + 1),
        cols=100,
        msg=msg)
Exemple #3
0
def newlabel():
    """Re-draw tree with newly picked label."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    # FIXME: re-factor; check label AFTER replacing it
    # now actually replace label at nodeid
    _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
    nodeid = int(nodeid)
    dt = DrawTree(tree, senttok)
    match = LABELRE.match(dt.nodes[nodeid].label)
    if 'label' in request.args:
        label = request.args.get('label', '')
        dt.nodes[nodeid].label = (label + (match.group(2) or '') +
                                  (match.group(3) or ''))
    elif 'function' in request.args:
        label = request.args.get('function', '')
        if label == '':
            dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(3)
                                               or '')
        else:
            dt.nodes[nodeid].label = '%s-%s%s' % (match.group(1), label,
                                                  match.group(3) or '')
    elif 'morph' in request.args:
        label = request.args.get('morph', '')
        if label == '':
            dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(2)
                                               or '')
        else:
            dt.nodes[nodeid].label = '%s%s/%s' % (match.group(1),
                                                  match.group(2) or '', label)
    else:
        raise ValueError('expected label or function argument')
    tree = dt.nodes[0]
    dt = DrawTree(tree, senttok)  # kludge..
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    session['actions'][RELABEL] += 1
    session.modified = True
    return Markup('%s\n\n%s\t%s' % (link,
                                    dt.text(unicodelines=True,
                                            html=True,
                                            funcsep='-',
                                            morphsep='/',
                                            nodeprops='t0'), treestr))
Exemple #4
0
def replacesubtree():
    n = int(request.args.get('n', 0))
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    username = session['username']
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    error = ''
    dt = DrawTree(tree, senttok)
    _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
    nodeid = int(nodeid)
    subseq = sorted(dt.nodes[nodeid].leaves())
    subsent = ' '.join(senttok[n] for n in subseq)
    root = dt.nodes[nodeid].label
    resp = WORKERS[username].submit(worker.getparses,
                                    subsent, (), (),
                                    root=root).result()
    _senttok, parsetrees, _messages, _elapsed = resp
    newsubtree = parsetrees[n - 1][1]
    pos = sorted(list(newsubtree.subtrees(lambda n: isinstance(n[0], int))),
                 key=lambda n: n[0])
    for n, a in enumerate(pos):
        a[0] = subseq[n]
    dt.nodes[nodeid][:] = newsubtree[:]
    tree = canonicalize(dt.nodes[0])
    dt = DrawTree(tree, senttok)  # kludge..
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    session['actions'][REPARSE] += 1
    session.modified = True
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    return Markup('%s\n\n%s%s\t%s' % (link, error,
                                      dt.text(unicodelines=True,
                                              html=True,
                                              funcsep='-',
                                              morphsep='/',
                                              nodeprops='t0'), treestr))
Exemple #5
0
def parse():
	"""Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
	sent = request.args.get('sent', None)
	est = request.args.get('est', 'rfe')
	marg = request.args.get('marg', 'nbest')
	objfun = request.args.get('objfun', 'mpp')
	coarse = request.args.get('coarse', None)
	html = 'html' in request.args
	lang = request.args.get('lang', 'detect')
	if not sent:
		return ''
	frags = nbest = None
	senttok = tokenize(sent)
	if not senttok or not 1 <= len(senttok) <= LIMIT:
		return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
	if lang == 'detect':
		lang = guesslang(senttok)
	elif lang not in PARSERS:
		return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
	key = (senttok, est, marg, objfun, coarse, lang)
	resp = CACHE.get(key)
	if resp is None:
		link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg,
				objfun=objfun, coarse=coarse, html=html))
		PARSERS[lang].stages[-1].estimator = est
		PARSERS[lang].stages[-1].objective = objfun
		PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
		PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
		if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
			PARSERS[lang].stages[0].mode = coarse
			PARSERS[lang].stages[1].k = (1e-5
					if coarse == 'pcfg-posterior' else 50)

		results = list(PARSERS[lang].parse(senttok))
		if results[-1].noparse:
			parsetrees = []
			result = 'no parse!'
			frags = nbest = ''
		else:
			if SHOWMORPH:
				for node in results[-1].parsetree.subtrees(
						lambda n: n and not isinstance(n[0], Tree)):
					treebank.handlemorphology(
							'replace', None, node, node.source)
					node.label = node.label.replace('[]', '')
			if SHOWFUNC:
				treebank.handlefunctions('add', results[-1].parsetree, pos=True)
			tree = str(results[-1].parsetree)
			prob = results[-1].prob
			parsetrees = results[-1].parsetrees or []
			parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
			parsetrees_ = []
			fragments = results[-1].fragments or ()
			APP.logger.info('[%s] %s', probstr(prob), tree)
			tree = Tree.parse(tree, parse_leaf=int)
			result = Markup(DrawTree(tree, senttok).text(
					unicodelines=True, html=html, funcsep='-'))
			frags = Markup('Phrasal fragments used in the most probable '
					'derivation of the highest ranked parse tree:\n'
					+ '\n\n'.join(
					DrawTree(frag).text(unicodelines=True, html=html)
					for frag in fragments if frag.count('(') > 1))
			for tree, prob, x in parsetrees:
				tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
				if SHOWMORPH:
					for node in tree.subtrees(
							lambda n: n and not isinstance(n[0], Tree)):
						treebank.handlemorphology(
								'replace', None, node, node.source)
				if SHOWFUNC:
					treebank.handlefunctions('add', tree, pos=True)
				parsetrees_.append((tree, prob, x))
			nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob),
						DrawTree(tree, senttok).text(
							unicodelines=True, html=html, funcsep='-'))
					for n, (tree, prob, _) in enumerate(parsetrees_)))
		msg = '\n'.join(stage.msg for stage in results)
		elapsed = [stage.elapsedtime for stage in results]
		elapsed = 'CPU time elapsed: %s => %gs' % (
				' '.join('%gs' % a for a in elapsed), sum(elapsed))
		info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (
				len(senttok), lang, est, objfun, marg), msg, elapsed,
				'10 most probable parse trees:',
				'\n'.join('%d. [%s] %s' % (n + 1, probstr(prob),
						writediscbrackettree(tree, senttok))
						for n, (tree, prob, _) in enumerate(parsetrees))
				+ '\n'))
		CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000)
	else:
		(sent, result, frags, nbest,  # pylint: disable=unpacking-non-sequence
				info, link) = resp  # pylint: disable=unpacking-non-sequence
	if html:
		return render_template('parsetree.html', sent=sent, result=result,
				frags=frags, nbest=nbest, info=info, link=link,
				randid=randid())
	else:
		return Response('\n'.join((nbest, frags, info, result)),
				mimetype='text/plain')
Exemple #6
0
def reattach():
    """Re-draw tree after re-attaching node under new parent."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    dt = DrawTree(tree, senttok)
    error = ''
    if request.args.get('newparent') == 'deletenode':
        # remove nodeid by replacing it with its children
        _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
        nodeid = int(nodeid)
        x = dt.nodes[nodeid]
        if nodeid == 0 or isinstance(x[0], int):
            error = 'ERROR: cannot remove ROOT or POS node'
        else:
            children = list(x)
            x[:] = []
            for y in dt.nodes[0].subtrees():
                if any(child is x for child in y):
                    i = y.index(x)
                    y[i:i + 1] = children
                    tree = canonicalize(dt.nodes[0])
                    dt = DrawTree(tree, senttok)  # kludge..
                    break
    elif request.args.get('nodeid', '').startswith('newlabel_'):
        # splice in a new node under parentid
        _treeid, newparent = request.args.get('newparent',
                                              '').lstrip('t').split('_')
        newparent = int(newparent)
        label = request.args.get('nodeid').split('_', 1)[1]
        y = dt.nodes[newparent]
        if isinstance(y[0], int):
            error = 'ERROR: cannot add node under POS tag'
        else:
            children = list(y)
            y[:] = []
            y[:] = [Tree(label, children)]
            tree = canonicalize(dt.nodes[0])
            dt = DrawTree(tree, senttok)  # kludge..
    else:  # re-attach existing node at existing new parent
        _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
        nodeid = int(nodeid)
        _treeid, newparent = request.args.get('newparent',
                                              '').lstrip('t').split('_')
        newparent = int(newparent)
        # remove node from old parent
        # dt.nodes[nodeid].parent.pop(dt.nodes[nodeid].parent_index)
        x = dt.nodes[nodeid]
        y = dt.nodes[newparent]
        for node in x.subtrees():
            if node is y:
                error = ('ERROR: cannot re-attach subtree'
                         ' under (descendant of) itself\n')
                break
        else:
            for node in dt.nodes[0].subtrees():
                if any(child is x for child in node):
                    if len(node) > 1:
                        node.remove(x)
                        dt.nodes[newparent].append(x)
                        tree = canonicalize(dt.nodes[0])
                        dt = DrawTree(tree, senttok)  # kludge..
                    else:
                        error = ('ERROR: re-attaching only child creates'
                                 ' empty node %s; remove manually\n' % node)
                    break
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    if error == '':
        session['actions'][REATTACH] += 1
        session.modified = True
    return Markup('%s\n\n%s%s\t%s' % (link, error,
                                      dt.text(unicodelines=True,
                                              html=True,
                                              funcsep='-',
                                              morphsep='/',
                                              nodeprops='t0'), treestr))
Exemple #7
0
def parse():
    """Display parse. To be invoked by an AJAX call."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    username = session['username']
    require = request.args.get('require', '')
    block = request.args.get('block', '')
    urlprm = dict(sentno=sentno)
    if require and require != '':
        urlprm['require'] = require
    if block and block != '':
        urlprm['block'] = block
    require, block = parseconstraints(require, block)
    if require or block:
        session['actions'][CONSTRAINTS] += 1
        session.modified = True
    if False and app.config['DEBUG']:
        resp = worker.getparses(sent, require, block)
    else:
        resp = WORKERS[username].submit(worker.getparses, sent, require,
                                        block).result()
    senttok, parsetrees, messages, elapsed = resp
    maxdepth = ''
    if not parsetrees:
        result = ('no parse! reload page to clear constraints, '
                  'or continue with next sentence.')
        nbest = dep = depsvg = ''
    else:
        dep = depsvg = ''
        if workerattr('headrules'):
            dep = writedependencies(parsetrees[0][1], senttok, 'conll')
            depsvg = Markup(DrawDependencies.fromconll(dep).svg())
        result = ''
        dectree, maxdepth, _ = decisiontree(parsetrees, senttok, urlprm)
        prob, tree, _treestr, _fragments = parsetrees[0]
        nbest = Markup(
            '%s\nbest tree: %s' %
            (dectree,
             ('%(n)d. [%(prob)s] '
              '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; '
              '<a href="/annotate/edit?%(urlprm)s">edit</a>; '
              '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n'
              '%(tree)s' %
              dict(n=1,
                   prob=probstr(prob),
                   urlprm=urlencode(dict(urlprm, n=1)),
                   tree=DrawTree(tree, senttok).text(unicodelines=True,
                                                     html=True,
                                                     funcsep='-',
                                                     morphsep='/',
                                                     nodeprops='t1')))))
    msg = '\n'.join(messages)
    elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
        '%gs' % a for a in elapsed), sum(elapsed))
    info = '\n'.join(
        ('length: %d;' % len(senttok), msg, elapsed,
         'most probable parse trees:',
         ''.join('%d. [%s] %s' %
                 (n + 1, probstr(prob), writediscbrackettree(treestr, senttok))
                 for n, (prob, _tree, treestr, _deriv) in enumerate(parsetrees)
                 if treestr is not None) + '\n'))
    return render_template('annotatetree.html',
                           sent=sent,
                           result=result,
                           nbest=nbest,
                           info=info,
                           dep=dep,
                           depsvg=depsvg,
                           maxdepth=maxdepth,
                           msg='%d parse trees' % len(parsetrees))
Exemple #8
0
def parse():
    """Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
    sent = request.args.get('sent', None)
    objfun = request.args.get('objfun', 'mpp')
    est = request.args.get('est', 'rfe')
    marg = request.args.get('marg', 'nbest')
    coarse = request.args.get('coarse', 'pcfg')
    html = 'html' in request.args
    lang = request.args.get('lang', 'detect')
    require = request.args.get('require', None)
    block = request.args.get('block', None)
    if not sent:
        return ''
    nbest = None
    if POSTAGS.match(sent):
        senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split()))
    else:
        senttok, tags = tuple(tokenize(sent)), None
    if not senttok or not 1 <= len(senttok) <= LIMIT:
        return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
    if lang == 'detect':
        lang = guesslang(senttok)
    elif lang not in PARSERS:
        return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
    if require:
        require = tuple((label, tuple(indices))
                        for label, indices in sorted(json.loads(require)))
    if block:
        block = tuple((label, tuple(indices))
                      for label, indices in sorted(json.loads(block)))
    key = (senttok, tags, est, marg, objfun, coarse, lang, require, block)
    resp = CACHE.get(key)
    if resp is None:
        urlparams = dict(sent=sent,
                         lang=lang,
                         est=est,
                         marg=marg,
                         objfun=objfun,
                         coarse=coarse,
                         html=html)
        if require:
            urlparams['require'] = json.dumps(require)
        if block:
            urlparams['block'] = json.dumps(block)
        link = '?' + url_encode(urlparams)
        PARSERS[lang].stages[-1].estimator = est
        PARSERS[lang].stages[-1].objective = objfun
        PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
        PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
        if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
            PARSERS[lang].stages[0].mode = ('pcfg' if coarse
                                            == 'pcfg-posterior' else coarse)
            if len(PARSERS[lang].stages) > 1:
                PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior'
                                             else 50)
        results = list(PARSERS[lang].parse(senttok,
                                           tags=tags,
                                           require=require,
                                           block=block))
        if SHOWMORPH:
            replacemorph(results[-1].parsetree)
        if SHOWFUNC:
            treebank.handlefunctions('add', results[-1].parsetree, pos=True)
        tree = str(results[-1].parsetree)
        prob = results[-1].prob
        parsetrees = results[-1].parsetrees or []
        parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
        parsetrees_ = []
        LOG.info('[%s] %s', probstr(prob), tree)
        tree = Tree.parse(tree, parse_leaf=int)
        result = Markup(
            DrawTree(tree, senttok).text(unicodelines=True,
                                         html=html,
                                         funcsep='-'))
        for tree, prob, x in parsetrees:
            tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
            if SHOWMORPH:
                replacemorph(tree)
            if SHOWFUNC:
                treebank.handlefunctions('add', tree, pos=True)
            parsetrees_.append((tree, prob, x))
        if PARSERS[lang].headrules:
            xtree = PARSERS[lang].postprocess(parsetrees[0][0], senttok, -1)[0]
            dep = treebank.writedependencies(xtree, senttok, 'conll')
            depsvg = Markup(DrawDependencies.fromconll(dep).svg())
        else:
            dep = depsvg = ''
        rid = randid()
        nbest = Markup('\n\n'.join(
            '%d. [%s] '
            '<a href=\'javascript: toggle("f%s%d"); \'>'
            'derivation</a>\n'
            '<span id=f%s%d style="display: none; margin-left: 3em; ">'
            'Fragments used in the highest ranked derivation'
            ' of this parse tree:\n%s</span>\n%s' % (
                n + 1,
                probstr(prob),
                rid,
                n + 1,
                rid,
                n + 1,
                '\n\n'.join(
                    '%s\n%s' %
                    (w, DrawTree(frag).text(unicodelines=True, html=html))
                    for frag, w in fragments or ()  # if frag.count('(') > 1
                ),
                DrawTree(tree, senttok).text(
                    unicodelines=True, html=html, funcsep='-'))
            for n, (tree, prob, fragments) in enumerate(parsetrees_)))
        deriv = Markup(
            'Fragments used in the highest ranked derivation'
            ' of best parse tree:\n%s' % (
                '\n\n'.join(
                    '%s\n%s' %
                    (w, DrawTree(frag).text(unicodelines=True, html=html))
                    for frag, w in parsetrees_[0][2] or ()
                    # if frag.count('(') > 1
                ))) if parsetrees_ else ''
        msg = '\n'.join(stage.msg for stage in results)
        elapsed = [stage.elapsedtime for stage in results]
        elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
            '%gs' % a for a in elapsed), sum(elapsed))
        info = '\n'.join((
            'length: %d; lang=%s; est=%s; objfun=%s; marg=%s' %
            (len(senttok), lang, est, objfun, marg), msg, elapsed,
            '10 most probable parse trees:',
            ''.join('%d. [%s] %s' %
                    (n + 1, probstr(prob), writediscbrackettree(tree, senttok))
                    for n, (tree, prob, _) in enumerate(parsetrees)) + '\n'))
        CACHE.set(key, (sent, result, nbest, deriv, info, link, dep, depsvg),
                  timeout=5000)
    else:
        (sent, result, nbest, deriv, info, link, dep, depsvg) = resp
    if html:
        return render_template('parsetree.html',
                               sent=sent,
                               result=result,
                               nbest=nbest,
                               deriv=deriv,
                               info=info,
                               link=link,
                               dep=dep,
                               depsvg=depsvg,
                               randid=randid())
    else:
        return Response('\n'.join((nbest, info, result)),
                        mimetype='text/plain')
Exemple #9
0
def parse():
    """Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
    sent = request.args.get('sent', None)
    est = request.args.get('est', 'rfe')
    marg = request.args.get('marg', 'nbest')
    objfun = request.args.get('objfun', 'mpp')
    coarse = request.args.get('coarse', None)
    html = 'html' in request.args
    lang = request.args.get('lang', 'detect')
    if not sent:
        return ''
    frags = nbest = None
    senttok = tokenize(sent)
    if not senttok or not 1 <= len(senttok) <= LIMIT:
        return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
    if lang == 'detect':
        lang = guesslang(senttok)
    elif lang not in PARSERS:
        return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
    key = (senttok, est, marg, objfun, coarse, lang)
    resp = CACHE.get(key)
    if resp is None:
        link = 'parse?' + url_encode(
            dict(sent=sent,
                 est=est,
                 marg=marg,
                 objfun=objfun,
                 coarse=coarse,
                 html=html))
        PARSERS[lang].stages[-1].estimator = est
        PARSERS[lang].stages[-1].objective = objfun
        PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
        PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
        if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
            PARSERS[lang].stages[0].mode = coarse
            PARSERS[lang].stages[1].k = (1e-5
                                         if coarse == 'pcfg-posterior' else 50)

        results = list(PARSERS[lang].parse(senttok))
        if results[-1].noparse:
            parsetrees = []
            result = 'no parse!'
            frags = nbest = ''
        else:
            if SHOWMORPH:
                replacemorph(results[-1].parsetree)
            if SHOWFUNC:
                treebank.handlefunctions('add',
                                         results[-1].parsetree,
                                         pos=True)
            tree = str(results[-1].parsetree)
            prob = results[-1].prob
            parsetrees = results[-1].parsetrees or []
            parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
            parsetrees_ = []
            fragments = results[-1].fragments or ()
            APP.logger.info('[%s] %s', probstr(prob), tree)
            tree = Tree.parse(tree, parse_leaf=int)
            result = Markup(
                DrawTree(tree, senttok).text(unicodelines=True,
                                             html=html,
                                             funcsep='-'))
            frags = Markup(
                'Phrasal fragments used in the most probable '
                'derivation of the highest ranked parse tree:\n' + '\n\n'.join(
                    DrawTree(frag).text(unicodelines=True, html=html)
                    for frag in fragments if frag.count('(') > 1))
            for tree, prob, x in parsetrees:
                tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
                if SHOWMORPH:
                    replacemorph(tree)
                if SHOWFUNC:
                    treebank.handlefunctions('add', tree, pos=True)
                parsetrees_.append((tree, prob, x))
            nbest = Markup('\n\n'.join(
                '%d. [%s]\n%s' %
                (n + 1, probstr(prob), DrawTree(tree, senttok).text(
                    unicodelines=True, html=html, funcsep='-'))
                for n, (tree, prob, _) in enumerate(parsetrees_)))
        msg = '\n'.join(stage.msg for stage in results)
        elapsed = [stage.elapsedtime for stage in results]
        elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
            '%gs' % a for a in elapsed), sum(elapsed))
        info = '\n'.join(
            ('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' %
             (len(senttok), lang, est, objfun, marg), msg, elapsed,
             '10 most probable parse trees:', '\n'.join(
                 '%d. [%s] %s' %
                 (n + 1, probstr(prob), writediscbrackettree(tree, senttok))
                 for n, (tree, prob, _) in enumerate(parsetrees)) + '\n'))
        CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000)
    else:
        (
            sent,
            result,
            frags,
            nbest,  # pylint: disable=unpacking-non-sequence
            info,
            link) = resp  # pylint: disable=unpacking-non-sequence
    if html:
        return render_template('parsetree.html',
                               sent=sent,
                               result=result,
                               frags=frags,
                               nbest=nbest,
                               info=info,
                               link=link,
                               randid=randid())
    else:
        return Response('\n'.join((nbest, frags, info, result)),
                        mimetype='text/plain')
Exemple #10
0
def parse():
	"""Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
	sent = request.args.get('sent', None)
	objfun = request.args.get('objfun', 'mpp')
	est = request.args.get('est', 'rfe')
	marg = request.args.get('marg', 'nbest')
	coarse = request.args.get('coarse', 'pcfg')
	html = 'html' in request.args
	lang = request.args.get('lang', 'detect')
	require = request.args.get('require', None)
	block = request.args.get('block', None)
	if not sent:
		return ''
	nbest = None
	if POSTAGS.match(sent):
		senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split()))
	else:
		senttok, tags = tuple(tokenize(sent)), None
	if not senttok or not 1 <= len(senttok) <= LIMIT:
		return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
	if lang == 'detect':
		lang = guesslang(senttok)
	elif lang not in PARSERS:
		return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
	if require:
		require = tuple((label, tuple(indices))
				for label, indices in sorted(json.loads(require)))
	if block:
		block = tuple((label, tuple(indices))
				for label, indices in sorted(json.loads(block)))
	key = (senttok, tags, est, marg, objfun, coarse, lang, require, block)
	resp = CACHE.get(key)
	if resp is None:
		urlparams = dict(sent=sent, est=est, marg=marg, objfun=objfun,
				coarse=coarse, html=html)
		if require:
			urlparams['require'] = json.dumps(require)
		if block:
			urlparams['block'] = json.dumps(block)
		link = 'parse?' + url_encode(urlparams)
		PARSERS[lang].stages[-1].estimator = est
		PARSERS[lang].stages[-1].objective = objfun
		PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
		PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
		if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
			PARSERS[lang].stages[0].mode = (
					'pcfg' if coarse == 'pcfg-posterior' else coarse)
			if len(PARSERS[lang].stages) > 1:
				PARSERS[lang].stages[1].k = (1e-5
						if coarse == 'pcfg-posterior' else 50)
		results = list(PARSERS[lang].parse(
				senttok, tags=tags, require=require, block=block))
		if results[-1].noparse:
			parsetrees = []
			result = 'no parse!'
			nbest = dep = depsvg = ''
		else:
			if SHOWMORPH:
				replacemorph(results[-1].parsetree)
			if SHOWFUNC:
				treebank.handlefunctions('add', results[-1].parsetree, pos=True)
			tree = str(results[-1].parsetree)
			prob = results[-1].prob
			parsetrees = results[-1].parsetrees or []
			parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
			parsetrees_ = []
			APP.logger.info('[%s] %s', probstr(prob), tree)
			tree = Tree.parse(tree, parse_leaf=int)
			result = Markup(DrawTree(tree, senttok).text(
					unicodelines=True, html=html, funcsep='-'))
			for tree, prob, x in parsetrees:
				tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
				if SHOWMORPH:
					replacemorph(tree)
				if SHOWFUNC:
					treebank.handlefunctions('add', tree, pos=True)
				parsetrees_.append((tree, prob, x))
			if PARSERS[lang].headrules:
				xtree = PARSERS[lang].postprocess(
						parsetrees[0][0], senttok, -1)[0]
				dep = treebank.writedependencies(xtree, senttok, 'conll')
				depsvg = Markup(DrawDependencies.fromconll(dep).svg())
			else:
				dep = depsvg = ''
			rid = randid()
			nbest = Markup('\n\n'.join('%d. [%s] '
					'<a href=\'javascript: toggle("f%s%d"); \'>'
					'derivation</a>\n'
					'<span id=f%s%d style="display: none; margin-left: 3em; ">'
					'Fragments used in the highest ranked derivation'
					' of this parse tree:\n%s</span>\n%s' % (
						n + 1,
						probstr(prob),
						rid, n + 1,
						rid, n + 1,
						'\n\n'.join('%s\n%s' % (w,
							DrawTree(frag).text(unicodelines=True, html=html))
							for frag, w in fragments or ()  # if frag.count('(') > 1
						),
						DrawTree(tree, senttok).text(
							unicodelines=True, html=html, funcsep='-'))
					for n, (tree, prob, fragments) in enumerate(parsetrees_)))
		msg = '\n'.join(stage.msg for stage in results)
		elapsed = [stage.elapsedtime for stage in results]
		elapsed = 'CPU time elapsed: %s => %gs' % (
				' '.join('%gs' % a for a in elapsed), sum(elapsed))
		info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (
				len(senttok), lang, est, objfun, marg), msg, elapsed,
				'10 most probable parse trees:',
				''.join('%d. [%s] %s' % (n + 1, probstr(prob),
						writediscbrackettree(tree, senttok))
						for n, (tree, prob, _) in enumerate(parsetrees))
				+ '\n'))
		CACHE.set(key, (sent, result, nbest, info, link, dep, depsvg),
				timeout=5000)
	else:
		(sent, result, nbest, info, link, dep, depsvg) = resp
	if html:
		return render_template('parsetree.html', sent=sent, result=result,
				nbest=nbest, info=info, link=link, dep=dep,
				depsvg=depsvg, randid=randid())
	else:
		return Response('\n'.join((nbest, info, result)),
				mimetype='text/plain')