Beispiel #1
0
    def compare_hybridtrees(self, gold, system):
        """
        :type gold: ConstituentTree
        :type system: ConstituentTree
        :return:
        :rtype:
        """
        gtree, gsent = convert_tree(gold)
        stree, ssent = convert_tree(system)
        try:
            result = TreePairResult(0, gtree, gsent, stree, ssent, self.param).scores()
            f1 = float(result['LF'])
            if math.isnan(f1):
                return 0.0
            else:
                return f1

        except (KeyError, IndexError, ValueError):
            gtree, gsent = convert_tree(gold)
            stree, ssent = convert_tree(system)
            print('gold tree:')
            print(DrawTree(gtree, gsent))
            print(gold)
            print(gold.root)

            print('system tree')
            print(DrawTree(stree, ssent))
            print(system)
            print(system.root)
            result = TreePairResult(0, gtree, gsent, stree, ssent, self.param).scores()
            assert False
Beispiel #2
0
def newlabel():
    """Re-draw tree with newly picked label."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    # FIXME: re-factor; check label AFTER replacing it
    # now actually replace label at nodeid
    _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
    nodeid = int(nodeid)
    dt = DrawTree(tree, senttok)
    match = LABELRE.match(dt.nodes[nodeid].label)
    if 'label' in request.args:
        label = request.args.get('label', '')
        dt.nodes[nodeid].label = (label + (match.group(2) or '') +
                                  (match.group(3) or ''))
    elif 'function' in request.args:
        label = request.args.get('function', '')
        if label == '':
            dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(3)
                                               or '')
        else:
            dt.nodes[nodeid].label = '%s-%s%s' % (match.group(1), label,
                                                  match.group(3) or '')
    elif 'morph' in request.args:
        label = request.args.get('morph', '')
        if label == '':
            dt.nodes[nodeid].label = '%s%s' % (match.group(1), match.group(2)
                                               or '')
        else:
            dt.nodes[nodeid].label = '%s%s/%s' % (match.group(1),
                                                  match.group(2) or '', label)
    else:
        raise ValueError('expected label or function argument')
    tree = dt.nodes[0]
    dt = DrawTree(tree, senttok)  # kludge..
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    session['actions'][RELABEL] += 1
    session.modified = True
    return Markup('%s\n\n%s\t%s' % (link,
                                    dt.text(unicodelines=True,
                                            html=True,
                                            funcsep='-',
                                            morphsep='/',
                                            nodeprops='t0'), treestr))
def main():
    # train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/train/train.German.gold.xml'
    # corpus = sentence_names_to_hybridtrees(["s" + str(i) for i in range(1, 10)], file_name=train_path, hold=False)

    train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/dev/dev.German.gold.xml'
    names = ["s" + str(i) for i in range(40675, 40700)]
    names = ['s40564']
    corpus = sentence_names_to_hybridtrees(names, path=train_path, hold=False)

    cp = TreeComparator()

    tree_sys = ConstituentTree()
    tree_sys.add_node('0', ConstituentCategory('PN'))
    tree_sys.add_node('1', corpus[0].token_yield()[0], order=True)
    tree_sys.add_punct("3", '$.', '.')
    tree_sys.add_to_root('0')
    tree_sys.add_child('0', '1')

    param = build_param()

    for i, hybridtree in enumerate(corpus):
        print(i)

        # discotree = convert_tree(hybridtree)
        tree, sent = convert_tree(hybridtree)
        tree2, sent2 = convert_tree(tree_sys)

        if i == 11:
            pass

        # print(discotree)

        # print(discotree.draw())

        # print(DrawTree(discotree, discotree.sent))
        print(DrawTree(tree, sent))

        print(' '.join(map(lambda x: x.form(), hybridtree.full_token_yield())))

        print(DrawTree(tree2, sent2))

        print(tree[::-1])

        print('POS', tree.pos())

        result = TreePairResult(i, tree, sent, tree2, sent2, param)
        print(result.scores())

        print("Comparator: ", cp.compare_hybridtrees(hybridtree, hybridtree))
Beispiel #4
0
def reparsesubtree():
    """Re-parse selected subtree."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    username = session['username']
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    error = ''
    dt = DrawTree(tree, senttok)
    _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
    nodeid = int(nodeid)
    subseq = sorted(dt.nodes[nodeid].leaves())
    subsent = ' '.join(senttok[n] for n in subseq)
    # FIXME only works when root label of tree matches label in grammar.
    # need a single label that works across all stages.
    root = dt.nodes[nodeid].label
    # root = grammar.tolabel[next(iter(grammar.tblabelmapping[root]))]
    resp = WORKERS[username].submit(worker.getparses,
                                    subsent, (), (),
                                    root=root).result()
    _senttok, parsetrees, _messages, _elapsed = resp
    app.logger.info('%d-%d. [parse trees=%d] %s', sentno, nodeid,
                    len(parsetrees), subsent)
    print(parsetrees[0][1])
    nbest = Markup(
        '<pre>%d parse trees\n'
        '<a href="javascript: toggle(\'nbest\'); ">cancel</a>\n'
        '%s</pre>' % (len(parsetrees), '\n'.join(
            '%(n)d. [%(prob)s] '
            '<a href="#" onClick="picksubtree(%(n)d); ">'
            'use this subtree</a>; '
            '\n\n'
            '%(tree)s' %
            dict(n=n + 1,
                 prob=probstr(prob),
                 tree=DrawTree(tree, subsent.split()).text(unicodelines=True,
                                                           html=True,
                                                           funcsep='-',
                                                           morphsep='/',
                                                           nodeprops='t%d' %
                                                           (n + 1)))
            for n, (prob, tree, _treestr,
                    fragments) in enumerate(parsetrees))))
    return nbest
Beispiel #5
0
def showderiv():
    """Render derivation for a given parse tree in cache."""
    username = session['username']
    n = int(request.args.get('n'))  # 1-indexed
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    require = request.args.get('require', '')
    block = request.args.get('block', '')
    require, block = parseconstraints(require, block)
    resp = WORKERS[username].submit(worker.getparses, sent, require,
                                    block).result()
    senttok, parsetrees, _messages, _elapsed = resp
    _prob, tree, _treestr, fragments = parsetrees[n - 1]
    return Markup(
        '<pre>Fragments used in the highest ranked derivation'
        ' of this parse tree:\n%s\n%s</pre>' % ('\n\n'.join(
            '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=True))
            for frag, w in fragments or ()), DrawTree(tree, senttok).text(
                unicodelines=True, html=True, funcsep='-')))
Beispiel #6
0
def filterparsetrees():
    """For a parse tree in the cache, return a filtered set of its n-best
	parses matching current constraints."""
    username = session['username']
    session['actions'][CONSTRAINTS] += 1
    session.modified = True
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    urlprm = dict(sentno=sentno)
    require = request.args.get('require', '')
    block = request.args.get('block', '')
    if require and require != '':
        urlprm['require'] = require
    if block and block != '':
        urlprm['block'] = block
    require, block = parseconstraints(require, block)
    frequire = request.args.get('frequire', '')
    fblock = request.args.get('fblock', '')
    frequire, fblock = parseconstraints(frequire, fblock)
    resp = WORKERS[username].submit(worker.getparses, sent, require,
                                    block).result()
    senttok, parsetrees, _messages, _elapsed = resp
    parsetrees_ = [
        (n, prob, tree, treestr, frags)
        for n, (prob, tree, treestr, frags) in enumerate(parsetrees)
        if treestr is None or testconstraints(treestr, frequire, fblock)
    ]
    if len(parsetrees_) == 0:
        return ('No parse trees after filtering; try pressing Re-parse, '
                'or reload page to clear constraints.\n')
    nbest = Markup('%d parse trees\n%s' % (len(parsetrees_), '\n'.join(
        '%(n)d. [%(prob)s] '
        '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; '
        '<a href="/annotate/edit?%(urlprm)s">edit</a>; '
        '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n'
        '%(tree)s' % dict(n=n + 1,
                          prob=probstr(prob),
                          urlprm=urlencode(dict(urlprm, n=n + 1)),
                          tree=DrawTree(tree, senttok).text(unicodelines=True,
                                                            html=True,
                                                            funcsep='-',
                                                            morphsep='/',
                                                            nodeprops='t%d' %
                                                            (n + 1)))
        for n, prob, tree, _treestr, fragments in parsetrees_)))
    return nbest
Beispiel #7
0
def browsetrees():
    """Browse through trees in a file."""
    chunk = 20  # number of trees to fetch for one request
    if 'text' in request.args and 'sent' in request.args:
        textno = int(request.args['text'])
        sentno = int(request.args['sent'])
        start = max(1, sentno - sentno % chunk)
        stop = start + chunk
        nofunc = 'nofunc' in request.args
        nomorph = 'nomorph' in request.args
        filename = os.path.join(CORPUS_DIR, TEXTS[textno] + '.export')
        trees = CORPORA[filename].itertrees(start, stop)
        results = [
            '<pre id="t%s"%s>%s\n%s</pre>' %
            (n, ' style="display: none; "' if 'ajax' in request.args else '',
             ', '.join('%s: %.3g' % (f, addsentweight(FILTERS[f](item))[1])
                       for f in sorted(FILTERS)),
             DrawTree(item.tree, item.sent).text(unicodelines=True, html=True))
            for n, (_key, item) in enumerate(trees, start)
        ]
        if 'ajax' in request.args:
            return '\n'.join(results)

        prevlink = '<a id=prev>prev</a>'
        if sentno > chunk:
            prevlink = '<a href="browse?text=%d;sent=%d" id=prev>prev</a>' % (
                textno, sentno - chunk + 1)
        nextlink = '<a id=next>next</a>'
        nextlink = '<a href="browse?text=%d;sent=%d" id=next>next</a>' % (
            textno, sentno + chunk + 1)
        return render_template('browse.html',
                               textno=textno,
                               sentno=sentno,
                               text=TEXTS[textno],
                               totalsents=1000,
                               trees=results,
                               prevlink=prevlink,
                               nextlink=nextlink,
                               chunk=chunk,
                               nofunc=nofunc,
                               nomorph=nomorph,
                               mintree=start,
                               maxtree=stop)
    return '<h1>Browse through trees</h1>\n<ol>\n%s</ol>\n' % '\n'.join(
        '<li><a href="browse?text=%d;sent=1;nomorph">%s</a> ' % (n, text)
        for n, text in enumerate(TEXTS))
Beispiel #8
0
def parse(args, stdinput):
    """Parse a given sentence after inducing a grammar from a given corpus.

    Parameters
    ----------
    args : list(str)
        The list of arguments: corpus file, sentence.
    stdinput : list(str)
        The pruning policy which should be used for the parsing process.

    """

    # assign the parameter values
    corpus = TigerXMLCorpusReader(args[0], encoding='utf8')
    sent = args[1]

    # create grammar and gold trees
    trees = [
        ImmutableTree.convert(canonicalize(t))
        for t in list(corpus.trees().values())
    ]
    sentences = list(corpus.sents().values())
    grammar = Grammar(trees, sentences)
    goldtrees = [t for s, t in zip(sentences, trees) if ' '.join(s) == sent]

    # create initial pruning policy
    pp = deserialize(stdinput, FEATURES, grammar) if\
        stdinput else PruningPolicy()

    # create derivation tree
    parser = Parser(grammar)
    derivationgraph = parser.parse(sent, pp)
    derivationtree = derivationgraph.get_tree()
    stdout.flush()

    # print results
    if isinstance(derivationtree, Tree):
        # print graphical representation if the sentence could be parsed
        print(derivationtree.pprint())
        drawtree = DrawTree(derivationtree, sent.split())
        print("\n derivation tree: \n" + drawtree.text())
    else:
        # otherwise print a error message
        print(derivationtree)
    if len(goldtrees) > 0:
        # print graphical representation if there is a gold tree
        drawgold = DrawTree(goldtrees[0], sent.split())
        print("\n gold tree: \n" + drawgold.text())
        # print recall if both trees are available
        if isinstance(derivationtree, Tree):
            print("\n recall: %f" % accuracy(derivationtree, goldtrees[0]))
Beispiel #9
0
def draw():
	""" Wrapper to parse & draw tree(s). """
	if len(request.args['tree']) > LIMIT:
		return 'Too much data. Limit: %d bytes' % LIMIT
	dts = []
	try:
		trees = list(incrementaltreereader(
				request.args['tree'].splitlines(),
				morphology='add' if 'morph' in request.args else None,
				functions='add' if 'func' in request.args else None))
	except Exception as err:  # pylint: disable=broad-except
		return Response(str(err), mimetype='text/plain')
	for tree, sent, _rest in trees:
		try:
			dts.append(DrawTree(tree, sent, abbr='abbr' in request.args))
		except Exception as err:  # pylint: disable=broad-except
			return Response(str(err), mimetype='text/plain')
	if not dts:
		return Response('No trees!', mimetype='text/plain')
	return drawtrees(request.args, dts)
Beispiel #10
0
def redraw():
    """Validate and re-draw tree."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    treestr = request.args.get('tree')
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    oldtree = request.args.get('oldtree', '')
    if oldtree and treestr != oldtree:
        session['actions'][EDITDIST] += editdistance(treestr, oldtree)
        session.modified = True
    return Markup('%s\n\n%s' % (
        link,
        # DrawTree(tree, senttok).svg(funcsep='-', hscale=45)
        DrawTree(tree, senttok).text(unicodelines=True,
                                     html=True,
                                     funcsep='-',
                                     morphsep='/',
                                     nodeprops='t0')))
Beispiel #11
0
def replacesubtree():
    n = int(request.args.get('n', 0))
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    username = session['username']
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    error = ''
    dt = DrawTree(tree, senttok)
    _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
    nodeid = int(nodeid)
    subseq = sorted(dt.nodes[nodeid].leaves())
    subsent = ' '.join(senttok[n] for n in subseq)
    root = dt.nodes[nodeid].label
    resp = WORKERS[username].submit(worker.getparses,
                                    subsent, (), (),
                                    root=root).result()
    _senttok, parsetrees, _messages, _elapsed = resp
    newsubtree = parsetrees[n - 1][1]
    pos = sorted(list(newsubtree.subtrees(lambda n: isinstance(n[0], int))),
                 key=lambda n: n[0])
    for n, a in enumerate(pos):
        a[0] = subseq[n]
    dt.nodes[nodeid][:] = newsubtree[:]
    tree = canonicalize(dt.nodes[0])
    dt = DrawTree(tree, senttok)  # kludge..
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    session['actions'][REPARSE] += 1
    session.modified = True
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    return Markup('%s\n\n%s%s\t%s' % (link, error,
                                      dt.text(unicodelines=True,
                                              html=True,
                                              funcsep='-',
                                              morphsep='/',
                                              nodeprops='t0'), treestr))
Beispiel #12
0
def parse():
    """Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
    sent = request.args.get('sent', None)
    est = request.args.get('est', 'rfe')
    marg = request.args.get('marg', 'nbest')
    objfun = request.args.get('objfun', 'mpp')
    coarse = request.args.get('coarse', None)
    html = 'html' in request.args
    lang = request.args.get('lang', 'detect')
    if not sent:
        return ''
    frags = nbest = None
    senttok = tokenize(sent)
    if not senttok or not 1 <= len(senttok) <= LIMIT:
        return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
    if lang == 'detect':
        lang = guesslang(senttok)
    elif lang not in PARSERS:
        return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
    key = (senttok, est, marg, objfun, coarse, lang)
    resp = CACHE.get(key)
    if resp is None:
        link = 'parse?' + url_encode(
            dict(sent=sent,
                 est=est,
                 marg=marg,
                 objfun=objfun,
                 coarse=coarse,
                 html=html))
        PARSERS[lang].stages[-1].estimator = est
        PARSERS[lang].stages[-1].objective = objfun
        PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
        PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
        if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
            PARSERS[lang].stages[0].mode = coarse
            PARSERS[lang].stages[1].k = (1e-5
                                         if coarse == 'pcfg-posterior' else 50)

        results = list(PARSERS[lang].parse(senttok))
        if results[-1].noparse:
            parsetrees = []
            result = 'no parse!'
            frags = nbest = ''
        else:
            if SHOWMORPH:
                replacemorph(results[-1].parsetree)
            if SHOWFUNC:
                treebank.handlefunctions('add',
                                         results[-1].parsetree,
                                         pos=True)
            tree = str(results[-1].parsetree)
            prob = results[-1].prob
            parsetrees = results[-1].parsetrees or []
            parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
            parsetrees_ = []
            fragments = results[-1].fragments or ()
            APP.logger.info('[%s] %s', probstr(prob), tree)
            tree = Tree.parse(tree, parse_leaf=int)
            result = Markup(
                DrawTree(tree, senttok).text(unicodelines=True,
                                             html=html,
                                             funcsep='-'))
            frags = Markup(
                'Phrasal fragments used in the most probable '
                'derivation of the highest ranked parse tree:\n' + '\n\n'.join(
                    DrawTree(frag).text(unicodelines=True, html=html)
                    for frag in fragments if frag.count('(') > 1))
            for tree, prob, x in parsetrees:
                tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
                if SHOWMORPH:
                    replacemorph(tree)
                if SHOWFUNC:
                    treebank.handlefunctions('add', tree, pos=True)
                parsetrees_.append((tree, prob, x))
            nbest = Markup('\n\n'.join(
                '%d. [%s]\n%s' %
                (n + 1, probstr(prob), DrawTree(tree, senttok).text(
                    unicodelines=True, html=html, funcsep='-'))
                for n, (tree, prob, _) in enumerate(parsetrees_)))
        msg = '\n'.join(stage.msg for stage in results)
        elapsed = [stage.elapsedtime for stage in results]
        elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
            '%gs' % a for a in elapsed), sum(elapsed))
        info = '\n'.join(
            ('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' %
             (len(senttok), lang, est, objfun, marg), msg, elapsed,
             '10 most probable parse trees:', '\n'.join(
                 '%d. [%s] %s' %
                 (n + 1, probstr(prob), writediscbrackettree(tree, senttok))
                 for n, (tree, prob, _) in enumerate(parsetrees)) + '\n'))
        CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000)
    else:
        (
            sent,
            result,
            frags,
            nbest,  # pylint: disable=unpacking-non-sequence
            info,
            link) = resp  # pylint: disable=unpacking-non-sequence
    if html:
        return render_template('parsetree.html',
                               sent=sent,
                               result=result,
                               frags=frags,
                               nbest=nbest,
                               info=info,
                               link=link,
                               randid=randid())
    else:
        return Response('\n'.join((nbest, frags, info, result)),
                        mimetype='text/plain')
Beispiel #13
0
def parse():
    """Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
    sent = request.args.get('sent', None)
    objfun = request.args.get('objfun', 'mpp')
    est = request.args.get('est', 'rfe')
    marg = request.args.get('marg', 'nbest')
    coarse = request.args.get('coarse', 'pcfg')
    html = 'html' in request.args
    lang = request.args.get('lang', 'detect')
    require = request.args.get('require', None)
    block = request.args.get('block', None)
    if not sent:
        return ''
    nbest = None
    if POSTAGS.match(sent):
        senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split()))
    else:
        senttok, tags = tuple(tokenize(sent)), None
    if not senttok or not 1 <= len(senttok) <= LIMIT:
        return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
    if lang == 'detect':
        lang = guesslang(senttok)
    elif lang not in PARSERS:
        return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
    if require:
        require = tuple((label, tuple(indices))
                        for label, indices in sorted(json.loads(require)))
    if block:
        block = tuple((label, tuple(indices))
                      for label, indices in sorted(json.loads(block)))
    key = (senttok, tags, est, marg, objfun, coarse, lang, require, block)
    resp = CACHE.get(key)
    if resp is None:
        urlparams = dict(sent=sent,
                         lang=lang,
                         est=est,
                         marg=marg,
                         objfun=objfun,
                         coarse=coarse,
                         html=html)
        if require:
            urlparams['require'] = json.dumps(require)
        if block:
            urlparams['block'] = json.dumps(block)
        link = '?' + url_encode(urlparams)
        PARSERS[lang].stages[-1].estimator = est
        PARSERS[lang].stages[-1].objective = objfun
        PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
        PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
        if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
            PARSERS[lang].stages[0].mode = ('pcfg' if coarse
                                            == 'pcfg-posterior' else coarse)
            if len(PARSERS[lang].stages) > 1:
                PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior'
                                             else 50)
        results = list(PARSERS[lang].parse(senttok,
                                           tags=tags,
                                           require=require,
                                           block=block))
        if SHOWMORPH:
            replacemorph(results[-1].parsetree)
        if SHOWFUNC:
            treebank.handlefunctions('add', results[-1].parsetree, pos=True)
        tree = str(results[-1].parsetree)
        prob = results[-1].prob
        parsetrees = results[-1].parsetrees or []
        parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
        parsetrees_ = []
        LOG.info('[%s] %s', probstr(prob), tree)
        tree = Tree.parse(tree, parse_leaf=int)
        result = Markup(
            DrawTree(tree, senttok).text(unicodelines=True,
                                         html=html,
                                         funcsep='-'))
        for tree, prob, x in parsetrees:
            tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
            if SHOWMORPH:
                replacemorph(tree)
            if SHOWFUNC:
                treebank.handlefunctions('add', tree, pos=True)
            parsetrees_.append((tree, prob, x))
        if PARSERS[lang].headrules:
            xtree = PARSERS[lang].postprocess(parsetrees[0][0], senttok, -1)[0]
            dep = treebank.writedependencies(xtree, senttok, 'conll')
            depsvg = Markup(DrawDependencies.fromconll(dep).svg())
        else:
            dep = depsvg = ''
        rid = randid()
        nbest = Markup('\n\n'.join(
            '%d. [%s] '
            '<a href=\'javascript: toggle("f%s%d"); \'>'
            'derivation</a>\n'
            '<span id=f%s%d style="display: none; margin-left: 3em; ">'
            'Fragments used in the highest ranked derivation'
            ' of this parse tree:\n%s</span>\n%s' % (
                n + 1,
                probstr(prob),
                rid,
                n + 1,
                rid,
                n + 1,
                '\n\n'.join(
                    '%s\n%s' %
                    (w, DrawTree(frag).text(unicodelines=True, html=html))
                    for frag, w in fragments or ()  # if frag.count('(') > 1
                ),
                DrawTree(tree, senttok).text(
                    unicodelines=True, html=html, funcsep='-'))
            for n, (tree, prob, fragments) in enumerate(parsetrees_)))
        deriv = Markup(
            'Fragments used in the highest ranked derivation'
            ' of best parse tree:\n%s' % (
                '\n\n'.join(
                    '%s\n%s' %
                    (w, DrawTree(frag).text(unicodelines=True, html=html))
                    for frag, w in parsetrees_[0][2] or ()
                    # if frag.count('(') > 1
                ))) if parsetrees_ else ''
        msg = '\n'.join(stage.msg for stage in results)
        elapsed = [stage.elapsedtime for stage in results]
        elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
            '%gs' % a for a in elapsed), sum(elapsed))
        info = '\n'.join((
            'length: %d; lang=%s; est=%s; objfun=%s; marg=%s' %
            (len(senttok), lang, est, objfun, marg), msg, elapsed,
            '10 most probable parse trees:',
            ''.join('%d. [%s] %s' %
                    (n + 1, probstr(prob), writediscbrackettree(tree, senttok))
                    for n, (tree, prob, _) in enumerate(parsetrees)) + '\n'))
        CACHE.set(key, (sent, result, nbest, deriv, info, link, dep, depsvg),
                  timeout=5000)
    else:
        (sent, result, nbest, deriv, info, link, dep, depsvg) = resp
    if html:
        return render_template('parsetree.html',
                               sent=sent,
                               result=result,
                               nbest=nbest,
                               deriv=deriv,
                               info=info,
                               link=link,
                               dep=dep,
                               depsvg=depsvg,
                               randid=randid())
    else:
        return Response('\n'.join((nbest, info, result)),
                        mimetype='text/plain')
Beispiel #14
0
def reattach():
    """Re-draw tree after re-attaching node under new parent."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    senttok, _ = worker.postokenize(sent)
    treestr = request.args.get('tree', '')
    try:
        tree, _sent1 = validate(treestr, senttok)
    except ValueError as err:
        return str(err)
    dt = DrawTree(tree, senttok)
    error = ''
    if request.args.get('newparent') == 'deletenode':
        # remove nodeid by replacing it with its children
        _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
        nodeid = int(nodeid)
        x = dt.nodes[nodeid]
        if nodeid == 0 or isinstance(x[0], int):
            error = 'ERROR: cannot remove ROOT or POS node'
        else:
            children = list(x)
            x[:] = []
            for y in dt.nodes[0].subtrees():
                if any(child is x for child in y):
                    i = y.index(x)
                    y[i:i + 1] = children
                    tree = canonicalize(dt.nodes[0])
                    dt = DrawTree(tree, senttok)  # kludge..
                    break
    elif request.args.get('nodeid', '').startswith('newlabel_'):
        # splice in a new node under parentid
        _treeid, newparent = request.args.get('newparent',
                                              '').lstrip('t').split('_')
        newparent = int(newparent)
        label = request.args.get('nodeid').split('_', 1)[1]
        y = dt.nodes[newparent]
        if isinstance(y[0], int):
            error = 'ERROR: cannot add node under POS tag'
        else:
            children = list(y)
            y[:] = []
            y[:] = [Tree(label, children)]
            tree = canonicalize(dt.nodes[0])
            dt = DrawTree(tree, senttok)  # kludge..
    else:  # re-attach existing node at existing new parent
        _treeid, nodeid = request.args.get('nodeid', '').lstrip('t').split('_')
        nodeid = int(nodeid)
        _treeid, newparent = request.args.get('newparent',
                                              '').lstrip('t').split('_')
        newparent = int(newparent)
        # remove node from old parent
        # dt.nodes[nodeid].parent.pop(dt.nodes[nodeid].parent_index)
        x = dt.nodes[nodeid]
        y = dt.nodes[newparent]
        for node in x.subtrees():
            if node is y:
                error = ('ERROR: cannot re-attach subtree'
                         ' under (descendant of) itself\n')
                break
        else:
            for node in dt.nodes[0].subtrees():
                if any(child is x for child in node):
                    if len(node) > 1:
                        node.remove(x)
                        dt.nodes[newparent].append(x)
                        tree = canonicalize(dt.nodes[0])
                        dt = DrawTree(tree, senttok)  # kludge..
                    else:
                        error = ('ERROR: re-attaching only child creates'
                                 ' empty node %s; remove manually\n' % node)
                    break
    treestr = writediscbrackettree(tree, senttok, pretty=True).rstrip()
    link = ('<a href="/annotate/accept?%s">accept this tree</a>' %
            urlencode(dict(sentno=sentno, tree=treestr)))
    if error == '':
        session['actions'][REATTACH] += 1
        session.modified = True
    return Markup('%s\n\n%s%s\t%s' % (link, error,
                                      dt.text(unicodelines=True,
                                              html=True,
                                              funcsep='-',
                                              morphsep='/',
                                              nodeprops='t0'), treestr))
Beispiel #15
0
def test_treedraw():
    """Draw some trees. Only tests whether no exception occurs."""
    trees = '''(ROOT (S (ADV 0) (VVFIN 1) (NP (PDAT 2) (NN 3)) (PTKNEG 4) \
				(PP (APPRART 5) (NN 6) (NP (ART 7) (ADJA 8) (NN 9)))) ($. 10))
			(S (NP (NN 1) (EX 3)) (VP (VB 0) (JJ 2)))
			(S (VP (PDS 0) (ADV 3) (VVINF 4)) (PIS 2) (VMFIN 1))
			(top (du (comp 0) (smain (noun 1) (verb 2) (inf (verb 8) (inf \
				(adj 3) (pp (prep 4) (np (det 5) (noun 6))) (part 7) (verb 9) \
				(pp (prep 10) (np (det 11) (noun 12) (pp (prep 13) (mwu \
				(noun 14) (noun 15))))))))) (punct 16))
			(top (smain (noun 0) (verb 1) (inf (verb 5) (inf (np (det 2) \
				(adj 3) (noun 4)) (verb 6) (pp (prep 7) (noun 8))))) (punct 9))
			(top (smain (noun 0) (verb 1) (noun 2) (inf (adv 3) (verb 4))) \
				(punct 5))
			(top (punct 5) (du (smain (noun 0) (verb 1) (ppart (np (det 2) \
				(noun 3)) (verb 4))) (conj (sv1 (conj (noun 6) (vg 7) (np \
				(det 8) (noun 9))) (verb 10) (noun 11) (part 12)) (vg 13) \
				(sv1 (verb 14) (ti (comp 19) (inf (np (conj (det 15) (vg 16) \
				(det 17)) (noun 18)) (verb 20)))))) (punct 21))
			(top (punct 10) (punct 16) (punct 18) (smain (np (det 0) (noun 1) \
				(pp (prep 2) (np (det 3) (noun 4)))) (verb 5) (adv 6) (np \
				(noun 7) (noun 8)) (part 9) (np (det 11) (noun 12) (pp \
				(prep 13) (np (det 14) (noun 15)))) (conj (vg 20) (ppres \
				(adj 17) (pp (prep 22) (np (det 23) (adj 24) (noun 25)))) \
				(ppres (adj 19)) (ppres (adj 21)))) (punct 26))
			(top (punct 10) (punct 11) (punct 16) (smain (np (det 0) \
				(noun 1)) (verb 2) (np (det 3) (noun 4)) (adv 5) (du (cp \
				(comp 6) (ssub (noun 7) (verb 8) (inf (verb 9)))) (du \
				(smain (noun 12) (verb 13) (adv 14) (part 15)) (noun 17)))) \
				(punct 18) (punct 19))
			(top (smain (noun 0) (verb 1) (inf (verb 8) (inf (verb 9) (inf \
				(adv 2) (pp (prep 3) (noun 4)) (pp (prep 5) (np (det 6) \
				(noun 7))) (verb 10))))) (punct 11))
			(top (smain (noun 0) (verb 1) (pp (prep 2) (np (det 3) (adj 4) \
				(noun 5) (rel (noun 6) (ssub (noun 7) (verb 10) (ppart \
				(adj 8) (part 9) (verb 11))))))) (punct 12))
			(top (smain (np (det 0) (noun 1)) (verb 2) (ap (adv 3) (num 4) \
				(cp (comp 5) (np (det 6) (adj 7) (noun 8) (rel (noun 9) (ssub \
				(noun 10) (verb 11) (pp (prep 12) (np (det 13) (adj 14) \
				(adj 15) (noun 16))))))))) (punct 17))
			(top (smain (np (det 0) (noun 1)) (verb 2) (adv 3) (pp (prep 4) \
				(np (det 5) (noun 6)) (part 7))) (punct 8))
			(top (punct 7) (conj (smain (noun 0) (verb 1) (np (det 2) \
				(noun 3)) (pp (prep 4) (np (det 5) (noun 6)))) (smain \
				(verb 8) (np (det 9) (num 10) (noun 11)) (part 12)) (vg 13) \
				(smain (verb 14) (noun 15) (pp (prep 16) (np (det 17) \
				(noun 18) (pp (prep 19) (np (det 20) (noun 21))))))) \
				(punct 22))
			(top (smain (np (det 0) (noun 1) (rel (noun 2) (ssub (np (num 3) \
				(noun 4)) (adj 5) (verb 6)))) (verb 7) (ppart (verb 8) (pp \
				(prep 9) (noun 10)))) (punct 11))
			(top (conj (sv1 (np (det 0) (noun 1)) (verb 2) (ppart (verb 3))) \
				(vg 4) (sv1 (verb 5) (pp (prep 6) (np (det 7) (adj 8) \
				(noun 9))))) (punct 10))
			(top (smain (noun 0) (verb 1) (np (det 2) (noun 3)) (inf (adj 4) \
				(verb 5) (cp (comp 6) (ssub (noun 7) (adv 8) (verb 10) (ap \
				(num 9) (cp (comp 11) (np (det 12) (adj 13) (noun 14) (pp \
				(prep 15) (conj (np (det 16) (noun 17)) (vg 18) (np \
				(noun 19))))))))))) (punct 20))
			(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) \
				(inf (verb 6) (conj (inf (pp (prep 2) (np (det 3) (noun 4))) \
				(verb 7)) (inf (verb 9)) (vg 10) (inf (verb 11)))))) \
				(punct 12))
			(top (smain (verb 2) (noun 3) (adv 4) (ppart (np (det 0) \
				(noun 1)) (verb 5))) (punct 6))
			(top (conj (smain (np (det 0) (noun 1)) (verb 2) (adj 3) (pp \
				(prep 4) (np (det 5) (noun 6)))) (vg 7) (smain (np (det 8) \
				(noun 9) (pp (prep 10) (np (det 11) (noun 12)))) (verb 13) \
				(pp (prep 14) (np (det 15) (noun 16))))) (punct 17))
			(top (conj (smain (noun 0) (verb 1) (inf (ppart (np (noun 2) \
				(noun 3)) (verb 4)) (verb 5))) (vg 6) (smain (noun 7) \
				(inf (ppart (np (det 8) (noun 9)))))) (punct 10))
			(A (B1 (t 6) (t 13)) (B2 (t 3) (t 7) (t 10))  (B3 (t 1) \
				(t 9) (t 11) (t 14) (t 16)) (B4 (t 0) (t 5) (t 8)))
			(A (B1 6 13) (B2 3 7 10)  (B3 1 \
				9 11 14 16) (B4 0 5 8))
			(VP (VB 0) (PRT 2))
			(VP (VP 0 3) (NP (PRP 1) (NN 2)))
			(ROOT (S (VP_2 (PP (APPR 0) (ART 1) (NN 2) (PP (APPR 3) (ART 4) \
				(ADJA 5) (NN 6))) (ADJD 10) (PP (APPR 11) (NN 12)) (VVPP 13)) \
				(VAFIN 7) (NP (ART 8) (NN 9))) ($. 14))'''
    sents = '''Leider stehen diese Fragen nicht im Vordergrund der \
				augenblicklichen Diskussion .
			is Mary happy there
			das muss man jetzt machen
			Of ze had gewoon met haar vriendinnen rond kunnen slenteren in de \
				buurt van Trafalgar Square .
			Het had een prachtige dag kunnen zijn in Londen .
			Cathy zag hen wild zwaaien .
			Het was een spel geworden , zij en haar vriendinnen kozen iemand \
				uit en probeerden zijn of haar nationaliteit te raden .
			Elk jaar in het hoogseizoen trokken daar massa's toeristen \
				voorbij , hun fototoestel in de aanslag , pratend , gillend \
				en lachend in de vreemdste talen .
			Haar vader stak zijn duim omhoog alsof hij wilde zeggen : " het \
				komt wel goed , joch " .
			Ze hadden languit naast elkaar op de strandstoelen kunnen gaan \
				liggen .
			Het hoorde bij de warme zomerdag die ze ginds achter had gelaten .
			De oprijlaan was niet meer dan een hobbelige zandstrook die zich \
				voortslingerde tussen de hoge grijze boomstammen .
			Haar moeder kleefde bijna tegen het autoraampje aan .
			Ze veegde de tranen uit haar ooghoeken , tilde haar twee koffers \
				op en begaf zich in de richting van het landhuis .
			Het meisje dat vijf keer juist raadde werd getrakteerd op ijs .
			Haar neus werd platgedrukt en leek op een jonge champignon .
			Cathy zag de BMW langzaam verdwijnen tot hij niet meer was dan \
				een zilveren schijnsel tussen de bomen en struiken .
			Ze had met haar moeder kunnen gaan winkelen , zwemmen of \
				terrassen .
			Dat werkwoord had ze zelf uitgevonden .
			De middagzon hing klein tussen de takken en de schaduwen van de \
				wolken drentelden over het gras .
			Zij zou mams rug ingewreven hebben en mam de hare .
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			Mit einer Messe in der Sixtinischen Kapelle ist das Konklave \
				offiziell zu Ende gegangen .'''
    from discodop.tree import DrawTree
    trees = [Tree(a) for a in trees.splitlines()]
    sents = [a.split() for a in sents.splitlines()]
    sents.extend([['Wake', None, 'up'], [None, 'your', 'friend', None]])
    for n, (tree, sent) in enumerate(zip(trees, sents)):
        drawtree = DrawTree(tree, sent)
        print('\ntree, sent',
              n,
              tree,
              ' '.join('...' if a is None else a for a in sent),
              repr(drawtree),
              sep='\n')
        try:
            print(drawtree.text(unicodelines=True, ansi=True), sep='\n')
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False), sep='\n')
Beispiel #16
0
def test_treedraw():
	"""Draw some trees. Only tests whether no exception occurs."""
	trees = '''(ROOT (S (ADV 0) (VVFIN 1) (NP (PDAT 2) (NN 3)) (PTKNEG 4) \
				(PP (APPRART 5) (NN 6) (NP (ART 7) (ADJA 8) (NN 9)))) ($. 10))
			(S (NP (NN 1) (EX 3)) (VP (VB 0) (JJ 2)))
			(S (VP (PDS 0) (ADV 3) (VVINF 4)) (PIS 2) (VMFIN 1))
			(top (du (comp 0) (smain (noun 1) (verb 2) (inf (verb 8) (inf \
				(adj 3) (pp (prep 4) (np (det 5) (noun 6))) (part 7) (verb 9) \
				(pp (prep 10) (np (det 11) (noun 12) (pp (prep 13) (mwu \
				(noun 14) (noun 15))))))))) (punct 16))
			(top (smain (noun 0) (verb 1) (inf (verb 5) (inf (np (det 2) \
				(adj 3) (noun 4)) (verb 6) (pp (prep 7) (noun 8))))) (punct 9))
			(top (smain (noun 0) (verb 1) (noun 2) (inf (adv 3) (verb 4))) \
				(punct 5))
			(top (punct 5) (du (smain (noun 0) (verb 1) (ppart (np (det 2) \
				(noun 3)) (verb 4))) (conj (sv1 (conj (noun 6) (vg 7) (np \
				(det 8) (noun 9))) (verb 10) (noun 11) (part 12)) (vg 13) \
				(sv1 (verb 14) (ti (comp 19) (inf (np (conj (det 15) (vg 16) \
				(det 17)) (noun 18)) (verb 20)))))) (punct 21))
			(top (punct 10) (punct 16) (punct 18) (smain (np (det 0) (noun 1) \
				(pp (prep 2) (np (det 3) (noun 4)))) (verb 5) (adv 6) (np \
				(noun 7) (noun 8)) (part 9) (np (det 11) (noun 12) (pp \
				(prep 13) (np (det 14) (noun 15)))) (conj (vg 20) (ppres \
				(adj 17) (pp (prep 22) (np (det 23) (adj 24) (noun 25)))) \
				(ppres (adj 19)) (ppres (adj 21)))) (punct 26))
			(top (punct 10) (punct 11) (punct 16) (smain (np (det 0) \
				(noun 1)) (verb 2) (np (det 3) (noun 4)) (adv 5) (du (cp \
				(comp 6) (ssub (noun 7) (verb 8) (inf (verb 9)))) (du \
				(smain (noun 12) (verb 13) (adv 14) (part 15)) (noun 17)))) \
				(punct 18) (punct 19))
			(top (smain (noun 0) (verb 1) (inf (verb 8) (inf (verb 9) (inf \
				(adv 2) (pp (prep 3) (noun 4)) (pp (prep 5) (np (det 6) \
				(noun 7))) (verb 10))))) (punct 11))
			(top (smain (noun 0) (verb 1) (pp (prep 2) (np (det 3) (adj 4) \
				(noun 5) (rel (noun 6) (ssub (noun 7) (verb 10) (ppart \
				(adj 8) (part 9) (verb 11))))))) (punct 12))
			(top (smain (np (det 0) (noun 1)) (verb 2) (ap (adv 3) (num 4) \
				(cp (comp 5) (np (det 6) (adj 7) (noun 8) (rel (noun 9) (ssub \
				(noun 10) (verb 11) (pp (prep 12) (np (det 13) (adj 14) \
				(adj 15) (noun 16))))))))) (punct 17))
			(top (smain (np (det 0) (noun 1)) (verb 2) (adv 3) (pp (prep 4) \
				(np (det 5) (noun 6)) (part 7))) (punct 8))
			(top (punct 7) (conj (smain (noun 0) (verb 1) (np (det 2) \
				(noun 3)) (pp (prep 4) (np (det 5) (noun 6)))) (smain \
				(verb 8) (np (det 9) (num 10) (noun 11)) (part 12)) (vg 13) \
				(smain (verb 14) (noun 15) (pp (prep 16) (np (det 17) \
				(noun 18) (pp (prep 19) (np (det 20) (noun 21))))))) \
				(punct 22))
			(top (smain (np (det 0) (noun 1) (rel (noun 2) (ssub (np (num 3) \
				(noun 4)) (adj 5) (verb 6)))) (verb 7) (ppart (verb 8) (pp \
				(prep 9) (noun 10)))) (punct 11))
			(top (conj (sv1 (np (det 0) (noun 1)) (verb 2) (ppart (verb 3))) \
				(vg 4) (sv1 (verb 5) (pp (prep 6) (np (det 7) (adj 8) \
				(noun 9))))) (punct 10))
			(top (smain (noun 0) (verb 1) (np (det 2) (noun 3)) (inf (adj 4) \
				(verb 5) (cp (comp 6) (ssub (noun 7) (adv 8) (verb 10) (ap \
				(num 9) (cp (comp 11) (np (det 12) (adj 13) (noun 14) (pp \
				(prep 15) (conj (np (det 16) (noun 17)) (vg 18) (np \
				(noun 19))))))))))) (punct 20))
			(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) \
				(inf (verb 6) (conj (inf (pp (prep 2) (np (det 3) (noun 4))) \
				(verb 7)) (inf (verb 9)) (vg 10) (inf (verb 11)))))) \
				(punct 12))
			(top (smain (verb 2) (noun 3) (adv 4) (ppart (np (det 0) \
				(noun 1)) (verb 5))) (punct 6))
			(top (conj (smain (np (det 0) (noun 1)) (verb 2) (adj 3) (pp \
				(prep 4) (np (det 5) (noun 6)))) (vg 7) (smain (np (det 8) \
				(noun 9) (pp (prep 10) (np (det 11) (noun 12)))) (verb 13) \
				(pp (prep 14) (np (det 15) (noun 16))))) (punct 17))
			(top (conj (smain (noun 0) (verb 1) (inf (ppart (np (noun 2) \
				(noun 3)) (verb 4)) (verb 5))) (vg 6) (smain (noun 7) \
				(inf (ppart (np (det 8) (noun 9)))))) (punct 10))
			(A (B1 (t 6) (t 13)) (B2 (t 3) (t 7) (t 10))  (B3 (t 1) \
				(t 9) (t 11) (t 14) (t 16)) (B4 (t 0) (t 5) (t 8)))
			(A (B1 6 13) (B2 3 7 10)  (B3 1 \
				9 11 14 16) (B4 0 5 8))
			(VP (VB 0) (PRT 2))
			(VP (VP 0 3) (NP (PRP 1) (NN 2)))
			(ROOT (S (VP_2 (PP (APPR 0) (ART 1) (NN 2) (PP (APPR 3) (ART 4) \
				(ADJA 5) (NN 6))) (ADJD 10) (PP (APPR 11) (NN 12)) (VVPP 13)) \
				(VAFIN 7) (NP (ART 8) (NN 9))) ($. 14))'''
	sents = '''Leider stehen diese Fragen nicht im Vordergrund der \
				augenblicklichen Diskussion .
			is Mary happy there
			das muss man jetzt machen
			Of ze had gewoon met haar vriendinnen rond kunnen slenteren in de \
				buurt van Trafalgar Square .
			Het had een prachtige dag kunnen zijn in Londen .
			Cathy zag hen wild zwaaien .
			Het was een spel geworden , zij en haar vriendinnen kozen iemand \
				uit en probeerden zijn of haar nationaliteit te raden .
			Elk jaar in het hoogseizoen trokken daar massa's toeristen \
				voorbij , hun fototoestel in de aanslag , pratend , gillend \
				en lachend in de vreemdste talen .
			Haar vader stak zijn duim omhoog alsof hij wilde zeggen : " het \
				komt wel goed , joch " .
			Ze hadden languit naast elkaar op de strandstoelen kunnen gaan \
				liggen .
			Het hoorde bij de warme zomerdag die ze ginds achter had gelaten .
			De oprijlaan was niet meer dan een hobbelige zandstrook die zich \
				voortslingerde tussen de hoge grijze boomstammen .
			Haar moeder kleefde bijna tegen het autoraampje aan .
			Ze veegde de tranen uit haar ooghoeken , tilde haar twee koffers \
				op en begaf zich in de richting van het landhuis .
			Het meisje dat vijf keer juist raadde werd getrakteerd op ijs .
			Haar neus werd platgedrukt en leek op een jonge champignon .
			Cathy zag de BMW langzaam verdwijnen tot hij niet meer was dan \
				een zilveren schijnsel tussen de bomen en struiken .
			Ze had met haar moeder kunnen gaan winkelen , zwemmen of \
				terrassen .
			Dat werkwoord had ze zelf uitgevonden .
			De middagzon hing klein tussen de takken en de schaduwen van de \
				wolken drentelden over het gras .
			Zij zou mams rug ingewreven hebben en mam de hare .
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
			Mit einer Messe in der Sixtinischen Kapelle ist das Konklave \
				offiziell zu Ende gegangen .'''
	from discodop.tree import DrawTree
	trees = [Tree(a) for a in trees.splitlines()]
	sents = [a.split() for a in sents.splitlines()]
	sents.extend([['Wake', None, 'up'],
		[None, 'your', 'friend', None]])
	for n, (tree, sent) in enumerate(zip(trees, sents)):
		drawtree = DrawTree(tree, sent)
		print('\ntree, sent', n, tree,
				' '.join('...' if a is None else a for a in sent),
				repr(drawtree),
				sep='\n')
		try:
			print(drawtree.text(unicodelines=True, ansi=True), sep='\n')
		except (UnicodeDecodeError, UnicodeEncodeError):
			print(drawtree.text(unicodelines=False, ansi=False), sep='\n')
Beispiel #17
0
def parse():
    """Display parse. To be invoked by an AJAX call."""
    sentno = int(request.args.get('sentno'))  # 1-indexed
    sent = SENTENCES[QUEUE[sentno - 1][0]]
    username = session['username']
    require = request.args.get('require', '')
    block = request.args.get('block', '')
    urlprm = dict(sentno=sentno)
    if require and require != '':
        urlprm['require'] = require
    if block and block != '':
        urlprm['block'] = block
    require, block = parseconstraints(require, block)
    if require or block:
        session['actions'][CONSTRAINTS] += 1
        session.modified = True
    if False and app.config['DEBUG']:
        resp = worker.getparses(sent, require, block)
    else:
        resp = WORKERS[username].submit(worker.getparses, sent, require,
                                        block).result()
    senttok, parsetrees, messages, elapsed = resp
    maxdepth = ''
    if not parsetrees:
        result = ('no parse! reload page to clear constraints, '
                  'or continue with next sentence.')
        nbest = dep = depsvg = ''
    else:
        dep = depsvg = ''
        if workerattr('headrules'):
            dep = writedependencies(parsetrees[0][1], senttok, 'conll')
            depsvg = Markup(DrawDependencies.fromconll(dep).svg())
        result = ''
        dectree, maxdepth, _ = decisiontree(parsetrees, senttok, urlprm)
        prob, tree, _treestr, _fragments = parsetrees[0]
        nbest = Markup(
            '%s\nbest tree: %s' %
            (dectree,
             ('%(n)d. [%(prob)s] '
              '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; '
              '<a href="/annotate/edit?%(urlprm)s">edit</a>; '
              '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n'
              '%(tree)s' %
              dict(n=1,
                   prob=probstr(prob),
                   urlprm=urlencode(dict(urlprm, n=1)),
                   tree=DrawTree(tree, senttok).text(unicodelines=True,
                                                     html=True,
                                                     funcsep='-',
                                                     morphsep='/',
                                                     nodeprops='t1')))))
    msg = '\n'.join(messages)
    elapsed = 'CPU time elapsed: %s => %gs' % (' '.join(
        '%gs' % a for a in elapsed), sum(elapsed))
    info = '\n'.join(
        ('length: %d;' % len(senttok), msg, elapsed,
         'most probable parse trees:',
         ''.join('%d. [%s] %s' %
                 (n + 1, probstr(prob), writediscbrackettree(treestr, senttok))
                 for n, (prob, _tree, treestr, _deriv) in enumerate(parsetrees)
                 if treestr is not None) + '\n'))
    return render_template('annotatetree.html',
                           sent=sent,
                           result=result,
                           nbest=nbest,
                           info=info,
                           dep=dep,
                           depsvg=depsvg,
                           maxdepth=maxdepth,
                           msg='%d parse trees' % len(parsetrees))
Beispiel #18
0
def decisiontree(parsetrees, sent, urlprm):
    """Create a decision tree to select among n trees."""
    # The class labels are the n-best trees 0..n
    # The attributes are the labeled spans in the trees; they split the n-best
    # trees into two sets with and without that span.
    spans = {}
    if len(parsetrees) <= 1:
        return '', 0, None
    for n, (_prob, tree, _, _) in enumerate(parsetrees):
        for span in getspans(tree):
            # simplest strategy: store presence of span as binary feature
            # perhaps better: use weight from tree probability
            spans.setdefault(span, set()).add(n)

    # create decision tree with scikit-learn
    features = list(spans)
    featurenames = [
        '[%s %s]' % (label, ' '.join(sent[n] for n in leaves))
        for label, leaves in features
    ]
    data = np.array([[n in spans[span] for span in features]
                     for n in range(len(parsetrees))],
                    dtype=np.bool)
    estimator = DecisionTreeClassifier(random_state=0)
    estimator.fit(data,
                  range(len(parsetrees)),
                  sample_weight=[prob for prob, _, _, _ in parsetrees])
    path = estimator.decision_path(data)

    def rec(tree, n=0, depth=0):
        """Recursively produce a string representation of a decision tree."""
        if tree.children_left[n] == tree.children_right[n]:
            x = tree.value[n].nonzero()[1][0]
            prob, _tree, _treestr, _fragments = parsetrees[x]
            thistree = (
                '%(n)d. [%(prob)s] '
                '<a href="/annotate/accept?%(urlprm)s">accept this tree</a>; '
                '<a href="/annotate/edit?%(urlprm)s">edit</a>; '
                '<a href="/annotate/deriv?%(urlprm)s">derivation</a>\n\n' %
                dict(n=x + 1,
                     prob=probstr(prob),
                     urlprm=urlencode(dict(urlprm, n=x + 1, dec=depth))))
            return ('<span id="d%d" style="display: none; ">%stree %d:\n'
                    '%s</span>' % (n, depth * '\t', x + 1, thistree))
        left = tree.children_left[n]
        right = tree.children_right[n]
        return ('<span id=d%(n)d style="display: %(display)s; ">'
                '%(indent)s%(constituent)s '
                '<a href="javascript: showhide(\'d%(right)s\', \'d%(left)s\', '
                '\'dd%(exright)s\', \'%(numtrees)s\'); ">'
                'good constituent</a> '
                '<a href="javascript: showhide(\'d%(left)s\', \'d%(right)s\', '
                '\'dd%(exleft)s\', \'%(numtrees)s\'); ">'
                'bad constituent</a> '
                '%(subtree1)s%(subtree2)s</span>' % dict(
                    n=n,
                    display='block' if n == 0 else 'none',
                    indent=depth * 4 * ' ',
                    constituent=featurenames[tree.feature[n]],
                    left=left,
                    right=right,
                    exleft=path[:, left].nonzero()[0][0],
                    exright=path[:, right].nonzero()[0][0],
                    numtrees=len(parsetrees),
                    subtree1=rec(tree, left, depth + 1),
                    subtree2=rec(tree, right, depth + 1),
                ))

    nodes = rec(estimator.tree_)
    leaves = []
    seen = set()
    for n in range(estimator.tree_.node_count):
        x = estimator.tree_.value[n].nonzero()[1][0]
        if x in seen:
            continue
        seen.add(x)
        _prob, xtree, _treestr, _fragments = parsetrees[x]
        thistree = DrawTree(xtree, sent).text(unicodelines=True,
                                              html=True,
                                              funcsep='-',
                                              morphsep='/',
                                              nodeprops='t%d' % (x + 1))
        leaves.append('<span id="dd%d" style="display: none; ">%s</span>' %
                      (x, thistree))
    return nodes + ''.join(leaves), estimator.tree_.max_depth, path