Beispiel #1
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (
            r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }'
            % {
                'tags': ModifierTagsRegex,
                'index': index
            })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node,
                                         r'/[ICV]P/=TOP $ *=SS',
                                         with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s",
                      ss.category / ss.category)
                replace_kid(
                    top.parent, top,
                    Node("NN", [top], ss.category / ss.category, head_index=0))
Beispiel #2
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' 
             % { 'tags': ModifierTagsRegex, 'index': index })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Beispiel #3
0
    def accept_derivation(self, bundle):
        tree = bundle.derivation

        print bundle.label()
        print "-" * len(bundle.label())

        print pprint(tree, sep='   ')
        print
Beispiel #4
0
 def accept_derivation(self, bundle):
     tree = bundle.derivation
     
     print bundle.label()
     print "-" * len(bundle.label())
     
     print pprint(tree, sep='   ')
     print
Beispiel #5
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)
            
        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)
        
        top.category = top[0].category.left

        debug("done %s", pprint(top))
Beispiel #6
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(p, t, Node(
            base_tag(t.tag, strip_cptb_tag=False),
            [t],
            typeraise(typeraise_t_category, S, TR_TOPICALISATION),
            head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Beispiel #7
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node: kid.parent = new_node
        
        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP/QP)|((SbNP/QP)/NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP)|((SbNP)/QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0); np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0); qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)
            
            self.fix_categories_starting_from(new_np_node, top)
Beispiel #8
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node:
            kid.parent = new_node

        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top,
                                  r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP',
                                  with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP / QP) | ((SbNP / QP) / NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP) | ((SbNP) / QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0)
            np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0)
            qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)

            self.fix_categories_starting_from(new_np_node, top)
Beispiel #9
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)

        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)

        top.category = top[0].category.left

        debug("done %s", pprint(top))
Beispiel #10
0
    def fix_topicalisation_without_gap(self, node, p, s, t):
        debug("Fixing topicalisation without gap: %s", pprint(node))

        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(p.category)/featureless(s.category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Beispiel #11
0
    def fix_topicalisation_without_gap(self, node, p, s, t):
        debug("Fixing topicalisation without gap: %s", pprint(node))

        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(p.category) / featureless(s.category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Beispiel #12
0
def view_deriv(env, start_response):
    global node_index
    node_index = 0

    start_response('200 OK', [('Content-type', 'text/html')])
    variables = env['selector.vars']

    doc_id, deriv_id = int(variables['doc']), int(variables['deriv'])
    filename = 'chtb_%04d.fid' % doc_id

    doc = GuessReader(os.path.join(CORPORA_PATH, filename))
    if doc:
        bundle = doc[deriv_id]

        body = ''
        if bundle:
            body += '<div id="tree">'
            body += pprint(bundle.derivation,
                           sep='&nbsp;',
                           newline='<br/>',
                           node_repr=html_node_repr)
            body += '</div>'

            body += '<div id="main">'
            for leaf, n in izip(
                    leaves(bundle.derivation, lambda e: not is_ignored(e)),
                    count()):
                body += '''<span class="word"><span id="word%(index)d" onmouseover="$('pos').show();$('pos%(index)s').show();$('tree%(index)s').addClassName('highlighted');" onmouseout="$('tree%(index)s').removeClassName('highlighted');$('pos%(index)s').hide();$('pos').hide();">%(body)s</span></span>''' % {
                    'index': n,
                    'body': leaf.lex
                }

            body += prev_next_links(doc, doc_id, deriv_id)
            body += '</div>'

            body += '<div id="pos">'
            body += '<span id="pos_display">'
            for leaf, n in izip(
                    leaves(bundle.derivation, lambda e: not is_ignored(e)),
                    count()):
                body += '<span id="pos%d" style="display:none">%s</span>' % (
                    n, leaf.tag)
            body += '</span>'
            body += '</div>'

            yield layout(body)
        else:
            yield error_document()

    else:
        yield error_document()
Beispiel #13
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s",
              lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, S, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Beispiel #14
0
        import psyco
        psyco.full()
    except ImportError: pass
    
    from munge.ccg.parse import *

    file = "final/%s" % sys.argv[1]
    t=naive_label_derivation(parse_tree(open(file).readlines()[2*int(sys.argv[2])+1]))
    print t
    print "sent:"
    print "-----"
    print ' '.join(t.text())
    deps = mkdeps(t)
    
    print "deps:"
    print "-----"
    for l, r in deps: print "%s|%s" % (l, r)
    
    print "leaves:"
    print "-------"
    for leaf in leaves(t):
        print leaf.lex, leaf.cat
        
    print "unhandled combs:"
    print "----------------"
    for comb in unanalysed:
        print comb
        
    print "finished:"
    print pprint(t)
Beispiel #15
0
 def show_pp_tree(match_node, bundle):
     print pprint(bundle.derivation, focus=match_node)
Beispiel #16
0
 def show_pp_node(match_node, bundle):
     print pprint(match_node)
Beispiel #17
0
 def show_pp_tree(match_node, bundle):
     print pprint(bundle.derivation, focus=match_node)
Beispiel #18
0
 def show_pp_node(match_node, bundle):
     print pprint(match_node)