Esempio n. 1
0
    def fix_rnr(self, rnr, g):
        # G is the node dominating all the conjuncts
        rnr_tags = []
        for node, ctx in find_all(
            g, r'/:c/a', with_context=True):
            for rnr in find_all(
                node, r'^/\*RNR\*/'):
                rnr_tags.append(get_trace_index_from_tag(rnr.lex))

        for index in rnr_tags:
            for node, ctx in find_all(
                g,
                r'*=PP < { *=P < { *=T < ^/\*RNR\*%s/ $ *=S } }' % index,
                with_context=True
            ):
                inherit_tag(ctx.s, ctx.p)
                self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)
                
        # This breaks with the IP (LC CC LC) case in 9:19(11) -- last_conjunct returns None
        # because the last conjunct has been shrunk
        last_conjunct = list(find_first(g, r'/:c/a', left_to_right=False))
        
        args = []
        # Here, we uniquify the rnr tags so that we excise each shared argument only once
        for index in set(rnr_tags):
            # find_first, because we only want to find one match, the shallowest.
            # cf 7:27(10), if NP-OBJ-2(NN NP-OBJ-2(JJ NN)), then we only want to identify
            # one matching node for index -2 -- the shallowest -- and not two.
            for node, ctx in find_first(
                last_conjunct[0],
                r'*=P < { /%s/a=T $ *=S }' % index,
                with_context=True
            ):
                args.append(ctx.t)
                
                # Note: last_conjunct may be disconnected from
                # the tree by replace_kid (when ctx.p == last_conjunct)
                replace_kid(ctx.p.parent, ctx.p, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)
                    
        # Because the find_all which retrieved the args is an in-order left-to-right traversal, it will find
        # shallower nodes before deeper nodes. Therefore, if a verb has two args V A1 A2, the _args_ list will
        # contain [A2, A1] because A2 is shallower (further from the head) than A1.
        # We reverse the list of args, so that args are re-attached from the inside out (starting from A1).
        # args.reverse()
        
        new_g = g
        for arg in args:
            new_g = Node(new_g.tag, [new_g, arg], new_g.category.left, head_index=0)
            arg.parent = new_g
        
        replace_kid(g.parent, g, new_g)
Esempio n. 2
0
    def fix_rnr(self, rnr, g):
        # G is the node dominating all the conjuncts
        rnr_tags = []
        for node, ctx in find_all(g, r'/:c/a', with_context=True):
            for rnr in find_all(node, r'^/\*RNR\*/'):
                rnr_tags.append(get_trace_index_from_tag(rnr.lex))

        for index in rnr_tags:
            for node, ctx in find_all(
                    g,
                    r'*=PP < { *=P < { *=T < ^/\*RNR\*%s/ $ *=S } }' % index,
                    with_context=True):
                inherit_tag(ctx.s, ctx.p)
                self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)

        # This breaks with the IP (LC CC LC) case in 9:19(11) -- last_conjunct returns None
        # because the last conjunct has been shrunk
        last_conjunct = list(find_first(g, r'/:c/a', left_to_right=False))

        args = []
        # Here, we uniquify the rnr tags so that we excise each shared argument only once
        for index in set(rnr_tags):
            # find_first, because we only want to find one match, the shallowest.
            # cf 7:27(10), if NP-OBJ-2(NN NP-OBJ-2(JJ NN)), then we only want to identify
            # one matching node for index -2 -- the shallowest -- and not two.
            for node, ctx in find_first(last_conjunct[0],
                                        r'*=P < { /%s/a=T $ *=S }' % index,
                                        with_context=True):
                args.append(ctx.t)

                # Note: last_conjunct may be disconnected from
                # the tree by replace_kid (when ctx.p == last_conjunct)
                replace_kid(ctx.p.parent, ctx.p, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)

        # Because the find_all which retrieved the args is an in-order left-to-right traversal, it will find
        # shallower nodes before deeper nodes. Therefore, if a verb has two args V A1 A2, the _args_ list will
        # contain [A2, A1] because A2 is shallower (further from the head) than A1.
        # We reverse the list of args, so that args are re-attached from the inside out (starting from A1).
        # args.reverse()

        new_g = g
        for arg in args:
            new_g = Node(new_g.tag, [new_g, arg],
                         new_g.category.left,
                         head_index=0)
            arg.parent = new_g

        replace_kid(g.parent, g, new_g)
Esempio n. 3
0
 def accept_derivation(self, bundle):
     root = bundle.derivation
     for (pattern, name) in self.Patterns:
         # print pattern, name
         for node, ctx in find_all(root, pattern, with_context=True):
             toks = self.toks.get(bundle.label(), None)
             cn_toks = text(root)
             
             trace = ctx.t
             
             if not (toks and cn_toks):
                 print >>sys.stderr, bundle.label()
                 if trace:
                     self.results[name].not_discharged += 1
             
             alignment = align(cn_toks, toks)
             
             if trace is not None:
                 trace_index = get_index_of_leaf(root, trace)
                 if alignment.get(trace_index, None) is not None:
                     self.results[name].not_discharged += 1
                 else:
                     self.results[name].discharged += 1
             else:
                 print >>sys.stderr, "t was not bound to a trace node"
Esempio n. 4
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node: kid.parent = new_node
        
        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP/QP)|((SbNP/QP)/NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP)|((SbNP)/QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0); np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0); qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)
            
            self.fix_categories_starting_from(new_np_node, top)
Esempio n. 5
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' 
             % { 'tags': ModifierTagsRegex, 'index': index })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Esempio n. 6
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(p, t, Node(
            base_tag(t.tag, strip_cptb_tag=False),
            [t],
            typeraise(typeraise_t_category, S, TR_TOPICALISATION),
            head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 7
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (
            r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }'
            % {
                'tags': ModifierTagsRegex,
                'index': index
            })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node,
                                         r'/[ICV]P/=TOP $ *=SS',
                                         with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s",
                      ss.category / ss.category)
                replace_kid(
                    top.parent, top,
                    Node("NN", [top], ss.category / ss.category, head_index=0))
Esempio n. 8
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node:
            kid.parent = new_node

        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top,
                                  r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP',
                                  with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP / QP) | ((SbNP / QP) / NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP) | ((SbNP) / QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0)
            np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0)
            qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)

            self.fix_categories_starting_from(new_np_node, top)
Esempio n. 9
0
 def accept_derivation(self, bundle):
     root = bundle.derivation
 
     for node, ctx in find_all(root, filter_expression, with_context=True):
         if node_filter_function(node):
             words = list(text_without_quotes_or_traces(node))
             words_filtered = list(text_without_quotes_or_traces(node, 
                 pred=lambda n: n.tag in ('NN', 'NR', 'NT', 'JJ') and not n.tag == "PU"))
     
             lengths = [bin_lengths(len(w.decode('u8'))) for w in words_filtered]
             if len(lengths)> 2 and lengths[0] == 1 and lengths[1] == 1: lengths[0:2] = [1]
             lengths[1:-1] = []
             sig = ' '.join(imap(str, lengths))
     
             self.sigs[sig][0] += 1
             self.sigs[sig][1].append(' '.join(words))
     
             nn = next_leaf(node)
             while nn and is_ignored(nn): nn = next_leaf(nn)
             if not nn: continue
     
             next_len = bin_lengths(len(nn.lex.decode('u8')))
             key = '%s;%s' % (sig,next_len)
             self.atboundary[key][0] += 1
             self.atboundary[key][1].append(' '.join(words + ['* '+nn.lex]))
Esempio n. 10
0
    def accept_derivation(self, bundle):
        root = bundle.derivation
        for (pattern, name) in self.Patterns:
            # print pattern, name
            for node, ctx in find_all(root, pattern, with_context=True):
                toks = self.toks.get(bundle.label(), None)
                cn_toks = text(root)

                trace = ctx.t

                if not (toks and cn_toks):
                    print >> sys.stderr, bundle.label()
                    if trace:
                        self.results[name].not_discharged += 1

                alignment = align(cn_toks, toks)

                if trace is not None:
                    trace_index = get_index_of_leaf(root, trace)
                    if alignment.get(trace_index, None) is not None:
                        self.results[name].not_discharged += 1
                    else:
                        self.results[name].discharged += 1
                else:
                    print >> sys.stderr, "t was not bound to a trace node"
Esempio n. 11
0
 def accept_derivation(self, bundle):
     top = bundle.derivation
     heads = set()
     for node, ctx in find_all(top, r'* $ { * < ^/\*pro\*/ }', with_context=True):
         head = find_head(node)
         if head not in heads:
             self.verbs[' '.join(head.text())] += 1
             heads.add(head)
Esempio n. 12
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(
                    node,
                    r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/',
                    with_context=True,
                    left_to_right=True)
                if not result:
                    debug(
                        'Could not find verbal category; did not create null relativiser.'
                    )
                    return

                top, context = result
                SS = context.ss.category

                debug("Creating null relativiser unary category: %s", SS / SS)
                replace_kid(top.parent, top,
                            Node("NN", [top], SS / SS, head_index=0))
Esempio n. 13
0
 def accept_derivation(self, bundle):
     root = bundle.derivation
     length = lambda s: bin_lengths(len(s))
 
     for node, ctx in find_all(root, filter_expression, with_context=True):
         if node_filter_function(node):
             L, R = ctx['L'].lex.decode('u8'), ctx['R'].lex.decode('u8')
             self.sigs[ (length(L), length(R)) ][0] += 1
             self.sigs[ (length(L), length(R)) ][1].append( ' '.join((L, R)).encode('u8') )
Esempio n. 14
0
 def accept_derivation(self, bundle):
     for node, ctx in find_all(bundle.derivation, expr, with_context=True):
         u = ctx.n.lex.decode('u8')
         if u[0] in baixing:
             leaf = ctx.n
             kids = [ Leaf(leaf.tag, u[0].encode('u8'), None), Leaf(leaf.tag, u[1:].encode('u8'), None) ]
             replace_kid(ctx.n.parent, ctx.n, Node('NR', kids))
             #node.kids = kids
             
     self.write_derivation(bundle)
Esempio n. 15
0
 def accept_derivation(self, bundle):
     top = bundle.derivation
     heads = set()
     for node, ctx in find_all(top,
                               r'* $ { * < ^/\*pro\*/ }',
                               with_context=True):
         head = find_head(node)
         if head not in heads:
             self.verbs[' '.join(head.text())] += 1
             heads.add(head)
Esempio n. 16
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top,
                                   r'* $ *=SS',
                                   with_context=True,
                                   nonrecursive=True)
                if result:
                    _, ctx = result
                    ss = ctx.ss
                    debug("Creating null relativiser unary category: %s",
                          ss.category / ss.category)
                    replace_kid(
                        top.parent, top,
                        Node("NN", [top],
                             ss.category / ss.category,
                             head_index=0))
Esempio n. 17
0
        def accept_derivation(self, bundle):
            root = bundle.derivation
            length = lambda s: bin_lengths(len(s))

            for node, ctx in find_all(root,
                                      filter_expression,
                                      with_context=True):
                if node_filter_function(node):
                    L, R = ctx['L'].lex.decode('u8'), ctx['R'].lex.decode('u8')
                    self.sigs[(length(L), length(R))][0] += 1
                    self.sigs[(length(L), length(R))][1].append(' '.join(
                        (L, R)).encode('u8'))
Esempio n. 18
0
 def accept_derivation(self, bundle):
     top = bundle.derivation
     for node, ctx in find_all(top, r'* < /-TPC-\d/a=T', with_context=True):
         trace = find_coindexed_trace(top, ctx.t)
         if trace:
             topicalised_node = ctx.t
             
             topicalised_node.tag = trace.parent.tag
             replace_kid(trace.parent.parent, trace.parent, topicalised_node)
             node.kids.remove(topicalised_node)
             
     self.write_derivation(bundle)
Esempio n. 19
0
    def accept_derivation(self, bundle):
        top = bundle.derivation
        for node, ctx in find_all(top, r'* < /-TPC-\d/a=T', with_context=True):
            trace = find_coindexed_trace(top, ctx.t)
            if trace:
                topicalised_node = ctx.t

                topicalised_node.tag = trace.parent.tag
                replace_kid(trace.parent.parent, trace.parent,
                            topicalised_node)
                node.kids.remove(topicalised_node)

        self.write_derivation(bundle)
Esempio n. 20
0
    def accept_derivation(self, bundle):
        for node, ctx in find_all(bundle.derivation, expr, with_context=True):
            u = ctx.n.lex.decode('u8')
            if u[0] in baixing:
                leaf = ctx.n
                kids = [
                    Leaf(leaf.tag, u[0].encode('u8'), None),
                    Leaf(leaf.tag, u[1:].encode('u8'), None)
                ]
                replace_kid(ctx.n.parent, ctx.n, Node('NR', kids))
                #node.kids = kids

        self.write_derivation(bundle)
Esempio n. 21
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True)
                if not result:
                    debug('Could not find verbal category; did not create null relativiser.')
                    return
                
                top, context = result
                SS = context.ss.category
                
                debug("Creating null relativiser unary category: %s", SS/SS)
                replace_kid(top.parent, top, Node("NN", [top], SS/SS, head_index=0))
Esempio n. 22
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))
        
        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC          
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)
        
        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index
        
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True)
                if result:
                    _, ctx = result; ss = ctx.ss
                    debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                    replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Esempio n. 23
0
 def fix_whword_topicalisation(self, node, p, s, t):
     debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
     # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
     t.tag = base_tag(t.tag, strip_cptb_tag=False)
     # create topicalised category based on the tag of T
     typeraise_t_category = ptb_to_cat(t)
     # insert a node with the topicalised category
     replace_kid(p, t, Node(
         base_tag(t.tag, strip_cptb_tag=False),
         [t],
         typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
         head_index=0))
         
     index = get_trace_index_from_tag(t.tag)
     
     expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index
     
     for top, ctx in find_all(p, expr, with_context=True):
         replace_kid(ctx.pp, ctx.p, ctx.s)
         self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 24
0
    def fix_ba_object_gap(self, top, ba, c, d):
        # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        for trace_NP, ctx in find_all(d, r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }', with_context=True):
            print 'FOUND'
            debug("Found %s", trace_NP)
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            # self.fix_categories_starting_from(s, until=top)
        
        # debug("Fixing ba-construction object gap: %s" % lrp_repr(node))
        # 
        # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        #     debug("Found %s", trace_NP)
        #     pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # 
        #     self.fix_object_gap(pp, p, t, s)
        #     self.fix_categories_starting_from(s, until=c)
        #     
        self.relabel_ba_category(top, ba, s)
Esempio n. 25
0
    def fix_whword_topicalisation(self, node, p, s, t):
        debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index

        for top, ctx in find_all(p, expr, with_context=True):
            replace_kid(ctx.pp, ctx.p, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 26
0
    def fix_ba_object_gap(self, top, ba, c, d):
        # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        for trace_NP, ctx in find_all(
                d,
                r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }',
                with_context=True):
            print 'FOUND'
            debug("Found %s", trace_NP)
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            # self.fix_categories_starting_from(s, until=top)

        # debug("Fixing ba-construction object gap: %s" % lrp_repr(node))
        #
        # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        #     debug("Found %s", trace_NP)
        #     pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        #
        #     self.fix_object_gap(pp, p, t, s)
        #     self.fix_categories_starting_from(s, until=c)
        #
        self.relabel_ba_category(top, ba, s)
Esempio n. 27
0
        def accept_derivation(self, bundle):
            root = bundle.derivation

            for node, ctx in find_all(root,
                                      filter_expression,
                                      with_context=True):
                if node_filter_function(node):
                    words = list(text_without_quotes_or_traces(node))
                    words_filtered = list(
                        text_without_quotes_or_traces(
                            node,
                            pred=lambda n: n.tag in
                            ('NN', 'NR', 'NT', 'JJ') and not n.tag == "PU"))

                    lengths = [
                        bin_lengths(len(w.decode('u8')))
                        for w in words_filtered
                    ]
                    if len(lengths
                           ) > 2 and lengths[0] == 1 and lengths[1] == 1:
                        lengths[0:2] = [1]
                    lengths[1:-1] = []
                    sig = ' '.join(imap(str, lengths))

                    self.sigs[sig][0] += 1
                    self.sigs[sig][1].append(' '.join(words))

                    nn = next_leaf(node)
                    while nn and is_ignored(nn):
                        nn = next_leaf(nn)
                    if not nn: continue

                    next_len = bin_lengths(len(nn.lex.decode('u8')))
                    key = '%s;%s' % (sig, next_len)
                    self.atboundary[key][0] += 1
                    self.atboundary[key][1].append(' '.join(words +
                                                            ['* ' + nn.lex]))
Esempio n. 28
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s",
              lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, S, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 29
0
 def accept_derivation(self, bundle):
     for node, ctx in find_all(bundle.derivation, r'/VP/ < /V[VACE]/=T ! < /CC/', with_context=True):
         lex = ':'.join(ctx.t.text())
         self.frames[signature(node)][lex] += 1
Esempio n. 30
0
 def match_generator(self, deriv, expr):
     return find_all(deriv, expr)
Esempio n. 31
0
 def match_generator(self, deriv, expr, with_context):
     return find_all(deriv, expr, with_context)
Esempio n. 32
0
 def match_generator(self, deriv, expr, with_context):
     return find_all(deriv, expr, with_context)
Esempio n. 33
0
 def accept_derivation(self, bundle):
     top = bundle.derivation
     for node, ctx in find_all(top, r'/V[VACE]/', with_context=True):
         self.verbs[' '.join(node.text())] += 1
Esempio n. 34
0
 def match_generator(self, deriv, expr):
     return find_all(deriv, expr)
Esempio n. 35
0
 def accept_derivation(self, bundle):
     for node in find_all(bundle.derivation, '* <1 /VV/'):
         self.frames[signature(node)] += 1
Esempio n. 36
0
 def accept_derivation(self, bundle):
     top = bundle.derivation
     for node, ctx in find_all(top, r'/V[VACE]/', with_context=True):
         self.verbs[' '.join(node.text())] += 1
Esempio n. 37
0
 def accept_derivation(self, bundle):
     for node in find_all(bundle.derivation, '* <1 /VV/'):
         self.frames[signature(node)] += 1