Python Node.Node 예제들, munge.penn.aug_nodes.Node.Node Python 예제들

예제 #1

0

파일 보기

    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node:
            kid.parent = new_node

        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top,
                                  r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP',
                                  with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP / QP) | ((SbNP / QP) / NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP) | ((SbNP) / QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0)
            np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0)
            qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)

            self.fix_categories_starting_from(new_np_node, top)

예제 #2

0

파일 보기

 def fix_ip_app(self, p, a, s):
     debug("Fixing IP-APP NX: %s", lrp_repr(p))
     new_kid = copy(a)
     new_kid.tag = base_tag(
         new_kid.tag)  # relabel to stop infinite matching
     replace_kid(
         p, a, Node("NN", [new_kid], s.category / s.category, head_index=0))

예제 #3

0

파일 보기

    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (
            r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }'
            % {
                'tags': ModifierTagsRegex,
                'index': index
            })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node,
                                         r'/[ICV]P/=TOP $ *=SS',
                                         with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s",
                      ss.category / ss.category)
                replace_kid(
                    top.parent, top,
                    Node("NN", [top], ss.category / ss.category, head_index=0))

예제 #4

0

파일 보기

    def fix_topicalisation_without_gap(self, node, p, s, t):
        debug("Fixing topicalisation without gap: %s", pprint(node))

        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(p.category) / featureless(s.category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))

예제 #5

0

파일 보기

    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(
                    node,
                    r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/',
                    with_context=True,
                    left_to_right=True)
                if not result:
                    debug(
                        'Could not find verbal category; did not create null relativiser.'
                    )
                    return

                top, context = result
                SS = context.ss.category

                debug("Creating null relativiser unary category: %s", SS / SS)
                replace_kid(top.parent, top,
                            Node("NN", [top], SS / SS, head_index=0))

예제 #6

0

파일 보기

    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top,
                                   r'* $ *=SS',
                                   with_context=True,
                                   nonrecursive=True)
                if result:
                    _, ctx = result
                    ss = ctx.ss
                    debug("Creating null relativiser unary category: %s",
                          ss.category / ss.category)
                    replace_kid(
                        top.parent, top,
                        Node("NN", [top],
                             ss.category / ss.category,
                             head_index=0))

예제 #7

0

파일 보기

    def fix_modification(self, node, p, s, t):
        debug("Fixing modification: %s", lrp_repr(node))
        S, P = s.category, p.category

        # If you don't strip the tag :m from the newly created child (new_kid),
        # the fix_modification pattern will match infinitely when tgrep visits new_kid
        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(P) / featureless(S)
        debug("Creating category %s", new_category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))

예제 #8

0

파일 보기

    def accept_derivation(self, bundle):
        for node, ctx in find_all(bundle.derivation, expr, with_context=True):
            u = ctx.n.lex.decode('u8')
            if u[0] in baixing:
                leaf = ctx.n
                kids = [
                    Leaf(leaf.tag, u[0].encode('u8'), None),
                    Leaf(leaf.tag, u[1:].encode('u8'), None)
                ]
                replace_kid(ctx.n.parent, ctx.n, Node('NR', kids))
                #node.kids = kids

        self.write_derivation(bundle)

예제 #9

0

파일 보기

    def fix_rnr(self, rnr, g):
        # G is the node dominating all the conjuncts
        rnr_tags = []
        for node, ctx in find_all(g, r'/:c/a', with_context=True):
            for rnr in find_all(node, r'^/\*RNR\*/'):
                rnr_tags.append(get_trace_index_from_tag(rnr.lex))

        for index in rnr_tags:
            for node, ctx in find_all(
                    g,
                    r'*=PP < { *=P < { *=T < ^/\*RNR\*%s/ $ *=S } }' % index,
                    with_context=True):
                inherit_tag(ctx.s, ctx.p)
                self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)

        # This breaks with the IP (LC CC LC) case in 9:19(11) -- last_conjunct returns None
        # because the last conjunct has been shrunk
        last_conjunct = list(find_first(g, r'/:c/a', left_to_right=False))

        args = []
        # Here, we uniquify the rnr tags so that we excise each shared argument only once
        for index in set(rnr_tags):
            # find_first, because we only want to find one match, the shallowest.
            # cf 7:27(10), if NP-OBJ-2(NN NP-OBJ-2(JJ NN)), then we only want to identify
            # one matching node for index -2 -- the shallowest -- and not two.
            for node, ctx in find_first(last_conjunct[0],
                                        r'*=P < { /%s/a=T $ *=S }' % index,
                                        with_context=True):
                args.append(ctx.t)

                # Note: last_conjunct may be disconnected from
                # the tree by replace_kid (when ctx.p == last_conjunct)
                replace_kid(ctx.p.parent, ctx.p, ctx.s)
                self.fix_categories_starting_from(ctx.s, g)

        # Because the find_all which retrieved the args is an in-order left-to-right traversal, it will find
        # shallower nodes before deeper nodes. Therefore, if a verb has two args V A1 A2, the _args_ list will
        # contain [A2, A1] because A2 is shallower (further from the head) than A1.
        # We reverse the list of args, so that args are re-attached from the inside out (starting from A1).
        # args.reverse()

        new_g = g
        for arg in args:
            new_g = Node(new_g.tag, [new_g, arg],
                         new_g.category.left,
                         head_index=0)
            arg.parent = new_g

        replace_kid(g.parent, g, new_g)

예제 #10

0

파일 보기

    def fix_whword_topicalisation(self, node, p, s, t):
        debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index

        for top, ctx in find_all(p, expr, with_context=True):
            replace_kid(ctx.pp, ctx.p, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)

예제 #11

0

파일 보기

    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s",
              lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, S, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)

예제 #12

0

파일 보기

    def fix_categories_starting_from(self, node, until):
        '''Adjusts category labels from _node_ to _until_ (not inclusive) to obtain the correct
CCG analysis.'''
        while node is not until:
            # Only fix binary rules
            if (not node.parent) or node.parent.count() < 2: break

            l, r, p = node.parent[0], node.parent[1], node.parent
            L, R, P = (n.category for n in (l, r, p))
            debug("L: %s R: %s P: %s", L, R, P)

            applied_rule = analyse(L, R, P)
            debug("[ %s'%s' %s'%s' -> %s'%s' ] %s", L, ''.join(l.text()), R,
                  ''.join(r.text()), P, ''.join(p.text()), applied_rule)

            if applied_rule is None:
                debug("invalid rule %s %s -> %s", L, R, P)

                if R.is_complex() and R.left.is_complex(
                ) and L == R.left.right:
                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    T = R.left.left
                    new_category = typeraise(L, T, TR_FORWARD)  #T/(T|L)
                    node.parent[0] = Node(l.tag, [l],
                                          new_category,
                                          head_index=0)

                    new_parent_category = fcomp(new_category, R)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)

                elif L.is_complex() and L.left.is_complex(
                ) and R == L.left.right:
                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    T = L.left.left
                    new_category = typeraise(R, T, TR_BACKWARD)  #T|(T/R)
                    node.parent[1] = Node(r.tag, [r],
                                          new_category,
                                          head_index=0)

                    new_parent_category = bxcomp(L, new_category)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)

                # conj R -> P
                # Make P into R[conj]
                # L cannot be the comma category (,), otherwise we get a mis-analysis
                # in 2:22(5)
                if str(L) in ('conj', 'LCM'):
                    p.category = R.clone_adding_feature('conj')
                    debug("New category: %s", p.category)

                # L R[conj] -> P
                elif R.has_feature('conj'):
                    new_L = L.clone()

                    r.category = new_L.clone_adding_feature('conj')
                    p.category = new_L

                    debug("New category: %s", new_L)

                elif L.is_leaf():
                    # , R -> P[conj] becomes , R -> R[conj]
                    if P.has_feature('conj') and l.tag in (
                            'PU', 'CC'):  # treat as partial coordination
                        debug("Fixing coordination: %s" % P)
                        p.category = r.category.clone_adding_feature('conj')
                        debug("new parent category: %s" % p.category)

                    # , R -> P becomes , R -> R
                    elif l.tag == "PU" and not P.has_feature(
                            'conj'):  # treat as absorption
                        debug("Fixing left absorption: %s" % P)
                        p.category = r.category

                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    elif R.is_complex() and R.left.is_complex(
                    ) and L == R.left.right:
                        T = R.left.left
                        new_category = typeraise(L, T, TR_FORWARD)  #T/(T|L)
                        node.parent[0] = Node(l.tag, [l],
                                              new_category,
                                              head_index=0)

                        new_parent_category = fcomp(new_category, R)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                elif R.is_leaf():
                    # R , -> P becomes R , -> R
                    if r.tag == "PU":  # treat as absorption
                        debug("Fixing right absorption: %s" % P)
                        p.category = l.category

                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    elif L.is_complex() and L.left.is_complex(
                    ) and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(R, T, TR_BACKWARD)  #T|(T/R)
                        node.parent[1] = Node(r.tag, [r],
                                              new_category,
                                              head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                else:
                    new_parent_category = None

                    # try typeraising fix
                    # T/(T/X) (T\A)/X -> T can be fixed:
                    # (T\A)/((T\A)/X) (T\A)/X -> T\A
                    if self.is_topicalisation(L) and (L.right.right == R.right
                                                      and P == L.left
                                                      and P == R.left.left):
                        T_A = R.left
                        X = R.right

                        l.category = T_A / (T_A / X)
                        new_parent_category = T_A

                    # (X|X)|Z Y       -> X becomes
                    # (X|X)|Z X|(X|X) -> X|Z
                    elif L.is_complex() and L.left.is_complex(
                    ) and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(
                            R, R, TR_BACKWARD, strip_features=False)  #T/(T|L)
                        node.parent[1] = Node(r.tag, [r],
                                              new_category,
                                              head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                    # Generalise over right modifiers of verbal categories (S[dcl]\X)$
                    elif self.is_verbal_category(
                            L) and L.is_complex() and L.left.is_complex():
                        T = L.left.right
                        new_category = typeraise(R, T, TR_BACKWARD)
                        debug('Trying out %s', new_category)

                        if bxcomp(L, new_category):
                            node.parent[1] = Node(r.tag, [r],
                                                  new_category,
                                                  head_index=0)
                            new_parent_category = bxcomp(L, new_category)

                    # Last ditch: try all of the composition rules to generalise over L R -> P
                    if not new_parent_category:
                        # having fxcomp creates bad categories in NP(IP DEC) construction (1:97(3))
                        # but, we need fxcomp to create the gap NP-TPC NP-SBJ(*T*) VP, so allow it when the rhs doesn't look like the DEC category
                        new_parent_category = (
                            fcomp(L, R)
                            or bcomp(L, R, when=not self.is_relativiser(R))
                            or bxcomp(
                                L, R, when=not self.is_relativiser(R)
                            )  #or bxcomp2(L, R, when=self.is_verbal_category(L)) 
                            or fxcomp(L, R, when=not self.is_relativiser(R)))

                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category
                    else:
                        debug("couldn't fix, skipping")

            node = node.parent
            debug('')