Esempio n. 1
0
    def do_fix(C, node):
        def bxcomp(L, R, P):
            A = featureless(L.left)
            return A | A

        def bxcomp2(L, R, P):
            A = featureless(L.left.left)
            return A | A

        def innermost_VP(c):
            while c.left.is_complex():
                c = c.left
            return c

        if node.parent and node.parent.count() > 1:
            l, r, p = node.parent[0], node.parent[1], node.parent
            L, R, P = (n.category for n in (l, r, p))

            if p.tag.startswith("VSB"):
                v = innermost_VP(L)
                l.category = v / v
            elif is_modifier_category(R) and L.is_complex() and r is node:

                if C.is_bxcomp2_candidate(L, R, P):
                    node.category = bxcomp2(L, R, P)
                    debug("Generalised %s to %s", R, node.category)
                elif C.is_bxcomp_candidate(L, R, P):
                    node.category = bxcomp(L, R, P)
                    debug("Generalised %s to %s", R, node.category)
Esempio n. 2
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node:
            kid.parent = new_node

        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top,
                                  r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP',
                                  with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP / QP) | ((SbNP / QP) / NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP) | ((SbNP) / QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0)
            np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0)
            qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)

            self.fix_categories_starting_from(new_np_node, top)
Esempio n. 3
0
    def output(self):
        appl_accepted, _ = self.appl_only_filter.compute_accepts_and_rejects()
        null_accepted, _ = self.null_only_filter.compute_accepts_and_rejects()

        # Start by collecting the manually annotated slashes from the file
        aggregate = self.parse_annoform(self.manual_fn)

        for (set, mode_name) in ((appl_accepted, 'apply'), (null_accepted,
                                                            'null')):
            for (cat_string, slash_index, applied_frequency, total_frequency) in \
                sorted(set, key=lambda this: this[2], reverse=True):

                #                 aggregate[re.sub(r'[-*@.]', '', cat_string)][slash_index] = mode_name
                # If there is a slash-mode entry in _aggregate_ already (from the manual list),
                # do not overwrite it.
                slash_to_mode_map = aggregate[re.sub(r'[-*@.]', '',
                                                     cat_string)]
                if slash_index not in slash_to_mode_map:
                    debug("Slash %d of %s will have mode %s", slash_index,
                          cat_string, mode_name)
                    slash_to_mode_map[slash_index] = mode_name
                else:
                    debug("Not overwriting slash %d of %s", slash_index,
                          cat_string)

        for (category_string, slashes) in aggregate.iteritems():
            for (slash_index, mode_name) in slashes.iteritems():
                print " ".join((category_string, mode_name, str(slash_index)))
Esempio n. 4
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(p, t, Node(
            base_tag(t.tag, strip_cptb_tag=False),
            [t],
            typeraise(typeraise_t_category, S, TR_TOPICALISATION),
            head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 5
0
 def process_annotator_into_substs(self, fn):
     substs = {}
     
     slashes = defaultdict(set)
     with file(fn, 'r') as f:
         for lineno, line in enumerate(f):
             line = line.rstrip()
             
             fields = line.split()
             if len(fields) != 3:
                 raise FilterException, ("Missing field at line %d of annotator file %s." 
                                         % (lineno, self.anno_filename))
                                         
             category_string, replacement_mode_string, slash_index = fields
             debug("Slash %s of %s goes to %s=%d", slash_index, re.sub(r'[-.*@]', '', category_string), replacement_mode_string,self.mode_string_to_index(replacement_mode_string))
             slashes[re.sub(r'[-.*@]', '', category_string)].add(
                                     ( int(slash_index), self.mode_string_to_index(replacement_mode_string) ))
             
         for (category_string, replacements) in slashes.iteritems():
             moded_category = parse_category(category_string)
             moded_category.labelled()
             
             for (subcategory, slash_index) in moded_category.slashes():
                 result = find(lambda (index, mode): index == slash_index, replacements)
                 if result:
                     replacement_slash, replacement_mode = result
                     debug("Setting mode of slash %s of %s to %s", slash_index, moded_category, replacement_mode)
                     subcategory.mode = replacement_mode
                     
             substs[category_string] = moded_category
     
     return substs
Esempio n. 6
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)

        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)

        top.category = top[0].category.left

        debug("done %s", pprint(top))
Esempio n. 7
0
    def fix_cat_for(self, leaf, slash_index, mode):
        key_category = re.sub(r'[-.*@]', '', str(leaf.cat))
        if not (key_category in self.permitted_cats):
            warn("No entry in splitdef file for category %s", leaf.cat)
            return
            
        alternatives = self.permitted_cats[key_category]
        #print "All alternatives: %s" % alternatives
            
        old_modes = self.modes_for_cat(leaf.cat)
        
        def is_invalid_alternative(alt):
            alt_modes = self.modes_for_cat(alt)
            if len(alt_modes) != len(old_modes):
                warn("Replacement category %s has different size to original category %s", alt, leaf.cat)
                
            modes_for_comparison = zip(alt_modes, old_modes)
            del modes_for_comparison[slash_index]

            return str(leaf.cat) == str(alt) or \
                   any((ModeTier[alt] < ModeTier[old]) for (alt, old) in modes_for_comparison)
                   
        valids = list(reject(alternatives, is_invalid_alternative))
        if not valids:
            warn("No valid alternative for %s which preserves mode `%s' on slash %d", leaf.cat, mode, slash_index)
            return
            
        #print "Alternatives: %s" % valids
        alternative = min(valids, key=lambda e: self.permissiveness(e, slash_index))
        debug("%s `%s' -> %s", leaf.cat, leaf.lex, alternative)
        
        leaf.cat = alternative
Esempio n. 8
0
    def clusterfix(self, top, pp, p, s, t):
        debug("Fixing argument cluster coordination: %s", pprint(top))
        debug('T: %s', t)
        # 1. Shrink the verb (node T)
        self.fix_object_gap(pp, p, t, s)
        # 2. Reattach the verb above the TOP node
        new_node = Node('TAG', top.kids, top.category, head_index=0)
        top.kids = [t, new_node]
        # (Reattaching parent pointers)
        for kid in new_node: kid.parent = new_node
        
        # 3. Find and relabel argument clusters
        for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True):
            vp, np, qp = ctx.vp, ctx.np, ctx.qp
            # Now, VP should have category ((S[dcl]\NP)/QP)/NP
            SbNP = t.category.left.left
            QP, NP = qp.category, np.category
            # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)
            new_np_category = (SbNP/QP)|((SbNP/QP)/NP)
            # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP))
            new_qp_category = (SbNP)|((SbNP)/QP)

            # insert unary nodes
            new_np_node = Node(np.tag, [np], new_np_category, head_index=0); np.parent = new_np_node
            new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0); qp.parent = new_qp_node

            replace_kid(vp, np, new_np_node)
            replace_kid(vp, qp, new_qp_node)
            
            self.fix_categories_starting_from(new_np_node, top)
Esempio n. 9
0
    def do_fix(C, node):
        def bxcomp(L, R, P):
            A = featureless(L.left)
            return A | A

        def bxcomp2(L, R, P):
            A = featureless(L.left.left)
            return A | A

        def innermost_VP(c):
            while c.left.is_complex():
                c = c.left
            return c

        if node.parent and node.parent.count() > 1:
            l, r, p = node.parent[0], node.parent[1], node.parent
            L, R, P = (n.category for n in (l, r, p))

            if p.tag.startswith('VSB'):
                v = innermost_VP(L)
                l.category = v / v
            elif (is_modifier_category(R) and L.is_complex() and r is node):

                if C.is_bxcomp2_candidate(L, R, P):
                    node.category = bxcomp2(L, R, P)
                    debug("Generalised %s to %s", R, node.category)
                elif C.is_bxcomp_candidate(L, R, P):
                    node.category = bxcomp(L, R, P)
                    debug("Generalised %s to %s", R, node.category)
Esempio n. 10
0
 def fix_ip_app(self, p, a, s):
     debug("Fixing IP-APP NX: %s", lrp_repr(p))
     new_kid = copy(a)
     new_kid.tag = base_tag(
         new_kid.tag)  # relabel to stop infinite matching
     replace_kid(
         p, a, Node("NN", [new_kid], s.category / s.category, head_index=0))
Esempio n. 11
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)
            
        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)
        
        top.category = top[0].category.left

        debug("done %s", pprint(top))
Esempio n. 12
0
 def fix_short_bei_obj_gap(self, node, pp, bei, beis, t, p, s):
     debug("fixing short bei object gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s))
     
     # simple test case in 29:71(3) for bei with extracted NP
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, until=bei.parent[1])
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 13
0
    def fix_short_bei_io_gap(self, node, pp, bei, beis, t, p, s):
        debug("fixing short bei io gap: pp:%s\np:%s\ns:%s", lrp_repr(pp),
              lrp_repr(p), lrp_repr(s))

        replace_kid(pp, p, s)
        self.fix_categories_starting_from(s, until=pp)
        bei.category = bei.category.clone_with(right=beis.category)
Esempio n. 14
0
 def fix_short_bei_subj_gap(self, node, bei, pp, p, t, s):
     debug("fixing short bei subject gap: %s", lrp_repr(pp))
     # take the VP sibling of SB
     # replace T with S
     # this analysis isn't entirely correct
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, pp)
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 15
0
    def fix_topicalisation_without_gap(self, node, p, s, t):
        debug("Fixing topicalisation without gap: %s", pprint(node))

        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(p.category) / featureless(s.category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 16
0
    def fix_topicalisation_without_gap(self, node, p, s, t):
        debug("Fixing topicalisation without gap: %s", pprint(node))

        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(p.category)/featureless(s.category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 17
0
    def fix_short_bei_obj_gap(self, node, pp, bei, beis, t, p, s):
        debug("fixing short bei object gap: pp:%s\np:%s\ns:%s", lrp_repr(pp),
              lrp_repr(p), lrp_repr(s))

        # simple test case in 29:71(3) for bei with extracted NP
        replace_kid(pp, p, s)
        self.fix_categories_starting_from(s, until=bei.parent[1])
        bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 18
0
 def fix_short_bei_subj_gap(self, node, bei, pp, p, t, s):
     debug("fixing short bei subject gap: %s", lrp_repr(pp))
     # take the VP sibling of SB
     # replace T with S
     # this analysis isn't entirely correct
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, pp)
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 19
0
def label_partial_coordination(node, inside_np=False, ucp=False):
    node[0].category = ptb_to_cat(node[0])
    node.kids[0] = label(node[0], inside_np)
    
    debug('label_partial_coordination node.category: %s, %s', node.category, node)
    node[1].category = ptb_to_cat(node[1]) if ucp else node.category 
    node.kids[1] = label(node[1], inside_np)
    
    return node
Esempio n. 20
0
def register_unary(unaries, node, filler):
    '''
    If _node_ represents the result (RHS) of a unary rule, this records that a new
dependency must be created between it and its filler, adding it to _unaries_, a list
of such dependencies created in a given derivation.
    '''
    node.cat.parg_labelled()
    node.cat.slot.head.lex = filler
    debug("%s head lex <- %s", node, filler)
    unaries.append(node)
Esempio n. 21
0
def label_partial_coordination(node, inside_np=False, ucp=False):
    node[0].category = ptb_to_cat(node[0])
    node.kids[0] = label(node[0], inside_np)

    debug('label_partial_coordination node.category: %s, %s', node.category,
          node)
    node[1].category = ptb_to_cat(node[1]) if ucp else node.category
    node.kids[1] = label(node[1], inside_np)

    return node
Esempio n. 22
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top,
                                   r'* $ *=SS',
                                   with_context=True,
                                   nonrecursive=True)
                if result:
                    _, ctx = result
                    ss = ctx.ss
                    debug("Creating null relativiser unary category: %s",
                          ss.category / ss.category)
                    replace_kid(
                        top.parent, top,
                        Node("NN", [top],
                             ss.category / ss.category,
                             head_index=0))
Esempio n. 23
0
    def fix_modification(self, node, p, s, t):
        debug("Fixing modification: %s", lrp_repr(node))
        S, P = s.category, p.category

        # If you don't strip the tag :m from the newly created child (new_kid),
        # the fix_modification pattern will match infinitely when tgrep visits new_kid
        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(P) / featureless(S)
        debug("Creating category %s", new_category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 24
0
    def fix_modification(self, node, p, s, t):
        debug("Fixing modification: %s", lrp_repr(node))
        S, P = s.category, p.category

        # If you don't strip the tag :m from the newly created child (new_kid),
        # the fix_modification pattern will match infinitely when tgrep visits new_kid
        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(P) / featureless(S)
        debug("Creating category %s", new_category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 25
0
    def relabel_bei_category(self, top, pred):
        # particle 'you' is tagged as a preposition but acts as the BEI marker
        bei, ctx = get_first(top, r'*=S [ $ /LB/=BEI | $ ^"由"=BEI | $ ^"经"=BEI | $ ^"经过"=BEI | $ ^"随"=BEI | $ ^"为"=BEI | $ ^"以"=BEI | $ ^"经由"=BEI ]', with_context=True)
        s, bei = ctx.s, ctx.bei

        bei.category = bei.category.clone_with(right=s.category)
        bei.category.left._right = pred.category
        
        bei.parent.category = bei.category.left
        
        debug("new bei category: %s", bei.category)
        return bei
Esempio n. 26
0
    def do_tgrep_with_callback(root, pattern, callback, **kwargs):
        new_root = None
        for match_node, context in tgrep(root, pattern, with_context=True, **kwargs):
            debug("Callback %s matched", callback.__name__)
            if context:  # only supply a context if the expression binds variables
                # smash the case, variables in tgrep expressions are case insensitive
                result = callback(match_node, **smash_key_case(context))
            else:
                result = callback(match_node)

            # a new root will be returned if one has been installed
            if result:
                new_root = result

        return new_root or root
Esempio n. 27
0
    def relabel_bei_category(self, top, pred):
        # particle 'you' is tagged as a preposition but acts as the BEI marker
        bei, ctx = get_first(
            top,
            r'*=S [ $ /LB/=BEI | $ ^"由"=BEI | $ ^"经"=BEI | $ ^"经过"=BEI | $ ^"随"=BEI | $ ^"为"=BEI | $ ^"以"=BEI | $ ^"经由"=BEI ]',
            with_context=True)
        s, bei = ctx.s, ctx.bei

        bei.category = bei.category.clone_with(right=s.category)
        bei.category.left._right = pred.category

        bei.parent.category = bei.category.left

        debug("new bei category: %s", bei.category)
        return bei
Esempio n. 28
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(
                    node,
                    r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/',
                    with_context=True,
                    left_to_right=True)
                if not result:
                    debug(
                        'Could not find verbal category; did not create null relativiser.'
                    )
                    return

                top, context = result
                SS = context.ss.category

                debug("Creating null relativiser unary category: %s", SS / SS)
                replace_kid(top.parent, top,
                            Node("NN", [top], SS / SS, head_index=0))
Esempio n. 29
0
    def relabel_relativiser(self, node):
        # Relabel the relativiser category (NP/NP)\S to (NP/NP)\(S|NP)
        
        result = get_first(node, r'*=S $ /(DEC|SP)/=REL', with_context=True, left_to_right=True)

        if result is not None:
            _, context = result
            s, relativiser = context.s, context.rel
            
            relativiser.category = relativiser.category.clone_with(right=s.category)
            debug("New rel category: %s", relativiser.category)

            return True
        else:
            warn("Couldn't find relativiser under %s", node)
            return False
Esempio n. 30
0
    def do_tgrep_with_callback(root, pattern, callback, **kwargs):
        new_root = None
        for match_node, context in tgrep(root,
                                         pattern,
                                         with_context=True,
                                         **kwargs):
            debug("Callback %s matched", callback.__name__)
            if context:  # only supply a context if the expression binds variables
                # smash the case, variables in tgrep expressions are case insensitive
                result = callback(match_node, **smash_key_case(context))
            else:
                result = callback(match_node)

            # a new root will be returned if one has been installed
            if result: new_root = result

        return new_root or root
Esempio n. 31
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))
        
        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC          
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)
        
        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index
        
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True)
                if result:
                    _, ctx = result; ss = ctx.ss
                    debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                    replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Esempio n. 32
0
    def fix_whword_topicalisation(self, node, p, s, t):
        debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index

        for top, ctx in find_all(p, expr, with_context=True):
            replace_kid(ctx.pp, ctx.p, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 33
0
    def fix_ba_object_gap(self, top, ba, c, d):
        # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        for trace_NP, ctx in find_all(d, r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }', with_context=True):
            print 'FOUND'
            debug("Found %s", trace_NP)
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            # self.fix_categories_starting_from(s, until=top)
        
        # debug("Fixing ba-construction object gap: %s" % lrp_repr(node))
        # 
        # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        #     debug("Found %s", trace_NP)
        #     pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # 
        #     self.fix_object_gap(pp, p, t, s)
        #     self.fix_categories_starting_from(s, until=c)
        #     
        self.relabel_ba_category(top, ba, s)
Esempio n. 34
0
 def fix_whword_topicalisation(self, node, p, s, t):
     debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
     # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
     t.tag = base_tag(t.tag, strip_cptb_tag=False)
     # create topicalised category based on the tag of T
     typeraise_t_category = ptb_to_cat(t)
     # insert a node with the topicalised category
     replace_kid(p, t, Node(
         base_tag(t.tag, strip_cptb_tag=False),
         [t],
         typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
         head_index=0))
         
     index = get_trace_index_from_tag(t.tag)
     
     expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index
     
     for top, ctx in find_all(p, expr, with_context=True):
         replace_kid(ctx.pp, ctx.p, ctx.s)
         self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 35
0
    def relabel_relativiser(self, node):
        # Relabel the relativiser category (NP/NP)\S to (NP/NP)\(S|NP)

        result = get_first(node,
                           r'*=S $ /(DEC|SP)/=REL',
                           with_context=True,
                           left_to_right=True)

        if result is not None:
            _, context = result
            s, relativiser = context.s, context.rel

            relativiser.category = relativiser.category.clone_with(
                right=s.category)
            debug("New rel category: %s", relativiser.category)

            return True
        else:
            warn("Couldn't find relativiser under %s", node)
            return False
Esempio n. 36
0
def multi_tgrep(deriv, query_callback_map):
    if not query_callback_map: raise RuntimeError('No query expressions given.')
    initialise()
    
    if _tgrep_debug:
        for expression in query_callback_map.keys():
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("\t%s %s", tok.type, tok.value)
    
    queries = [yacc.parse(expression) for expression in query_callback_map.keys()]
    for node in nodes(deriv):
        for query_expr, query_str in izip(queries, query_callback_map.keys()):
            context = Context()
            if query_expr.is_satisfied_by(node, context):
                if context:
                    query_callback_map[query_str](node, **smash_key_case(context))
                else:
                    query_callback_map[query_str](node)
Esempio n. 37
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True)
                if not result:
                    debug('Could not find verbal category; did not create null relativiser.')
                    return
                
                top, context = result
                SS = context.ss.category
                
                debug("Creating null relativiser unary category: %s", SS/SS)
                replace_kid(top.parent, top, Node("NN", [top], SS/SS, head_index=0))
Esempio n. 38
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' 
             % { 'tags': ModifierTagsRegex, 'index': index })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Esempio n. 39
0
    def fix_nongap_extraction(self, _, n, pred, k):
        node = n
        debug("Fixing nongap extraction: %s", pprint(node))
        debug("k %s", pprint(k))
        self.remove_null_element(node)

        index = get_trace_index_from_tag(k.tag)
        expr = (
            r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }'
            % {
                'tags': ModifierTagsRegex,
                'index': index
            })

        # we use "<<" in the expression, because fix_*_topicalisation comes
        # before fix_nongap_extraction, and this can introduce an extra layer between
        # the phrasal tag and the trace
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            # remove T from P
            # replace P with S
            self.fix_object_gap(pp, p, t, s)

            if not self.relabel_relativiser(pred):
                top, context = get_first(node,
                                         r'/[ICV]P/=TOP $ *=SS',
                                         with_context=True)
                ss = context.ss

                debug("Creating null relativiser unary category: %s",
                      ss.category / ss.category)
                replace_kid(
                    top.parent, top,
                    Node("NN", [top], ss.category / ss.category, head_index=0))
Esempio n. 40
0
def unify(L, R, ignore=False, copy_vars=True):
    assgs = []

    for (Ls, Rs) in izip(L.nested_compound_categories(), R.nested_compound_categories()):
        if Ls.slot.is_filled() and Rs.slot.is_filled():
            if (not ignore) and Ls.slot.head.lex != Rs.slot.head.lex:
                raise UnificationException('%s (%s) and %s (%s) both filled' % (Ls, Ls.slot, Rs, Rs.slot))

        elif Ls.slot.is_filled():
            debug('R %s <- L %s', Rs.slot, Ls.slot)
            
            Rs.slot.head.lex = Ls.slot.head.lex
            Rs.slot.head.filler = L
                    
            assgs.append( (Rs, Ls.slot.head.lex) )

        elif Rs.slot.is_filled():
            debug('L %s <- R %s', Ls.slot, Rs.slot)
                
            Ls.slot.head.lex = Rs.slot.head.lex
            Ls.slot.head.filler = R
                
            assgs.append( (Ls, Rs.slot.head.lex) )

        else: # both slots are variables, need to unify variables
            if Ls.slot == Rs.slot: continue

            debug('%s <-> %s (copy_vars=%s)', Ls.slot, Rs.slot, copy_vars)
            if copy_vars:
                Rs.slot.unify_heads(Ls.slot)

            assgs.append( (Rs, Ls) )

    return assgs
Esempio n. 41
0
def tgrep(deriv, expression, with_context=False, nonrecursive=False, left_to_right=False):
    '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node
yields a pair (node, context), and captured nodes are accessible by name using the dict-like context.
If the user wants to keep context around, a copy must be made.'''
    if not expression: raise RuntimeError('No query expression given.')

    query = expression_cache.get(expression, None)
        
    if query is None:
        initialise()
            
        if _tgrep_debug:
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("%s %s", tok.type, tok.value)

        query = yacc.parse(expression)
        expression_cache[expression] = query
    
    # Default traversal method is right to left
    traversal_method = (single if nonrecursive  else 
                        nodes  if left_to_right else 
                        nodes_reversed)
                        
    context = Context()
    for node in traversal_method(deriv):
        context.clear()
        
        if query.is_satisfied_by(node, context):
            if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query)
            if with_context:
                yield node, context
            else: yield node
Esempio n. 42
0
    def fix_ba_object_gap(self, top, ba, c, d):
        # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        for trace_NP, ctx in find_all(
                d,
                r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }',
                with_context=True):
            print 'FOUND'
            debug("Found %s", trace_NP)
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            # self.fix_categories_starting_from(s, until=top)

        # debug("Fixing ba-construction object gap: %s" % lrp_repr(node))
        #
        # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True):
        #     debug("Found %s", trace_NP)
        #     pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        #
        #     self.fix_object_gap(pp, p, t, s)
        #     self.fix_categories_starting_from(s, until=c)
        #
        self.relabel_ba_category(top, ba, s)
Esempio n. 43
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s",
              lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, S, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 44
0
def multi_tgrep(deriv, query_callback_map):
    if not query_callback_map:
        raise RuntimeError('No query expressions given.')
    initialise()

    if _tgrep_debug:
        for expression in query_callback_map.keys():
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("\t%s %s", tok.type, tok.value)

    queries = [
        yacc.parse(expression) for expression in query_callback_map.keys()
    ]
    for node in nodes(deriv):
        for query_expr, query_str in izip(queries, query_callback_map.keys()):
            context = Context()
            if query_expr.is_satisfied_by(node, context):
                if context:
                    query_callback_map[query_str](node,
                                                  **smash_key_case(context))
                else:
                    query_callback_map[query_str](node)
Esempio n. 45
0
    def output(self):
        appl_accepted, _ = self.appl_only_filter.compute_accepts_and_rejects()
        null_accepted, _ = self.null_only_filter.compute_accepts_and_rejects()
        
        # Start by collecting the manually annotated slashes from the file
        aggregate = self.parse_annoform(self.manual_fn)

        for (set, mode_name) in ( (appl_accepted, 'apply'), (null_accepted, 'null') ):
            for (cat_string, slash_index, applied_frequency, total_frequency) in \
                sorted(set, key=lambda this: this[2], reverse=True):
                
#                 aggregate[re.sub(r'[-*@.]', '', cat_string)][slash_index] = mode_name
                # If there is a slash-mode entry in _aggregate_ already (from the manual list),
                # do not overwrite it.
                slash_to_mode_map = aggregate[re.sub(r'[-*@.]', '', cat_string)]
                if slash_index not in slash_to_mode_map:
                    debug("Slash %d of %s will have mode %s", slash_index, cat_string, mode_name)
                    slash_to_mode_map[slash_index] = mode_name
                else:
                    debug("Not overwriting slash %d of %s", slash_index, cat_string)
                
        for (category_string, slashes) in aggregate.iteritems():
            for (slash_index, mode_name) in slashes.iteritems():
                print " ".join((category_string, mode_name, str(slash_index)))
Esempio n. 46
0
def tgrep(deriv,
          expression,
          with_context=False,
          nonrecursive=False,
          left_to_right=False):
    '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node
yields a pair (node, context), and captured nodes are accessible by name using the dict-like context.
If the user wants to keep context around, a copy must be made.'''
    if not expression: raise RuntimeError('No query expression given.')

    query = expression_cache.get(expression, None)

    if query is None:
        initialise()

        if _tgrep_debug:
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("%s %s", tok.type, tok.value)

        query = yacc.parse(expression)
        expression_cache[expression] = query

    # Default traversal method is right to left
    traversal_method = (single if nonrecursive else
                        nodes if left_to_right else nodes_reversed)

    context = Context()
    for node in traversal_method(deriv):
        context.clear()

        if query.is_satisfied_by(node, context):
            if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query)
            if with_context:
                yield node, context
            else:
                yield node
Esempio n. 47
0
# Chinese CCGbank conversion
# ==========================
# (c) 2008-2012 Daniel Tse <*****@*****.**>
# University of Sydney

# Use of this software is governed by the attached "Chinese CCGbank converter Licence Agreement"
# supplied in the Chinese CCGbank conversion distribution. If the LICENCE file is missing, please
# notify the maintainer Daniel Tse <*****@*****.**>.

try:
    import ply.lex as lex
    import ply.yacc as yacc
except ImportError:
    import lex, yacc

from munge.util.err_utils import debug, info
import munge.proc.tgrep.parse as parse

if __name__ == '__main__':
    import sys
    lex.lex(module=parse)

    expr = sys.argv[1]
    debug("Lexing %s", expr)
    lex.input(expr)
    for tok in iter(lex.token, None):
        debug("%s %s", tok.type, tok.value)
Esempio n. 48
0
# Chinese CCGbank conversion
# ==========================
# (c) 2008-2012 Daniel Tse <*****@*****.**>
# University of Sydney

# Use of this software is governed by the attached "Chinese CCGbank converter Licence Agreement"
# supplied in the Chinese CCGbank conversion distribution. If the LICENCE file is missing, please
# notify the maintainer Daniel Tse <*****@*****.**>.

try:
    import ply.lex as lex
    import ply.yacc as yacc
except ImportError:
    import lex, yacc
    
from munge.util.err_utils import debug, info
import munge.proc.tgrep.parse as parse

if __name__ == '__main__':
    import sys
    lex.lex(module=parse)
    
    expr = sys.argv[1]
    debug("Lexing %s", expr)
    lex.input(expr)
    for tok in iter(lex.token, None):
        debug("%s %s", tok.type, tok.value)
Esempio n. 49
0
 def accept_leaf(self, leaf):
     cat_string_without_modes = leaf.cat.__repr__(show_modes=False) # Hide modes
     if cat_string_without_modes in self.substs:
         debug("Substituting %s with %s", cat_string_without_modes, self.substs[cat_string_without_modes])
         leaf.cat = self.substs[cat_string_without_modes]
Esempio n. 50
0
    def fix_reduced_long_bei_gap(self, node, *args, **kwargs):
        debug("Fixing reduced long bei gap: %s", lrp_repr(node))

        return self.fix_long_bei_gap(node, *args, **update(kwargs,
                                                           reduced=True))
Esempio n. 51
0
    def fix_categories_starting_from(self, node, until):
        '''Adjusts category labels from _node_ to _until_ (not inclusive) to obtain the correct
CCG analysis.'''
        while node is not until:
            # Only fix binary rules
            if (not node.parent) or node.parent.count() < 2: break

            l, r, p = node.parent[0], node.parent[1], node.parent
            L, R, P = (n.category for n in (l, r, p))
            debug("L: %s R: %s P: %s", L, R, P)

            applied_rule = analyse(L, R, P)
            debug("[ %s'%s' %s'%s' -> %s'%s' ] %s", L, ''.join(l.text()), R,
                  ''.join(r.text()), P, ''.join(p.text()), applied_rule)

            if applied_rule is None:
                debug("invalid rule %s %s -> %s", L, R, P)

                if R.is_complex() and R.left.is_complex(
                ) and L == R.left.right:
                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    T = R.left.left
                    new_category = typeraise(L, T, TR_FORWARD)  #T/(T|L)
                    node.parent[0] = Node(l.tag, [l],
                                          new_category,
                                          head_index=0)

                    new_parent_category = fcomp(new_category, R)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)

                elif L.is_complex() and L.left.is_complex(
                ) and R == L.left.right:
                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    T = L.left.left
                    new_category = typeraise(R, T, TR_BACKWARD)  #T|(T/R)
                    node.parent[1] = Node(r.tag, [r],
                                          new_category,
                                          head_index=0)

                    new_parent_category = bxcomp(L, new_category)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)

                # conj R -> P
                # Make P into R[conj]
                # L cannot be the comma category (,), otherwise we get a mis-analysis
                # in 2:22(5)
                if str(L) in ('conj', 'LCM'):
                    p.category = R.clone_adding_feature('conj')
                    debug("New category: %s", p.category)

                # L R[conj] -> P
                elif R.has_feature('conj'):
                    new_L = L.clone()

                    r.category = new_L.clone_adding_feature('conj')
                    p.category = new_L

                    debug("New category: %s", new_L)

                elif L.is_leaf():
                    # , R -> P[conj] becomes , R -> R[conj]
                    if P.has_feature('conj') and l.tag in (
                            'PU', 'CC'):  # treat as partial coordination
                        debug("Fixing coordination: %s" % P)
                        p.category = r.category.clone_adding_feature('conj')
                        debug("new parent category: %s" % p.category)

                    # , R -> P becomes , R -> R
                    elif l.tag == "PU" and not P.has_feature(
                            'conj'):  # treat as absorption
                        debug("Fixing left absorption: %s" % P)
                        p.category = r.category

                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    elif R.is_complex() and R.left.is_complex(
                    ) and L == R.left.right:
                        T = R.left.left
                        new_category = typeraise(L, T, TR_FORWARD)  #T/(T|L)
                        node.parent[0] = Node(l.tag, [l],
                                              new_category,
                                              head_index=0)

                        new_parent_category = fcomp(new_category, R)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                elif R.is_leaf():
                    # R , -> P becomes R , -> R
                    if r.tag == "PU":  # treat as absorption
                        debug("Fixing right absorption: %s" % P)
                        p.category = l.category

                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    elif L.is_complex() and L.left.is_complex(
                    ) and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(R, T, TR_BACKWARD)  #T|(T/R)
                        node.parent[1] = Node(r.tag, [r],
                                              new_category,
                                              head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                else:
                    new_parent_category = None

                    # try typeraising fix
                    # T/(T/X) (T\A)/X -> T can be fixed:
                    # (T\A)/((T\A)/X) (T\A)/X -> T\A
                    if self.is_topicalisation(L) and (L.right.right == R.right
                                                      and P == L.left
                                                      and P == R.left.left):
                        T_A = R.left
                        X = R.right

                        l.category = T_A / (T_A / X)
                        new_parent_category = T_A

                    # (X|X)|Z Y       -> X becomes
                    # (X|X)|Z X|(X|X) -> X|Z
                    elif L.is_complex() and L.left.is_complex(
                    ) and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(
                            R, R, TR_BACKWARD, strip_features=False)  #T/(T|L)
                        node.parent[1] = Node(r.tag, [r],
                                              new_category,
                                              head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s",
                                  new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                    # Generalise over right modifiers of verbal categories (S[dcl]\X)$
                    elif self.is_verbal_category(
                            L) and L.is_complex() and L.left.is_complex():
                        T = L.left.right
                        new_category = typeraise(R, T, TR_BACKWARD)
                        debug('Trying out %s', new_category)

                        if bxcomp(L, new_category):
                            node.parent[1] = Node(r.tag, [r],
                                                  new_category,
                                                  head_index=0)
                            new_parent_category = bxcomp(L, new_category)

                    # Last ditch: try all of the composition rules to generalise over L R -> P
                    if not new_parent_category:
                        # having fxcomp creates bad categories in NP(IP DEC) construction (1:97(3))
                        # but, we need fxcomp to create the gap NP-TPC NP-SBJ(*T*) VP, so allow it when the rhs doesn't look like the DEC category
                        new_parent_category = (
                            fcomp(L, R)
                            or bcomp(L, R, when=not self.is_relativiser(R))
                            or bxcomp(
                                L, R, when=not self.is_relativiser(R)
                            )  #or bxcomp2(L, R, when=self.is_verbal_category(L)) 
                            or fxcomp(L, R, when=not self.is_relativiser(R)))

                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category
                    else:
                        debug("couldn't fix, skipping")

            node = node.parent
            debug('')
Esempio n. 52
0
 def fix_short_bei_io_gap(self, node, pp, bei, beis, t, p, s):
     debug("fixing short bei io gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s))
     
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, until=pp)
     bei.category = bei.category.clone_with(right=beis.category)
Esempio n. 53
0
    def fix_categories_starting_from(self, node, until):
        '''Adjusts category labels from _node_ to _until_ (not inclusive) to obtain the correct
CCG analysis.'''
        while node is not until:
            # Only fix binary rules
            if (not node.parent) or node.parent.count() < 2: break

            l, r, p = node.parent[0], node.parent[1], node.parent
            L, R, P = (n.category for n in (l, r, p))
            debug("L: %s R: %s P: %s", L, R, P)

            applied_rule = analyse(L, R, P)
            debug("[ %s'%s' %s'%s' -> %s'%s' ] %s",
                L, ''.join(l.text()), R, ''.join(r.text()), P, ''.join(p.text()),
                applied_rule)

            if applied_rule is None:
                debug("invalid rule %s %s -> %s", L, R, P)
                
                if R.is_complex() and R.left.is_complex() and L == R.left.right:
                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    T = R.left.left
                    new_category = typeraise(L, T, TR_FORWARD)#T/(T|L)
                    node.parent[0] = Node(l.tag, [l], new_category, head_index=0)

                    new_parent_category = fcomp(new_category, R)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)
                
                elif L.is_complex() and L.left.is_complex() and R == L.left.right:
                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    T = L.left.left
                    new_category = typeraise(R, T, TR_BACKWARD)#T|(T/R)
                    node.parent[1] = Node(r.tag, [r], new_category, head_index=0)

                    new_parent_category = bxcomp(L, new_category)
                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category

                    debug("New category: %s", new_category)

                # conj R -> P
                # Make P into R[conj]
                # L cannot be the comma category (,), otherwise we get a mis-analysis
                # in 2:22(5)
                if str(L) in ('conj', 'LCM'):
                    p.category = R.clone_adding_feature('conj')
                    debug("New category: %s", p.category)

                # L R[conj] -> P
                elif R.has_feature('conj'):
                    new_L = L.clone()

                    r.category = new_L.clone_adding_feature('conj')
                    p.category = new_L

                    debug("New category: %s", new_L)

                elif L.is_leaf():
                    # , R -> P[conj] becomes , R -> R[conj]
                    if P.has_feature('conj') and l.tag in ('PU', 'CC'): # treat as partial coordination
                        debug("Fixing coordination: %s" % P)
                        p.category = r.category.clone_adding_feature('conj')
                        debug("new parent category: %s" % p.category)
                        
                    # , R -> P becomes , R -> R
                    elif l.tag == "PU" and not P.has_feature('conj'): # treat as absorption
                        debug("Fixing left absorption: %s" % P)
                        p.category = r.category

                    # L       (X|L)|Y -> X|Y becomes
                    # X|(X|L) (X|L)|Y -> X|Y
                    elif R.is_complex() and R.left.is_complex() and L == R.left.right:
                        T = R.left.left
                        new_category = typeraise(L, T, TR_FORWARD)#T/(T|L)
                        node.parent[0] = Node(l.tag, [l], new_category, head_index=0)

                        new_parent_category = fcomp(new_category, R)
                        if new_parent_category:
                            debug("new parent category: %s", new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)
                        
                elif R.is_leaf():
                    # R , -> P becomes R , -> R
                    if r.tag == "PU": # treat as absorption
                        debug("Fixing right absorption: %s" % P)
                        p.category = l.category

                    # (X|R)|Y R       -> X|Y  becomes
                    # (X|R)|Y X|(X|R) -> X|Y
                    elif L.is_complex() and L.left.is_complex() and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(R, T, TR_BACKWARD)#T|(T/R)
                        node.parent[1] = Node(r.tag, [r], new_category, head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s", new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)

                else:
                    new_parent_category = None
                    
                    # try typeraising fix
                    # T/(T/X) (T\A)/X -> T can be fixed:
                    # (T\A)/((T\A)/X) (T\A)/X -> T\A
                    if self.is_topicalisation(L) and (
                        L.right.right == R.right and
                        P == L.left and P == R.left.left):
                        T_A = R.left
                        X = R.right

                        l.category = T_A/(T_A/X)
                        new_parent_category = T_A
                        
                    # (X|X)|Z Y       -> X becomes
                    # (X|X)|Z X|(X|X) -> X|Z
                    elif L.is_complex() and L.left.is_complex() and R == L.left.right:
                        T = L.left.left
                        new_category = typeraise(R, R, TR_BACKWARD, strip_features=False)#T/(T|L)
                        node.parent[1] = Node(r.tag, [r], new_category, head_index=0)

                        new_parent_category = bxcomp(L, new_category)
                        if new_parent_category:
                            debug("new parent category: %s", new_parent_category)
                            p.category = new_parent_category

                        debug("New category: %s", new_category)
                                            
                    # Generalise over right modifiers of verbal categories (S[dcl]\X)$
                    elif self.is_verbal_category(L) and L.is_complex() and L.left.is_complex():
                        T = L.left.right
                        new_category = typeraise(R, T, TR_BACKWARD)
                        debug('Trying out %s', new_category)
                        
                        if bxcomp(L, new_category):
                            node.parent[1] = Node(r.tag, [r], new_category, head_index=0)
                            new_parent_category = bxcomp(L, new_category)

                    # Last ditch: try all of the composition rules to generalise over L R -> P
                    if not new_parent_category:
                        # having fxcomp creates bad categories in NP(IP DEC) construction (1:97(3))
                        # but, we need fxcomp to create the gap NP-TPC NP-SBJ(*T*) VP, so allow it when the rhs doesn't look like the DEC category
                        new_parent_category = (fcomp(L, R) or bcomp(L, R, when=not self.is_relativiser(R)) 
                                            or bxcomp(L, R, when=not self.is_relativiser(R)) #or bxcomp2(L, R, when=self.is_verbal_category(L)) 
                                            or fxcomp(L, R, when=not self.is_relativiser(R)))

                    if new_parent_category:
                        debug("new parent category: %s", new_parent_category)
                        p.category = new_parent_category
                    else:
                        debug("couldn't fix, skipping")

            node = node.parent
            debug('')
Esempio n. 54
0
    def fix_reduced_long_bei_gap(self, node, *args, **kwargs):
        debug("Fixing reduced long bei gap: %s", lrp_repr(node))

        return self.fix_long_bei_gap(node, *args, **update(kwargs, reduced=True))
Esempio n. 55
0
 def fix_ip_app(self, p, a, s):
     debug("Fixing IP-APP NX: %s", lrp_repr(p))
     new_kid = copy(a)
     new_kid.tag = base_tag(new_kid.tag) # relabel to stop infinite matching
     replace_kid(p, a, Node("NN", [new_kid], s.category/s.category, head_index=0))