Esempio n. 1
0
    def fix_short_bei_io_gap(self, node, pp, bei, beis, t, p, s):
        debug("fixing short bei io gap: pp:%s\np:%s\ns:%s", lrp_repr(pp),
              lrp_repr(p), lrp_repr(s))

        replace_kid(pp, p, s)
        self.fix_categories_starting_from(s, until=pp)
        bei.category = bei.category.clone_with(right=beis.category)
Esempio n. 2
0
 def fix_short_bei_obj_gap(self, node, pp, bei, beis, t, p, s):
     debug("fixing short bei object gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s))
     
     # simple test case in 29:71(3) for bei with extracted NP
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, until=bei.parent[1])
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 3
0
    def fix_short_bei_obj_gap(self, node, pp, bei, beis, t, p, s):
        debug("fixing short bei object gap: pp:%s\np:%s\ns:%s", lrp_repr(pp),
              lrp_repr(p), lrp_repr(s))

        # simple test case in 29:71(3) for bei with extracted NP
        replace_kid(pp, p, s)
        self.fix_categories_starting_from(s, until=bei.parent[1])
        bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 4
0
def replace_kid(node, old, new):
    # make sure you go through Node#__setitem__, not by modifying Node.kids directly,
    # otherwise parent pointers won't get updated
    try:
        i = node.kids.index(old)
        node[i] = new
    except ValueError:
        raise MungeException(
            "Tried to replace:\n\t%s\nwith:\t%s\nactual kids:\n\t%s" %
            (lrp_repr(old), lrp_repr(new), '\n\t'.join(
                (lrp_repr(kid) for kid in node.kids))))
Esempio n. 5
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)

        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)

        top.category = top[0].category.left

        debug("done %s", pprint(top))
Esempio n. 6
0
    def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False):
        debug("Fixing long bei gap: %s", lrp_repr(node))

        if not reduced:
            self.remove_null_element(top)
            
        if n:
            index = get_trace_index_from_tag(n.tag)
        else:
            index = r'\*'

        expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index
        trace_NP, ctx = get_first(top, expr, with_context=True)

        pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s
        # remove T from P
        # replace P with S
        self.fix_object_gap(pp, p, t, s)

        self.fix_categories_starting_from(s, until=top)
        self.relabel_bei_category(top, pred)
        
        top.category = top[0].category.left

        debug("done %s", pprint(top))
Esempio n. 7
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(p, t, Node(
            base_tag(t.tag, strip_cptb_tag=False),
            [t],
            typeraise(typeraise_t_category, S, TR_TOPICALISATION),
            head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 8
0
 def fix_ip_app(self, p, a, s):
     debug("Fixing IP-APP NX: %s", lrp_repr(p))
     new_kid = copy(a)
     new_kid.tag = base_tag(
         new_kid.tag)  # relabel to stop infinite matching
     replace_kid(
         p, a, Node("NN", [new_kid], s.category / s.category, head_index=0))
Esempio n. 9
0
def tgrep(deriv, expression, with_context=False, nonrecursive=False, left_to_right=False):
    '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node
yields a pair (node, context), and captured nodes are accessible by name using the dict-like context.
If the user wants to keep context around, a copy must be made.'''
    if not expression: raise RuntimeError('No query expression given.')

    query = expression_cache.get(expression, None)
        
    if query is None:
        initialise()
            
        if _tgrep_debug:
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("%s %s", tok.type, tok.value)

        query = yacc.parse(expression)
        expression_cache[expression] = query
    
    # Default traversal method is right to left
    traversal_method = (single if nonrecursive  else 
                        nodes  if left_to_right else 
                        nodes_reversed)
                        
    context = Context()
    for node in traversal_method(deriv):
        context.clear()
        
        if query.is_satisfied_by(node, context):
            if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query)
            if with_context:
                yield node, context
            else: yield node
Esempio n. 10
0
 def fix_short_bei_subj_gap(self, node, bei, pp, p, t, s):
     debug("fixing short bei subject gap: %s", lrp_repr(pp))
     # take the VP sibling of SB
     # replace T with S
     # this analysis isn't entirely correct
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, pp)
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 11
0
 def fix_short_bei_subj_gap(self, node, bei, pp, p, t, s):
     debug("fixing short bei subject gap: %s", lrp_repr(pp))
     # take the VP sibling of SB
     # replace T with S
     # this analysis isn't entirely correct
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, pp)
     bei.category = bei.category.clone_with(right=bei.parent[1].category)
Esempio n. 12
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(
                    node,
                    r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/',
                    with_context=True,
                    left_to_right=True)
                if not result:
                    debug(
                        'Could not find verbal category; did not create null relativiser.'
                    )
                    return

                top, context = result
                SS = context.ss.category

                debug("Creating null relativiser unary category: %s", SS / SS)
                replace_kid(top.parent, top,
                            Node("NN", [top], SS / SS, head_index=0))
Esempio n. 13
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N

        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |
            #      WHNP  CP     -->      CP
            #            / \            /  \
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''

        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top,
                                   r'* $ *=SS',
                                   with_context=True,
                                   nonrecursive=True)
                if result:
                    _, ctx = result
                    ss = ctx.ss
                    debug("Creating null relativiser unary category: %s",
                          ss.category / ss.category)
                    replace_kid(
                        top.parent, top,
                        Node("NN", [top],
                             ss.category / ss.category,
                             head_index=0))
Esempio n. 14
0
    def fix_modification(self, node, p, s, t):
        debug("Fixing modification: %s", lrp_repr(node))
        S, P = s.category, p.category

        # If you don't strip the tag :m from the newly created child (new_kid),
        # the fix_modification pattern will match infinitely when tgrep visits new_kid
        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(P) / featureless(S)
        debug("Creating category %s", new_category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 15
0
    def fix_modification(self, node, p, s, t):
        debug("Fixing modification: %s", lrp_repr(node))
        S, P = s.category, p.category

        # If you don't strip the tag :m from the newly created child (new_kid),
        # the fix_modification pattern will match infinitely when tgrep visits new_kid
        new_kid = copy(t)
        new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False)

        new_category = featureless(P) / featureless(S)
        debug("Creating category %s", new_category)
        replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
Esempio n. 16
0
    def fix_subject_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        debug("%s", reduced)
        node = n
        debug("Fixing subject extraction: %s", lrp_repr(node))

        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)

        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index

        for trace_NP, ctx in find_all(node, expr, with_context=True):
            pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=node)

            if not self.relabel_relativiser(pred):
                # TOP is the shrunk VP
                # after shrinking, we can get VV or VA here
                # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7))
                result = get_first(node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True)
                if not result:
                    debug('Could not find verbal category; did not create null relativiser.')
                    return
                
                top, context = result
                SS = context.ss.category
                
                debug("Creating null relativiser unary category: %s", SS/SS)
                replace_kid(top.parent, top, Node("NN", [top], SS/SS, head_index=0))
Esempio n. 17
0
    def fix_object_extraction(self, _, n, pred, w=None, reduced=False):
        global use_bare_N
        
        node = n
        debug("Fixing object extraction: %s", lrp_repr(node))
        
        # We only want this if we are using the N -> NP unary rule
        # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N)
        if use_bare_N and pred.tag.startswith('NP'):
            # Fix for the NP(VP de) case:
            # ---------------------------
            #         NP                 NP
            #        /  \                |  
            #      WHNP  CP     -->      CP              
            #            / \            /  \           
            #          IP  DEC         IP   DEC          
            if not pred.is_leaf():
                pred.kids.pop(0)
                pred.head_index = 0
        else:
            if not reduced:
                self.remove_null_element(node)
        
        if w:
            index = get_trace_index_from_tag(w.tag)
        else:
            index = ''
            
        expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index
        
        for trace_NP, ctx in find_all(node, expr, with_context=True):
            top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s

            self.fix_object_gap(pp, p, t, s)
            self.fix_categories_starting_from(s, until=top)

            # If we couldn't find the DEC node, this is the null relativiser case
            if not self.relabel_relativiser(pred):
                # TOP is the S node
                # null relativiser category comes from sibling of TOP
                # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9))
                result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True)
                if result:
                    _, ctx = result; ss = ctx.ss
                    debug("Creating null relativiser unary category: %s", ss.category/ss.category)
                    replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
Esempio n. 18
0
 def fix_whword_topicalisation(self, node, p, s, t):
     debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
     # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
     t.tag = base_tag(t.tag, strip_cptb_tag=False)
     # create topicalised category based on the tag of T
     typeraise_t_category = ptb_to_cat(t)
     # insert a node with the topicalised category
     replace_kid(p, t, Node(
         base_tag(t.tag, strip_cptb_tag=False),
         [t],
         typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
         head_index=0))
         
     index = get_trace_index_from_tag(t.tag)
     
     expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index
     
     for top, ctx in find_all(p, expr, with_context=True):
         replace_kid(ctx.pp, ctx.p, ctx.s)
         self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 19
0
    def fix_whword_topicalisation(self, node, p, s, t):
        debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node))
        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index

        for top, ctx in find_all(p, expr, with_context=True):
            replace_kid(ctx.pp, ctx.p, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 20
0
def tgrep(deriv,
          expression,
          with_context=False,
          nonrecursive=False,
          left_to_right=False):
    '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node
yields a pair (node, context), and captured nodes are accessible by name using the dict-like context.
If the user wants to keep context around, a copy must be made.'''
    if not expression: raise RuntimeError('No query expression given.')

    query = expression_cache.get(expression, None)

    if query is None:
        initialise()

        if _tgrep_debug:
            debug("Lexing %s", expression)
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("%s %s", tok.type, tok.value)

        query = yacc.parse(expression)
        expression_cache[expression] = query

    # Default traversal method is right to left
    traversal_method = (single if nonrecursive else
                        nodes if left_to_right else nodes_reversed)

    context = Context()
    for node in traversal_method(deriv):
        context.clear()

        if query.is_satisfied_by(node, context):
            if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query)
            if with_context:
                yield node, context
            else:
                yield node
Esempio n. 21
0
    def fix_topicalisation_with_gap(self, node, p, s, t):
        debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s",
              lrp_repr(node), pprint(s), pprint(t))

        # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5))
        t.tag = base_tag(t.tag, strip_cptb_tag=False)
        # create topicalised category based on the tag of T
        typeraise_t_category = ptb_to_cat(t)
        # insert a node with the topicalised category
        replace_kid(
            p, t,
            Node(base_tag(t.tag, strip_cptb_tag=False), [t],
                 typeraise(typeraise_t_category, S, TR_TOPICALISATION),
                 head_index=0))

        index = get_trace_index_from_tag(t.tag)

        # attested gaps:
        # 575 IP-TPC:t
        # 134 NP-TPC:t
        #  10 IP-Q-TPC:t
        #   8 CP-TPC:t
        #   4 NP-PN-TPC:t
        #   2 QP-TPC:t
        #   2 NP-TTL-TPC:t
        #   1 PP-TPC:t
        #   1 IP-IJ-TPC:t
        #   1 INTJ-TPC:t
        #   1 CP-Q-TPC:t
        #   1 CP-CND-TPC:t
        expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index

        for top, ctx in find_all(s, expr, with_context=True):
            debug('top: %s', pprint(top))
            self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s)
            self.fix_categories_starting_from(ctx.s, until=top)
Esempio n. 22
0
    def fix_reduced_long_bei_gap(self, node, *args, **kwargs):
        debug("Fixing reduced long bei gap: %s", lrp_repr(node))

        return self.fix_long_bei_gap(node, *args, **update(kwargs,
                                                           reduced=True))
Esempio n. 23
0
    def fix_reduced_long_bei_gap(self, node, *args, **kwargs):
        debug("Fixing reduced long bei gap: %s", lrp_repr(node))

        return self.fix_long_bei_gap(node, *args, **update(kwargs, reduced=True))
Esempio n. 24
0
 def fix_ip_app(self, p, a, s):
     debug("Fixing IP-APP NX: %s", lrp_repr(p))
     new_kid = copy(a)
     new_kid.tag = base_tag(new_kid.tag) # relabel to stop infinite matching
     replace_kid(p, a, Node("NN", [new_kid], s.category/s.category, head_index=0))
Esempio n. 25
0
 def fix_short_bei_io_gap(self, node, pp, bei, beis, t, p, s):
     debug("fixing short bei io gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s))
     
     replace_kid(pp, p, s)
     self.fix_categories_starting_from(s, until=pp)
     bei.category = bei.category.clone_with(right=beis.category)
Esempio n. 26
0
def replace_kid(node, old, new):
    # make sure you go through Node#__setitem__, not by modifying Node.kids directly,
    # otherwise parent pointers won't get updated 
    try:
        i = node.kids.index(old)
        node[i] = new
    except ValueError:
        raise MungeException("Tried to replace:\n\t%s\nwith:\t%s\nactual kids:\n\t%s" % (lrp_repr(old), lrp_repr(new), '\n\t'.join((lrp_repr(kid) for kid in node.kids))))