def fix_rnr(self, rnr, g): # G is the node dominating all the conjuncts rnr_tags = [] for node, ctx in find_all( g, r'/:c/a', with_context=True): for rnr in find_all( node, r'^/\*RNR\*/'): rnr_tags.append(get_trace_index_from_tag(rnr.lex)) for index in rnr_tags: for node, ctx in find_all( g, r'*=PP < { *=P < { *=T < ^/\*RNR\*%s/ $ *=S } }' % index, with_context=True ): inherit_tag(ctx.s, ctx.p) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, g) # This breaks with the IP (LC CC LC) case in 9:19(11) -- last_conjunct returns None # because the last conjunct has been shrunk last_conjunct = list(find_first(g, r'/:c/a', left_to_right=False)) args = [] # Here, we uniquify the rnr tags so that we excise each shared argument only once for index in set(rnr_tags): # find_first, because we only want to find one match, the shallowest. # cf 7:27(10), if NP-OBJ-2(NN NP-OBJ-2(JJ NN)), then we only want to identify # one matching node for index -2 -- the shallowest -- and not two. for node, ctx in find_first( last_conjunct[0], r'*=P < { /%s/a=T $ *=S }' % index, with_context=True ): args.append(ctx.t) # Note: last_conjunct may be disconnected from # the tree by replace_kid (when ctx.p == last_conjunct) replace_kid(ctx.p.parent, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, g) # Because the find_all which retrieved the args is an in-order left-to-right traversal, it will find # shallower nodes before deeper nodes. Therefore, if a verb has two args V A1 A2, the _args_ list will # contain [A2, A1] because A2 is shallower (further from the head) than A1. # We reverse the list of args, so that args are re-attached from the inside out (starting from A1). # args.reverse() new_g = g for arg in args: new_g = Node(new_g.tag, [new_g, arg], new_g.category.left, head_index=0) arg.parent = new_g replace_kid(g.parent, g, new_g)
def fix_rnr(self, rnr, g): # G is the node dominating all the conjuncts rnr_tags = [] for node, ctx in find_all(g, r'/:c/a', with_context=True): for rnr in find_all(node, r'^/\*RNR\*/'): rnr_tags.append(get_trace_index_from_tag(rnr.lex)) for index in rnr_tags: for node, ctx in find_all( g, r'*=PP < { *=P < { *=T < ^/\*RNR\*%s/ $ *=S } }' % index, with_context=True): inherit_tag(ctx.s, ctx.p) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, g) # This breaks with the IP (LC CC LC) case in 9:19(11) -- last_conjunct returns None # because the last conjunct has been shrunk last_conjunct = list(find_first(g, r'/:c/a', left_to_right=False)) args = [] # Here, we uniquify the rnr tags so that we excise each shared argument only once for index in set(rnr_tags): # find_first, because we only want to find one match, the shallowest. # cf 7:27(10), if NP-OBJ-2(NN NP-OBJ-2(JJ NN)), then we only want to identify # one matching node for index -2 -- the shallowest -- and not two. for node, ctx in find_first(last_conjunct[0], r'*=P < { /%s/a=T $ *=S }' % index, with_context=True): args.append(ctx.t) # Note: last_conjunct may be disconnected from # the tree by replace_kid (when ctx.p == last_conjunct) replace_kid(ctx.p.parent, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, g) # Because the find_all which retrieved the args is an in-order left-to-right traversal, it will find # shallower nodes before deeper nodes. Therefore, if a verb has two args V A1 A2, the _args_ list will # contain [A2, A1] because A2 is shallower (further from the head) than A1. # We reverse the list of args, so that args are re-attached from the inside out (starting from A1). # args.reverse() new_g = g for arg in args: new_g = Node(new_g.tag, [new_g, arg], new_g.category.left, head_index=0) arg.parent = new_g replace_kid(g.parent, g, new_g)
def accept_derivation(self, bundle): root = bundle.derivation for (pattern, name) in self.Patterns: # print pattern, name for node, ctx in find_all(root, pattern, with_context=True): toks = self.toks.get(bundle.label(), None) cn_toks = text(root) trace = ctx.t if not (toks and cn_toks): print >>sys.stderr, bundle.label() if trace: self.results[name].not_discharged += 1 alignment = align(cn_toks, toks) if trace is not None: trace_index = get_index_of_leaf(root, trace) if alignment.get(trace_index, None) is not None: self.results[name].not_discharged += 1 else: self.results[name].discharged += 1 else: print >>sys.stderr, "t was not bound to a trace node"
def clusterfix(self, top, pp, p, s, t): debug("Fixing argument cluster coordination: %s", pprint(top)) debug('T: %s', t) # 1. Shrink the verb (node T) self.fix_object_gap(pp, p, t, s) # 2. Reattach the verb above the TOP node new_node = Node('TAG', top.kids, top.category, head_index=0) top.kids = [t, new_node] # (Reattaching parent pointers) for kid in new_node: kid.parent = new_node # 3. Find and relabel argument clusters for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True): vp, np, qp = ctx.vp, ctx.np, ctx.qp # Now, VP should have category ((S[dcl]\NP)/QP)/NP SbNP = t.category.left.left QP, NP = qp.category, np.category # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP) new_np_category = (SbNP/QP)|((SbNP/QP)/NP) # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP)) new_qp_category = (SbNP)|((SbNP)/QP) # insert unary nodes new_np_node = Node(np.tag, [np], new_np_category, head_index=0); np.parent = new_np_node new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0); qp.parent = new_qp_node replace_kid(vp, np, new_np_node) replace_kid(vp, qp, new_qp_node) self.fix_categories_starting_from(new_np_node, top)
def fix_nongap_extraction(self, _, n, pred, k): node = n debug("Fixing nongap extraction: %s", pprint(node)) debug("k %s", pprint(k)) self.remove_null_element(node) index = get_trace_index_from_tag(k.tag) expr = (r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' % { 'tags': ModifierTagsRegex, 'index': index }) # we use "<<" in the expression, because fix_*_topicalisation comes # before fix_nongap_extraction, and this can introduce an extra layer between # the phrasal tag and the trace for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) if not self.relabel_relativiser(pred): top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True) ss = context.ss debug("Creating null relativiser unary category: %s", ss.category/ss.category) replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid(p, t, Node( base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_nongap_extraction(self, _, n, pred, k): node = n debug("Fixing nongap extraction: %s", pprint(node)) debug("k %s", pprint(k)) self.remove_null_element(node) index = get_trace_index_from_tag(k.tag) expr = ( r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' % { 'tags': ModifierTagsRegex, 'index': index }) # we use "<<" in the expression, because fix_*_topicalisation comes # before fix_nongap_extraction, and this can introduce an extra layer between # the phrasal tag and the trace for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) if not self.relabel_relativiser(pred): top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True) ss = context.ss debug("Creating null relativiser unary category: %s", ss.category / ss.category) replace_kid( top.parent, top, Node("NN", [top], ss.category / ss.category, head_index=0))
def clusterfix(self, top, pp, p, s, t): debug("Fixing argument cluster coordination: %s", pprint(top)) debug('T: %s', t) # 1. Shrink the verb (node T) self.fix_object_gap(pp, p, t, s) # 2. Reattach the verb above the TOP node new_node = Node('TAG', top.kids, top.category, head_index=0) top.kids = [t, new_node] # (Reattaching parent pointers) for kid in new_node: kid.parent = new_node # 3. Find and relabel argument clusters for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True): vp, np, qp = ctx.vp, ctx.np, ctx.qp # Now, VP should have category ((S[dcl]\NP)/QP)/NP SbNP = t.category.left.left QP, NP = qp.category, np.category # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP) new_np_category = (SbNP / QP) | ((SbNP / QP) / NP) # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP)) new_qp_category = (SbNP) | ((SbNP) / QP) # insert unary nodes new_np_node = Node(np.tag, [np], new_np_category, head_index=0) np.parent = new_np_node new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0) qp.parent = new_qp_node replace_kid(vp, np, new_np_node) replace_kid(vp, qp, new_qp_node) self.fix_categories_starting_from(new_np_node, top)
def accept_derivation(self, bundle): root = bundle.derivation for node, ctx in find_all(root, filter_expression, with_context=True): if node_filter_function(node): words = list(text_without_quotes_or_traces(node)) words_filtered = list(text_without_quotes_or_traces(node, pred=lambda n: n.tag in ('NN', 'NR', 'NT', 'JJ') and not n.tag == "PU")) lengths = [bin_lengths(len(w.decode('u8'))) for w in words_filtered] if len(lengths)> 2 and lengths[0] == 1 and lengths[1] == 1: lengths[0:2] = [1] lengths[1:-1] = [] sig = ' '.join(imap(str, lengths)) self.sigs[sig][0] += 1 self.sigs[sig][1].append(' '.join(words)) nn = next_leaf(node) while nn and is_ignored(nn): nn = next_leaf(nn) if not nn: continue next_len = bin_lengths(len(nn.lex.decode('u8'))) key = '%s;%s' % (sig,next_len) self.atboundary[key][0] += 1 self.atboundary[key][1].append(' '.join(words + ['* '+nn.lex]))
def accept_derivation(self, bundle): root = bundle.derivation for (pattern, name) in self.Patterns: # print pattern, name for node, ctx in find_all(root, pattern, with_context=True): toks = self.toks.get(bundle.label(), None) cn_toks = text(root) trace = ctx.t if not (toks and cn_toks): print >> sys.stderr, bundle.label() if trace: self.results[name].not_discharged += 1 alignment = align(cn_toks, toks) if trace is not None: trace_index = get_index_of_leaf(root, trace) if alignment.get(trace_index, None) is not None: self.results[name].not_discharged += 1 else: self.results[name].discharged += 1 else: print >> sys.stderr, "t was not bound to a trace node"
def accept_derivation(self, bundle): top = bundle.derivation heads = set() for node, ctx in find_all(top, r'* $ { * < ^/\*pro\*/ }', with_context=True): head = find_head(node) if head not in heads: self.verbs[' '.join(head.text())] += 1 heads.add(head)
def fix_subject_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N debug("%s", reduced) node = n debug("Fixing subject extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=node) if not self.relabel_relativiser(pred): # TOP is the shrunk VP # after shrinking, we can get VV or VA here # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7)) result = get_first( node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True) if not result: debug( 'Could not find verbal category; did not create null relativiser.' ) return top, context = result SS = context.ss.category debug("Creating null relativiser unary category: %s", SS / SS) replace_kid(top.parent, top, Node("NN", [top], SS / SS, head_index=0))
def accept_derivation(self, bundle): root = bundle.derivation length = lambda s: bin_lengths(len(s)) for node, ctx in find_all(root, filter_expression, with_context=True): if node_filter_function(node): L, R = ctx['L'].lex.decode('u8'), ctx['R'].lex.decode('u8') self.sigs[ (length(L), length(R)) ][0] += 1 self.sigs[ (length(L), length(R)) ][1].append( ' '.join((L, R)).encode('u8') )
def accept_derivation(self, bundle): for node, ctx in find_all(bundle.derivation, expr, with_context=True): u = ctx.n.lex.decode('u8') if u[0] in baixing: leaf = ctx.n kids = [ Leaf(leaf.tag, u[0].encode('u8'), None), Leaf(leaf.tag, u[1:].encode('u8'), None) ] replace_kid(ctx.n.parent, ctx.n, Node('NR', kids)) #node.kids = kids self.write_derivation(bundle)
def fix_object_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N node = n debug("Fixing object extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) # If we couldn't find the DEC node, this is the null relativiser case if not self.relabel_relativiser(pred): # TOP is the S node # null relativiser category comes from sibling of TOP # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9)) result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True) if result: _, ctx = result ss = ctx.ss debug("Creating null relativiser unary category: %s", ss.category / ss.category) replace_kid( top.parent, top, Node("NN", [top], ss.category / ss.category, head_index=0))
def accept_derivation(self, bundle): root = bundle.derivation length = lambda s: bin_lengths(len(s)) for node, ctx in find_all(root, filter_expression, with_context=True): if node_filter_function(node): L, R = ctx['L'].lex.decode('u8'), ctx['R'].lex.decode('u8') self.sigs[(length(L), length(R))][0] += 1 self.sigs[(length(L), length(R))][1].append(' '.join( (L, R)).encode('u8'))
def accept_derivation(self, bundle): top = bundle.derivation for node, ctx in find_all(top, r'* < /-TPC-\d/a=T', with_context=True): trace = find_coindexed_trace(top, ctx.t) if trace: topicalised_node = ctx.t topicalised_node.tag = trace.parent.tag replace_kid(trace.parent.parent, trace.parent, topicalised_node) node.kids.remove(topicalised_node) self.write_derivation(bundle)
def fix_subject_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N debug("%s", reduced) node = n debug("Fixing subject extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=node) if not self.relabel_relativiser(pred): # TOP is the shrunk VP # after shrinking, we can get VV or VA here # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7)) result = get_first(node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True) if not result: debug('Could not find verbal category; did not create null relativiser.') return top, context = result SS = context.ss.category debug("Creating null relativiser unary category: %s", SS/SS) replace_kid(top.parent, top, Node("NN", [top], SS/SS, head_index=0))
def fix_object_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N node = n debug("Fixing object extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) # If we couldn't find the DEC node, this is the null relativiser case if not self.relabel_relativiser(pred): # TOP is the S node # null relativiser category comes from sibling of TOP # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9)) result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True) if result: _, ctx = result; ss = ctx.ss debug("Creating null relativiser unary category: %s", ss.category/ss.category) replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
def fix_whword_topicalisation(self, node, p, s, t): debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid(p, t, Node( base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index for top, ctx in find_all(p, expr, with_context=True): replace_kid(ctx.pp, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_ba_object_gap(self, top, ba, c, d): # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True): for trace_NP, ctx in find_all(d, r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }', with_context=True): print 'FOUND' debug("Found %s", trace_NP) pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) # self.fix_categories_starting_from(s, until=top) # debug("Fixing ba-construction object gap: %s" % lrp_repr(node)) # # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True): # debug("Found %s", trace_NP) # pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # # self.fix_object_gap(pp, p, t, s) # self.fix_categories_starting_from(s, until=c) # self.relabel_ba_category(top, ba, s)
def fix_whword_topicalisation(self, node, p, s, t): debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid( p, t, Node(base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index for top, ctx in find_all(p, expr, with_context=True): replace_kid(ctx.pp, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_ba_object_gap(self, top, ba, c, d): # for trace_NP, ctx in find_all(d, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True): for trace_NP, ctx in find_all( d, r'{ { { /NP-OBJ/=T < ^/\*-/ } $ *=S } > { *=P > *=PP } }', with_context=True): print 'FOUND' debug("Found %s", trace_NP) pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) # self.fix_categories_starting_from(s, until=top) # debug("Fixing ba-construction object gap: %s" % lrp_repr(node)) # # for trace_NP, ctx in find_all(top, r'*=PP < {*=P < { /NP-OBJ/=T < ^/\*-/ $ *=S } }', with_context=True): # debug("Found %s", trace_NP) # pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # # self.fix_object_gap(pp, p, t, s) # self.fix_categories_starting_from(s, until=c) # self.relabel_ba_category(top, ba, s)
def accept_derivation(self, bundle): root = bundle.derivation for node, ctx in find_all(root, filter_expression, with_context=True): if node_filter_function(node): words = list(text_without_quotes_or_traces(node)) words_filtered = list( text_without_quotes_or_traces( node, pred=lambda n: n.tag in ('NN', 'NR', 'NT', 'JJ') and not n.tag == "PU")) lengths = [ bin_lengths(len(w.decode('u8'))) for w in words_filtered ] if len(lengths ) > 2 and lengths[0] == 1 and lengths[1] == 1: lengths[0:2] = [1] lengths[1:-1] = [] sig = ' '.join(imap(str, lengths)) self.sigs[sig][0] += 1 self.sigs[sig][1].append(' '.join(words)) nn = next_leaf(node) while nn and is_ignored(nn): nn = next_leaf(nn) if not nn: continue next_len = bin_lengths(len(nn.lex.decode('u8'))) key = '%s;%s' % (sig, next_len) self.atboundary[key][0] += 1 self.atboundary[key][1].append(' '.join(words + ['* ' + nn.lex]))
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid( p, t, Node(base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def accept_derivation(self, bundle): for node, ctx in find_all(bundle.derivation, r'/VP/ < /V[VACE]/=T ! < /CC/', with_context=True): lex = ':'.join(ctx.t.text()) self.frames[signature(node)][lex] += 1
def match_generator(self, deriv, expr): return find_all(deriv, expr)
def match_generator(self, deriv, expr, with_context): return find_all(deriv, expr, with_context)
def accept_derivation(self, bundle): top = bundle.derivation for node, ctx in find_all(top, r'/V[VACE]/', with_context=True): self.verbs[' '.join(node.text())] += 1
def accept_derivation(self, bundle): for node in find_all(bundle.derivation, '* <1 /VV/'): self.frames[signature(node)] += 1