def parse_markedup_file(self, lines): lines = iter(lines) result = {} while True: try: line = lines.next() except StopIteration: break if line == '\n': continue if line[0] in '#=': continue line = line.rstrip() if line[0] not in ' \t': cur_cat_string = line else: while line[0] in ' \t': line = line.lstrip() bits = line.split() if len(bits) != 2: continue index, cat = bits if cat == 'ignore': continue cat = parse_category(cat) if cur_cat_string not in result: result[cur_cat_string] = cat return result
def body(toks): # HACK tag = toks.next() head_index = None if toks.peek() == '<' and (not tag == 'PU'): toks.next() head_index = int(toks.next()) shift_and_check( '>', toks ) category = None if toks.peek() == '{': toks.next() category = parse_category(toks.next()) shift_and_check( '}', toks ) kids = [] lex = None while toks.peek() != ')': if toks.peek() == '(': kids.append( self.read_deriv(toks) ) else: lex = toks.next() if (not kids) and lex: return A.Leaf(tag, lex, category, parent) else: ret = A.Node(tag, kids, category, parent, head_index) for kid in ret: kid.parent = ret return ret
def testMarked(self): for (before, after) in ( ('N/N', '(N{Y}/N{Y}){_}'), ('NP/N', '(NP{Y}/N{Y}){_}'), (r'S[q]\S[dcl]', '(S[q]{Y}\S[dcl]{Y}){_}'), (r'(S\NP)/(S\NP)', '((S{Y}\NP{Z}){Y}/(S{Y}\NP{Z}){Y}){_}'), ('X', 'X{_}'), ('(S[dcl]\NP)/NP', '((S[dcl]{_}\NP{Y}){_}/NP{Z}){_}'), (',', ',{_}'), ('(S[dcl]\NP)/(S[dcl]\NP)','((S[dcl]{_}\NP{Y}){_}/(S[dcl]{Z}\NP{Y}){Z}){_}'), ): self.assertEqual(repr(label(parse_category(before))), after)
def testMarked(self): for (before, after) in ( ('N/N', '(N{Y}/N{Y}){_}'), ('NP/N', '(NP{Y}/N{Y}){_}'), (r'S[q]\S[dcl]', '(S[q]{Y}\S[dcl]{Y}){_}'), (r'(S\NP)/(S\NP)', '((S{Y}\NP{Z}){Y}/(S{Y}\NP{Z}){Y}){_}'), ('X', 'X{_}'), ('(S[dcl]\NP)/NP', '((S[dcl]{_}\NP{Y}){_}/NP{Z}){_}'), (',', ',{_}'), ('(S[dcl]\NP)/(S[dcl]\NP)', '((S[dcl]{_}\NP{Y}){_}/(S[dcl]{Z}\NP{Y}){Z}){_}'), ): self.assertEqual(repr(label(parse_category(before))), after)
def mkdeps(root, postprocessor=identity): for i, leaf in enumerate(leaves(root)): # Uniquify each leaf with an index leaf.lex += IndexSeparatorTemplate % i # Apply the left to right slash labelling # (we abuse this to refer to slots, not slashes) leaf.cat.parg_labelled() # Populate the outermost (_) variable of each leaf leaf.cat.slot.head.lex = leaf.lex for (l, r, p) in pairs_postorder(root): _label_result(l, r, p) global unanalysed unaries = [] for l, r, p in pairs_postorder(root): L, R, P = map(lambda x: x and x.cat, (l, r, p)) comb = analyse(L, R, P) if not comb: debug("Unrecognised rule %s %s -> %s", L, R, P) unifier = [] if config.debug: debug("%s %s %s (%s)", L, R, P, str(comb)) if comb == 'fwd_appl': # [Xx/Yy]l Yy -> Xx unifier = unify(L.right, R) p.cat = L.left elif comb == 'bwd_appl': # Yy [Xx\Yy]r -> Xx unifier = unify(L, R.right) p.cat = R.left # Pro-drops which drop their outer argument # [(S_\NPy)_/NPx]_ -> [S_\NPy]_ elif comb in ('object_prodrop', 'vp_vp_object_prodrop', 'yi_subject_prodrop', 'vp_modifier_subject_prodrop'): p.cat = L.left # [Xx/Yy]l [Yy/Zz]r -> [Xx/Zz]r elif comb == 'fwd_comp': # X/Y Y/Z -> X/Z if is_rooted_in(Sdcl, L, respecting_features=True): P.slot = L.slot else: P.slot = R.slot # lexical head comes from R (Y/Z) P.slot.var = fresh_var(prefix='K') unifier = unify(L.right, R.left) p.cat._left = L.left p.cat._right = R.right # [Yy\Zz]l [Xx\Yy]r -> [Xx\Zz]l elif comb == 'bwd_comp': # Y\Z X\Y -> X\Z if is_rooted_in(Sdcl, R, respecting_features=True): P.slot = R.slot else: P.slot = L.slot # lexical head comes from L (Y\Z) P.slot.var = fresh_var(prefix='K') unifier = unify(R.right, L.left) p.cat._left = R.left p.cat._right = L.right elif comb in ('s_np_apposition', 'vp_np_apposition'): # { S[dcl], S[dcl]\NP } NPy -> NPy P.slot = R.slot # = copy_vars unifier = unify(P, R) # NP NP -> N/N elif comb == 'np_np_to_nfn_apposition': # do the same as NP NP -> NP, except fill in the vars Ny/Ny P.right.slot.var = fresh_var(prefix='N') P.left.slot = P.right.slot register_unary(unaries, p, L.slot.head.lex) make_set_head_from(l, r, p) elif comb in ('conjoin', 'np_np_apposition'): # X X[conj] -> X make_set_head_from(l, r, p) elif comb in ('conj_absorb', 'conj_comma_absorb'): # conj X -> X[conj] copy_vars(frm=R, to=P) unify(P, R) # R.slot.head = P.slot.head elif comb == 'funny_conj': # conj X -> X p.cat = R elif comb == 'nongap_topicalisation': # {N, NP, S[dcl], QP}x -> [Sy/Sy]x P.slot = L.slot P.right.slot.var = fresh_var() P.left.slot = P.right.slot register_unary(unaries, p, L.slot.head.lex) elif comb in ('np_gap_topicalisation', 's_gap_topicalisation', 'qp_gap_topicalisation'): # NPx -> [ Sy/(Sy/NPx)y ]y P.right.right.slot = L.slot P.slot.var = fresh_var() P.left.slot = P.right.left.slot = P.right.slot = P.slot elif comb == 'subject_prodrop': # (S[dcl]y\NPx)y -> S[dcl]y | [(S[dcl]y\NPx)y/NPz]y -> (S[dcl]y/NPz)y if P == parse_category(r'S[dcl]'): P.slot = L.slot elif P == parse_category(r'S[dcl]/NP'): P.slot = P.left.slot = L.slot P.right.slot = L.right.slot else: warn("Invalid parent category %s for subject prodrop.", P) elif comb == 'fwd_xcomp': # [Xx/Yy]l [Yy\Zz]r -> [Xx/Zz]r if is_rooted_in(Sdcl, L, respecting_features=True): P.slot = L.slot else: P.slot = R.slot # lexical head comes from R (Y/Z) P.slot.var = fresh_var(prefix='K') unifier = unify(L.right, R.left) p.cat._left = L.left p.cat._right = R.right elif comb == 'bwd_xcomp': # [Yy/Zz]l [Xx\Yy]r -> [Xx/Zz]l if is_rooted_in(Sdcl, R, respecting_features=True): P.slot = R.slot else: P.slot = L.slot # lexical head comes from L (Y\Z) # P.slot = L.slot P.slot.var = fresh_var(prefix='K') unifier = unify(R.right, L.left) p.cat._left = R.left p.cat._right = L.right elif comb == 'bwd_r1xcomp': # [(Yy/Zz)k/Ww]l [Xx\Yy]r -> [(Xx\Zz)k/Ww]l # TODO: where should P's lexical head come from? L or R? unifier = unify(L.left.left, R.right) p.cat._left._left = R.left p.cat._left._right = L.left.right p.cat._right = L.right elif comb in ('fwd_raise', 'bwd_raise'): # Xx -> [ Tf|(Tf|Xx)f ]f if P == parse_category(r'(S[dcl]\NP)\((S[dcl]\NP)/(S[dcl]\NP))'): # (S[dcl]y\NPz)y -> [ (S[dcl]f\NPg)f/((S[dcl]f\NPg)f\(S[dcl]y\NPz)y)f ]f P.left.slot.var = P.left.left.slot.var = P.right.slot.var = P.slot.var = fresh_var() # f P.left.right.slot.var = fresh_var() # g copy_vars(frm=P.left, to=P.right.left) copy_vars(frm=L, to=P.right.right) unifier = unify(L, P.right.right) elif P == parse_category(r'((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP)'): # NPy -> [ ((S[dcl]v\NPw)v/QPz)v \ ( ((S[dcl]v\NPw)v/QPz)v/NPy )v ]v P.slot.var = fresh_var() P.left.slot = P.right.slot = \ P.left. left.slot = P.left. left.left.slot = \ P.right.left.slot = P.right.left.left.slot = \ P.right.left.left.left.slot = P.slot # v # P.right.right.slot = fresh_var() # y P.right.right.slot = L.slot P.left.right.slot.var = fresh_var('Z') P.right.left.right.slot = P.left.right.slot # z P.left.left.right.slot.var = fresh_var('W') P.right.left.left.right.slot = P.left.left.right.slot # w unifier = unify(L, P.right.right) elif P == parse_category(r'(S[dcl]\NP)\((S[dcl]\NP)/QP)'): # QPy -> [ (S[dcl]v\NPz)v \ ((S[dcl]v\NPz)v/QPy)v ]v P.slot.var = fresh_var() P.left.slot = P.left.left.slot = \ P.right.slot = P.right.left.slot = P.right.left.left.slot = P.slot # v # P.right.right.slot = fresh_var() # y P.right.right.slot = L.slot P.left.right.slot.var = fresh_var('Z') P.right.left.right.slot = P.left.right.slot # z unifier = unify(L, P.right.right) else: P.slot.var = fresh_var() P.right.left.slot = P.left.slot = P.right.slot = P.slot P.right.right.slot = L.slot unifier = unify(L, P.right.right) elif comb == 'np_typechange': P.slot = L.slot # = copy_vars unifier = unify(P, L) elif comb == 'lcp_np_typechange': P.slot = L.slot unifier = unify(P, L) elif comb in ('lcp_sfs_typechange', 'lcp_nfn_typechange'): P.left.slot.var = fresh_var() P.right.slot = P.left.slot P.slot = L.slot register_unary(unaries, p, L.slot.head.lex) elif comb == 'lcp_sbnpfsbnp_typechange': # [(Sy\NPz)y/(Sy\NPz)y]_ P.left.slot.var = fresh_var() P.left.left.slot = P.right.left.slot = P.right.slot = P.left.slot register_unary(unaries, p, L.slot.head.lex) elif comb == 'null_relativiser_typechange': # Xy -> (Nf/Nf)y P.slot = L.slot if P == _NfN: P.left.slot.var = fresh_var() P.right.slot = P.left.slot register_unary(unaries, p, L.slot.head.lex) elif P == _NfNfNfN: P.left.slot.var = fresh_var() P.left.left.slot.var = fresh_var(prefix="G") P.left.right.slot = P.left.left.slot P.right.slot = P.left.slot register_unary(unaries, p, L.slot.head.lex) else: warn("Unhandled null relativiser typechange: %s -> %s", L, P) # [NP/NP]y -> NPy elif comb == 'de_nominalisation': P.slot = L.slot register_unary(unaries, p, L.slot.head.lex) # {M, QP}y -> (Nf/Nf)y elif comb == 'measure_word_number_elision': P.slot = L.slot P.left.slot.var = fresh_var() P.right.slot = P.left.slot register_unary(unaries, p, L.slot.head.lex) elif comb == 'l_punct_absorb': # , X -> X[conj] # need to put conj feature back on parent p.cat = R.clone_adding_feature('conj') elif comb == 'r_punct_absorb': p.cat = L elif R and L == R and is_rooted_in(parse_category('S'), L): # VCD (stopgap) make_set_head_from(l, r, p) else: debug('Unhandled combinator %s (%s %s -> %s)', comb, L, R, P) unanalysed.add(comb) P.slot = R.slot if R else L.slot for (dest, src) in unifier: if isinstance(src, (basestring, list)): # Fake bidirectional unification: # ------------------------------- # If variable X has been unified with value v, # rewrite all mentions of v in the output category to point to variable X # (v is uniquified by concatenating it with an ID, so this should hold) for subcat in p.cat.nested_compound_categories(): if subcat.slot.head.lex == src: subcat.slot = dest.slot if config.debug: debug("> %s" % p.cat) debug('---') if config.fail_on_unassigned_variables: assert no_unassigned_variables(p.cat), "Unassigned variables in %s" % p.cat if config.debug: debug('unaries: %s', unaries) # Collect deps from arguments deps = [] for l in chain( leaves(root), unaries ): if config.debug: debug("%s %s", l, l.cat) C = l.cat while not C.is_leaf(): arg = C.right if arg.slot.head.filler: #and not l.cat.left.slot == l.cat.right.slot): # print "%s %s %s %s %s %s" % (C.slot.head.lex, C, arg.slot.head.lex, arg, l.cat, C.label) if C.label is None: warn("Dependency generated on slash without label: %s %s", C, arg) deps.append( (C.slot.head.lex, arg.slot.head.lex, l.cat, C.label) ) if is_modifier(C): break C = C.left # Produce dep pairs result = set() for depl, depr, head_cat, head_label in deps: for sdepl in set(seqify(depl)): for sdepr in set(seqify(depr)): if not (sdepl and sdepr): debug("Dependency with None: %s %s", sdepl, sdepr) continue result.add( (postprocessor(sdepl), postprocessor(sdepr), head_cat, head_label) ) if config.debug: for line in write_deps(result): debug(line) return result
from munge.cats.trace import analyse from munge.trees.traverse import leaves, pairs_postorder from munge.util.iter_utils import flatten, seqify from munge.util.err_utils import debug, warn, err from munge.util.func_utils import identity from munge.trees.pprint import pprint from munge.cats.labels import label_result, _label_result from munge.cats.trace import analyse from apps.cn.output import OutputDerivation from apps.cn.mkmarked import naive_label_derivation, is_modifier from apps.util.mkdeps_utils import * from apps.cn.fix_rc import is_rooted_in if config.use_bare_N: _NfN = parse_category('N/N') _NfNfNfN = parse_category('(N/N)/(N/N)') else: _NfN = parse_category('NP/NP') _NfNfNfN = parse_category('(NP/NP)/(NP/NP)') def register_unary(unaries, node, filler): ''' If _node_ represents the result (RHS) of a unary rule, this records that a new dependency must be created between it and its filler, adding it to _unaries_, a list of such dependencies created in a given derivation. ''' node.cat.parg_labelled() node.cat.slot.head.lex = filler debug("%s head lex <- %s", node, filler) unaries.append(node)