def is_np_structure(node): # These tags are attested NP modifiers # rule out NP-PRD as NP structure (0:88(15)) # rule out things already tagged explicitly as coordination by tag.py if any(has_tags(kid, 'cC') for kid in node): return False return node.tag.startswith('NP') and all( (any(kid.tag.startswith(cat) for cat in NominalCategories)) or kid.tag.startswith('ADJP') or kid.tag.startswith('QP') or kid.tag.startswith('CD') or kid.tag.startswith('DP') or kid.tag.startswith('DT') or # 24:98(3) kid.tag.startswith('PN') or kid.tag.startswith('CP') or kid.tag. startswith('DNP') or kid.tag.startswith('ADVP') or kid.tag.startswith( 'AD') or kid.tag.startswith('IP') or kid.tag.startswith('JJ') or # JJ is in here because ADJP < JJ may have been shrunk already kid.tag.startswith('LCP') or kid.tag.startswith('CLP') or # 0:57(12) reduced M kid.tag.startswith('PP') or kid.tag in ("PU", "CC") or # CC for underspecified NP (27:24(3)) kid.tag.startswith('NP') or kid.tag.startswith('WHNP') or # 9:30(13) kid.tag.startswith('FLR') or # ignore FLR kid.tag.startswith('SP') or # ignore SP has_tag(kid, 'p') for kid in node)
def is_np_structure(node): # These tags are attested NP modifiers # rule out NP-PRD as NP structure (0:88(15)) # rule out things already tagged explicitly as coordination by tag.py if any(has_tags(kid, 'cC') for kid in node): return False return node.tag.startswith('NP') and all( (any(kid.tag.startswith(cat) for cat in NominalCategories)) or kid.tag.startswith('ADJP') or kid.tag.startswith('QP') or kid.tag.startswith('CD') or kid.tag.startswith('DP') or kid.tag.startswith('DT') or # 24:98(3) kid.tag.startswith('PN') or kid.tag.startswith('CP') or kid.tag.startswith('DNP') or kid.tag.startswith('ADVP') or kid.tag.startswith('AD') or kid.tag.startswith('IP') or kid.tag.startswith('JJ') or # JJ is in here because ADJP < JJ may have been shrunk already kid.tag.startswith('LCP') or kid.tag.startswith('CLP') or # 0:57(12) reduced M kid.tag.startswith('PP') or kid.tag in ("PU", "CC") or # CC for underspecified NP (27:24(3)) kid.tag.startswith('NP') or kid.tag.startswith('WHNP') or # 9:30(13) kid.tag.startswith('FLR') or # ignore FLR kid.tag.startswith('SP') or # ignore SP has_tag(kid, 'p') for kid in node)
def is_np_internal_structure(node): # rule out things already tagged explicitly as coordination by tag.py if any(has_tags(kid, 'cC') for kid in node): return False return (node.tag.startswith('NP') and all( has_tags(kid, 'nN') or any( kid.tag.startswith(tag) for tag in NominalCategories) or kid.tag in ('PU', 'CC') or kid.tag.startswith('JJ') or kid.tag.startswith('CD') or kid.tag.startswith('OD') or has_tag(kid, '&') for kid in leaves(node)))
def is_np_internal_structure(node): # rule out things already tagged explicitly as coordination by tag.py if any(has_tags(kid, 'cC') for kid in node): return False return (node.tag.startswith('NP') and all(has_tags(kid, 'nN') or any(kid.tag.startswith(tag) for tag in NominalCategories) or kid.tag in ('PU', 'CC') or kid.tag.startswith('JJ') or kid.tag.startswith('CD') or kid.tag.startswith('OD') or has_tag(kid, '&') for kid in leaves(node)))
def is_topicalisation(node): return has_tag(node[0], 't')
def is_apposition(node): return any(has_tag(kid, 'A') for kid in node)
def is_modification(node): return has_tag(node[0], 'm')
def is_coordination(node): return has_tag(node[0], 'c') or has_tag(node[1], 'c')
def is_ucp(node): # TODO: The test for node[1].tag != PU is to prevent a rare mis-analysis of punctuation in a UCP being # identified as a conjunct. Need to investigate further return has_tag(node[0], 'C') and node[1].tag != 'PU'
def is_prn(node): return has_tag(node, 'p')
def is_head_initial(node): fnpk = first_nonpunct_kid(node) return (has_tag(fnpk, 'h') or has_tag(node[1], 'r')) if fnpk else False
def is_head_final(node): lnpk = last_nonpunct_kid(node) return (has_tag(lnpk, 'h') or has_tag(node[0], 'l')) if lnpk else False
def is_topicalisation_without_gap(node): return has_tag(node[0], 'T')
def is_etc(node): return node.count() > 1 and has_tag(node[1], '&')
def is_argument_cluster(node): return all(has_tag(kid, '@') for kid in node)
def is_adjunction(node): return has_tag(node[0], 'a')
def is_partial_ucp(node): return ((node[0].is_leaf() and (node[0].tag.startswith('CC') or node[0].tag == 'PU') and has_tag(node[1], 'C')) and base_tag(node.tag) != base_tag(node[1].tag))