예제 #1
0
def signature(node, d):
    # return base_tag(k[0].tag) + ' ' + base_tag(k[-1].tag)
    dnp_index = [i for (i, k) in enumerate(node) if k.tag == 'DNP']
    last_nonpunct_tag = [
        k.tag for (i, k) in enumerate(node)
        if i < dnp_index and k.tag not in ('PU', 'PRN', 'FLR')
    ][-1]
    return ' '.join((base_tag(d[0].tag), base_tag(last_nonpunct_tag)))
예제 #2
0
파일: conjs.py 프로젝트: Oneplus/cnccgbank
 def accept_derivation(self, bundle):        
     for node in nodes(bundle.derivation):
         if node.is_leaf(): continue
         if is_coordination(node):
             ccs = list(where(lambda kid: kid.tag == 'CC', node.kids))
             for cc in ccs:
                 self.conjs[base_tag(node.tag)][cc.lex] += 1
                 self.inverse[cc.lex][base_tag(node.tag)] += 1
예제 #3
0
 def accept_derivation(self, bundle):
     for node in nodes(bundle.derivation):
         if node.is_leaf(): continue
         if is_coordination(node):
             ccs = list(where(lambda kid: kid.tag == 'CC', node.kids))
             for cc in ccs:
                 self.conjs[base_tag(node.tag)][cc.lex] += 1
                 self.inverse[cc.lex][base_tag(node.tag)] += 1
예제 #4
0
 def format(self, leaf):
     return self.format_string % {
         'lex': leaf.lex,
         'pos': leaf.pos1,
         'cat': str(leaf.cat),
         'stemmed_pos': base_tag(leaf.pos1)
     }
예제 #5
0
 def ccgbank_repr(self):
     return "(<L %(cat)s %(basetag)s %(basetag)s %(lex)s %(cat)s>)" % {
         'cat': self.category,
         'basetag': base_tag(self.tag),
         'tag': self.detag(self.tag),
         'lex': self.lex
     }
예제 #6
0
 def accept_derivation(self, bundle):
     self.nderivs += 1
     self.nwords += len(bundle.derivation.text())
     for leaf in leaves(bundle.derivation):
         if self.is_trace(leaf):
             self.ecs += 1
             self.ec_types[base_tag(leaf.lex)] += 1
         else:
             self.tokens.add(leaf.lex)
예제 #7
0
 def accept_derivation(self, bundle):
     def kids_have_same_tag(node):
         def tags_are_equal(t1, t2):
             if t1[0] == 'V' and t2[0] == 'V': return True
             if t1[0] == 'N' and t2[0] == 'N': return True
             return t1 == t2
         return all(tags_are_equal(node[0].tag, other.tag) for other in node[1:])
     self.nderivs += 1
     for node in nodes(bundle.derivation):
         if (node.count() > 1 and 
             (not node.tag.startswith('NP')) and
             (not node.tag.startswith('ADJP')) and
             (not node.tag.startswith('FRAG')) and
             (not node.tag.startswith('FLR')) and
             (not base_tag(node.tag) in ('VCD', 'VRD', 'VCP', 'VNV', 'VPT', 'VSB')) and
             (not kids_have_same_tag(node)) and
             all(base_tag(kid.tag) in WordTags for kid in node)):
             self.nbad += 1
             print node
             break
예제 #8
0
파일: tag.py 프로젝트: VikingMew/cnccgbank
def is_right_absorption(node):
    '''The CPTB annotation has some (possibly noisy) nodes which look like CCGbank-style right absorption: this returns True
for cases of this.'''
    return node.count() == 2 and base_tag(node.tag) == base_tag(node[0].tag) and node[1].tag == 'PU'
예제 #9
0
 def stem_tag(tag):
     if tag.startswith('V') and tag[1] in 'VACE': return 'V'
     else: return base_tag(tag)
예제 #10
0
파일: cnc.py 프로젝트: Oneplus/cnccgbank
 def format(self, leaf):
     return self.format_string % {'lex': leaf.lex, 'pos': leaf.pos1, 'cat': str(leaf.cat), 'stemmed_pos': base_tag(leaf.pos1)}
예제 #11
0
 def __init__(self, var):
     AtomValue.__init__(self, var,
                        lambda a, b: base_tag(a.cat) == base_tag(b.cat))
예제 #12
0
def is_right_absorption(node):
    '''The CPTB annotation has some (possibly noisy) nodes which look like CCGbank-style right absorption: this returns True
for cases of this.'''
    return node.count() == 2 and base_tag(node.tag) == base_tag(
        node[0].tag) and node[1].tag == 'PU'
예제 #13
0
파일: subcat.py 프로젝트: Oneplus/cnccgbank
def signature(node):
    def ignored(tag):
        return tag == 'AS' or tag == 'PU' or tag == 'PRN' or tag == 'FLR'
    return ' '.join(base_tag(k.tag) for k in node[1:] if not ignored(k.tag))
예제 #14
0
def signature(node):
    def ignored(tag):
        return tag == 'AS' or tag == 'PU' or tag == 'PRN' or tag == 'FLR'

    return ' '.join(base_tag(k.tag) for k in node[1:] if not ignored(k.tag))
예제 #15
0
파일: tag.py 프로젝트: VikingMew/cnccgbank
def is_repeated_unary_projection(tag, node):
    '''True if _node_ has _tag_, and the unary child of _node_ also has _tag_.'''
    return node.tag.startswith(tag) and node.count() == 1 and base_tag(node[0].tag) == tag and not node[0].is_leaf()
예제 #16
0
def is_partial_ucp(node):
    return ((node[0].is_leaf() and (node[0].tag.startswith('CC') or node[0].tag == 'PU') and has_tag(node[1], 'C')) and
        base_tag(node.tag) != base_tag(node[1].tag))
예제 #17
0
파일: mod.py 프로젝트: Oneplus/cnccgbank
def signature(node, d):
    # return base_tag(k[0].tag) + ' ' + base_tag(k[-1].tag)
    dnp_index = [i for (i, k) in enumerate(node) if k.tag == 'DNP']
    last_nonpunct_tag = [k.tag for (i, k) in enumerate(node) if i < dnp_index and k.tag not in ('PU', 'PRN', 'FLR')][-1]
    return ' '.join( (base_tag(d[0].tag), base_tag(last_nonpunct_tag)) )
예제 #18
0
파일: nodes.py 프로젝트: Oneplus/cnccgbank
 def __init__(self, var):
     AtomValue.__init__(self, var, lambda a, b: base_tag(a.cat) == base_tag(b.cat))
예제 #19
0
def is_partial_ucp(node):
    return ((node[0].is_leaf() and
             (node[0].tag.startswith('CC') or node[0].tag == 'PU')
             and has_tag(node[1], 'C'))
            and base_tag(node.tag) != base_tag(node[1].tag))
예제 #20
0
def is_repeated_unary_projection(tag, node):
    '''True if _node_ has _tag_, and the unary child of _node_ also has _tag_.'''
    return node.tag.startswith(tag) and node.count() == 1 and base_tag(
        node[0].tag) == tag and not node[0].is_leaf()
예제 #21
0
 def signature(node):
     return ' '.join([base_tag(node.tag)] +
                     [base_tag(kid.tag) for kid in node])
예제 #22
0
파일: nrules.py 프로젝트: Oneplus/cnccgbank
 def signature(node):
     return ' '.join([base_tag(node.tag)] + [base_tag(kid.tag) for kid in node])