def test__bool__(self): assert not Lnk(None) assert not Lnk.charspan(-1, -1) assert Lnk.charspan(0, 0) assert Lnk.chartspan(0, 0) assert Lnk.tokens([]) assert Lnk.edge(0)
def abrams_barked_dmrs(): d = DMRS( 30, 30, nodes=[Node(10, 'udef_q'), Node(20, 'named', type='x', carg='Abrams', lnk=Lnk.charspan(0,6)), Node(30, '_bark_v_1', type='e', properties={'TENSE': 'past'}, lnk=Lnk.charspan(7,13))], links=[Link(10, 20, 'RSTR', 'H'), Link(30, 20, 'ARG1', 'NEQ')], lnk=Lnk.charspan(0,14), surface='Abrams barked.', identifier='1000380') return d
def _decode_lnk(cfrom, cto): if cfrom is cto is None: return None elif None in (cfrom, cto): raise ValueError('Both cfrom and cto, or neither, must be specified.') else: return Lnk.charspan(cfrom, cto)
def testCharSpanLnk(self): lnk = Lnk.charspan(0, 1) assert lnk.type == Lnk.CHARSPAN assert lnk.data == (0, 1) assert str(lnk) == '<0:1>' repr(lnk) # no error lnk = Lnk.charspan('0', '1') assert lnk.data == (0, 1) with pytest.raises(TypeError): Lnk.charspan(1) with pytest.raises(TypeError): Lnk.charspan([1, 2]) with pytest.raises(TypeError): Lnk.charspan(1, 2, 3) with pytest.raises(ValueError): Lnk.charspan('a', 'b')
def from_string(cls, s): """ Decode from the YY token lattice format. """ def _qstrip(s): return s[1:-1] # remove assumed quote characters tokens = [] for match in _yy_re.finditer(s): d = match.groupdict() lnk, pos = None, [] if d['lnkfrom'] is not None: lnk = Lnk.charspan(d['lnkfrom'], d['lnkto']) if d['pos'] is not None: ps = d['pos'].strip().split() pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2]))) tokens.append( YYToken( int(d['id']), int(d['start']), int(d['end']), lnk, list(map(int, d['paths'].strip().split())), _qstrip(d['form']), None if d['surface'] is None else _qstrip(d['surface']), int(d['ipos']), list(map(_qstrip, d['lrules'].strip().split())), pos ) ) return cls(tokens)
def tokenize_result(self, result, pattern=DEFAULT_TOKENIZER): logger.info('tokenize_result(%r, %r)', result, pattern) tokens = [ YYToken(id=i, start=i, end=(i + 1), lnk=Lnk.charspan(tok[0], tok[1]), form=tok[2]) for i, tok in enumerate(_tokenize(result, pattern)) ] return YYTokenLattice(tokens)
def from_triples(triples): """ Decode triples, as from :func:`to_triples`, into a DMRS object. """ top = lnk = surface = identifier = None nids, nd, edges = [], {}, [] for src, rel, tgt in triples: rel = rel.lstrip(':') src, tgt = str(src), str(tgt) # in case penman converts ids to ints if src is None and rel == 'top': top = tgt continue elif src not in nd: if top is None: top = src nids.append(src) nd[src] = { 'pred': None, 'lnk': None, 'type': None, 'props': {}, 'carg': None } if rel == 'instance': nd[src]['pred'] = tgt elif rel == 'lnk': cfrom, cto = tgt.strip('"<>').split(':') nd[src]['lnk'] = Lnk.charspan(int(cfrom), int(cto)) elif rel == 'carg': if (tgt[0], tgt[-1]) == ('"', '"'): tgt = tgt[1:-1] nd[src]['carg'] = tgt elif rel == CVARSORT: nd[src]['type'] = tgt elif rel.islower(): nd[src]['props'][rel] = tgt else: rargname, post = rel.rsplit('-', 1) edges.append((src, tgt, rargname, post)) nidmap = dict((nid, FIRST_NODE_ID + i) for i, nid in enumerate(nids)) nodes = [ Node(id=nidmap[nid], predicate=nd[nid]['pred'], type=nd[nid]['type'], properties=nd[nid]['props'], lnk=nd[nid]['lnk'], carg=nd[nid]['carg']) for i, nid in enumerate(nids) ] links = [Link(nidmap[s], nidmap[t], r, p) for s, t, r, p in edges] return DMRS(top=nidmap[top], nodes=nodes, links=links, lnk=lnk, surface=surface, identifier=identifier)
def test_len_change_with_capturing_group(): x = r.from_string(r"!wo(n't) will \1").apply("I won't go") assert x.string == "I will n't go" assert x.startmap.tolist() == [ 1, 0, 0, 0, -1, -2, -3, -4, -3, -3, -3, -3, -3, -3, -3 ] assert x.endmap.tolist() == [ 0, 0, 0, 1, 0, -1, -2, -3, -3, -3, -3, -3, -3, -3, -4 ] x = r.from_string(r"!wo(n't) will \1").tokenize("I won't go") assert len(x.tokens) == 4 assert x.tokens[0].form == 'I' assert x.tokens[0].lnk == Lnk.charspan(0, 1) assert x.tokens[1].form == 'will' assert x.tokens[1].lnk == Lnk.charspan(2, 4) assert x.tokens[2].form == "n't" assert x.tokens[2].lnk == Lnk.charspan(4, 7) assert x.tokens[3].form == 'go' assert x.tokens[3].lnk == Lnk.charspan(8, 10)
def test_from_dict(self): t = YYToken.from_dict({'id': 1, 'start': 0, 'end': 1, 'form': "dog"}) check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YYToken.from_dict({ 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, # 'paths': [1], 'form': "dog", 'surface': "Dog", # 'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }) check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def test_to_dict(self): t = YYToken(1, 0, 1, form="dog") assert t.to_dict() == {'id': 1, 'start': 0, 'end': 1, 'form': "dog"} t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) assert t.to_dict() == { 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, # 'paths': [1], 'form': "dog", 'surface': "Dog", # 'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }
def test_to_list(self): tl = YY([YYToken(1, 0, 1, form="dog")]) assert tl.to_list() == [{'id': 1, 'start': 0, 'end': 1, 'form': "dog"}] tl = YY([ YYToken(1, 0, 1, Lnk.charspan(0, 4), [1], "dogs", "Dogs", ipos=0, lrules=["null"], pos=[("NN", 1.0)]), YYToken(2, 1, 2, Lnk.charspan(5, 9), [1], "bark", ipos=0, lrules=["null"], pos=[("VBZ", 1.0)]) ]) assert tl.to_list() == [ {'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4, 'form': "dogs", 'surface': "Dogs", # 'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }, {'id': 2, 'start': 1, 'end': 2, 'from': 5, 'to': 9, 'form': "bark", # 'ipos': 0, 'lrules': ["null"], 'tags': ["VBZ"], 'probabilities': [1.0] } ]
def from_dict(cls, d): """ Decode from a dictionary as from :meth:`to_dict`. """ return cls( d['id'], d['start'], d['end'], Lnk.charspan(d['from'], d['to']) if 'from' in d else None, # d.get('paths', [1]), form=d['form'], surface=d.get('surface'), # ipos= # lrules= pos=list(zip(d.get('tags', []), d.get('probabilities', []))) )
def test_init(self): with pytest.raises(TypeError): YYToken() YYToken(1) YYToken(1, 0) YYToken(1, 0, 1) YYToken(1, 0, 1, Lnk.charspan(0, 1)) YYToken(1, 0, 1, Lnk.charspan(0, 1), [1]) YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".") YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0) YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0, lrules=["null"]) YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0, lrules=["null"], pos=[(".", 1.0)]) t = YYToken(1, 0, 1, form="dog") check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def from_dict(d): """ Decode a dictionary, as from :func:`to_dict`, into an EDS object. """ top = d.get('top') nodes = [] for nodeid, node in d.get('nodes', {}).items(): props = node.get('properties', None) nodetype = node.get('type') lnk = None if 'lnk' in node: lnk = Lnk.charspan(node['lnk']['from'], node['lnk']['to']) nodes.append( Node(id=nodeid, predicate=node['label'], type=nodetype, edges=node.get('edges', {}), properties=props, carg=node.get('carg'), lnk=lnk)) nodes.sort(key=lambda n: (n.cfrom, -n.cto)) return EDS(top, nodes=nodes)
def test__eq__(self): assert Lnk(None) == Lnk(None) assert Lnk(None) != Lnk.charspan(0, 1) assert Lnk.charspan(0, 1) == Lnk.charspan(0, 1) assert Lnk.charspan(0, 1) != Lnk.charspan(0, 2) assert Lnk.charspan(0, 1) != Lnk.chartspan(0, 1)
def _lnk(o): return None if o is None else Lnk.charspan(o['from'], o['to'])
def test_fromstring(self): assert len(YY.from_string(token_v1_basic).tokens) == 1 t = YY.from_string(token_v1_basic).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YY.from_string(token_v1_surface).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], []) t = YY.from_string(token_v1_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [("NN", 0.8), ("VV", 0.2)]) t = YY.from_string(token_v1_surface_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) t = YY.from_string(token_v1_lrules).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["lrule1", "lrule2"], []) t = YY.from_string(token_v2).tokens[0] check_token(t, 1, 0, 1, Lnk.charspan(1, 3), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) tl = YY.from_string(tokenstring) assert len(tl.tokens) == 9 check_token( tl.tokens[0], 42, 0, 1, Lnk.charspan(0, 12), [1], "Tokenization", None, 0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)] ) check_token( tl.tokens[1], 43, 1, 2, Lnk.charspan(12, 13), [1], ",", None, 0, ["null"], [(",", 1.0000)] ) check_token( tl.tokens[2], 44, 2, 3, Lnk.charspan(14, 15), [1], "a", None, 0, ["null"], [("DT", 1.0000)] ) check_token( tl.tokens[3], 45, 3, 4, Lnk.charspan(16, 27), [1], "non-trivial", None, 0, ["null"], [("JJ", 1.0000)] ) check_token( tl.tokens[4], 46, 4, 5, Lnk.charspan(28, 36), [1], "exercise", None, 0, ["null"], [("NN", 0.9887), ("VB", 0.0113)] ) check_token( tl.tokens[5], 47, 5, 6, Lnk.charspan(36, 37), [1], ",", None, 0, ["null"], [(",", 1.0000)] ) check_token( tl.tokens[6], 48, 6, 7, Lnk.charspan(38, 44), [1], "bazed", None, 0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)] ) check_token( tl.tokens[7], 49, 7, 8, Lnk.charspan(45, 58), [1], "*****@*****.**", None, 0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)] ) check_token( tl.tokens[8], 50, 8, 9, Lnk.charspan(58, 59), [1], ".", None, 0, ["null"], [(".", 1.0000)] )
def _lnk(x): return None if x is None else Lnk.charspan(x['from'], x['to'])
def _decode_lnk(elem): return Lnk.charspan(elem.get('cfrom', '-1'), elem.get('cto', '-1'))
def __init__(self): self.lnk = Lnk.charspan(0, 1)