def test_init(self): with pytest.raises(TypeError): D() with pytest.raises(TypeError): D(1) D(1, 'some-thing') D(1, 'some-thing', 0.5, 0, 3, [T('some-token')]) # roots are special: id is None, entity is root, daughters must # exactly 1 node; rest are None with pytest.raises(TypeError): D(None) with pytest.raises(TypeError): D(None, 'some-root', 0.5) with pytest.raises(TypeError): D(None, 'some-root', start=1) with pytest.raises(TypeError): D(None, 'some-root', end=1) with pytest.raises(ValueError): D(None, 'some-root', daughters=[N(1, 'some-thing'), N(2, 'some-thing')]) with pytest.raises(ValueError): D(None, 'some-root', daughters=[T('some-token')]) D(None, 'some-root', daughters=[N(1, 'some-thing')]) D(None, 'some-root', None, None, None, daughters=[N(1, 'some-thing')]) # root not as top with pytest.raises(ValueError): D(1, 'some-thing', daughters=[ N(None, 'some-root', daughters=[N(2, 'a-lex', daughters=[T('some-token')])]) ])
def test_attributes(self): t = D(1, 'some-thing') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1 assert t.start == -1 assert t.end == -1 assert t.daughters == [] t = D(1, 'some-thing', 0.5, 1, 6, [T('some token')]) assert t.id == 1 assert t.entity == 'some-thing' assert t.score == 0.5 assert t.start == 1 assert t.end == 6 assert t.daughters == [T('some token')] t = D(None, 'some-root', daughters=[D(1, 'some-thing')]) assert t.id is None assert t.entity == 'some-root' assert t.score is None assert t.start is None assert t.end is None assert len(t.daughters) == 1
def test_fromstring(self): with pytest.raises(DerivationSyntaxError): from_string('') # root with no children # TODO: this should be a DerivationSyntaxError but the current # UDF parser doesn't make that straightforward. Revist this # when/if the UDF parsing changes with pytest.raises(ValueError): from_string('(some-root)') # does not start with `(` or end with `)` with pytest.raises(DerivationSyntaxError): from_string(' (1 some-thing -1 -1 -1 ("token"))') with pytest.raises(DerivationSyntaxError): from_string(' (1 some-thing -1 -1 -1 ("token")) ') # uneven parens with pytest.raises(DerivationSyntaxError): from_string('(1 some-thing -1 -1 -1 ("token")') # ok t = from_string('(1 some-thing -1 -1 -1 ("token"))') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('token')] # newlines in tree t = from_string('''(1 some-thing -1 -1 -1 ("token"))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('token')] # LKB-style terminals t = from_string('''(1 some-thing -1 -1 -1 ("to ken" 1 2))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('to ken')] # start/end ignored # TFS-style terminals t = from_string(r'''(1 some-thing -1 -1 -1 ("to ken" 2 "token [ +FORM \"to\" ]" 3 "token [ +FORM \"ken\" ]"))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [ T('to ken', [ Tk(2, r'token [ +FORM \"to\" ]'), Tk(3, r'token [ +FORM \"ken\" ]') ]) ] # longer example t = from_string(r'''(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 ("a" 1 "token [ +FORM \"a\" ]")) (3 bcd-lex 0.5 2 5 ("bcd" 2 "token [ +FORM \"bcd\" ]"))) )''') assert t.entity == 'root' assert len(t.daughters) == 1 top = t.daughters[0] assert top.id == 1 assert top.entity == 'some-thing' assert top.score == 0.4 assert top.start == 0 assert top.end == 5 assert len(top.daughters) == 2 lex = top.daughters[0] assert lex.id == 2 assert lex.entity == 'a-lex' assert lex.score == 0.8 assert lex.start == 0 assert lex.end == 1 assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])] lex = top.daughters[1] assert lex.id == 3 assert lex.entity == 'bcd-lex' assert lex.score == 0.5 assert lex.start == 2 assert lex.end == 5 assert lex.daughters == [T('bcd', [Tk(2, r'token [ +FORM \"bcd\" ]')])]
def test_fromstring(self): with pytest.raises(ValueError): D.from_string('') # root with no children with pytest.raises(ValueError): D.from_string('(some-root)') # does not start with `(` or end with `)` with pytest.raises(ValueError): D.from_string(' (1 some-thing -1 -1 -1 ("token"))') with pytest.raises(ValueError): D.from_string(' (1 some-thing -1 -1 -1 ("token")) ') # uneven parens with pytest.raises(ValueError): D.from_string('(1 some-thing -1 -1 -1 ("token")') # ok t = D.from_string('(1 some-thing -1 -1 -1 ("token"))') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('token')] # newlines in tree t = D.from_string('''(1 some-thing -1 -1 -1 ("token"))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('token')] # LKB-style terminals t = D.from_string('''(1 some-thing -1 -1 -1 ("to ken" 1 2))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [T('to ken')] # start/end ignored # TFS-style terminals t = D.from_string(r'''(1 some-thing -1 -1 -1 ("to ken" 2 "token [ +FORM \"to\" ]" 3 "token [ +FORM \"ken\" ]"))''') assert t.id == 1 assert t.entity == 'some-thing' assert t.score == -1.0 assert t.start == -1 assert t.end == -1 assert t.daughters == [ T('to ken', [Tk(2, r'token [ +FORM \"to\" ]'), Tk(3, r'token [ +FORM \"ken\" ]')]) ] # longer example t = D.from_string(r'''(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 ("a" 1 "token [ +FORM \"a\" ]")) (3 bcd-lex 0.5 2 5 ("bcd" 2 "token [ +FORM \"bcd\" ]"))) )''') assert t.entity == 'root' assert len(t.daughters) == 1 top = t.daughters[0] assert top.id == 1 assert top.entity == 'some-thing' assert top.score == 0.4 assert top.start == 0 assert top.end == 5 assert len(top.daughters) == 2 lex = top.daughters[0] assert lex.id == 2 assert lex.entity == 'a-lex' assert lex.score == 0.8 assert lex.start == 0 assert lex.end == 1 assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])] lex = top.daughters[1] assert lex.id == 3 assert lex.entity == 'bcd-lex' assert lex.score == 0.5 assert lex.start == 2 assert lex.end == 5 assert lex.daughters == [T('bcd', [Tk(2, r'token [ +FORM \"bcd\" ]')])]