def test_unusual_misc(): """ The above RUSSIAN_SAMPLE resulted in a blank misc field in one particular implementation of the conll code (the below test would fail) """ doc = CoNLL.conll2doc(input_str=RUSSIAN_SAMPLE) sentences = CoNLL.doc2conll(doc) assert len(sentences) == 1 assert len(sentences[0]) == 14 for word in sentences[0]: pieces = word.split("\t") assert len(pieces) == 1 or len(pieces) == 10 if len(pieces) == 10: assert all(piece for piece in pieces)
def test_doc_with_comments(): """ Test that a doc with comments gets converted back with comments """ lines = RUSSIAN_SAMPLE.split("\n") doc = CoNLL.conll2doc(input_str=RUSSIAN_SAMPLE) assert len(doc.sentences) == 1 assert len(doc.sentences[0].comments) == 3 assert lines[0] == doc.sentences[0].comments[0] assert lines[1] == doc.sentences[0].comments[1] assert lines[2] == doc.sentences[0].comments[2] sentences = CoNLL.doc2conll(doc) assert len(sentences) == 1 sentence = sentences[0] assert len(sentence) == 14 assert lines[0] == sentence[0] assert lines[1] == sentence[1] assert lines[2] == sentence[2]