def test_view_shrink_whitespace_2(self): tree = etree.fromstring(input_xml3) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view = view.shrink_whitespace() plain, lookup = view.get_plain() self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
def test_lazy_add(self): tree = etree.fromstring(input_xml4) so = standoffconverter.Standoff(tree) view = (standoffconverter.View(so.table).insert_tag_text( "lb", "\n").exclude_outside("p")) plain, lookup = view.get_plain() nlp = English() nlp.add_pipe('sentencizer') for isent, sent in enumerate(nlp(plain).sents): start_ind = lookup.get_pos(sent.start_char) end_ind = lookup.get_pos(sent.end_char - 1) + 1 so.add_inline(begin=start_ind, end=end_ind, tag="s", depth=None, attrib={'id': f'{isent}'}, lazy=True) so.recreate_subtree(so.text_el.find('./body')) output_xml = etree.tostring(so.tree).decode("utf-8") expected_output = """<TEI> <teiHeader> </teiHeader> <text> <body> <p><s id="0">1 2 3 4.</s> <s id="1">5 6<lb/> 7 9 10.</s></p> <p> <s id="2">11 12 13 14</s></p> </body> </text></TEI>""" self.assertTrue(output_xml == expected_output)
def test_collapsed_table_1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) collapsed_table = so.collapsed_table self.assertTrue(collapsed_table.iloc[0].text == "1 2 3 4 5 6 7 9 10") self.assertTrue(collapsed_table.iloc[3].text == " 12 13 14")
def test_remove_annotation(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) to_remove = so.standoffs[2] so.remove_inline(to_remove["el"]) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_output = '<text><body>1 2 3 4 5 6 7 9 10<p> 11<lb/> 12 13 14</p></body></text>' self.assertTrue(output_xml == expected_output)
def test_view_shrink_whitespace_1(self): tree = etree.fromstring(input_xml2) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view = view.shrink_whitespace() plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("7"))].text == "7") self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
def test_remove_empty_element(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) to_remove = so.standoffs[-1] so.remove_inline(to_remove['el']) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_out = '''<text><body><p>1 2 3 4 5 6 7 9 10</p><p> 11 12 13 14</p></body></text>''' self.assertTrue(expected_out == output_xml)
def test_add_empty_element(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=1, end=1, tag="lb", depth=None, attrib={}) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_out = '''<text><body><p>1<lb/> 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>''' self.assertTrue(expected_out == output_xml)
def test_add_annotation_fail1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) with self.assertRaises(ValueError): so.add_inline(begin=17, end=19, tag="xx", depth=3, attrib={"resp": "machine"}) output_xml = etree.tostring(so.text_el).decode("utf-8")
def test_add_annotation_1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=0, end=1, tag="xx", depth=None, attrib={"resp": "machine"}) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_out = '''<text><body><p><xx resp="machine">1</xx> 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>''' self.assertTrue(expected_out == output_xml)
def test_view_insert_tag_text(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view.insert_tag_text("lb", "\n") plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("12"))].text == "1") self.assertTrue(plain == '1 2 3 4 5 6 7 9 10 11\n 12 13 14')
def test_add_annotation_fail2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=4, tag="xx", depth=None, attrib={"resp": "machine"}) with self.assertRaises(ValueError): so.add_inline(begin=3, end=5, tag="xx", depth=None, attrib={"resp": "machine"})
def test_view_exclude_2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=5, tag="xx", depth=None, attrib={"resp": "machine"}) view = standoffconverter.View(so.table) view = view.exclude_outside(["xx"]) plain, lookup = view.get_plain() self.assertTrue(plain == '2 3')
def test_span_2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_span(begin=2, end=22, tag="span", depth=None, attrib=None, id_="test2") output_xml = etree.tostring(so.text_el).decode("utf-8") expected_output = "<text><body><p>1 <span spanTo=\"test2\"/>2 3 4 5 6 7 9 10</p><p> 11<lb/> <anchor id=\"test2\"/>12 13 14</p></body></text>" self.assertTrue(output_xml == expected_output)
def test_view_exclude_1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=4, tag="xx", depth=None, attrib={"resp": "machine"}) view = standoffconverter.View(so.table) view = view.exclude_inside(["xx"]) plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("5"))].text == "5")
def test_add_annotation_3(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=3, tag="xx", depth=3, attrib={"resp": "machine"}) so.add_inline(begin=2, end=3, tag="vv", depth=3, attrib={"resp": "machine"}) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_out = '<text><body><p>1 <vv resp="machine"><xx resp="machine">2</xx></vv> 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>' # print(expected_out) # print(output_xml) self.assertTrue(expected_out == output_xml)
def test_add_remove_annotation2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) for _ in range(5): so.add_inline(begin=2, end=3, tag="vv", depth=3, attrib={"resp": "machine"}) for _ in range(5): to_remove = [ it["el"] for it in so.standoffs if it["el"].tag == 'vv' ][0] so.remove_inline(to_remove) output_xml = etree.tostring(so.text_el).decode("utf-8") expected_output = "<text><body><p>1 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>" self.assertTrue(output_xml == expected_output)
def test_from_tree_plain(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) self.assertTrue(so.plain == '1 2 3 4 5 6 7 9 10 11 12 13 14')
def test_collapsed_table_2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) collapsed_table = so.collapsed_table self.assertTrue(str(collapsed_table.iloc[0].context) == "text>body>p")
def test_json(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) output_json = so.json expected_out = '[{"tag": "text", "attrib": {}, "begin": 0, "end": 30, "depth": 0}, {"tag": "body", "attrib": {}, "begin": 0, "end": 30, "depth": 1}, {"tag": "p", "attrib": {}, "begin": 0, "end": 18, "depth": 2}, {"tag": "p", "attrib": {}, "begin": 18, "end": 30, "depth": 2}, {"tag": "lb", "attrib": {}, "begin": 21, "end": 21, "depth": 3}]' self.assertTrue(expected_out == output_json)