Example #1
0
    def test_lazy_add(self):
        tree = etree.fromstring(input_xml4)
        so = standoffconverter.Standoff(tree)

        view = (standoffconverter.View(so.table).insert_tag_text(
            "lb", "\n").exclude_outside("p"))

        plain, lookup = view.get_plain()

        nlp = English()
        nlp.add_pipe('sentencizer')

        for isent, sent in enumerate(nlp(plain).sents):

            start_ind = lookup.get_pos(sent.start_char)
            end_ind = lookup.get_pos(sent.end_char - 1) + 1

            so.add_inline(begin=start_ind,
                          end=end_ind,
                          tag="s",
                          depth=None,
                          attrib={'id': f'{isent}'},
                          lazy=True)

        so.recreate_subtree(so.text_el.find('./body'))
        output_xml = etree.tostring(so.tree).decode("utf-8")
        expected_output = """<TEI>
<teiHeader> </teiHeader>
<text>
    <body>
        <p><s id="0">1 2 3 4.</s> <s id="1">5 6<lb/> 7 9 10.</s></p>
        <p> <s id="2">11 12 13 14</s></p>
    </body>
</text></TEI>"""
        self.assertTrue(output_xml == expected_output)
Example #2
0
 def test_view_shrink_whitespace_2(self):
     tree = etree.fromstring(input_xml3)
     so = standoffconverter.Standoff(tree)
     view = standoffconverter.View(so.table)
     view = view.shrink_whitespace()
     plain, lookup = view.get_plain()
     self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
    def test_view_1(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Converter(tree)

        mask = np.zeros(len(so.table), dtype=bool)
        mask[10:20] = True
        view = standoffconverter.View(so, mask)
        self.assertTrue(view.standoff_char_pos(0) == (10, 10))
Example #4
0
    def test_view_shrink_whitespace_1(self):
        tree = etree.fromstring(input_xml2)
        so = standoffconverter.Standoff(tree)
        view = standoffconverter.View(so.table)
        view = view.shrink_whitespace()
        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("7"))].text == "7")
        self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
Example #5
0
    def test_view_insert_tag_text(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)

        view = standoffconverter.View(so.table)
        view.insert_tag_text("lb", "\n")

        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("12"))].text == "1")

        self.assertTrue(plain == '1 2 3 4 5 6 7 9 10 11\n 12 13 14')
Example #6
0
    def test_view_exclude_2(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=2,
                      end=5,
                      tag="xx",
                      depth=None,
                      attrib={"resp": "machine"})
        view = standoffconverter.View(so.table)

        view = view.exclude_outside(["xx"])
        plain, lookup = view.get_plain()

        self.assertTrue(plain == '2 3')
Example #7
0
    def test_view_exclude_1(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=2,
                      end=4,
                      tag="xx",
                      depth=None,
                      attrib={"resp": "machine"})
        view = standoffconverter.View(so.table)

        view = view.exclude_inside(["xx"])
        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("5"))].text == "5")