def get_text_line_from_xml_element(xml_element):

    t = TextLine(bbox_from_string(xml_element.attrib['bbox']))

    for text_xml_element in xml_element.findall('./text'):
        if len(text_xml_element.attrib) == 0:
            continue

        t.texts.append(get_text_from_xml_element(text_xml_element))

    return t
def get_text_from_xml_element(xml_element):
    text = xml_element.text
    if isinstance(text, str):
        text = text[0][0]
    else:
        text = ' '
    bbox = None
    if 'bbox' in xml_element.attrib:
        bbox = bbox_from_string(xml_element.attrib['bbox'])

    return Text(
        bbox, text,
        TextStyle(xml_element.attrib['font'],
                  float(xml_element.attrib['size']),
                  xml_element.attrib['colourspace'],
                  xml_element.attrib['ncolour']))
def get_page_from_xml_element(xml_element):
    page = Page(bbox_from_string(xml_element.attrib['bbox']))
    for text_box_node in xml_element.findall('./textbox'):
        page.text_boxes.append(get_text_box_from_xml_element(text_box_node))
    return page
예제 #4
0
 def setUp(self):
     self.bbox = bbox_from_string('459.840,753.697,462.075,765.210')
예제 #5
0
 def test_five_points(self):
     with self.assertRaises(InvalidBboxString):
         bbox_from_string('1,2,3,4,5')
예제 #6
0
 def test_two_points(self):
     with self.assertRaises(InvalidBboxString):
         bbox_from_string('1,2')
예제 #7
0
 def test_one_point(self):
     with self.assertRaises(InvalidBboxString):
         bbox_from_string('1')