def test_find_all(self): root = make_xml(b"<one><two><three/><three/></two></one>") # Can't find the root element result = find_all(root, "one") self.assertEqual(result, []) result = find_all(root, "three") expected = ["three", "three"] self.assertEqual(list(elements_to_tags(result)), expected) result = find_all(root, "two") self.assertEqual(list(elements_to_tags(result)), ["two"])
def test_find_all(self): root = make_xml(b'<one><two><three/><three/></two></one>') # Can't find the root element result = find_all(root, 'one') self.assertEqual(result, []) result = find_all(root, 'three') expected = ['three', 'three'] self.assertEqual(list(elements_to_tags(result)), expected) result = find_all(root, 'two') self.assertEqual(list(elements_to_tags(result)), ['two'])
def _get_rowspan(self, el, v_merge): current_row = self.pre_processor.row_index(el) current_col = self.pre_processor.column_index(el) rowspan = 1 result = '' tbl = find_ancestor_with_tag(self.pre_processor, el, 'tbl') # We only want table cells that have a higher row_index that is greater # than the current_row and that are on the current_col if tbl is None: return '' tcs = [ tc for tc in find_all(tbl, 'tc') if self.pre_processor.row_index(tc) >= current_row and self.pre_processor.column_index(tc) == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: restart_in_v_merge = 'restart' in v_merge.attrib['val'] def increment_rowspan(tc): if not restart_in_v_merge: return False if not self.pre_processor.vmerge_continue(tc): return False return True for tc in tcs: if increment_rowspan(tc): rowspan += 1 else: rowspan = 1 if rowspan > 1: result = rowspan return str(result)
def test_get_image_sizes(self): parser = XMLDocx2Html( document_xml=self.get_xml(), relationships=self.relationships, ) tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_size(el)) expected = [ ('40px', '20px'), ('41pt', '21pt'), ] self.assertEqual( set(image_ids), set(expected), )
def test_get_image_sizes(self): parser = XMLDocx2Html( document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_size(el)) expected = [ ('40px', '20px'), ('41pt', '21pt'), ] self.assertEqual( set(image_ids), set(expected), )
def test_get_image_id(self): parser = XMLDocx2Html( document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_id(el)) expected = [ 'rId0', 'rId1', ] self.assertEqual( set(image_ids), set(expected), )
def test_get_image_id(self): parser = XMLDocx2Html( document_xml=self.get_xml(), relationships=self.relationships, ) tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_id(el)) expected = [ 'rId0', 'rId1', ] self.assertEqual( set(image_ids), set(expected), )
def _parse_styles(self): if self.styles_text is None: return {} tree = parse_xml_from_string(self.styles_text) styles_dict = {} for style in find_all(tree, "style"): style_val = find_first(style, "name").attrib["val"] run_properties = find_first(style, "rPr") styles_dict[style.attrib["styleId"]] = { "style_name": style_val, "default_run_properties": self._parse_run_properties(run_properties), } return styles_dict
def _parse_styles(self): if self.styles_text is None: return {} tree = parse_xml_from_string(self.styles_text) styles_dict = {} for style in find_all(tree, 'style'): style_val = find_first(style, 'name').attrib['val'] run_properties = find_first(style, 'rPr') styles_dict[style.attrib['styleId']] = { 'style_name': style_val, 'default_run_properties': self._parse_run_properties( run_properties, ), } return styles_dict
def _parse_styles(self): if self.styles_text is None: return {} tree = parse_xml_from_string(self.styles_text) styles_dict = {} for style in find_all(tree, 'style'): style_val = find_first(style, 'name').attrib['val'] run_properties = find_first(style, 'rPr') styles_dict[style.attrib['styleId']] = { 'style_name': style_val, 'default_run_properties': self._parse_run_properties(run_properties, ), } return styles_dict
def _parse_styles(self): styles_part = self.document.main_document_part.style_definitions_part if not styles_part: return {} styles_root = styles_part.root_element styles_dict = {} for style in find_all(styles_root, 'style'): style_val = find_first(style, 'name').attrib['val'] run_properties = find_first(style, 'rPr') styles_dict[style.attrib['styleId']] = { 'style_name': style_val, 'default_run_properties': self._parse_run_properties(run_properties, ), } return styles_dict
def _parse_styles(self): styles_part = self.document.main_document_part.style_definitions_part if not styles_part: return {} styles_root = styles_part.root_element styles_dict = {} for style in find_all(styles_root, 'style'): style_val = find_first(style, 'name').attrib['val'] run_properties = find_first(style, 'rPr') styles_dict[style.attrib['styleId']] = { 'style_name': style_val, 'default_run_properties': self._parse_run_properties( run_properties, ), } return styles_dict