def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:br'): typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append('\u2011') elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append('\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) if style.rtl is True: ans.set('dir', 'rtl') if is_symbol_font(style.font_family): for elem in text: if elem.text: elem.text = map_symbol_text(elem.text, style.font_family) if elem.tail: elem.tail = map_symbol_text(elem.tail, style.font_family) style.font_family = 'sans-serif' return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:br'): typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append(u'\u2011') elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append(u'\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) if style.rtl is True: ans.set('dir', 'rtl') if is_symbol_font(style.font_family): for elem in text: if elem.text: elem.text = map_symbol_text(elem.text, style.font_family) if elem.tail: elem.tail = map_symbol_text(elem.tail, style.font_family) style.font_family = 'sans-serif' return ans