def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) if space == 'preserve': text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = child.get('type', None) if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, "text", []) for child in run: if is_tag(child, "w:t"): if not child.text: continue space = child.get(XML("space"), None) if space == "preserve": text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, "w:cr"): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, "w:br"): typ = child.get("type", None) if typ in {"column", "page"}: br = BR(style="page-break-after:always") else: clear = child.get("clear", None) if clear in {"all", "left", "right"}: br = BR(style="clear:%s" % ("both" if clear == "all" else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, "w:drawing") or is_tag(child, "w:pict"): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, "w:footnoteReference") or is_tag(child, "w:endnoteReference"): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href="#" + anchor, title=name), id="back_%s" % anchor) l.set("class", "noteref") text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, "".join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {"superscript", "subscript"}: ans.tag = "sub" if style.vert_align == "subscript" else "sup" if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) if space == 'preserve': text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = child.get('type', None) if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif is_tag(child, 'w:noBreakHyphen'): text.buf.append(u'\u2011') elif is_tag(child, 'w:softHyphen'): text.buf.append(u'\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) return ans