def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, "text", []) for child in run: if is_tag(child, "w:t"): if not child.text: continue space = child.get(XML("space"), None) if space == "preserve": text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, "w:cr"): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, "w:br"): typ = child.get("type", None) if typ in {"column", "page"}: br = BR(style="page-break-after:always") else: clear = child.get("clear", None) if clear in {"all", "left", "right"}: br = BR(style="clear:%s" % ("both" if clear == "all" else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, "w:drawing") or is_tag(child, "w:pict"): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, "w:footnoteReference") or is_tag(child, "w:endnoteReference"): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href="#" + anchor, title=name), id="back_%s" % anchor) l.set("class", "noteref") text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, "".join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {"superscript", "subscript"}: ans.tag = "sub" if style.vert_align == "subscript" else "sup" if style.lang is not inherit: ans.lang = style.lang return ans
def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get('list-id')] ilvl = int(start.get('list-lvl')) lvl = d.levels[ilvl] lvlid = start.get('list-id') + start.get('list-lvl') has_template = 'list-template' in start.attrib wrap = (OL if lvl.is_numbered or has_template else UL)('\n\t') if has_template: wrap.set('lvlid', lvlid) else: wrap.set( 'class', styles.register( lvl.css(images, self.pic_map, self.rid_map), 'list')) ccss = lvl.char_css() if ccss: ccss = styles.register(ccss, 'bullet') parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = '\n\t' if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get('list-template')) if ccss: span.set('class', ccss) last = templates.get(lvlid, '') if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ('list-lvl', 'list-id', 'list-template'): child.attrib.pop(attr, None) val = int(child.get('value')) if last_val == val - 1 or wrap.tag == 'ul': child.attrib.pop('value') last_val = val current_run[-1].tail = '\n' del current_run[:]
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) if space == 'preserve': text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = child.get('type', None) if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get('list-id')] ilvl = int(start.get('list-lvl')) lvl = d.levels[ilvl] lvlid = start.get('list-id') + start.get('list-lvl') has_template = 'list-template' in start.attrib wrap = (OL if lvl.is_numbered or has_template else UL)('\n\t') if has_template: wrap.set('lvlid', lvlid) else: wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list')) ccss = lvl.char_css() if ccss: ccss = styles.register(ccss, 'bullet') parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = '\n\t' if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get('list-template')) if ccss: span.set('class', ccss) last = templates.get(lvlid, '') if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ('list-lvl', 'list-id', 'list-template'): child.attrib.pop(attr, None) val = int(child.get('value')) if last_val == val - 1 or wrap.tag == 'ul' or (last_val is None and val == 1): child.attrib.pop('value') last_val = val current_run[-1].tail = '\n' del current_run[:]
def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get("list-id")] ilvl = int(start.get("list-lvl")) lvl = d.levels[ilvl] lvlid = start.get("list-id") + start.get("list-lvl") has_template = "list-template" in start.attrib wrap = (OL if lvl.is_numbered or has_template else UL)("\n\t") if has_template: wrap.set("lvlid", lvlid) else: wrap.set("class", styles.register(lvl.css(images, self.pic_map, self.rid_map), "list")) ccss = lvl.char_css() if ccss: ccss = styles.register(ccss, "bullet") parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = "\n\t" if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get("list-template")) if ccss: span.set("class", ccss) last = templates.get(lvlid, "") if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ("list-lvl", "list-id", "list-template"): child.attrib.pop(attr, None) val = int(child.get("value")) if last_val == val - 1 or wrap.tag == "ul": child.attrib.pop("value") last_val = val current_run[-1].tail = "\n" del current_run[:]
def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get('list-id')] ilvl = int(start.get('list-lvl')) lvl = d.levels[ilvl] lvlid = start.get('list-id') + start.get('list-lvl') wrap = (OL if lvl.is_numbered else UL)('\n\t') has_template = 'list-template' in start.attrib if has_template: wrap.set('lvlid', lvlid) else: wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list')) parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = '\n\t' if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get('list-template')) last = templates.get(lvlid, '') if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ('list-lvl', 'list-id', 'list-template'): child.attrib.pop(attr, None) val = int(child.get('value')) if last_val == val - 1 or wrap.tag == 'ul': child.attrib.pop('value') last_val = val current_run[-1].tail = '\n' del current_run[:]
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:br'): typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append('\u2011') elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append('\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) if style.rtl is True: ans.set('dir', 'rtl') if is_symbol_font(style.font_family): for elem in text: if elem.text: elem.text = map_symbol_text(elem.text, style.font_family) if elem.tail: elem.tail = map_symbol_text(elem.tail, style.font_family) style.font_family = 'sans-serif' return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search( child.text) is not None if preserve: text.add_elem( SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s' % ('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag( child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:tab'): spaces = int( math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif is_tag(child, 'w:noBreakHyphen'): text.buf.append(u'\u2011') elif is_tag(child, 'w:softHyphen'): text.buf.append(u'\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) return ans