def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get('list-id')] ilvl = int(start.get('list-lvl')) lvl = d.levels[ilvl] lvlid = start.get('list-id') + start.get('list-lvl') wrap = (OL if lvl.is_numbered else UL)('\n\t') has_template = 'list-template' in start.attrib if has_template: wrap.set('lvlid', lvlid) else: wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list')) parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = '\n\t' if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get('list-template')) last = templates.get(lvlid, '') if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ('list-lvl', 'list-id', 'list-template'): child.attrib.pop(attr, None) val = int(child.get('value')) if last_val == val - 1 or wrap.tag == 'ul': child.attrib.pop('value') last_val = val current_run[-1].tail = '\n' del current_run[:]
def commit(current_run): if not current_run: return start = current_run[0] parent = start.getparent() idx = parent.index(start) d = self.instances[start.get("list-id")] ilvl = int(start.get("list-lvl")) lvl = d.levels[ilvl] lvlid = start.get("list-id") + start.get("list-lvl") has_template = "list-template" in start.attrib wrap = (OL if lvl.is_numbered or has_template else UL)("\n\t") if has_template: wrap.set("lvlid", lvlid) else: wrap.set("class", styles.register(lvl.css(images, self.pic_map, self.rid_map), "list")) ccss = lvl.char_css() if ccss: ccss = styles.register(ccss, "bullet") parent.insert(idx, wrap) last_val = None for child in current_run: wrap.append(child) child.tail = "\n\t" if has_template: span = SPAN() span.text = child.text child.text = None for gc in child: span.append(gc) child.append(span) span = SPAN(child.get("list-template")) if ccss: span.set("class", ccss) last = templates.get(lvlid, "") if span.text and len(span.text) > len(last): templates[lvlid] = span.text child.insert(0, span) for attr in ("list-lvl", "list-id", "list-template"): child.attrib.pop(attr, None) val = int(child.get("value")) if last_val == val - 1 or wrap.tag == "ul": child.attrib.pop("value") last_val = val current_run[-1].tail = "\n" del current_run[:]
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, "text", []) for child in run: if is_tag(child, "w:t"): if not child.text: continue space = child.get(XML("space"), None) if space == "preserve": text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, "w:cr"): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, "w:br"): typ = child.get("type", None) if typ in {"column", "page"}: br = BR(style="page-break-after:always") else: clear = child.get("clear", None) if clear in {"all", "left", "right"}: br = BR(style="clear:%s" % ("both" if clear == "all" else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, "w:drawing") or is_tag(child, "w:pict"): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, "w:footnoteReference") or is_tag(child, "w:endnoteReference"): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href="#" + anchor, title=name), id="back_%s" % anchor) l.set("class", "noteref") text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, "".join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {"superscript", "subscript"}: ans.tag = "sub" if style.vert_align == "subscript" else "sup" if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif is_tag(child, 'w:noBreakHyphen'): text.buf.append(u'\u2011') elif is_tag(child, 'w:softHyphen'): text.buf.append(u'\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, prefix): logo = DIV(IMG(src=prefix+'/static/calibre.png', alt=__appname__), id='logo') search_box = build_search_box(num, search, sort, order, prefix) navigation = build_navigation(start, num, total, prefix+url_base) navigation2 = build_navigation(start, num, total, prefix+url_base) bookt = TABLE(id='listing') body = BODY( logo, search_box, navigation, HR(CLASS('spacer')), bookt, HR(CLASS('spacer')), navigation2 ) # Book list {{{ for book in books: thumbnail = TD( IMG(type='image/jpeg', border='0', src=prefix+'/get/thumb/%s' % book['id']), CLASS('thumbnail')) data = TD() for fmt in book['formats'].split(','): if not fmt or fmt.lower().startswith('original_'): continue a = quote(ascii_filename(book['authors'])) t = quote(ascii_filename(book['title'])) s = SPAN( A( fmt.lower(), href=prefix+'/get/%s/%s-%s_%d.%s' % (fmt, a, t, book['id'], fmt.lower()) ), CLASS('button')) s.tail = u'' data.append(s) div = DIV(CLASS('data-container')) data.append(div) series = u'[%s - %s]'%(book['series'], book['series_index']) \ if book['series'] else '' tags = u'Tags=[%s]'%book['tags'] if book['tags'] else '' ctext = '' for key in CKEYS: val = book.get(key, None) if val: ctext += '%s=[%s] '%tuple(val.split(':#:')) first = SPAN(u'\u202f%s %s by %s' % (book['title'], series, book['authors']), CLASS('first-line')) div.append(first) second = SPAN(u'%s - %s %s %s' % ( book['size'], book['timestamp'], tags, ctext), CLASS('second-line')) div.append(second) bookt.append(TR(thumbnail, data)) # }}} body.append(DIV( A(_('Switch to the full interface (non-mobile interface)'), href=prefix+"/browse", style="text-decoration: none; color: blue", title=_('The full interface gives you many more features, ' 'but it may not work well on a small screen')), style="text-align:center")) return HTML( HEAD( TITLE(__appname__ + ' Library'), LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico', type='image/x-icon'), LINK(rel='stylesheet', type='text/css', href=prefix+'/mobile/style.css'), LINK(rel='apple-touch-icon', href="/static/calibre.png") ), # End head body ) # End html
def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, prefix, have_kobo_browser=False): logo = DIV(IMG(src=prefix + "/static/calibre.png", alt=__appname__), id="logo") search_box = build_search_box(num, search, sort, order, prefix) navigation = build_navigation(start, num, total, prefix + url_base) navigation2 = build_navigation(start, num, total, prefix + url_base) bookt = TABLE(id="listing") body = BODY(logo, search_box, navigation, HR(CLASS("spacer")), bookt, HR(CLASS("spacer")), navigation2) # Book list {{{ for book in books: thumbnail = TD( IMG(type="image/jpeg", border="0", src=prefix + "/get/thumb/%s" % book["id"]), CLASS("thumbnail") ) data = TD() for fmt in book["formats"].split(","): if not fmt or fmt.lower().startswith("original_"): continue file_extension = "kepub.epub" if have_kobo_browser and fmt.lower() == "kepub" else fmt a = quote(ascii_filename(book["authors"])) t = quote(ascii_filename(book["title"])) s = SPAN( A(fmt.lower(), href=prefix + "/get/%s/%s-%s_%d.%s" % (fmt, a, t, book["id"], file_extension.lower())), CLASS("button"), ) s.tail = u"" data.append(s) div = DIV(CLASS("data-container")) data.append(div) series = u"[%s - %s]" % (book["series"], book["series_index"]) if book["series"] else "" tags = u"Tags=[%s]" % book["tags"] if book["tags"] else "" ctext = "" for key in CKEYS: val = book.get(key, None) if val: ctext += "%s=[%s] " % tuple(val.split(":#:")) first = SPAN( u"\u202f%s %s by %s" % (clean_xml_chars(book["title"]), clean_xml_chars(series), clean_xml_chars(book["authors"])), CLASS("first-line"), ) div.append(first) second = SPAN(u"%s - %s %s %s" % (book["size"], book["timestamp"], tags, ctext), CLASS("second-line")) div.append(second) bookt.append(TR(thumbnail, data)) # }}} body.append( DIV( A( _("Switch to the full interface (non-mobile interface)"), href=prefix + "/browse", style="text-decoration: none; color: blue", title=_( "The full interface gives you many more features, " "but it may not work well on a small screen" ), ), style="text-align:center", ) ) return HTML( HEAD( TITLE(__appname__ + " Library"), LINK(rel="icon", href="//calibre-ebook.com/favicon.ico", type="image/x-icon"), LINK(rel="stylesheet", type="text/css", href=prefix + "/mobile/style.css"), LINK(rel="apple-touch-icon", href="/static/calibre.png"), META(name="robots", content="noindex"), ), # End head body, ) # End html
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) if space == 'preserve': text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = child.get('type', None) if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:br'): typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append('\u2011') elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append('\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) if style.rtl is True: ans.set('dir', 'rtl') return ans
def resolve_links(self): self.resolved_link_map = {} for hyperlink, spans in self.link_map.items(): relationships_by_id = self.link_source_map[hyperlink] span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' self.resolved_link_map[hyperlink] = span tgt = self.namespace.get(hyperlink, 'w:tgtFrame') if tgt: span.set('target', tgt) tt = self.namespace.get(hyperlink, 'w:tooltip') if tt: span.set('title', tt) rid = self.namespace.get(hyperlink, 'r:id') if rid and rid in relationships_by_id: span.set('href', relationships_by_id[rid]) continue anchor = self.namespace.get(hyperlink, 'w:anchor') if anchor and anchor in self.anchor_map: span.set('href', '#' + self.anchor_map[anchor]) continue self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), ignoring' % (rid, anchor)) # hrefs that point nowhere give epubcheck a hernia. The element # should be styled explicitly by Word anyway. # span.set('href', '#') rmap = {v:k for k, v in self.object_map.items()} for hyperlink, runs in self.fields.hyperlink_fields: spans = [rmap[r] for r in runs if r in rmap] if not spans: continue span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' tgt = hyperlink.get('target', None) if tgt: span.set('target', tgt) tt = hyperlink.get('title', None) if tt: span.set('title', tt) url = hyperlink.get('url', None) if url is None: anchor = hyperlink.get('anchor', None) if anchor in self.anchor_map: span.set('href', '#' + self.anchor_map[anchor]) continue self.log.warn('Hyperlink field with unknown anchor: %s' % anchor) else: if url in self.anchor_map: span.set('href', '#' + self.anchor_map[url]) continue span.set('href', url) for img, link, relationships_by_id in self.images.links: parent = img.getparent() idx = parent.index(img) a = A(img) a.tail, img.tail = img.tail, None parent.insert(idx, a) tgt = link.get('target', None) if tgt: a.set('target', tgt) tt = link.get('title', None) if tt: a.set('title', tt) rid = link['id'] if rid in relationships_by_id: dest = relationships_by_id[rid] if dest.startswith('#'): if dest[1:] in self.anchor_map: a.set('href', '#' + self.anchor_map[dest[1:]]) else: a.set('href', dest)
def convert_p(self, p): dest = P() self.object_map[dest] = p style = self.styles.resolve_paragraph(p) self.layers[p] = [] self.frame_map[p] = style.frame self.add_frame(dest, style.frame) current_anchor = None current_hyperlink = None hl_xpath = self.namespace.XPath('ancestor::w:hyperlink[1]') def p_parent(x): # Ensure that nested <w:p> tags are handled. These can occur if a # textbox is present inside a paragraph. while True: x = x.getparent() try: if x.tag.endswith('}p'): return x except AttributeError: break for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink', 'w:instrText'): if p_parent(x) is not p: continue if x.tag.endswith('}r'): span = self.convert_run(x) if current_anchor is not None: (dest if len(dest) == 0 else span).set('id', current_anchor) current_anchor = None if current_hyperlink is not None: try: hl = hl_xpath(x)[0] self.link_map[hl].append(span) self.link_source_map[hl] = self.current_rels x.set('is-link', '1') except IndexError: current_hyperlink = None dest.append(span) self.layers[p].append(x) elif x.tag.endswith('}bookmarkStart'): anchor = self.namespace.get(x, 'w:name') if anchor and anchor not in self.anchor_map and anchor != '_GoBack': # _GoBack is a special bookmark inserted by Word 2010 for # the return to previous edit feature, we ignore it old_anchor = current_anchor self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(iter(self.anchor_map.values()))) if old_anchor is not None: # The previous anchor was not applied to any element for a, t in tuple(self.anchor_map.items()): if t == old_anchor: self.anchor_map[a] = current_anchor elif x.tag.endswith('}hyperlink'): current_hyperlink = x elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '): old_anchor = current_anchor anchor = str(uuid.uuid4()) self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(iter(self.anchor_map.values()))) self.toc_anchor = current_anchor if old_anchor is not None: # The previous anchor was not applied to any element for a, t in tuple(self.anchor_map.items()): if t == old_anchor: self.anchor_map[a] = current_anchor if current_anchor is not None: # This paragraph had no <w:r> descendants dest.set('id', current_anchor) current_anchor = None m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) if m is not None: n = min(6, max(1, int(m.group(1)))) dest.tag = 'h%d' % n if style.bidi is True: dest.set('dir', 'rtl') border_runs = [] common_borders = [] for span in dest: run = self.object_map[span] style = self.styles.resolve_run(run) if not border_runs or border_runs[-1][1].same_border(style): border_runs.append((span, style)) elif border_runs: if len(border_runs) > 1: common_borders.append(border_runs) border_runs = [] for border_run in common_borders: spans = [] bs = {} for span, style in border_run: style.get_border_css(bs) style.clear_border_css() spans.append(span) if bs: cls = self.styles.register(bs, 'text_border') wrapper = self.wrap_elems(spans, SPAN()) wrapper.set('class', cls) if not dest.text and len(dest) == 0 and not style.has_visible_border(): # Empty paragraph add a non-breaking space so that it is rendered # by WebKit dest.text = NBSP # If the last element in a block is a <br> the <br> is not rendered in # HTML, unless it is followed by a trailing space. Word, on the other # hand inserts a blank line for trailing <br>s. if len(dest) > 0 and not dest[-1].tail: if dest[-1].tag == 'br': dest[-1].tail = NBSP elif len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and not dest[-1][-1].tail: dest[-1][-1].tail = NBSP return dest
def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, prefix, have_kobo_browser=False): logo = DIV(IMG(src=prefix + '/static/calibre.png', alt=__appname__), id='logo') search_box = build_search_box(num, search, sort, order, prefix) navigation = build_navigation(start, num, total, prefix + url_base) navigation2 = build_navigation(start, num, total, prefix + url_base) bookt = TABLE(id='listing') body = BODY(logo, search_box, navigation, HR(CLASS('spacer')), bookt, HR(CLASS('spacer')), navigation2) # Book list {{{ for book in books: thumbnail = TD( IMG(type='image/jpeg', border='0', src=prefix + '/get/thumb/%s' % book['id']), CLASS('thumbnail')) data = TD() for fmt in book['formats'].split(','): if not fmt or fmt.lower().startswith('original_'): continue file_extension = "kepub.epub" if have_kobo_browser and fmt.lower( ) == "kepub" else fmt a = quote(ascii_filename(book['authors'])) t = quote(ascii_filename(book['title'])) s = SPAN( A(fmt.lower(), href=prefix + '/get/%s/%s-%s_%d.%s' % (fmt, a, t, book['id'], file_extension.lower())), CLASS('button')) s.tail = u'' data.append(s) div = DIV(CLASS('data-container')) data.append(div) series = u'[%s - %s]'%(book['series'], book['series_index']) \ if book['series'] else '' tags = u'Tags=[%s]' % book['tags'] if book['tags'] else '' ctext = '' for key in CKEYS: val = book.get(key, None) if val: ctext += '%s=[%s] ' % tuple(val.split(':#:')) first = SPAN( u'\u202f%s %s by %s' % (clean_xml_chars(book['title']), clean_xml_chars(series), clean_xml_chars(book['authors'])), CLASS('first-line')) div.append(first) second = SPAN( u'%s - %s %s %s' % (book['size'], book['timestamp'], tags, ctext), CLASS('second-line')) div.append(second) bookt.append(TR(thumbnail, data)) # }}} body.append( DIV(A(_('Switch to the full interface (non-mobile interface)'), href=prefix + "/browse", style="text-decoration: none; color: blue", title=_('The full interface gives you many more features, ' 'but it may not work well on a small screen')), style="text-align:center")) return HTML( HEAD( TITLE(__appname__ + ' Library'), LINK(rel='icon', href='//calibre-ebook.com/favicon.ico', type='image/x-icon'), LINK(rel='stylesheet', type='text/css', href=prefix + '/mobile/style.css'), LINK(rel='apple-touch-icon', href="/static/calibre.png"), META(name="robots", content="noindex")), # End head body) # End html
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_p(self, p): dest = P() self.object_map[dest] = p style = self.styles.resolve_paragraph(p) self.layers[p] = [] self.add_frame(dest, style.frame) current_anchor = None current_hyperlink = None hl_xpath = XPath('ancestor::w:hyperlink[1]') for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): if x.tag.endswith('}r'): span = self.convert_run(x) if current_anchor is not None: (dest if len(dest) == 0 else span).set('id', current_anchor) current_anchor = None if current_hyperlink is not None: try: hl = hl_xpath(x)[0] self.link_map[hl].append(span) x.set('is-link', '1') except IndexError: current_hyperlink = None dest.append(span) self.layers[p].append(x) elif x.tag.endswith('}bookmarkStart'): anchor = get(x, 'w:name') if anchor and anchor not in self.anchor_map: old_anchor = current_anchor self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues())) if old_anchor is not None: # The previous anchor was not applied to any element for a, t in tuple(self.anchor_map.iteritems()): if t == old_anchor: self.anchor_map[a] = current_anchor elif x.tag.endswith('}hyperlink'): current_hyperlink = x m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) if m is not None: n = min(6, max(1, int(m.group(1)))) dest.tag = 'h%d' % n if style.direction == 'rtl': dest.set('dir', 'rtl') border_runs = [] common_borders = [] for span in dest: run = self.object_map[span] style = self.styles.resolve_run(run) if not border_runs or border_runs[-1][1].same_border(style): border_runs.append((span, style)) elif border_runs: if len(border_runs) > 1: common_borders.append(border_runs) border_runs = [] for border_run in common_borders: spans = [] bs = {} for span, style in border_run: style.get_border_css(bs) style.clear_border_css() spans.append(span) if bs: cls = self.styles.register(bs, 'text_border') wrapper = self.wrap_elems(spans, SPAN()) wrapper.set('class', cls) if not dest.text and len(dest) == 0: # Empty paragraph add a non-breaking space so that it is rendered # by WebKit dest.text = '\xa0' return dest