def get_navbar(self, f, feeds, top=True): if len(feeds) < 2: return DIV() navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_70', style='text-align:center')) if not top: hr = HR() navbar.append(hr) navbar.text = None hr.tail = '| ' if f+1 < len(feeds): link = A(_('Next section'), href='../feed_%d/index.html'%(f+1)) link.tail = ' | ' navbar.append(link) link = A(_('Main menu'), href="../index.html") link.tail = ' | ' navbar.append(link) if f > 0: link = A(_('Previous section'), href='../feed_%d/index.html'%(f-1)) link.tail = ' |' navbar.append(link) if top: navbar.append(HR()) return navbar
def _generate(self, bottom, feed, art, number_of_articles_in_feed, two_levels, url, __appname__, prefix='', center=True, extra_css=None, style=None): head = HEAD(TITLE('navbar')) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) if prefix and not prefix.endswith('/'): prefix += '/' align = 'center' if center else 'left' navbar = DIV( attrs('calibre_navbar', rescale=70, style='text-align:' + align)) if bottom: if not url.startswith('file://'): navbar.append(HR()) text = 'This article was downloaded by ' p = PT( text, STRONG(__appname__), A(url, href=url, rel='calibre-downloaded-from'), style='text-align:left; max-width: 100%; overflow: hidden;' ) p[0].tail = ' from ' navbar.append(p) navbar.append(BR()) navbar.append(BR()) else: next_art = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \ else 'article_%d'%(art+1) up = '../..' if art == number_of_articles_in_feed - 1 else '..' href = '%s%s/%s/index.html' % (prefix, up, next_art) navbar.text = '| ' navbar.append(A(_('Next'), href=href)) href = '%s../index.html#article_%d' % (prefix, art) next(navbar.iterchildren(reversed=True)).tail = ' | ' navbar.append(A(_('Section menu'), href=href)) href = '%s../../index.html#feed_%d' % (prefix, feed) next(navbar.iterchildren(reversed=True)).tail = ' | ' navbar.append(A(_('Main menu'), href=href)) if art > 0 and not bottom: href = '%s../article_%d/index.html' % (prefix, art - 1) next(navbar.iterchildren(reversed=True)).tail = ' | ' navbar.append(A(_('Previous'), href=href)) next(navbar.iterchildren(reversed=True)).tail = ' | ' if not bottom: navbar.append(HR()) self.root = HTML(head, BODY(navbar))
def _generate(self, bottom, feed, art, number_of_articles_in_feed, two_levels, url, __appname__, prefix='', center=True, extra_css=None, style=None): head = HEAD(TITLE('navbar')) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) navbar = DIV() navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_tr = TR() # | Previous if art > 0: link = A(CLASS('article_link'), _('Previous'), href='%s../article_%d/index.html' % (prefix, art - 1)) navbar_tr.append(TD(CLASS('article_prev'), link)) else: navbar_tr.append(TD(CLASS('article_prev'), '')) # | Articles | Sections | link = A(CLASS('articles_link'), _('Articles'), href='%s../index.html#article_%d' % (prefix, art)) navbar_tr.append(TD(CLASS('article_articles_list'), link)) link = A(CLASS('sections_link'), _('Sections'), href='%s../../index.html#feed_%d' % (prefix, feed)) navbar_tr.append(TD(CLASS('article_sections_list'), link)) # | Next next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \ else 'article_%d'%(art+1) up = '../..' if art == number_of_articles_in_feed - 1 else '..' link = A(CLASS('article_link'), _('Next'), href='%s%s/%s/index.html' % (prefix, up, next)) navbar_tr.append(TD(CLASS('article_next'), link)) navbar_t.append(navbar_tr) navbar.append(navbar_t) #print "\n%s\n" % etree.tostring(navbar, pretty_print=True) self.root = HTML(head, BODY(navbar))
def _generate(self, title, masthead, datefmt, feeds, extra_css=None, style=None): self.IS_HTML = False if isinstance(datefmt, unicode): datefmt = datefmt.encode(preferred_encoding) date = strftime(datefmt) head = HEAD(TITLE(title)) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) ul = UL(CLASS('calibre_feed_list')) for i, feed in enumerate(feeds): if feed: li = LI(A( feed.title, CLASS('feed', 'calibre_rescale_120', href='feed_%d/index.html' % i)), id='feed_%d' % i) ul.append(li) div = DIV( PT(IMG(src=masthead, alt="masthead"), style='text-align:center'), PT(date, style='text-align:right'), ul, CLASS('calibre_rescale_100')) self.root = HTML(head, BODY(div)) if self.html_lang: self.root.set('lang', self.html_lang)
def inject_toc(tree, rules, ty_map_fn=None): modified = False for rule in rules: nodes = tree.xpath(rule["xpath"]) for node in nodes: pair = rule["fn"](node) if not pair: continue (name, child_ty) = pair toc_node = A(CLASS('dashAnchor')) if ty_map_fn: ty = ty_map_fn(child_ty) else: ty = child_ty toc_node.attrib["name"] = "//apple_ref/cpp/%s/%s" % (ty, name) place_fn = rule.get('place_fn', None) if place_fn: place_node = place_fn(node) else: place_node = node modified = True place_node.addprevious(toc_node) return modified
def _generate(self, title, masthead, datefmt, feeds, extra_css=None, style=None): self.IS_HTML = False date = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y')) masthead_p = etree.Element("p") masthead_p.set("style","text-align:center") masthead_img = etree.Element("img") masthead_img.set("src",masthead) masthead_img.set("alt","masthead") masthead_p.append(masthead_img) head = HEAD(TITLE(title)) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) toc = TABLE(CLASS('toc'),width="100%",border="0",cellpadding="3px") for i, feed in enumerate(feeds): if feed: tr = TR() tr.append(TD(CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) tr.append(TD('%s' % len(feed.articles), style="text-align:right")) toc.append(tr) div = DIV( masthead_p, H3(CLASS('publish_date'),date), DIV(CLASS('divider')), toc) self.root = HTML(head, BODY(div)) if self.html_lang: self.root.set('lang', self.html_lang)
def _generate(self, title, masthead, datefmt, feeds, extra_css=None, style=None): self.IS_HTML = False date = strftime(datefmt) head = HEAD(TITLE(title)) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) ul = UL(attrs('calibre_feed_list')) for i, feed in enumerate(feeds): if len(feed): li = LI(A( feed.title, attrs('feed', rescale=120, href='feed_%d/index.html' % i)), id='feed_%d' % i) ul.append(li) div = DIV( PT(IMG(src=masthead, alt="masthead"), style='text-align:center'), PT(date, style='text-align:right'), ul, attrs(rescale=100)) self.root = HTML(head, BODY(div)) if self.html_lang: self.root.set('lang', self.html_lang)
def _generate(self, bottom, feed, art, number_of_articles_in_feed, two_levels, url, __appname__, prefix='', center=True, extra_css=None, style=None): head = HEAD(TITLE('navbar')) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) navbar = DIV() navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_tr = TR() if bottom and not url.startswith('file://'): navbar.append(HR()) text = 'This article was downloaded by ' p = PT(text, STRONG(__appname__), A(url, href=url, rel='calibre-downloaded-from'), style='text-align:left; max-width: 100%; overflow: hidden;') p[0].tail = ' from ' navbar.append(p) navbar.append(BR()) # | Previous if art > 0: link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1)) navbar_tr.append(TD(CLASS('article_prev'),link)) else: navbar_tr.append(TD(CLASS('article_prev'),'')) # | Articles | Sections | link = A(CLASS('articles_link'),_('Articles'), href='%s../index.html#article_%d'%(prefix, art)) navbar_tr.append(TD(CLASS('article_articles_list'),link)) link = A(CLASS('sections_link'),_('Sections'), href='%s../../index.html#feed_%d'%(prefix, feed)) navbar_tr.append(TD(CLASS('article_sections_list'),link)) # | Next next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \ else 'article_%d'%(art+1) up = '../..' if art == number_of_articles_in_feed - 1 else '..' link = A(CLASS('article_link'), _('Next'), href='%s%s/%s/index.html'%(prefix, up, next)) navbar_tr.append(TD(CLASS('article_next'),link)) navbar_t.append(navbar_tr) navbar.append(navbar_t) # print "\n%s\n" % etree.tostring(navbar, pretty_print=True) self.root = HTML(head, BODY(navbar))
def build_navigation(start, num, total, url_base): # {{{ end = min((start + num - 1), total) tagline = SPAN('Books %d to %d of %d' % (start, end, total), style='display: block; text-align: center;') left_buttons = TD(CLASS('button', style='text-align:left')) right_buttons = TD(CLASS('button', style='text-align:right')) if start > 1: for t, s in [('First', 1), ('Previous', max(start - num, 1))]: left_buttons.append(A(t, href='%s;start=%d' % (url_base, s))) if total > start + num: for t, s in [('Next', start + num), ('Last', total - num + 1)]: right_buttons.append(A(t, href='%s;start=%d' % (url_base, s))) buttons = TABLE(TR(left_buttons, right_buttons), CLASS('buttons')) return DIV(tagline, buttons, CLASS('navigation'))
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False if space == 'preserve': # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(child.text) is not None preserve = multi_spaces or self.ws_pat.search(child.text) is not None if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) if space == 'preserve': text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): typ = child.get('type', None) if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: ans.lang = style.lang return ans
def _generate(self, f, feeds, cutoff, extra_css=None, style=None): from calibre.utils.cleantext import clean_xml_chars feed = feeds[f] head = HEAD(TITLE(feed.title)) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) body = BODY() body.append(self.get_navbar(f, feeds)) div = DIV( H2(feed.title, CLASS('calibre_feed_title', 'calibre_rescale_160')), CLASS('calibre_rescale_100') ) body.append(div) if getattr(feed, 'image', None): div.append(DIV(IMG( alt=feed.image_alt if feed.image_alt else '', src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): d = DIV(clean_xml_chars(feed.description), CLASS('calibre_feed_description', 'calibre_rescale_80')) d.append(BR()) div.append(d) ul = UL(CLASS('calibre_article_list')) for i, article in enumerate(feed.articles): if not getattr(article, 'downloaded', False): continue li = LI( A(article.title, CLASS('article calibre_rescale_120', href=article.url)), SPAN(article.formatted_date, CLASS('article_date')), CLASS('calibre_rescale_100', id='article_%d'%i, style='padding-bottom:0.5em') ) if article.summary: li.append(DIV(clean_xml_chars(cutoff(article.text_summary)), CLASS('article_description', 'calibre_rescale_70'))) ul.append(li) div.append(ul) div.append(self.get_navbar(f, feeds, top=False)) self.root = HTML(head, body) if self.html_lang: self.root.set('lang', self.html_lang)
def donode(item, parent, base, subpath): for child in item: title = child.title if not title: continue raw = unquote_path(child.href or '') rsrcname = os.path.basename(raw) rsrcpath = os.path.join(subpath, rsrcname) if (not os.path.exists(os.path.join(base, rsrcpath)) and os.path.exists(os.path.join(base, raw))): rsrcpath = raw if '%' not in rsrcpath: rsrcpath = urlquote(rsrcpath) if not raw: rsrcpath = '' c = DIV(A(title, href=rsrcpath)) donode(child, c, base, subpath) parent.append(c)
def clean_stars(div: HtmlElement) -> None: for e in elements(div, "p[strong[em]]"): e.drop_tree() for e in elements(div, ".//hr"): e.drop_tree() div.insert(0, H1("Unspeakable Desolation Pouring Down From the Stars")) e = element(div, "./p[1]") h2 = H2(e.text_content().title()) replace(e, h2) e = element(div, "./p[strong[a]]") a = element(div, "./p/strong/a") p = P( CLASS("breakabove"), A(e.text_content(), CLASS("internal"), href=a.attrib["href"]), ) replace(e, p)
def __call__(self): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.fields(doc, self.log) self.read_styles(relationships_by_type) self.images(relationships_by_id) self.layers = OrderedDict() self.framed = [[]] self.frame_map = {} self.framed_map = {} self.anchor_map = {} self.link_map = defaultdict(list) self.link_source_map = {} self.toc_anchor = None self.block_runs = [] paras = [] self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) self.resolve_alternate_content(doc) self.current_rels = relationships_by_id for wp, page_properties in iteritems(self.page_map): self.current_page = page_properties if wp.tag.endswith('}p'): p = self.convert_p(wp) self.body.append(p) paras.append(wp) self.read_block_anchors(doc) self.styles.apply_contextual_spacing(paras) self.mark_block_runs(paras) # Apply page breaks at the start of every section, except the first # section (since that will be the start of the file) self.styles.apply_section_page_breaks(self.section_starts[1:]) notes_header = None orig_rid_map = self.images.rid_map if self.footnotes.has_notes: self.body.append(H1(self.notes_text)) notes_header = self.body[-1] notes_header.set('class', 'notes-header') for anchor, text, note in self.footnotes: dl = DL(id=anchor) dl.set('class', 'footnote') self.body.append(dl) dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text))) dl[-1][0].tail = ']' dl.append(DD()) paras = [] self.images.rid_map = self.current_rels = note.rels[0] for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) self.page_map[wp] = self.current_page else: p = self.convert_p(wp) dl[-1].append(p) paras.append(wp) self.styles.apply_contextual_spacing(paras) self.mark_block_runs(paras) for p, wp in iteritems(self.object_map): if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab': # Paragraph uses tabs for indentation, convert to text-indent parent = p[0] tabs = [] for child in parent: if child.get('class', None) == 'tab': tabs.append(child) if child.tail: break else: break indent = len(tabs) * self.settings.default_tab_stop style = self.styles.resolve(wp) if style.text_indent is inherit or (hasattr(style.text_indent, 'endswith') and style.text_indent.endswith('pt')): if style.text_indent is not inherit: indent = float(style.text_indent[:-2]) + indent style.text_indent = '%.3gpt' % indent parent.text = tabs[-1].tail or '' list(map(parent.remove, tabs)) self.images.rid_map = orig_rid_map self.resolve_links() self.styles.cascade(self.layers) self.tables.apply_markup(self.object_map, self.page_map) numbered = [] for html_obj, obj in iteritems(self.object_map): raw = obj.get('calibre_num_id', None) if raw is not None: lvl, num_id = raw.partition(':')[0::2] try: lvl = int(lvl) except (TypeError, ValueError): lvl = 0 numbered.append((html_obj, num_id, lvl)) self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images) self.apply_frames() if len(self.body) > 0: self.body.text = '\n\t' for child in self.body: child.tail = '\n\t' self.body[-1].tail = '\n' self.log.debug('Converting styles to CSS') self.styles.generate_classes() for html_obj, obj in iteritems(self.object_map): style = self.styles.resolve(obj) if style is not None: css = style.css if css: cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) for html_obj, css in iteritems(self.framed_map): cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) if notes_header is not None: for h in self.namespace.children(self.body, 'h1', 'h2', 'h3'): notes_header.tag = h.tag cls = h.get('class', None) if cls and cls != 'notes-header': notes_header.set('class', '%s notes-header' % cls) break self.fields.polish_markup(self.object_map) self.log.debug('Cleaning up redundant markup generated by Word') self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath) return self.write(doc)
def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, prefix): logo = DIV(IMG(src=prefix + '/static/calibre.png', alt=__appname__), id='logo') search_box = build_search_box(num, search, sort, order, prefix) navigation = build_navigation(start, num, total, prefix + url_base) navigation2 = build_navigation(start, num, total, prefix + url_base) bookt = TABLE(id='listing') body = BODY(logo, search_box, navigation, HR(CLASS('spacer')), bookt, HR(CLASS('spacer')), navigation2) # Book list {{{ for book in books: thumbnail = TD( IMG(type='image/jpeg', border='0', src=prefix + '/get/thumb/%s' % book['id']), CLASS('thumbnail')) data = TD() for fmt in book['formats'].split(','): if not fmt or fmt.lower().startswith('original_'): continue a = quote(ascii_filename(book['authors'])) t = quote(ascii_filename(book['title'])) s = SPAN( A(fmt.lower(), href=prefix + '/get/%s/%s-%s_%d.%s' % (fmt, a, t, book['id'], fmt.lower())), CLASS('button')) s.tail = u'' data.append(s) div = DIV(CLASS('data-container')) data.append(div) series = u'[%s - %s]'%(book['series'], book['series_index']) \ if book['series'] else '' tags = u'Tags=[%s]' % book['tags'] if book['tags'] else '' ctext = '' for key in CKEYS: val = book.get(key, None) if val: ctext += '%s=[%s] ' % tuple(val.split(':#:')) first = SPAN( u'\u202f%s %s by %s' % (book['title'], series, book['authors']), CLASS('first-line')) div.append(first) second = SPAN( u'%s - %s %s %s' % (book['size'], book['timestamp'], tags, ctext), CLASS('second-line')) div.append(second) bookt.append(TR(thumbnail, data)) # }}} body.append( DIV(A(_('Switch to the full interface (non-mobile interface)'), href=prefix + "/browse", style="text-decoration: none; color: blue", title=_('The full interface gives you many more features, ' 'but it may not work well on a small screen')), style="text-align:center")) return HTML( HEAD( TITLE(__appname__ + ' Library'), LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico', type='image/x-icon'), LINK(rel='stylesheet', type='text/css', href=prefix + '/mobile/style.css'), LINK(rel='apple-touch-icon', href="/static/calibre.png")), # End head body) # End html
def resolve_links(self): self.resolved_link_map = {} for hyperlink, spans in iteritems(self.link_map): relationships_by_id = self.link_source_map[hyperlink] span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' self.resolved_link_map[hyperlink] = span tgt = self.namespace.get(hyperlink, 'w:tgtFrame') if tgt: span.set('target', tgt) tt = self.namespace.get(hyperlink, 'w:tooltip') if tt: span.set('title', tt) rid = self.namespace.get(hyperlink, 'r:id') if rid and rid in relationships_by_id: span.set('href', relationships_by_id[rid]) continue anchor = self.namespace.get(hyperlink, 'w:anchor') if anchor and anchor in self.anchor_map: span.set('href', '#' + self.anchor_map[anchor]) continue self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), ignoring' % (rid, anchor)) # hrefs that point nowhere give epubcheck a hernia. The element # should be styled explicitly by Word anyway. # span.set('href', '#') rmap = {v:k for k, v in iteritems(self.object_map)} for hyperlink, runs in self.fields.hyperlink_fields: spans = [rmap[r] for r in runs if r in rmap] if not spans: continue span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' tgt = hyperlink.get('target', None) if tgt: span.set('target', tgt) tt = hyperlink.get('title', None) if tt: span.set('title', tt) url = hyperlink.get('url', None) if url is None: anchor = hyperlink.get('anchor', None) if anchor in self.anchor_map: span.set('href', '#' + self.anchor_map[anchor]) continue self.log.warn('Hyperlink field with unknown anchor: %s' % anchor) else: if url in self.anchor_map: span.set('href', '#' + self.anchor_map[url]) continue span.set('href', url) for img, link, relationships_by_id in self.images.links: parent = img.getparent() idx = parent.index(img) a = A(img) a.tail, img.tail = img.tail, None parent.insert(idx, a) tgt = link.get('target', None) if tgt: a.set('target', tgt) tt = link.get('title', None) if tt: a.set('title', tt) rid = link['id'] if rid in relationships_by_id: dest = relationships_by_id[rid] if dest.startswith('#'): if dest[1:] in self.anchor_map: a.set('href', '#' + self.anchor_map[dest[1:]]) else: a.set('href', dest)
def __call__(self): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.fields(doc, self.log) self.read_styles(relationships_by_type) self.images(relationships_by_id) self.layers = OrderedDict() self.framed = [[]] self.framed_map = {} self.anchor_map = {} self.link_map = defaultdict(list) paras = [] self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) for wp, page_properties in self.page_map.iteritems(): self.current_page = page_properties if wp.tag.endswith('}p'): p = self.convert_p(wp) self.body.append(p) paras.append(wp) self.read_block_anchors(doc) self.styles.apply_contextual_spacing(paras) # Apply page breaks at the start of every section, except the first # section (since that will be the start of the file) self.styles.apply_section_page_breaks(self.section_starts[1:]) notes_header = None if self.footnotes.has_notes: dl = DL() dl.set('class', 'notes') self.body.append(H1(self.notes_text)) notes_header = self.body[-1] notes_header.set('class', 'notes-header') self.body.append(dl) for anchor, text, note in self.footnotes: dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl[-1][0].tail = ']' dl.append(DD()) paras = [] for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) self.page_map[wp] = self.current_page else: p = self.convert_p(wp) dl[-1].append(p) paras.append(wp) self.styles.apply_contextual_spacing(paras) self.resolve_links(relationships_by_id) self.styles.cascade(self.layers) self.tables.apply_markup(self.object_map, self.page_map) numbered = [] for html_obj, obj in self.object_map.iteritems(): raw = obj.get('calibre_num_id', None) if raw is not None: lvl, num_id = raw.partition(':')[0::2] try: lvl = int(lvl) except (TypeError, ValueError): lvl = 0 numbered.append((html_obj, num_id, lvl)) self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images) self.apply_frames() if len(self.body) > 0: self.body.text = '\n\t' for child in self.body: child.tail = '\n\t' self.body[-1].tail = '\n' self.log.debug('Converting styles to CSS') self.styles.generate_classes() for html_obj, obj in self.object_map.iteritems(): style = self.styles.resolve(obj) if style is not None: css = style.css if css: cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) for html_obj, css in self.framed_map.iteritems(): cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) if notes_header is not None: for h in children(self.body, 'h1', 'h2', 'h3'): notes_header.tag = h.tag cls = h.get('class', None) if cls and cls != 'notes-header': notes_header.set('class', '%s notes-header' % cls) break self.log.debug('Cleaning up redundant markup generated by Word') self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover) return self.write(doc)
def _generate(self, f, feeds, cutoff, extra_css=None, style=None): from calibre.utils.cleantext import clean_xml_chars def trim_title(title,clip=18): if len(title)>clip: tokens = title.split(' ') new_title_tokens = [] new_title_len = 0 if len(tokens[0]) > clip: return tokens[0][:clip] + '...' for token in tokens: if len(token) + new_title_len < clip: new_title_tokens.append(token) new_title_len += len(token) else: new_title_tokens.append('...') title = ' '.join(new_title_tokens) break return title self.IS_HTML = False feed = feeds[f] # Construct the navbar navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_tr = TR() # Previous Section link = '' if f > 0: link = A(CLASS('feed_link'), trim_title(feeds[f-1].title), href='../feed_%d/index.html' % int(f-1)) navbar_tr.append(TD(CLASS('feed_prev'),link)) # Up to Sections link = A(_('Sections'), href="../index.html") navbar_tr.append(TD(CLASS('feed_up'),link)) # Next Section link = '' if f < len(feeds)-1: link = A(CLASS('feed_link'), trim_title(feeds[f+1].title), href='../feed_%d/index.html' % int(f+1)) navbar_tr.append(TD(CLASS('feed_next'),link)) navbar_t.append(navbar_tr) top_navbar = navbar_t bottom_navbar = copy.copy(navbar_t) # print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) # Build the page head = HEAD(TITLE(feed.title)) if style: head.append(STYLE(style, type='text/css')) if extra_css: head.append(STYLE(extra_css, type='text/css')) body = BODY() div = DIV( top_navbar, H2(feed.title, CLASS('feed_title')) ) body.append(div) if getattr(feed, 'image', None): div.append(DIV(IMG( alt=feed.image_alt if feed.image_alt else '', src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): d = DIV(clean_xml_chars(feed.description), CLASS('calibre_feed_description', 'calibre_rescale_80')) d.append(BR()) div.append(d) for i, article in enumerate(feed.articles): if not getattr(article, 'downloaded', False): continue div_td = DIV(CLASS('article_summary'), A(article.title, CLASS('summary_headline','calibre_rescale_120', href=article.url))) if article.author: div_td.append(DIV(article.author, CLASS('summary_byline', 'calibre_rescale_100'))) if article.summary: div_td.append(DIV(cutoff(article.text_summary), CLASS('summary_text', 'calibre_rescale_100'))) div.append(div_td) div.append(bottom_navbar) self.root = HTML(head, body) if self.html_lang: self.root.set('lang', self.html_lang)
def convert_run(self, run): ans = SPAN() self.object_map[ans] = run text = Text(ans, 'text', []) for child in run: if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) preserve = False ctext = child.text if space != 'preserve': # Remove leading and trailing whitespace. Word ignores # leading and trailing whitespace without preserve ctext = ctext.strip(' \n\r\t') # Only use a <span> with white-space:pre-wrap if this element # actually needs it, i.e. if it has more than one # consecutive space or it has newlines or tabs. multi_spaces = self.ms_pat.search(ctext) is not None preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(ctext) elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:br'): typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: clear = child.get('clear', None) if clear in {'all', 'left', 'right'}: br = BR(style='clear:%s'%('both' if clear == 'all' else clear)) else: br = BR() text.add_elem(br) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append('\u2011') elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append('\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: ans.set('lang', lang) if style.rtl is True: ans.set('dir', 'rtl') if is_symbol_font(style.font_family): for elem in text: if elem.text: elem.text = map_symbol_text(elem.text, style.font_family) if elem.tail: elem.tail = map_symbol_text(elem.tail, style.font_family) style.font_family = 'sans-serif' return ans
def __call__(self): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.read_styles(relationships_by_type) self.images(relationships_by_id) self.layers = OrderedDict() self.framed = [[]] self.framed_map = {} self.anchor_map = {} self.link_map = defaultdict(list) self.read_page_properties(doc) for wp, page_properties in self.page_map.iteritems(): self.current_page = page_properties p = self.convert_p(wp) self.body.append(p) notes_header = None if self.footnotes.has_notes: dl = DL() dl.set('class', 'notes') self.body.append(H1(self.notes_text)) notes_header = self.body[-1] notes_header.set('class', 'notes-header') self.body.append(dl) for anchor, text, note in self.footnotes: dl.append( DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl[-1][0].tail = ']' dl.append(DD()) in_table = False for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp) in_table = True continue if in_table: if ancestor(wp, 'w:tbl') is not None: self.tables.add(wp) else: in_table = False p = self.convert_p(wp) dl[-1].append(p) self.resolve_links(relationships_by_id) self.styles.cascade(self.layers) self.tables.apply_markup(self.object_map) numbered = [] for html_obj, obj in self.object_map.iteritems(): raw = obj.get('calibre_num_id', None) if raw is not None: lvl, num_id = raw.partition(':')[0::2] try: lvl = int(lvl) except (TypeError, ValueError): lvl = 0 numbered.append((html_obj, num_id, lvl)) self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map) self.apply_frames() if len(self.body) > 0: self.body.text = '\n\t' for child in self.body: child.tail = '\n\t' self.body[-1].tail = '\n' self.styles.generate_classes() for html_obj, obj in self.object_map.iteritems(): style = self.styles.resolve(obj) if style is not None: css = style.css if css: cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) for html_obj, css in self.framed_map.iteritems(): cls = self.styles.class_name(css) if cls: html_obj.set('class', cls) if notes_header is not None: for h in self.body.iterchildren('h1', 'h2', 'h3'): notes_header.tag = h.tag cls = h.get('class', None) if cls and cls != 'notes-header': notes_header.set('class', '%s notes-header' % cls) break return self.write()