def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = self.href_to_name(url, base) if name: if self.has_name_and_is_not_empty(name): frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) else: if isinstance(name, unicode_type): name = name.encode('utf-8') url = 'missing:' + force_unicode(quote(name), 'utf-8') changed.add(base) return url
def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = container.href_to_name(url, base) if name: if container.has_name_and_is_not_empty(name): frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) else: if isinstance(name, unicode_type): name = name.encode('utf-8') url = 'missing:' + force_unicode(quote(name), 'utf-8') changed.add(base) return url
def __call__(self, oeb, opts): self.log = oeb.log attr_path = XPath('//h:img[@src]') for item in oeb.spine: root = item.data if not hasattr(root, 'xpath'): continue for img in attr_path(root): raw = img.get('src', '') if not raw.startswith('data:'): continue header, data = raw.partition(',')[0::2] if not header.startswith('data:image/') or not data: continue if ';base64' in header: data = re.sub(r'\s+', '', data) from polyglot.binary import from_base64_bytes try: data = from_base64_bytes(data) except Exception: self.log.error('Found invalid base64 encoded data URI, ignoring it') continue else: data = urlunquote(data) from imghdr import what fmt = what(None, data) if not fmt: self.log.warn('Image encoded as data URL has unknown format, ignoring') continue img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
def __call__(self, oeb, opts): self.log = oeb.log attr_path = XPath('//h:img[@src]') for item in oeb.spine: root = item.data if not hasattr(root, 'xpath'): continue for img in attr_path(root): raw = img.get('src', '') if not raw.startswith('data:'): continue header, data = raw.partition(',')[0::2] if not header.startswith('data:image/') or not data: continue if ';base64' in header: data = re.sub(r'\s+', '', data) from base64 import b64decode try: data = b64decode(data) except Exception: self.log.error('Found invalid base64 encoded data URI, ignoring it') continue else: data = urlunquote(data) from imghdr import what fmt = what(None, data) if not fmt: self.log.warn('Image encoded as data URL has unknown format, ignoring') continue img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
def create_image_markup(self, html_img, stylizer, href, as_block=False): # TODO: img inside a link (clickable image) style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: floating = None if as_block: ml, mr = style._get('margin-left'), style._get('margin-right') if ml == 'auto': floating = 'center' if mr == 'auto' else 'right' if mr == 'auto': floating = 'center' if ml == 'auto' else 'right' else: parent = html_img.getparent() if len(parent) == 1 and not (parent.text or '').strip() and not (html_img.tail or '').strip(): # We have an inline image alone inside a block pstyle = stylizer.style(parent) if pstyle['text-align'] in ('center', 'right') and 'block' in pstyle['display']: floating = pstyle['text-align'] fake_margins = floating is None self.count += 1 img = self.images[href] name = urlunquote(posixpath.basename(href)) width, height = style.img_size(img.width, img.height) scaled, width, height = fit_image(width, height, self.page_width, self.page_height) width, height = map(pt_to_emu, (width, height)) makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: parent = makeelement(ans, 'wp:inline') else: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in get_image_margins(style).iteritems()}) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: # The idiotic Word requires this to be after the extent settings if as_block: makeelement(parent, 'wp:wrapTopAndBottom') else: makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height) return ans
def url_to_local_path(cls, url, base): path = url.path isabs = False if iswindows and path.startswith('/'): path = path[1:] isabs = True path = urlunparse(('', '', path, url.params, url.query, '')) path = urlunquote(path) if isabs or os.path.isabs(path): return path return os.path.abspath(os.path.join(base, path))
def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = self.href_to_name(url, base) if name: frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) changed.add(base) return url
def create_filename(self, href, fmt): fname = ascii_filename(urlunquote(posixpath.basename(href))) fname = posixpath.splitext(fname)[0] fname = fname[:75].rstrip('.') or 'image' num = 0 base = fname while fname.lower() in self.seen_filenames: num += 1 fname = base + str(num) self.seen_filenames.add(fname.lower()) fname += os.extsep + fmt.lower() return fname
def __init__(self, pathtoepub, log, clone_data=None, tdir=None): if clone_data is not None: super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoepub', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoepub = pathtoepub if tdir is None: tdir = PersistentTemporaryDirectory('_epub_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(self.pathtoepub, 'rb') as stream: try: zf = ZipFile(stream) zf.extractall(tdir) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) try: os.remove(join(tdir, 'mimetype')) except EnvironmentError: pass container_path = join(self.root, 'META-INF', 'container.xml') if not exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') container = etree.fromstring(open(container_path, 'rb').read()) opf_files = container.xpath( (r'child::ocf:rootfiles/ocf:rootfile' '[@media-type="%s" and @full-path]' % guess_type('a.opf')), namespaces={'ocf': OCF_NS}) if not opf_files: raise InvalidEpub( 'META-INF/container.xml contains no link to OPF file') opf_path = os.path.join( self.root, *(urlunquote(opf_files[0].get('full-path')).split('/'))) if not exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to' ' by META-INF/container.xml') super(EpubContainer, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {} if 'META-INF/encryption.xml' in self.name_path_map: self.process_encryption() self.parsed_cache['META-INF/container.xml'] = container
def create_image_markup(self, html_img, stylizer, href): # TODO: img inside a link (clickable image) style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: floating = None fake_margins = floating is None self.count += 1 img = self.images[href] name = urlunquote(posixpath.basename(href)) width, height = map(pt_to_emu, style.img_size(img.width, img.height)) makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: parent = makeelement(ans, 'wp:inline') else: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(parent, 'wp:extent', cx=str(width), cy=str(width)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in get_image_margins(style).iteritems()}) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: # The idiotic Word requires this to be after the extent settings makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=html_img.get('alt') or name) makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") g = makeelement(parent, 'a:graphic') gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) pic = makeelement(gd, 'pic:pic') nvPicPr = makeelement(pic, 'pic:nvPicPr') makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=html_img.get('alt') or name) makeelement(nvPicPr, 'pic:cNvPicPr') bf = makeelement(pic, 'pic:blipFill') makeelement(bf, 'a:blip', r_embed=img.rid) makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') spPr = makeelement(pic, 'pic:spPr') xfrm = makeelement(spPr, 'a:xfrm') makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=str(width), cy=str(height)) makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') return ans
def __init__(self, pathtoepub, log, clone_data=None, tdir=None): if clone_data is not None: super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoepub', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoepub = pathtoepub if tdir is None: tdir = PersistentTemporaryDirectory('_epub_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(self.pathtoepub, 'rb') as stream: try: zf = ZipFile(stream) zf.extractall(tdir) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) try: os.remove(join(tdir, 'mimetype')) except EnvironmentError: pass container_path = join(self.root, 'META-INF', 'container.xml') if not exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') container = etree.fromstring(open(container_path, 'rb').read()) opf_files = container.xpath(( r'child::ocf:rootfiles/ocf:rootfile' '[@media-type="%s" and @full-path]'%guess_type('a.opf') ), namespaces={'ocf':OCF_NS} ) if not opf_files: raise InvalidEpub('META-INF/container.xml contains no link to OPF file') opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get('full-path')).split('/'))) if not exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to' ' by META-INF/container.xml') super(EpubContainer, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {} if 'META-INF/encryption.xml' in self.name_path_map: self.process_encryption() self.parsed_cache['META-INF/container.xml'] = container
def href_to_name(self, href, base=None): ''' Convert an href (relative to base) to a name. base must be a name or None, in which case self.root is used. ''' if base is None: base = self.root else: base = os.path.dirname(self.name_to_abspath(base)) purl = urlparse(href) if purl.scheme or not purl.path or purl.path.startswith('/'): return None href = urlunquote(purl.path) fullpath = os.path.join(base, *href.split('/')) return self.abspath_to_name(fullpath)
def __init__(self, url, base): ''' :param url: The url this link points to. Must be an unquoted unicode string. :param base: The base directory that relative URLs are with respect to. Must be a unicode string. ''' assert isinstance(url, unicode) and isinstance(base, unicode) self.url = url self.parsed_url = urlparse(self.url) self.is_local = self.parsed_url.scheme in ('', 'file') self.is_internal = self.is_local and not bool(self.parsed_url.path) self.path = None self.fragment = urlunquote(self.parsed_url.fragment) if self.is_local and not self.is_internal: self.path = self.url_to_local_path(self.parsed_url, base)
def __init__(self, url, base): ''' :param url: The url this link points to. Must be an unquoted unicode string. :param base: The base directory that relative URLs are with respect to. Must be a unicode string. ''' assert isinstance(url, unicode_type) and isinstance(base, unicode_type) self.url = url self.parsed_url = urlparse(self.url) self.is_local = self.parsed_url.scheme in ('', 'file') self.is_internal = self.is_local and not bool(self.parsed_url.path) self.path = None self.fragment = urlunquote(self.parsed_url.fragment) if self.is_local and not self.is_internal: self.path = self.url_to_local_path(self.parsed_url, base)
def __init__(self, pathtoepub, log, clone_data=None, tdir=None): if clone_data is not None: super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data) for x in ("pathtoepub", "obfuscated_fonts"): setattr(self, x, clone_data[x]) return self.pathtoepub = pathtoepub if tdir is None: tdir = PersistentTemporaryDirectory("_epub_container") tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(self.pathtoepub, "rb") as stream: try: zf = ZipFile(stream) zf.extractall(tdir) except: log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) try: os.remove(join(tdir, "mimetype")) except EnvironmentError: pass container_path = join(self.root, "META-INF", "container.xml") if not exists(container_path): raise InvalidEpub("No META-INF/container.xml in epub") container = etree.fromstring(open(container_path, "rb").read()) opf_files = container.xpath( (r"child::ocf:rootfiles/ocf:rootfile" '[@media-type="%s" and @full-path]' % guess_type("a.opf")), namespaces={"ocf": OCF_NS}, ) if not opf_files: raise InvalidEpub("META-INF/container.xml contains no link to OPF file") opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get("full-path")).split("/"))) if not exists(opf_path): raise InvalidEpub("OPF file does not exist at location pointed to" " by META-INF/container.xml") super(EpubContainer, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {} if "META-INF/encryption.xml" in self.name_path_map: self.process_encryption() self.parsed_cache["META-INF/container.xml"] = container
def verify_links(self): spine_paths = {s:s for s in self.spine} for item in self.spine: base = os.path.dirname(item) for link in item.all_links: try: p = urlparse(urlunquote(link)) except Exception: continue if not p.scheme and not p.netloc: path = os.path.abspath(os.path.join(base, p.path)) if p.path else item try: path = spine_paths[path] except Exception: continue if not p.fragment or p.fragment in path.anchor_map: item.verified_links.add((path, p.fragment))
def rewrite_links(self, url): href, frag = urldefrag(url) try: href = self.current_item.abshref(href) except ValueError: # Unparseable URL return url href = urlnormalize(href) if href in self.map: anchor_map = self.map[href] nhref = anchor_map[frag if frag else None] nhref = self.current_item.relhref(nhref) if frag: nhref = '#'.join((urlunquote(nhref), frag)) return nhref return url
def read_font_fule(self, basedir, css): from PyQt4.Qt import QFontDatabase import cssutils cssutils.log.setLevel(logging.ERROR) try: sheet = cssutils.parseString(css, validate=False) except: return for rule in sheet.cssRules: try: s = rule.style src = s.getProperty('src').propertyValue[0].uri font_family = s.getProperty('font-family').propertyValue[0].value except: continue if not src or not font_family: continue font_file = os.path.normcase(os.path.abspath(os.path.join(basedir, src))) if font_file not in self.added_fonts: self.added_fonts.add(font_file) if not os.path.exists(font_file): from calibre.ebooks.oeb.base import urlunquote ff = urlunquote(font_file, error_handling='replace') if os.path.exists(ff): font_file = ff if os.path.exists(font_file): with open(font_file, 'rb') as f: idx = QFontDatabase.addApplicationFontFromData(f.read()) if idx > -1: family = map(unicode, QFontDatabase.applicationFontFamilies(idx)).next() self.log('Extracted embedded font:', family, 'from', os.path.basename(font_file)) if (family and family != font_family and family not in self.replace_map): self.log('Replacing font family value:', font_family, 'with', family) self.replace_map[font_family.encode('utf-8')] = \ family.encode('utf-8')
def check_links(container): links_map = defaultdict(set) xml_types = {guess_type('a.opf'), guess_type('a.ncx')} errors = [] a = errors.append def fl(x): x = repr(x) if x.startswith('u'): x = x[1:] return x for name, mt in iteritems(container.mime_map): if mt in OEB_DOCS or mt in OEB_STYLES or mt in xml_types: for href, lnum, col in container.iterlinks(name): if not href: a(EmptyLink(_('The link is empty'), name, lnum, col)) try: tname = container.href_to_name(href, name) except ValueError: tname = None # Absolute paths to files on another drive in windows cause this if tname is not None: if container.exists(tname): if tname in container.mime_map: links_map[name].add(tname) else: # Filesystem says the file exists, but it is not in # the mime_map, so either there is a case mismatch # or the link is a directory apath = container.name_to_abspath(tname) if os.path.isdir(apath): a(BadLink(_('The linked resource %s is a directory') % fl(href), name, lnum, col)) else: a(CaseMismatch(href, actual_case_for_name(container, tname), name, lnum, col)) else: cname = corrected_case_for_name(container, tname) if cname is not None: a(CaseMismatch(href, cname, name, lnum, col)) else: a(DanglingLink(_('The linked resource %s does not exist') % fl(href), tname, name, lnum, col)) else: purl = urlparse(href) if purl.scheme == 'file': a(FileLink(_('The link %s is a file:// URL') % fl(href), name, lnum, col)) elif purl.path and purl.path.startswith('/') and purl.scheme in {'', 'file'}: a(LocalLink(_('The link %s points to a file outside the book') % fl(href), name, lnum, col)) elif purl.path and purl.scheme in {'', 'file'} and ':' in urlunquote(purl.path): a(InvalidCharInLink(_('The link %s contains a : character, this will cause errors on Windows computers') % fl(href), name, lnum, col)) spine_docs = {name for name, linear in container.spine_names} spine_styles = {tname for name in spine_docs for tname in links_map[name] if container.mime_map.get(tname, None) in OEB_STYLES} num = -1 while len(spine_styles) > num: # Handle import rules in stylesheets num = len(spine_styles) spine_styles |= {tname for name in spine_styles for tname in links_map[name] if container.mime_map.get(tname, None) in OEB_STYLES} seen = set(OEB_DOCS) | set(OEB_STYLES) spine_resources = {tname for name in spine_docs | spine_styles for tname in links_map[name] if container.mime_map[tname] not in seen} unreferenced = set() cover_name = container.guide_type_map.get('cover', None) nav_items = frozenset(container.manifest_items_with_property('nav')) for name, mt in iteritems(container.mime_map): if mt in OEB_STYLES and name not in spine_styles: a(UnreferencedResource(name)) elif mt in OEB_DOCS and name not in spine_docs and name not in nav_items: a(UnreferencedDoc(name)) elif (mt in OEB_FONTS or mt.partition('/')[0] in {'image', 'audio', 'video'}) and name not in spine_resources and name != cover_name: if mt.partition('/')[0] == 'image' and name == get_raster_cover_name(container): continue a(UnreferencedResource(name)) else: continue unreferenced.add(name) manifest_names = set(itervalues(container.manifest_id_map)) for name in container.mime_map: if name not in manifest_names and not container.ok_to_be_unmanifested(name): a(Unmanifested(name, unreferenced=name in unreferenced)) if name == 'META-INF/calibre_bookmarks.txt': a(Bookmarks(name)) return errors
def convert_epub3_nav(self, nav_path, opf, log, opts): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>') navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src':href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/') ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id') for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root if getattr(self, 'removed_cover', None): changed = False base_path = os.path.dirname(nav_path) for elem in root.xpath('//*[@href]'): href, frag = elem.get('href').partition('#')[::2] link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path) abs_href = urlnormalize(link_path) if abs_href == self.removed_cover: changed = True elem.set('data-calibre-removed-titlepage', '1') if changed: with open(nav_path, 'wb') as f: f.write(serialize(root, 'application/xhtml+xml'))
def create_image_markup(self, html_img, stylizer, href, as_block=False): # TODO: img inside a link (clickable image) style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: floating = None if as_block: ml, mr = style._get('margin-left'), style._get('margin-right') if ml == 'auto': floating = 'center' if mr == 'auto' else 'right' if mr == 'auto': floating = 'center' if ml == 'auto' else 'right' fake_margins = floating is None self.count += 1 img = self.images[href] name = urlunquote(posixpath.basename(href)) width, height = map(pt_to_emu, style.img_size(img.width, img.height)) makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: parent = makeelement(ans, 'wp:inline') else: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set( 'behindDoc', "0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement( makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement( makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(parent, 'wp:extent', cx=str(width), cy=str(width)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins makeelement( parent, 'wp:effectExtent', **{ k[-1].lower(): v for k, v in get_image_margins(style).iteritems() }) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: # The idiotic Word requires this to be after the extent settings if as_block: makeelement(parent, 'wp:wrapTopAndBottom') else: makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=html_img.get('alt') or name) makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") g = makeelement(parent, 'a:graphic') gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) pic = makeelement(gd, 'pic:pic') nvPicPr = makeelement(pic, 'pic:nvPicPr') makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=html_img.get('alt') or name) makeelement(nvPicPr, 'pic:cNvPicPr') bf = makeelement(pic, 'pic:blipFill') makeelement(bf, 'a:blip', r_embed=img.rid) makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') spPr = makeelement(pic, 'pic:spPr') xfrm = makeelement(spPr, 'a:xfrm') makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=str(width), cy=str(height)) makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') return ans
def workaround_ade_quirks(self): # {{{ ''' Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote stylesheet = self.oeb.manifest.main_stylesheet # ADE cries big wet tears when it encounters an invalid fragment # identifier in the NCX toc. frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$') for node in self.oeb.toc.iter(): href = getattr(node, 'href', None) if hasattr(href, 'partition'): base, _, frag = href.partition('#') frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn( 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag) node.href = base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if hasattr(body, 'xpath'): # remove <img> tags with empty src elements bad = [] for x in XPath('//h:img')(body): src = x.get('src', '').strip() if src in ('', '#') or src.startswith('http:'): bad.append(x) for img in bad: img.getparent().remove(img) # Add id attribute to <a> tags that have name for x in XPath('//h:a[@name]')(body): if not x.get('id', False): x.set('id', x.get('name')) # The delightful epubcheck has started complaining about <a> tags that # have name attributes. x.attrib.pop('name') # Replace <br> that are children of <body> as ADE doesn't handle them for br in XPath('./h:br')(body): if br.getparent() is None: continue try: prior = br.itersiblings(preceding=True).next() priortag = barename(prior.tag) priortext = prior.tail except: priortag = 'body' priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML('p') br.text = u'\u00a0' style = br.get('style', '').split(';') style = filter(None, map(lambda x: x.strip(), style)) style.append('margin:0pt; border:0pt') # If the prior tag is a block (including a <br> we replaced) # then this <br> replacement should have a 1-line height. # Otherwise it should have no height. if not priortext and priortag in block_level_tags: style.append('height:1em') else: style.append('height:0pt') br.set('style', '; '.join(style)) for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in {'image/svg+xml', 'application/svg+xml'}: continue tag.getparent().remove(tag) for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) for tag in XPath('//h:script')(root): if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'): tag.getparent().remove(tag) for tag in XPath('//h:body/descendant::h:script')(root): tag.getparent().remove(tag) formchildren = XPath('./h:input|./h:button|./h:textarea|' './h:label|./h:fieldset|./h:legend') for tag in XPath('//h:form')(root): if formchildren(tag): tag.getparent().remove(tag) else: # Not a real form tag.tag = XHTML('div') for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') # ADE can't handle & in an img url for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) # ADE whimpers in fright when it encounters a <td> outside a # <table> in_table = XPath('ancestor::h:table') for tag in XPath('//h:td|//h:tr|//h:th')(root): if not in_table(tag): tag.tag = XHTML('div') # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces special_chars = re.compile(u'[\u200b\u00ad]') for elem in root.iterdescendants('*'): if elem.text: elem.text = special_chars.sub('', elem.text) elem.text = elem.text.replace(u'\u2011', '-') if elem.tail: elem.tail = special_chars.sub('', elem.tail) elem.tail = elem.tail.replace(u'\u2011', '-') if stylesheet is not None: # ADE doesn't render lists correctly if they have left margins from cssutils.css import CSSRule for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root): sel = '.'+lb.get('class') for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty('margin-left') # padding-left breaks rendering in webkit and gecko rule.style.removeProperty('padding-left') # Change whitespace:pre to pre-wrap to accommodate readers that # cannot scroll horizontally for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): style = rule.style ws = style.getPropertyValue('white-space') if ws == 'pre': style.setProperty('white-space', 'pre-wrap')
def workaround_ade_quirks(self): # {{{ ''' Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote stylesheet = self.oeb.manifest.main_stylesheet # ADE cries big wet tears when it encounters an invalid fragment # identifier in the NCX toc. frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$') for node in self.oeb.toc.iter(): href = getattr(node, 'href', None) if hasattr(href, 'partition'): base, _, frag = href.partition('#') frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn( 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it' % frag) node.href = base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if hasattr(body, 'xpath'): # remove <img> tags with empty src elements bad = [] for x in XPath('//h:img')(body): src = x.get('src', '').strip() if src in ('', '#') or src.startswith('http:'): bad.append(x) for img in bad: img.getparent().remove(img) # Add id attribute to <a> tags that have name for x in XPath('//h:a[@name]')(body): if not x.get('id', False): x.set('id', x.get('name')) # The delightful epubcheck has started complaining about <a> tags that # have name attributes. x.attrib.pop('name') # Replace <br> that are children of <body> as ADE doesn't handle them for br in XPath('./h:br')(body): if br.getparent() is None: continue try: prior = next(br.itersiblings(preceding=True)) priortag = barename(prior.tag) priortext = prior.tail except: priortag = 'body' priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML('p') br.text = '\u00a0' style = br.get('style', '').split(';') style = [_f for _f in [x.strip() for x in style] if _f] style.append('margin:0pt; border:0pt') # If the prior tag is a block (including a <br> we replaced) # then this <br> replacement should have a 1-line height. # Otherwise it should have no height. if not priortext and priortag in block_level_tags: style.append('height:1em') else: style.append('height:0pt') br.set('style', '; '.join(style)) for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in { 'image/svg+xml', 'application/svg+xml' }: continue tag.getparent().remove(tag) for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) for tag in XPath('//h:script')(root): if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'): tag.getparent().remove(tag) for tag in XPath('//h:body/descendant::h:script')(root): tag.getparent().remove(tag) formchildren = XPath('./h:input|./h:button|./h:textarea|' './h:label|./h:fieldset|./h:legend') for tag in XPath('//h:form')(root): if formchildren(tag): tag.getparent().remove(tag) else: # Not a real form tag.tag = XHTML('div') for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') # ADE can't handle & in an img url for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) # ADE whimpers in fright when it encounters a <td> outside a # <table> in_table = XPath('ancestor::h:table') for tag in XPath('//h:td|//h:tr|//h:th')(root): if not in_table(tag): tag.tag = XHTML('div') # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces special_chars = re.compile('[\u200b\u00ad]') for elem in root.iterdescendants('*'): if elem.text: elem.text = special_chars.sub('', elem.text) elem.text = elem.text.replace('\u2011', '-') if elem.tail: elem.tail = special_chars.sub('', elem.tail) elem.tail = elem.tail.replace('\u2011', '-') if stylesheet is not None: # ADE doesn't render lists correctly if they have left margins from cssutils.css import CSSRule for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root): sel = '.' + lb.get('class') for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty('margin-left') # padding-left breaks rendering in webkit and gecko rule.style.removeProperty('padding-left') # Change whitespace:pre to pre-wrap to accommodate readers that # cannot scroll horizontally for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): style = rule.style ws = style.getPropertyValue('white-space') if ws == 'pre': style.setProperty('white-space', 'pre-wrap')
def workaround_ade_quirks(self): # {{{ """ Perform various markup transforms to get the output to render correctly in the quirky ADE. """ from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote stylesheet = self.oeb.manifest.main_stylesheet # ADE cries big wet tears when it encounters an invalid fragment # identifier in the NCX toc. frag_pat = re.compile(r"[-A-Za-z0-9_:.]+$") for node in self.oeb.toc.iter(): href = getattr(node, "href", None) if hasattr(href, "partition"): base, _, frag = href.partition("#") frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn("Removing invalid fragment identifier %r from TOC" % frag) node.href = base for x in self.oeb.spine: root = x.data body = XPath("//h:body")(root) if body: body = body[0] if hasattr(body, "xpath"): # remove <img> tags with empty src elements bad = [] for x in XPath("//h:img")(body): src = x.get("src", "").strip() if src in ("", "#") or src.startswith("http:"): bad.append(x) for img in bad: img.getparent().remove(img) # Add id attribute to <a> tags that have name for x in XPath("//h:a[@name]")(body): if not x.get("id", False): x.set("id", x.get("name")) # The delightful epubcheck has started complaining about <a> tags that # have name attributes. x.attrib.pop("name") # Replace <br> that are children of <body> as ADE doesn't handle them for br in XPath("./h:br")(body): if br.getparent() is None: continue try: prior = br.itersiblings(preceding=True).next() priortag = barename(prior.tag) priortext = prior.tail except: priortag = "body" priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML("p") br.text = u"\u00a0" style = br.get("style", "").split(";") style = filter(None, map(lambda x: x.strip(), style)) style.append("margin:0pt; border:0pt") # If the prior tag is a block (including a <br> we replaced) # then this <br> replacement should have a 1-line height. # Otherwise it should have no height. if not priortext and priortag in block_level_tags: style.append("height:1em") else: style.append("height:0pt") br.set("style", "; ".join(style)) for tag in XPath("//h:embed")(root): tag.getparent().remove(tag) for tag in XPath("//h:object")(root): if tag.get("type", "").lower().strip() in {"image/svg+xml", "application/svg+xml"}: continue tag.getparent().remove(tag) for tag in XPath("//h:title|//h:style")(root): if not tag.text: tag.getparent().remove(tag) for tag in XPath("//h:script")(root): if not tag.text and not tag.get("src", False) and tag.get("type", None) != "text/x-mathjax-config": tag.getparent().remove(tag) for tag in XPath("//h:body/descendant::h:script")(root): tag.getparent().remove(tag) formchildren = XPath("./h:input|./h:button|./h:textarea|" "./h:label|./h:fieldset|./h:legend") for tag in XPath("//h:form")(root): if formchildren(tag): tag.getparent().remove(tag) else: # Not a real form tag.tag = XHTML("div") for tag in XPath("//h:center")(root): tag.tag = XHTML("div") tag.set("style", "text-align:center") # ADE can't handle & in an img url for tag in XPath("//h:img[@src]")(root): tag.set("src", tag.get("src", "").replace("&", "")) # ADE whimpers in fright when it encounters a <td> outside a # <table> in_table = XPath("ancestor::h:table") for tag in XPath("//h:td|//h:tr|//h:th")(root): if not in_table(tag): tag.tag = XHTML("div") special_chars = re.compile(u"[\u200b\u00ad]") for elem in root.iterdescendants(): if getattr(elem, "text", False): elem.text = special_chars.sub("", elem.text) elem.text = elem.text.replace(u"\u2011", "-") if getattr(elem, "tail", False): elem.tail = special_chars.sub("", elem.tail) elem.tail = elem.tail.replace(u"\u2011", "-") if stylesheet is not None: # ADE doesn't render lists correctly if they have left margins from cssutils.css import CSSRule for lb in XPath("//h:ul[@class]|//h:ol[@class]")(root): sel = "." + lb.get("class") for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty("margin-left") # padding-left breaks rendering in webkit and gecko rule.style.removeProperty("padding-left") # Change whitespace:pre to pre-wrap to accommodate readers that # cannot scroll horizontally for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): style = rule.style ws = style.getPropertyValue("white-space") if ws == "pre": style.setProperty("white-space", "pre-wrap")
def convert_epub3_nav(self, nav_path, opf, log, opts): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring( '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>' ) navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src': href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/') ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id') for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) opts.epub3_nav_href = urlnormalize( os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root if getattr(self, 'removed_cover', None): changed = False base_path = os.path.dirname(nav_path) for elem in root.xpath('//*[@href]'): href, frag = elem.get('href').partition('#')[::2] link_path = os.path.relpath( os.path.join(base_path, urlunquote(href)), base_path) abs_href = urlnormalize(link_path) if abs_href == self.removed_cover: changed = True elem.set('data-calibre-removed-titlepage', '1') if changed: with open(nav_path, 'wb') as f: f.write(serialize(root, 'application/xhtml+xml'))
def check_links(container): links_map = defaultdict(set) xml_types = {guess_type('a.opf'), guess_type('a.ncx')} errors = [] a = errors.append def fl(x): x = repr(x) if x.startswith('u'): x = x[1:] return x for name, mt in container.mime_map.iteritems(): if mt in OEB_DOCS or mt in OEB_STYLES or mt in xml_types: for href, lnum, col in container.iterlinks(name): if not href: a(EmptyLink(_('The link is empty'), name, lnum, col)) try: tname = container.href_to_name(href, name) except ValueError: tname = None # Absolute paths to files on another drive in windows cause this if tname is not None: if container.exists(tname): if tname in container.mime_map: links_map[name].add(tname) else: # Filesystem says the file exists, but it is not in # the mime_map, so either there is a case mismatch # or the link is a directory apath = container.name_to_abspath(tname) if os.path.isdir(apath): a( BadLink( _('The linked resource %s is a directory' ) % fl(href), name, lnum, col)) else: a( CaseMismatch( href, actual_case_for_name(container, tname), name, lnum, col)) else: cname = corrected_case_for_name(container, tname) if cname is not None: a(CaseMismatch(href, cname, name, lnum, col)) else: a( DanglingLink( _('The linked resource %s does not exist') % fl(href), tname, name, lnum, col)) else: purl = urlparse(href) if purl.scheme == 'file': a( FileLink( _('The link %s is a file:// URL') % fl(href), name, lnum, col)) elif purl.path and purl.path.startswith( '/') and purl.scheme in {'', 'file'}: a( LocalLink( _('The link %s points to a file outside the book' ) % fl(href), name, lnum, col)) elif purl.path and purl.scheme in { '', 'file' } and ':' in urlunquote(purl.path): a( InvalidCharInLink( _('The link %s contains a : character, this will cause errors on Windows computers' ) % fl(href), name, lnum, col)) spine_docs = {name for name, linear in container.spine_names} spine_styles = { tname for name in spine_docs for tname in links_map[name] if container.mime_map.get(tname, None) in OEB_STYLES } num = -1 while len(spine_styles) > num: # Handle import rules in stylesheets num = len(spine_styles) spine_styles |= { tname for name in spine_styles for tname in links_map[name] if container.mime_map.get(tname, None) in OEB_STYLES } seen = set(OEB_DOCS) | set(OEB_STYLES) spine_resources = { tname for name in spine_docs | spine_styles for tname in links_map[name] if container.mime_map[tname] not in seen } unreferenced = set() cover_name = container.guide_type_map.get('cover', None) nav_items = frozenset(container.manifest_items_with_property('nav')) for name, mt in container.mime_map.iteritems(): if mt in OEB_STYLES and name not in spine_styles: a(UnreferencedResource(name)) elif mt in OEB_DOCS and name not in spine_docs and name not in nav_items: a(UnreferencedDoc(name)) elif (mt in OEB_FONTS or mt.partition('/')[0] in { 'image', 'audio', 'video' }) and name not in spine_resources and name != cover_name: if mt.partition('/')[ 0] == 'image' and name == get_raster_cover_name(container): continue a(UnreferencedResource(name)) else: continue unreferenced.add(name) manifest_names = set(container.manifest_id_map.itervalues()) for name in container.mime_map: if name not in manifest_names and not container.ok_to_be_unmanifested( name): a(Unmanifested(name, unreferenced=name in unreferenced)) if name == 'META-INF/calibre_bookmarks.txt': a(Bookmarks(name)) return errors
def create_image_markup(self, html_img, stylizer, href, as_block=False): # TODO: img inside a link (clickable image) style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: floating = None if as_block: ml, mr = style._get('margin-left'), style._get('margin-right') if ml == 'auto': floating = 'center' if mr == 'auto' else 'right' if mr == 'auto': floating = 'center' if ml == 'auto' else 'right' else: parent = html_img.getparent() if len(parent) == 1 and not (parent.text or '').strip() and not ( html_img.tail or '').strip(): pstyle = stylizer.style(parent) if 'block' in pstyle['display']: # We have an inline image alone inside a block as_block = True floating = pstyle['float'] if floating not in {'left', 'right'}: floating = None if pstyle['text-align'] in ('center', 'right'): floating = pstyle['text-align'] floating = floating or 'left' fake_margins = floating is None self.count += 1 img = self.images[href] name = urlunquote(posixpath.basename(href)) width, height = style.img_size(img.width, img.height) scaled, width, height = fit_image(width, height, self.page_width, self.page_height) width, height = map(pt_to_emu, (width, height)) makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: parent = makeelement(ans, 'wp:inline') else: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set( 'behindDoc', "0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement( makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement( makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins makeelement( parent, 'wp:effectExtent', **{ k[-1].lower(): v for k, v in get_image_margins(style).iteritems() }) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: # The idiotic Word requires this to be after the extent settings if as_block: makeelement(parent, 'wp:wrapTopAndBottom') else: makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height) return ans