def check_filenames(container): errors = [] all_names = set(container.name_path_map) - container.names_that_must_not_be_changed for name in all_names: if urlquote(name) != name: errors.append(EscapedName(name)) return errors
def name_to_href(self, name, base=None): '''Convert a name to a href relative to base, which must be a name or None in which case self.root is used as the base''' fullpath = self.name_to_abspath(name) basepath = self.root if base is None else os.path.dirname(self.name_to_abspath(base)) path = relpath(fullpath, basepath).replace(os.sep, '/') return urlquote(path)
def read_image(self, href): if href not in self.images: item = self.oeb.manifest.hrefs.get( href) or self.oeb.manifest.hrefs.get(urlquote(href)) try: if item is None or not isinstance(item.data, bytes): self.log.warning('Failed to find image:', href) return except FileNotFoundError: self.log.warning('Failed to find image:', href) return try: fmt, width, height = identify(item.data) except Exception: self.log.warning('Replacing corrupted image with blank: %s' % href) item.data = I('blank.png', data=True, allow_user_override=False) fmt, width, height = identify(item.data) image_fname = 'media/' + self.create_filename(href, fmt) image_rid = self.document_relationships.add_image(image_fname) self.images[href] = Image(image_rid, image_fname, width, height, fmt, item) item.unload_data_from_memory() return self.images[href]
def __init__(self, name): BaseError.__init__(self, _('Filename contains unsafe characters'), name) qname = urlquote(name) self.sname = make_filename_safe(name) self.HELP = _( 'The filename {0} contains unsafe characters, that must be escaped, like' ' this {1}. This can cause problems with some e-book readers. To be' ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],' ' underscores and hyphens in your file names. While many other characters' ' are allowed, they may cause problems with some software.').format(name, qname) self.INDIVIDUAL_FIX = _( 'Rename the file {0} to {1}').format(name, self.sname)
def __init__(self, name): from calibre.utils.filenames import ascii_filename BaseError.__init__(self, _('Filename contains unsafe characters'), name) qname = urlquote(name) def esc(n): return ''.join(x if x in URL_SAFE else '_' for x in n) self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/')) self.HELP = _( 'The filename {0} contains unsafe characters, that must be escaped, like' ' this {1}. This can cause problems with some ebook readers. To be' ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],' ' underscores and hyphens in your file names. While many other characters' ' are allowed, they may cause problems with some software.').format(name, qname) self.INDIVIDUAL_FIX = _( 'Rename the file {0} to {1}').format(name, self.sname)
def donode(item, parent, base, subpath): for child in item: title = child.title if not title: continue raw = unquote_path(child.href or '') rsrcname = os.path.basename(raw) rsrcpath = os.path.join(subpath, rsrcname) if (not os.path.exists(os.path.join(base, rsrcpath)) and os.path.exists(os.path.join(base, raw))): rsrcpath = raw if '%' not in rsrcpath: rsrcpath = urlquote(rsrcpath) if not raw: rsrcpath = '' c = DIV(A(title, href=rsrcpath)) donode(child, c, base, subpath) parent.append(c)
def serialize_hyperlink(self, parent, link): item, url, tooltip = link purl = urlparse(url) href = purl.path def make_link(parent, anchor=None, id=None, tooltip=None): kw = {} if anchor is not None: kw['w_anchor'] = anchor elif id is not None: kw['r_id'] = id if tooltip: kw['w_tooltip'] = tooltip return self.namespace.makeelement(parent, 'w:hyperlink', **kw) if not purl.scheme: href = item.abshref(href) if href not in self.document_hrefs: href = urlquote(href) if href in self.document_hrefs: key = (href, purl.fragment or self.top_anchor) if key in self.anchor_map: bmark = self.anchor_map[key] else: bmark = self.anchor_map[(href, self.top_anchor)] return make_link(parent, anchor=bmark, tooltip=tooltip) else: self.log.warn( 'Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url) if purl.scheme in {'http', 'https', 'ftp'}: if url not in self.external_links: self.external_links[ url] = self.document_relationships.add_relationship( url, self.namespace.names['LINKS'], target_mode='External') return make_link(parent, id=self.external_links[url], tooltip=tooltip) return parent
def create_oebbook(self, htmlpath, basedir, opts, log, mi): import uuid from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.oeb.base import (DirContainer, rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, xpath, urlquote) from calibre import guess_type from calibre.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from calibre.ebooks.html.input import get_filelist from calibre.ebooks.metadata import string_to_authors from calibre.utils.localization import canonicalize_lang import css_parser, logging css_parser.log.setLevel(logging.WARN) self.OEB_STYLES = OEB_STYLES oeb = create_oebbook(log, None, opts, self, encoding=opts.input_encoding, populate=False) self.oeb = oeb metadata = oeb.metadata meta_info_to_oeb_metadata(mi, metadata, log) if not metadata.language: l = canonicalize_lang(getattr(opts, 'language', None)) if not l: oeb.logger.warn('Language not specified') l = get_lang().replace('_', '-') metadata.add('language', l) if not metadata.creator: a = getattr(opts, 'authors', None) if a: a = string_to_authors(a) if not a: oeb.logger.warn('Creator not specified') a = [self.oeb.translate(__('Unknown'))] for aut in a: metadata.add('creator', aut) if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate(__('Unknown'))) bookid = unicode_type(uuid.uuid4()) metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in metadata.identifier: if 'id' in ident.attrib: self.oeb.uid = metadata.identifier[0] break filelist = get_filelist(htmlpath, basedir, opts, log) filelist = [f for f in filelist if not f.is_binary] htmlfile_map = {} for f in filelist: path = f.path oeb.container = DirContainer(os.path.dirname(path), log, ignore_opf=True) bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname)) htmlfile_map[path] = href item = oeb.manifest.add(id, href, 'text/html') if path == htmlpath and '%' in path: bname = urlquote(bname) item.html_input_href = bname oeb.spine.add(item, True) self.added_resources = {} self.log = log self.log('Normalizing filename cases') for path, href in htmlfile_map.items(): if not self.is_case_sensitive(path): path = path.lower() self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urldefrag self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.log('Rewriting HTML links') for f in filelist: path = f.path dpath = os.path.dirname(path) oeb.container = DirContainer(dpath, log, ignore_opf=True) href = htmlfile_map[path] try: item = oeb.manifest.hrefs[href] except KeyError: item = oeb.manifest.hrefs[urlnormalize(href)] rewrite_links(item.data, partial(self.resource_adder, base=dpath)) for item in oeb.manifest.values(): if item.media_type in self.OEB_STYLES: dpath = None for path, href in self.added_resources.items(): if href == item.href: dpath = os.path.dirname(path) break css_parser.replaceUrls( item.data, partial(self.resource_adder, base=dpath)) toc = self.oeb.toc self.oeb.auto_generated_toc = True titles = [] headers = [] for item in self.oeb.spine: if not item.linear: continue html = item.data title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) title = re.sub(r'\s+', ' ', title.strip()) if title: titles.append(title) headers.append('(unlabled)') for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): expr = '/h:html/h:body//h:%s[position()=1]/text()' header = ''.join(xpath(html, expr % tag)) header = re.sub(r'\s+', ' ', header.strip()) if header: headers[-1] = header break use = titles if len(titles) > len(set(titles)): use = headers for title, item in zip(use, self.oeb.spine): if not item.linear: continue toc.add(title, item.href) oeb.container = DirContainer(getcwd(), oeb.log, ignore_opf=True) return oeb
def create_oebbook(self, htmlpath, basedir, opts, log, mi): import uuid from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.oeb.base import (DirContainer, rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, xpath, urlquote) from calibre import guess_type from calibre.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from calibre.ebooks.html.input import get_filelist from calibre.ebooks.metadata import string_to_authors from calibre.utils.localization import canonicalize_lang import css_parser, logging css_parser.log.setLevel(logging.WARN) self.OEB_STYLES = OEB_STYLES oeb = create_oebbook(log, None, opts, self, encoding=opts.input_encoding, populate=False) self.oeb = oeb metadata = oeb.metadata meta_info_to_oeb_metadata(mi, metadata, log) if not metadata.language: l = canonicalize_lang(getattr(opts, 'language', None)) if not l: oeb.logger.warn(u'Language not specified') l = get_lang().replace('_', '-') metadata.add('language', l) if not metadata.creator: a = getattr(opts, 'authors', None) if a: a = string_to_authors(a) if not a: oeb.logger.warn('Creator not specified') a = [self.oeb.translate(__('Unknown'))] for aut in a: metadata.add('creator', aut) if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate(__('Unknown'))) bookid = str(uuid.uuid4()) metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in metadata.identifier: if 'id' in ident.attrib: self.oeb.uid = metadata.identifier[0] break filelist = get_filelist(htmlpath, basedir, opts, log) filelist = [f for f in filelist if not f.is_binary] htmlfile_map = {} for f in filelist: path = f.path oeb.container = DirContainer(os.path.dirname(path), log, ignore_opf=True) bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname)) htmlfile_map[path] = href item = oeb.manifest.add(id, href, 'text/html') if path == htmlpath and '%' in path: bname = urlquote(bname) item.html_input_href = bname oeb.spine.add(item, True) self.added_resources = {} self.log = log self.log('Normalizing filename cases') for path, href in htmlfile_map.items(): if not self.is_case_sensitive(path): path = path.lower() self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urldefrag self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.log('Rewriting HTML links') for f in filelist: path = f.path dpath = os.path.dirname(path) oeb.container = DirContainer(dpath, log, ignore_opf=True) href = htmlfile_map[path] try: item = oeb.manifest.hrefs[href] except KeyError: item = oeb.manifest.hrefs[urlnormalize(href)] rewrite_links(item.data, partial(self.resource_adder, base=dpath)) for item in oeb.manifest.values(): if item.media_type in self.OEB_STYLES: dpath = None for path, href in self.added_resources.items(): if href == item.href: dpath = os.path.dirname(path) break css_parser.replaceUrls(item.data, partial(self.resource_adder, base=dpath)) toc = self.oeb.toc self.oeb.auto_generated_toc = True titles = [] headers = [] for item in self.oeb.spine: if not item.linear: continue html = item.data title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) title = re.sub(r'\s+', ' ', title.strip()) if title: titles.append(title) headers.append('(unlabled)') for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): expr = '/h:html/h:body//h:%s[position()=1]/text()' header = ''.join(xpath(html, expr % tag)) header = re.sub(r'\s+', ' ', header.strip()) if header: headers[-1] = header break use = titles if len(titles) > len(set(titles)): use = headers for title, item in zip(use, self.oeb.spine): if not item.linear: continue toc.add(title, item.href) oeb.container = DirContainer(getcwd(), oeb.log, ignore_opf=True) return oeb