Ejemplo n.º 1
0
def check_filenames(container):
    errors = []
    all_names = set(container.name_path_map) - container.names_that_must_not_be_changed
    for name in all_names:
        if urlquote(name) != name:
            errors.append(EscapedName(name))
    return errors
Ejemplo n.º 2
0
 def name_to_href(self, name, base=None):
     '''Convert a name to a href relative to base, which must be a name or
     None in which case self.root is used as the base'''
     fullpath = self.name_to_abspath(name)
     basepath = self.root if base is None else os.path.dirname(self.name_to_abspath(base))
     path = relpath(fullpath, basepath).replace(os.sep, '/')
     return urlquote(path)
Ejemplo n.º 3
0
def check_filenames(container):
    errors = []
    all_names = set(container.name_path_map) - container.names_that_must_not_be_changed
    for name in all_names:
        if urlquote(name) != name:
            errors.append(EscapedName(name))
    return errors
Ejemplo n.º 4
0
 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(
             href) or self.oeb.manifest.hrefs.get(urlquote(href))
         try:
             if item is None or not isinstance(item.data, bytes):
                 self.log.warning('Failed to find image:', href)
                 return
         except FileNotFoundError:
             self.log.warning('Failed to find image:', href)
             return
         try:
             fmt, width, height = identify(item.data)
         except Exception:
             self.log.warning('Replacing corrupted image with blank: %s' %
                              href)
             item.data = I('blank.png',
                           data=True,
                           allow_user_override=False)
             fmt, width, height = identify(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height,
                                   fmt, item)
         item.unload_data_from_memory()
     return self.images[href]
Ejemplo n.º 5
0
 def name_to_href(self, name, base=None):
     '''Convert a name to a href relative to base, which must be a name or
     None in which case self.root is used as the base'''
     fullpath = self.name_to_abspath(name)
     basepath = self.root if base is None else os.path.dirname(self.name_to_abspath(base))
     path = relpath(fullpath, basepath).replace(os.sep, '/')
     return urlquote(path)
Ejemplo n.º 6
0
    def __init__(self, name):
        BaseError.__init__(self, _('Filename contains unsafe characters'), name)
        qname = urlquote(name)

        self.sname = make_filename_safe(name)
        self.HELP = _(
            'The filename {0} contains unsafe characters, that must be escaped, like'
            ' this {1}. This can cause problems with some e-book readers. To be'
            ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],'
            ' underscores and hyphens in your file names. While many other characters'
            ' are allowed, they may cause problems with some software.').format(name, qname)
        self.INDIVIDUAL_FIX = _(
            'Rename the file {0} to {1}').format(name, self.sname)
Ejemplo n.º 7
0
    def __init__(self, name):
        BaseError.__init__(self, _('Filename contains unsafe characters'), name)
        qname = urlquote(name)

        self.sname = make_filename_safe(name)
        self.HELP = _(
            'The filename {0} contains unsafe characters, that must be escaped, like'
            ' this {1}. This can cause problems with some e-book readers. To be'
            ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],'
            ' underscores and hyphens in your file names. While many other characters'
            ' are allowed, they may cause problems with some software.').format(name, qname)
        self.INDIVIDUAL_FIX = _(
            'Rename the file {0} to {1}').format(name, self.sname)
Ejemplo n.º 8
0
 def __init__(self, name):
     from calibre.utils.filenames import ascii_filename
     BaseError.__init__(self, _('Filename contains unsafe characters'), name)
     qname = urlquote(name)
     def esc(n):
         return ''.join(x if x in URL_SAFE else '_' for x in n)
     self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
     self.HELP = _(
         'The filename {0} contains unsafe characters, that must be escaped, like'
         ' this {1}. This can cause problems with some ebook readers. To be'
         ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],'
         ' underscores and hyphens in your file names. While many other characters'
         ' are allowed, they may cause problems with some software.').format(name, qname)
     self.INDIVIDUAL_FIX = _(
         'Rename the file {0} to {1}').format(name, self.sname)
Ejemplo n.º 9
0
 def __init__(self, name):
     from calibre.utils.filenames import ascii_filename
     BaseError.__init__(self, _('Filename contains unsafe characters'), name)
     qname = urlquote(name)
     def esc(n):
         return ''.join(x if x in URL_SAFE else '_' for x in n)
     self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
     self.HELP = _(
         'The filename {0} contains unsafe characters, that must be escaped, like'
         ' this {1}. This can cause problems with some ebook readers. To be'
         ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],'
         ' underscores and hyphens in your file names. While many other characters'
         ' are allowed, they may cause problems with some software.').format(name, qname)
     self.INDIVIDUAL_FIX = _(
         'Rename the file {0} to {1}').format(name, self.sname)
Ejemplo n.º 10
0
        def donode(item, parent, base, subpath):
            for child in item:
                title = child.title
                if not title:
                    continue
                raw = unquote_path(child.href or '')
                rsrcname = os.path.basename(raw)
                rsrcpath = os.path.join(subpath, rsrcname)
                if (not os.path.exists(os.path.join(base, rsrcpath)) and os.path.exists(os.path.join(base, raw))):
                    rsrcpath = raw

                if '%' not in rsrcpath:
                    rsrcpath = urlquote(rsrcpath)
                if not raw:
                    rsrcpath = ''
                c = DIV(A(title, href=rsrcpath))
                donode(child, c, base, subpath)
                parent.append(c)
Ejemplo n.º 11
0
        def donode(item, parent, base, subpath):
            for child in item:
                title = child.title
                if not title:
                    continue
                raw = unquote_path(child.href or '')
                rsrcname = os.path.basename(raw)
                rsrcpath = os.path.join(subpath, rsrcname)
                if (not os.path.exists(os.path.join(base, rsrcpath)) and os.path.exists(os.path.join(base, raw))):
                    rsrcpath = raw

                if '%' not in rsrcpath:
                    rsrcpath = urlquote(rsrcpath)
                if not raw:
                    rsrcpath = ''
                c = DIV(A(title, href=rsrcpath))
                donode(child, c, base, subpath)
                parent.append(c)
Ejemplo n.º 12
0
    def serialize_hyperlink(self, parent, link):
        item, url, tooltip = link
        purl = urlparse(url)
        href = purl.path

        def make_link(parent, anchor=None, id=None, tooltip=None):
            kw = {}
            if anchor is not None:
                kw['w_anchor'] = anchor
            elif id is not None:
                kw['r_id'] = id
            if tooltip:
                kw['w_tooltip'] = tooltip
            return self.namespace.makeelement(parent, 'w:hyperlink', **kw)

        if not purl.scheme:
            href = item.abshref(href)
            if href not in self.document_hrefs:
                href = urlquote(href)
            if href in self.document_hrefs:
                key = (href, purl.fragment or self.top_anchor)
                if key in self.anchor_map:
                    bmark = self.anchor_map[key]
                else:
                    bmark = self.anchor_map[(href, self.top_anchor)]
                return make_link(parent, anchor=bmark, tooltip=tooltip)
            else:
                self.log.warn(
                    'Ignoring internal hyperlink with href (%s) pointing to unknown destination'
                    % url)
        if purl.scheme in {'http', 'https', 'ftp'}:
            if url not in self.external_links:
                self.external_links[
                    url] = self.document_relationships.add_relationship(
                        url,
                        self.namespace.names['LINKS'],
                        target_mode='External')
            return make_link(parent,
                             id=self.external_links[url],
                             tooltip=tooltip)
        return parent
Ejemplo n.º 13
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer, rewrite_links,
                                             urlnormalize, urldefrag,
                                             BINARY_MIME, OEB_STYLES, xpath,
                                             urlquote)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        from calibre.ebooks.metadata import string_to_authors
        from calibre.utils.localization import canonicalize_lang
        import css_parser, logging
        css_parser.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log,
                             None,
                             opts,
                             self,
                             encoding=opts.input_encoding,
                             populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn('Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate(__('Unknown'))]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = unicode_type(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path),
                                         log,
                                         ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                                             href=sanitize_file_name(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            if path == htmlpath and '%' in path:
                bname = urlquote(bname)
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            href = htmlfile_map[path]
            try:
                item = oeb.manifest.hrefs[href]
            except KeyError:
                item = oeb.manifest.hrefs[urlnormalize(href)]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                css_parser.replaceUrls(
                    item.data, partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in zip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(getcwd(), oeb.log, ignore_opf=True)
        return oeb
Ejemplo n.º 14
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
            xpath, urlquote)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        from calibre.ebooks.metadata import string_to_authors
        from calibre.utils.localization import canonicalize_lang
        import css_parser, logging
        css_parser.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn(u'Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate(__('Unknown'))]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            if path == htmlpath and '%' in path:
                bname = urlquote(bname)
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            href = htmlfile_map[path]
            try:
                item = oeb.manifest.hrefs[href]
            except KeyError:
                item = oeb.manifest.hrefs[urlnormalize(href)]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                css_parser.replaceUrls(item.data,
                        partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in zip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(getcwd(), oeb.log, ignore_opf=True)
        return oeb