Esempio n. 1
0
 def _metadata_from_opf(self, opf):
     from calibre.ebooks.metadata.opf2 import OPF
     from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
     stream = cStringIO.StringIO(etree.tostring(opf, xml_declaration=True, encoding='utf-8'))
     o = OPF(stream)
     pwm = o.primary_writing_mode
     if pwm:
         self.oeb.metadata.primary_writing_mode = pwm
     mi = o.to_book_metadata()
     if not mi.language:
         mi.language = get_lang().replace('_', '-')
     self.oeb.metadata.add('language', mi.language)
     if not mi.book_producer:
         mi.book_producer = '%(a)s (%(v)s) [http://%(a)s-ebook.com]'%\
             dict(a=__appname__, v=__version__)
     meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger)
     m = self.oeb.metadata
     m.add('identifier', str(uuid.uuid4()), id='uuid_id', scheme='uuid')
     self.oeb.uid = self.oeb.metadata.identifier[-1]
     if not m.title:
         m.add('title', self.oeb.translate(__('Unknown')))
     has_aut = False
     for x in m.creator:
         if getattr(x, 'role', '').lower() in ('', 'aut'):
             has_aut = True
             break
     if not has_aut:
         m.add('creator', self.oeb.translate(__('Unknown')), role='aut')
Esempio n. 2
0
def mark_as_titlepage(container, name, move_to_start=True):
    if move_to_start:
        for item, q, linear in container.spine_iter:
            if name == q:
                break
        if not linear:
            item.set('linear', 'yes')
        if item.getparent().index(item) > 0:
            container.insert_into_xml(item.getparent(), item, 0)
    for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
        ref.getparent().remove(ref)

    for guide in container.opf_xpath('//opf:guide'):
        container.insert_into_xml(
            guide,
            guide.makeelement(OPF('reference'),
                              type='cover',
                              href=container.name_to_href(
                                  name, container.opf_name)))
    container.dirty(container.opf_name)
Esempio n. 3
0
    def convert_text(self, oeb_book):
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.ebooks.pdf.render.from_html import PDFWriter

        self.log.debug('Serializing oeb input to disk for processing...')
        self.get_cover_data()

        self.process_fonts()

        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts,
                               self.log)

            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            opf = OPF(opfpath, os.path.dirname(opfpath))

            self.write(PDFWriter, [s.path for s in opf.spine],
                       getattr(opf, 'toc', None))
Esempio n. 4
0
def set_azw3_cover(container, cover_path, report):
    name = None
    found = True
    for gi in container.opf_xpath(
            '//opf:guide/opf:reference[@href and contains(@type, "cover")]'):
        href = gi.get('href')
        name = container.href_to_name(href, container.opf_name)
        container.remove_from_xml(gi)
    if name is None or not container.has_name(name):
        item = container.generate_item(name='cover.jpeg', id_prefix='cover')
        name = container.href_to_name(item.get('href'), container.opf_name)
        found = False
    href = container.name_to_href(name, container.opf_name)
    guide = container.opf_xpath('//opf:guide')[0]
    container.insert_into_xml(
        guide, guide.makeelement(OPF('reference'), href=href, type='cover'))
    with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest:
        shutil.copyfileobj(src, dest)
    container.dirty(container.opf_name)
    report('Cover updated' if found else 'Cover inserted')
Esempio n. 5
0
 def writer(root, prefixes, refines, ival=None):
     uid = root.get('unique-identifier')
     package_identifier = None
     for ident in XPath('./opf:metadata/dc:identifier')(root):
         is_package_id = uid is not None and uid == ident.get('id')
         if is_package_id:
             package_identifier = ident
         val = (ident.text or '').strip()
         if (val.startswith(name + ':')
                 or ident.get(OPF('scheme')) == name) and not is_package_id:
             remove_element(ident, refines)
     metadata = XPath('./opf:metadata')(root)[0]
     if ival:
         ident = metadata.makeelement(DC('identifier'))
         ident.text = '%s:%s' % (name, ival)
         if package_identifier is None:
             metadata.append(ident)
         else:
             p = package_identifier.getparent()
             p.insert(p.index(package_identifier), ident)
Esempio n. 6
0
    def generate_item(self, name, id_prefix=None, media_type=None):
        '''Add an item to the manifest with href derived from the given
        name. Ensures uniqueness of href and id automatically. Returns
        generated item.'''
        id_prefix = id_prefix or 'id'
        media_type = media_type or guess_type(name)
        href = self.name_to_href(name, self.opf_name)
        base, ext = href.rpartition('.')[0::2]
        all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
        c = 0
        item_id = id_prefix
        while item_id in all_ids:
            c += 1
            item_id = id_prefix + '%d'%c
        all_names = {x.get('href') for x in self.opf_xpath(
                '//opf:manifest/opf:item[@href]')}

        def exists(h):
            return self.exists(self.href_to_name(h, self.opf_name))

        c = 0
        while href in all_names or exists(href):
            c += 1
            href = '%s_%d.%s'%(base, c, ext)
        manifest = self.opf_xpath('//opf:manifest')[0]
        item = manifest.makeelement(OPF('item'),
                                    id=item_id, href=href)
        item.set('media-type', media_type)
        self.insert_into_xml(manifest, item)
        self.dirty(self.opf_name)
        name = self.href_to_name(href, self.opf_name)
        self.name_path_map[name] = path = self.name_to_abspath(name)
        self.mime_map[name] = media_type
        # Ensure that the file corresponding to the newly created item exists
        # otherwise cloned containers will fail when they try to get the number
        # of links to the file
        base = os.path.dirname(path)
        if not os.path.exists(base):
            os.makedirs(base)
        open(path, 'wb').close()
        return item
Esempio n. 7
0
    def convert_text(self, oeb_book):
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.ebooks.pdf.render.from_html import PDFWriter

        self.log.debug('Serializing oeb input to disk for processing...')
        self.get_cover_data()

        self.process_fonts()
        if self.opts.pdf_use_document_margins and self.stored_page_margins:
            import json
            for href, margins in iteritems(self.stored_page_margins):
                item = oeb_book.manifest.hrefs.get(href)
                if item is not None:
                    root = item.data
                    if hasattr(root, 'xpath') and margins:
                        root.set('data-calibre-pdf-output-page-margins',
                                 json.dumps(margins))

        # Remove javascript
        for item in self.oeb.spine:
            root = item.data
            if hasattr(root, 'xpath'):
                for script in root.xpath('//*[local-name()="script"]'):
                    script.text = None
                    script.attrib.clear()
                for elem in root.iter('*'):
                    for attr in tuple(elem.attrib):
                        if attr.startswith('on'):
                            elem.set(attr, '')

        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts,
                               self.log)

            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            opf = OPF(opfpath, os.path.dirname(opfpath))

            self.write(PDFWriter, [s.path for s in opf.spine],
                       getattr(opf, 'toc', None))
Esempio n. 8
0
def mark_as_titlepage(container, name, move_to_start=True):
    '''
    Mark the specified HTML file as the titlepage of the EPUB.

    :param move_to_start: If True the HTML file is moved to the start of the spine
    '''
    if move_to_start:
        for item, q, linear in container.spine_iter:
            if name == q:
                break
        if not linear:
            item.set('linear', 'yes')
        if item.getparent().index(item) > 0:
            container.insert_into_xml(item.getparent(), item, 0)
    for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'):
        ref.getparent().remove(ref)

    for guide in get_guides(container):
        container.insert_into_xml(guide, guide.makeelement(
            OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name)))
    container.dirty(container.opf_name)
Esempio n. 9
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v: k for k, v in container.manifest_id_map.iteritems()}
        # Add unreferenced text files to the spine
        for name, mt in container.mime_map.iteritems():
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(
                    spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Ensure that the meta cover tag is correct
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.set('content', cover_id)

        # Add description metadata
        metadata = container.opf_xpath('//opf:metadata')[0]
        container.insert_into_xml(metadata,
                                  metadata.makeelement(DC('description')))
        metadata[-1].text = 'Comprehensive documentation for calibre'

        # Remove search.html since it is useless in EPUB
        container.remove_item('search.html')

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)
Esempio n. 10
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v: k for k, v in iteritems(container.manifest_id_map)}
        # Add unreferenced text files to the spine
        for name, mt in iteritems(container.mime_map):
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(
                    spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Remove the <guide> which is not needed in EPUB 3
        for guide in container.opf_xpath('//*[local-name()="guide"]'):
            guide.getparent().remove(guide)

        # Ensure that the cover-image property is set
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for item in container.opf_xpath(
                '//opf:item[@id="{}"]'.format(cover_id)):
            item.set('properties', 'cover-image')

        # Remove any <meta cover> tag as it is not needed in epub 3
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.getparent().remove(meta)

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)
Esempio n. 11
0
def add_or_replace_jacket(container):
    name = find_existing_jacket(container)
    found = True
    if name is None:
        jacket_item = container.generate_item('jacket.xhtml',
                                              id_prefix='jacket')
        name = container.href_to_name(jacket_item.get('href'),
                                      container.opf_name)
        found = False
    replace_jacket(container, name)
    if not found:
        # Insert new jacket into spine
        index = 0
        sp = container.abspath_to_name(container.spine_items.next())
        if sp == find_cover_page(container):
            index = 1
        itemref = container.opf.makeelement(OPF('itemref'),
                                            idref=jacket_item.get('id'))
        container.insert_into_xml(container.opf_xpath('//opf:spine')[0],
                                  itemref,
                                  index=index)
    return found
Esempio n. 12
0
 def convert_metadata(self, oeb):
     E = ElementMaker(
         namespace=namespaces['cp'],
         nsmap={x: namespaces[x]
                for x in 'cp dc dcterms xsi'.split()})
     cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
     ts = utcnow().isoformat(str('T')).rpartition('.')[0] + 'Z'
     for x in 'created modified'.split():
         x = cp.makeelement(
             '{%s}%s' % (namespaces['dcterms'], x),
             **{'{%s}type' % namespaces['xsi']: 'dcterms:W3CDTF'})
         x.text = ts
         cp.append(x)
     package = etree.Element(OPF('package'),
                             attrib={'version': '2.0'},
                             nsmap={None: OPF2_NS})
     oeb.metadata.to_opf2(package)
     self.mi = ReadOPF(BytesIO(xml2str(package)),
                       populate_spine=False,
                       try_to_guess_cover=False).to_book_metadata()
     update_doc_props(cp, self.mi)
     return xml2str(cp)
Esempio n. 13
0
def parse_identifier(ident, val, refines):
    idid = ident.get('id')
    refines = refines[idid]
    scheme = None
    lval = val.lower()

    def finalize(scheme, val):
        if not scheme or not val:
            return None, None
        scheme = scheme.lower()
        if scheme in ('http', 'https'):
            return None, None
        if scheme.startswith('isbn'):
            scheme = 'isbn'
        if scheme == 'isbn':
            val = val.split(':')[-1]
            val = check_isbn(val)
            if val is None:
                return None, None
        return scheme, val

    # Try the OPF 2 style opf:scheme attribute, which will be present, for
    # example, in EPUB 3 files that have had their metadata set by an
    # application that only understands EPUB 2.
    scheme = ident.get(OPF('scheme'))
    if scheme and not lval.startswith('urn:'):
        return finalize(scheme, val)

    # Technically, we should be looking for refines that define the scheme, but
    # the IDioticPF created such a bad spec that they got their own
    # examples wrong, so I cannot be bothered doing this.
    # http://www.idpf.org/epub/301/spec/epub-publications-errata/

    # Parse the value for the scheme
    if lval.startswith('urn:'):
        val = val[4:]

    prefix, rest = val.partition(':')[::2]
    return finalize(prefix, rest)
Esempio n. 14
0
    def create_cover_page(self, input_fmt):
        templ = '''
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
        <head><style>
        html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; }
        img {
            width: auto; height: auto;
            margin-left: auto; margin-right: auto;
            max-width: 100vw; max-height: 100vh
        }
        </style></head><body><img src="%s"/></body></html>
        '''
        if input_fmt == 'epub':

            def cover_path(action, data):
                if action == 'write_image':
                    data.write(BLANK_JPEG)

            return set_epub_cover(self,
                                  cover_path, (lambda *a: None),
                                  options={'template': templ})
        raster_cover_name = find_cover_image(self, strict=True)
        if raster_cover_name is None:
            item = self.generate_item(name='cover.jpeg', id_prefix='cover')
            raster_cover_name = self.href_to_name(item.get('href'),
                                                  self.opf_name)
        with self.open(raster_cover_name, 'wb') as dest:
            dest.write(BLANK_JPEG)
        item = self.generate_item(name='titlepage.html', id_prefix='titlepage')
        titlepage_name = self.href_to_name(item.get('href'), self.opf_name)
        raw = templ % prepare_string_for_xml(
            self.name_to_href(raster_cover_name, titlepage_name), True)
        with self.open(titlepage_name, 'wb') as f:
            f.write(raw.encode('utf-8'))
        spine = self.opf_xpath('//opf:spine')[0]
        ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
        self.insert_into_xml(spine, ref, index=0)
        self.dirty(self.opf_name)
        return raster_cover_name, titlepage_name
Esempio n. 15
0
 def create_cover_page(self, input_fmt):
     if input_fmt == 'epub':
         def cover_path(action, data):
             if action == 'write_image':
                 data.write(BLANK_JPEG)
         return set_epub_cover(self, cover_path, (lambda *a: None))
     from calibre.ebooks.oeb.transforms.cover import CoverManager
     raster_cover_name = find_cover_image(self, strict=True)
     if raster_cover_name is None:
         item = self.generate_item(name='cover.jpeg', id_prefix='cover')
         raster_cover_name = self.href_to_name(item.get('href'), self.opf_name)
     with self.open(raster_cover_name, 'wb') as dest:
         dest.write(BLANK_JPEG)
     item = self.generate_item(name='titlepage.html', id_prefix='titlepage')
     titlepage_name = self.href_to_name(item.get('href'), self.opf_name)
     templ = CoverManager.SVG_TEMPLATE
     raw = templ % self.name_to_href(raster_cover_name, titlepage_name)
     with self.open(titlepage_name, 'wb') as f:
         f.write(raw.encode('utf-8'))
     spine = self.opf_xpath('//opf:spine')[0]
     ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
     self.insert_into_xml(spine, ref, index=0)
     self.dirty(self.opf_name)
     return raster_cover_name, titlepage_name
Esempio n. 16
0
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        if self.opts.epub_inline_toc:
            from calibre.ebooks.mobi.writer8.toc import TOCAdder
            opts.mobi_toc_at_start = not opts.epub_toc_at_end
            opts.mobi_passthrough = False
            opts.no_inline_toc = False
            TOCAdder(oeb,
                     opts,
                     replace_previous_inline_toc=True,
                     ignore_existing_toc=True)

        if self.opts.epub_flatten:
            from calibre.ebooks.oeb.transforms.filenames import FlatFilenames
            FlatFilenames()(oeb, opts)
        else:
            from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
            UniqueFilenames()(oeb, opts)

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.upshift_markup()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages(check_colorspaces=True)(oeb, opts)

        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                      max_flow_size=self.opts.flow_size * 1024)
        split(self.oeb, self.opts)

        from calibre.ebooks.oeb.transforms.cover import CoverManager
        cm = CoverManager(
            no_default_cover=self.opts.no_default_epub_cover,
            no_svg_cover=self.opts.no_svg_cover,
            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
        cm(self.oeb, self.opts, self.log)

        self.workaround_sony_quirks()

        if self.oeb.toc.count() == 0:
            self.log.warn('This EPUB file has no Table of Contents. '
                          'Creating a default TOC')
            first = next(iter(self.oeb.spine))
            self.oeb.toc.add(_('Start'), first.href)

        from calibre.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
        uuid = None
        for x in identifiers:
            if x.get(OPF('scheme'),
                     None).lower() == 'uuid' or str(x).startswith('urn:uuid:'):
                uuid = str(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

        if uuid is None:
            self.log.warn('No UUID identifier found')
            from uuid import uuid4
            uuid = str(uuid4())
            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)

        if encrypted_fonts and not uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
                if str(x) == uuid:
                    x.content = 'urn:uuid:' + uuid

        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                if self.opts.output_profile.epub_periodical_format == 'sony':
                    from calibre.ebooks.epub.periodical import sony_metadata
                    metadata_xml, atom_xml = sony_metadata(oeb)
                    extra_entries = [('atom.xml', 'application/atom+xml',
                                      atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([
                os.path.join(tdir, x) for x in os.listdir(tdir)
                if x.endswith('.ncx')
            ][0])
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
            with initialize_container(output_path,
                                      os.path.basename(opf),
                                      extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml', encryption)
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                                  metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    if os.path.isdir(opts.extract_to):
                        shutil.rmtree(opts.extract_to)
                    else:
                        os.remove(opts.extract_to)
                os.mkdir(opts.extract_to)
                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
Esempio n. 17
0
def split(container, name, loc_or_xpath, before=True):
    ''' Split the file specified by name at the position specified by loc_or_xpath. '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        split_point = node_from_loc(root, loc_or_xpath)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(
        root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(
        root2.xpath('//*/@name'))
    manifest_item = container.generate_item(
        name, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'),
                                         container.opf_name)

    # Fix links in the split trees
    for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom),
                              (root2, name, anchors_in_top)]:
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors:
                    a.set(
                        'href', '%s#%s' %
                        (container.name_to_href(rname, name), purl.fragment))

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in container.mime_map.iteritems():
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name,
                                     bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
Esempio n. 18
0
    def create_cover_page(self, input_fmt):
        templ = '''
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
        <head><style>
        html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; }
        img {
            width: 100%%; height: 100%%;
            object-fit: contain;
            margin-left: auto; margin-right: auto;
            max-width: 100vw; max-height: 100vh;
            top: 50vh; transform: translateY(-50%%);
            position: relative;
        }
        body.cover-fill img { object-fit: fill; }
        </style></head><body><img src="%s"/></body></html>
        '''

        def generic_cover():
            if self.book_metadata is not None:
                from calibre.ebooks.covers import create_cover
                mi = self.book_metadata
                return create_cover(mi.title, mi.authors, mi.series,
                                    mi.series_index)
            return BLANK_JPEG

        if input_fmt == 'epub':

            def image_callback(cover_image, wrapped_image):
                if cover_image:
                    image_callback.cover_data = self.raw_data(cover_image,
                                                              decode=False)
                if wrapped_image and not getattr(image_callback, 'cover_data',
                                                 None):
                    image_callback.cover_data = self.raw_data(wrapped_image,
                                                              decode=False)

            def cover_path(action, data):
                if action == 'write_image':
                    cdata = getattr(image_callback, 'cover_data',
                                    None) or generic_cover()
                    data.write(cdata)

            raster_cover_name, titlepage_name = set_epub_cover(
                self,
                cover_path, (lambda *a: None),
                options={'template': templ},
                image_callback=image_callback)
        else:
            raster_cover_name = find_cover_image(self, strict=True)
            if raster_cover_name is None:
                item = self.generate_item(name='cover.jpeg', id_prefix='cover')
                raster_cover_name = self.href_to_name(item.get('href'),
                                                      self.opf_name)
                with self.open(raster_cover_name, 'wb') as dest:
                    dest.write(generic_cover())
            item = self.generate_item(name='titlepage.html',
                                      id_prefix='titlepage')
            titlepage_name = self.href_to_name(item.get('href'), self.opf_name)
            raw = templ % prepare_string_for_xml(
                self.name_to_href(raster_cover_name, titlepage_name), True)
            with self.open(titlepage_name, 'wb') as f:
                f.write(raw.encode('utf-8'))
            spine = self.opf_xpath('//opf:spine')[0]
            ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
            self.insert_into_xml(spine, ref, index=0)
            self.dirty(self.opf_name)
        return raster_cover_name, titlepage_name
Esempio n. 19
0
    def rationalize_cover2(self, opf, log):
        ''' Ensure that the cover information in the guide is correct. That
        means, at most one entry with type="cover" that points to a raster
        cover and at most one entry with type="titlepage" that points to an
        HTML titlepage. '''
        from calibre.ebooks.oeb.base import OPF
        removed = None
        from lxml import etree
        guide_cover, guide_elem = None, None
        for guide_elem in opf.iterguide():
            if guide_elem.get('type', '').lower() == 'cover':
                guide_cover = guide_elem.get('href', '').partition('#')[0]
                break
        if not guide_cover:
            raster_cover = opf.raster_cover
            if raster_cover:
                if guide_elem is None:
                    g = opf.root.makeelement(OPF('guide'))
                    opf.root.append(g)
                else:
                    g = guide_elem.getparent()
                guide_cover = raster_cover
                guide_elem = g.makeelement(OPF('reference'),
                                           attrib={
                                               'href': raster_cover,
                                               'type': 'cover'
                                           })
                g.append(guide_elem)
            return
        spine = list(opf.iterspine())
        if not spine:
            return
        # Check if the cover specified in the guide is also
        # the first element in spine
        idref = spine[0].get('idref', '')
        manifest = list(opf.itermanifest())
        if not manifest:
            return
        elem = [x for x in manifest if x.get('id', '') == idref]
        if not elem or elem[0].get('href', None) != guide_cover:
            return
        log('Found HTML cover', guide_cover)

        # Remove from spine as covers must be treated
        # specially
        if not self.for_viewer:
            if len(spine) == 1:
                log.warn(
                    'There is only a single spine item and it is marked as the cover. Removing cover marking.'
                )
                for guide_elem in tuple(opf.iterguide()):
                    if guide_elem.get('type', '').lower() == 'cover':
                        guide_elem.getparent().remove(guide_elem)
                return
            else:
                spine[0].getparent().remove(spine[0])
                removed = guide_cover
        else:
            # Ensure the cover is displayed as the first item in the book, some
            # epub files have it set with linear='no' which causes the cover to
            # display in the end
            spine[0].attrib.pop('linear', None)
            opf.spine[0].is_linear = True
        # Ensure that the guide has a cover entry pointing to a raster cover
        # and a titlepage entry pointing to the html titlepage. The titlepage
        # entry will be used by the epub output plugin, the raster cover entry
        # by other output plugins.

        # Search for a raster cover identified in the OPF
        raster_cover = opf.raster_cover

        # Set the cover guide entry
        if raster_cover is not None:
            guide_elem.set('href', raster_cover)
        else:
            # Render the titlepage to create a raster cover
            from calibre.ebooks import render_html_svg_workaround
            guide_elem.set('href', 'calibre_raster_cover.jpg')
            t = etree.SubElement(elem[0].getparent(),
                                 OPF('item'),
                                 href=guide_elem.get('href'),
                                 id='calibre_raster_cover')
            t.set('media-type', 'image/jpeg')
            if os.path.exists(guide_cover):
                renderer = render_html_svg_workaround(guide_cover, log)
                if renderer is not None:
                    open('calibre_raster_cover.jpg', 'wb').write(renderer)

        # Set the titlepage guide entry
        self.set_guide_type(opf, 'titlepage', guide_cover, 'Title Page')
        return removed
Esempio n. 20
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                          ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                '%s is not a valid EPUB file (could not find opf)' % path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'

            def normpath(x):
                return posixpath.normpath(delta + elem.get('href'))

            for elem in opf.itermanifest():
                elem.set('href', normpath(elem.get('href')))
            for elem in opf.iterguide():
                elem.set('href', normpath(elem.get('href')))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)
        if self.removed_cover:
            self.removed_items_to_ignore = (self.removed_cover, )
        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log, options)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_:
                mt = y.get('media-type', None)
                if mt in {
                        'application/vnd.adobe-page-template+xml',
                        'application/vnd.adobe.page-template+xml',
                        'application/adobe-page-template+xml',
                        'application/adobe.page-template+xml',
                        'application/text'
                }:
                    not_for_spine.add(id_)
                ext = y.get('href', '').rpartition('.')[-1].lower()
                if mt == 'text/plain' and ext in {'otf', 'ttf'}:
                    # some epub authoring software sets font mime types to
                    # text/plain
                    not_for_spine.add(id_)
                    y.set('media-type', 'application/font')

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Esempio n. 21
0
def build_exth(metadata,
               prefer_author_sort=False,
               is_periodical=False,
               share_not_sync=True,
               cover_offset=None,
               thumbnail_offset=None,
               start_offset=None,
               mobi_doctype=2,
               num_of_resources=None,
               kf8_unknown_count=0,
               be_kindlegen2=False,
               kf8_header_index=None,
               opts=None):
    exth = BytesIO()
    nrecs = 0

    for term in metadata:
        if term not in EXTH_CODES: continue
        code = EXTH_CODES[term]
        items = metadata[term]
        if term == 'creator':
            if prefer_author_sort:
                creators = [
                    authors_to_sort_string([unicode(c)]) for c in items
                ]
            else:
                creators = [unicode(c) for c in items]
            items = creators
        elif term == 'rights':
            try:
                rights = utf8_text(unicode(metadata.rights[0]))
            except:
                rights = b'Unknown'
            exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
            exth.write(rights)
            nrecs += 1
            continue

        for item in items:
            data = unicode(item)
            if term != 'description':
                data = COLLAPSE_RE.sub(' ', data)
            if term == 'identifier':
                if data.lower().startswith('urn:isbn:'):
                    data = data[9:]
                elif item.scheme.lower() == 'isbn':
                    pass
                else:
                    continue
            if term == 'language':
                d2 = usr_lang_as_iso639_1(data)
                if d2:
                    data = d2
            data = utf8_text(data)
            exth.write(pack(b'>II', code, len(data) + 8))
            exth.write(data)
            nrecs += 1

    # Write UUID as ASIN
    uuid = None
    from calibre.ebooks.oeb.base import OPF
    for x in metadata['identifier']:
        if (x.get(OPF('scheme'), None).lower() == 'uuid'
                or unicode(x).startswith('urn:uuid:')):
            uuid = unicode(x).split(':')[-1]
            break
    if uuid is None:
        from uuid import uuid4
        uuid = str(uuid4())

    if isinstance(uuid, unicode):
        uuid = uuid.encode('utf-8')
    if not share_not_sync:
        exth.write(pack(b'>II', 113, len(uuid) + 8))
        exth.write(uuid)
        nrecs += 1

    # Write UUID as SOURCE
    c_uuid = b'calibre:%s' % uuid
    exth.write(pack(b'>II', 112, len(c_uuid) + 8))
    exth.write(c_uuid)
    nrecs += 1

    # Write cdetype
    if not is_periodical:
        if not share_not_sync:
            exth.write(pack(b'>II', 501, 12))
            exth.write(b'EBOK')
            nrecs += 1
    else:
        ids = {0x101: b'NWPR', 0x103: b'MAGZ'}.get(mobi_doctype, None)
        if ids:
            exth.write(pack(b'>II', 501, 12))
            exth.write(ids)
            nrecs += 1

    # Add a publication date entry
    datestr = None
    if metadata['date']:
        datestr = str(metadata['date'][0])
    elif metadata['timestamp']:
        datestr = str(metadata['timestamp'][0])

    if not datestr:
        raise ValueError("missing date or timestamp")

    datestr = bytes(datestr)
    exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
    exth.write(datestr)
    nrecs += 1
    if is_periodical:
        exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'],
                        len(datestr) + 8))
        exth.write(datestr)
        nrecs += 1

    if be_kindlegen2:
        vals = {204: 201, 205: 2, 206: 5, 207: 0}
    elif is_periodical:
        # Pretend to be amazon's super secret periodical generator
        vals = {204: 201, 205: 2, 206: 0, 207: 101}
    else:
        # Pretend to be kindlegen 1.2
        vals = {204: 201, 205: 1, 206: 2, 207: 33307}
    for code, val in vals.iteritems():
        exth.write(pack(b'>III', code, 12, val))
        nrecs += 1

    if cover_offset is not None:
        exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, cover_offset))
        exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
        nrecs += 2
    if thumbnail_offset is not None:
        exth.write(
            pack(b'>III', EXTH_CODES['thumboffset'], 12, thumbnail_offset))
        thumbnail_uri_str = bytes(
            'kindle:embed:%s' %
            (to_base(thumbnail_offset, base=32, min_num_digits=4)))
        exth.write(
            pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'],
                 len(thumbnail_uri_str) + 8))
        exth.write(thumbnail_uri_str)
        nrecs += 2

    if start_offset is not None:
        try:
            len(start_offset)
        except TypeError:
            start_offset = [start_offset]
        for so in start_offset:
            if so is not None:
                exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, so))
                nrecs += 1

    if kf8_header_index is not None:
        exth.write(
            pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
                 kf8_header_index))
        nrecs += 1

    if num_of_resources is not None:
        exth.write(
            pack(b'>III', EXTH_CODES['num_of_resources'], 12,
                 num_of_resources))
        nrecs += 1

    if kf8_unknown_count is not None:
        exth.write(
            pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
                 kf8_unknown_count))
        nrecs += 1

    #Extra metadata for fullscrenn
    if opts and opts.book_mode == 'comic':  #added for kindleear [insert0003 2017-09-03]
        exth.write(pack(b'>II', EXTH_CODES['RegionMagnification'], 13))
        exth.write(b'false')
        exth.write(pack(b'>II', EXTH_CODES['book-type'], 13))
        exth.write(b'comic')
        exth.write(pack(b'>II', EXTH_CODES['zero-gutter'], 12))
        exth.write(b'true')
        exth.write(pack(b'>II', EXTH_CODES['zero-margin'], 12))
        exth.write(b'true')
        exth.write(pack(b'>II', EXTH_CODES['primary-writing-mode'], 21))
        exth.write(b'horizontal-lr')
        exth.write(pack(b'>II', EXTH_CODES['fixed-layout'], 12))
        exth.write(b'true')
        exth.write(pack(b'>II', EXTH_CODES['orientation-lock'], 16))
        exth.write(b'portrait')
        original_resolution = b'%dx%d' % opts.dest.comic_screen_size  #sth like comic_screen_size = (1072, 1430)
        exth.write(
            pack(b'>II', EXTH_CODES['original-resolution'],
                 len(original_resolution) + 8))
        exth.write(original_resolution)
        nrecs += 8

    exth = exth.getvalue()
    trail = len(exth) % 4
    pad = b'\0' * (4 - trail)  # Always pad w/ at least 1 byte
    exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
    return b''.join(exth)
Esempio n. 22
0
def create_rating(root, prefixes, val):
    ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
    m = XPath('./opf:metadata')(root)[0]
    d = m.makeelement(OPF('meta'), attrib={'property': 'calibre:rating'})
    d.text = val
    m.append(d)
Esempio n. 23
0
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (
            datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
            if not lc:
                lc = self.oeb_book.metadata.language[0].value
            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
        metadata['cover'] = self.get_cover()
        metadata['genre'] = self.opts.fb2_genre

        metadata['author'] = u''
        for auth in self.oeb_book.metadata.creator:
            author_first = u''
            author_middle = u''
            author_last = u''
            author_parts = auth.value.split(' ')
            if len(author_parts) == 1:
                author_last = author_parts[0]
            elif len(author_parts) == 2:
                author_first = author_parts[0]
                author_last = author_parts[1]
            else:
                author_first = author_parts[0]
                author_middle = ' '.join(author_parts[1:-1])
                author_last = author_parts[-1]
            metadata['author'] += '<author>'
            metadata[
                'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(
                    author_first)
            if author_middle:
                metadata[
                    'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(
                        author_middle)
            metadata[
                'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(
                    author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
            metadata[
                'author'] = u'<author><first-name></first-name><last-name><last-name></author>'

        metadata['sequence'] = u''
        if self.oeb_book.metadata.series:
            index = '1'
            if self.oeb_book.metadata.series_index:
                index = self.oeb_book.metadata.series_index[0]
            metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (
                prepare_string_for_xml(
                    u'%s' % self.oeb_book.metadata.series[0]), index)

        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(
                    x).startswith('urn:uuid:'):
                metadata['id'] = unicode(x).split(':')[-1]
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
            metadata['id'] = str(uuid.uuid4())

        for key, value in metadata.items():
            if key not in ('author', 'cover', 'sequence'):
                metadata[key] = prepare_string_for_xml(value)

        return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
                '<description>' \
                    '<title-info>' \
                        '<genre>%(genre)s</genre>' \
                            '%(author)s' \
                        '<book-title>%(title)s</book-title>' \
                        '%(cover)s' \
                        '<lang>%(lang)s</lang>' \
                        '%(sequence)s' \
                    '</title-info>' \
                    '<document-info>' \
                        '%(author)s' \
                        '<program-used>%(appname)s %(version)s</program-used>' \
                        '<date>%(date)s</date>' \
                        '<id>%(id)s</id>' \
                        '<version>1.0</version>' \
                    '</document-info>' \
                '</description>' % metadata
Esempio n. 24
0
def check_opf(container):
    errors = []
    opf_version = container.opf_version_parsed

    if container.opf.tag != OPF('package'):
        err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS)
        errors.append(err)

    elif container.opf.get('version') is None and container.book_type == 'epub':
        err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3'))
        errors.append(err)

    for tag in ('metadata', 'manifest', 'spine'):
        if not container.opf_xpath('/opf:package/opf:' + tag):
            errors.append(MissingSection(container.opf_name, tag))

    all_ids = set(container.opf_xpath('//*/@id'))
    if '' in all_ids:
        for empty_id_tag in container.opf_xpath('//*[@id=""]'):
            errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline))
    all_ids.discard('')
    for elem in container.opf_xpath('//*[@idref]'):
        if elem.get('idref') not in all_ids:
            errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline))

    nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')]
    if nl_items:
        errors.append(NonLinearItems(container.opf_name, nl_items))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
        href = item.get('href', None)
        if href is None:
            errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline))
        else:
            hname = container.href_to_name(href, container.opf_name)
            if not hname or not container.exists(hname):
                errors.append(MissingHref(container.opf_name, href, item.sourceline))
            if href in seen:
                if href not in dups:
                    dups[href] = [seen[href]]
                dups[href].append(item.sourceline)
            else:
                seen[href] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'):
        ref = item.get('idref')
        if ref in seen:
            if ref not in dups:
                dups[ref] = [seen[ref]]
            dups[ref].append(item.sourceline)
        else:
            seen[ref] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups))

    spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
    if spine:
        spine = spine[0]
        mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')]
        if mitems:
            mitem = mitems[0]
            if mitem.get('media-type', '') != guess_type('a.ncx'):
                errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type')))
        else:
            errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc')))
    else:
        spine = container.opf_xpath('/opf:package/opf:spine')
        if spine:
            spine = spine[0]
            ncx = container.manifest_type_map.get(guess_type('a.ncx'))
            if ncx:
                ncx_name = ncx[0]
                rmap = {v:k for k, v in iteritems(container.manifest_id_map)}
                ncx_id = rmap.get(ncx_name)
                if ncx_id:
                    errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id))

    if opf_version.major > 2:
        existing_nav = find_existing_nav_toc(container)
        if existing_nav is None:
            errors.append(MissingNav(container.opf_name, 0))
        else:
            toc = parse_nav(container, existing_nav)
            if len(toc) == 0:
                errors.append(EmptyNav(existing_nav, 0))

    covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')
    if len(covers) > 0:
        if len(covers) > 1:
            errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers]))
        manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
        for cover in covers:
            if cover.get('content', None) not in manifest_ids:
                errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', '')))
            raw = etree.tostring(cover)
            try:
                n, c = raw.index(b'name="'), raw.index(b'content="')
            except ValueError:
                n = c = -1
            if n > -1 and c > -1 and n > c:
                errors.append(NookCover(container.opf_name, cover.sourceline))

    uid = container.opf.get('unique-identifier', None)
    if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
        errors.append(NoUID(container.opf_name))
    for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'):
        if not elem.text or not elem.text.strip():
            errors.append(EmptyIdentifier(container.opf_name, elem.sourceline))

    for item, name, linear in container.spine_iter:
        mt = container.mime_map[name]
        if mt != XHTML_MIME:
            iid = item.get('idref', None)
            lnum = None
            if iid:
                mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
                if mitem:
                    lnum = mitem[0].sourceline
                else:
                    iid = None
            errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name))

    return errors
Esempio n. 25
0
def create_series(root, refines, series, series_index):
    m = XPath('./opf:metadata')(root)[0]
    d = m.makeelement(OPF('meta'), attrib={'property':'belongs-to-collection'})
    d.text = series
    m.append(d)
    set_refines(d, refines, refdef('collection-type', 'series'), refdef('group-position', series_index))
Esempio n. 26
0
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (
            datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
            if not lc:
                lc = self.oeb_book.metadata.language[0].value
            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
        metadata['cover'] = self.get_cover()
        metadata['genre'] = self.opts.fb2_genre

        metadata['author'] = ''
        for auth in self.oeb_book.metadata.creator:
            author_first = ''
            author_middle = ''
            author_last = ''
            author_parts = auth.value.split(' ')
            if len(author_parts) == 1:
                author_last = author_parts[0]
            elif len(author_parts) == 2:
                author_first = author_parts[0]
                author_last = author_parts[1]
            else:
                author_first = author_parts[0]
                author_middle = ' '.join(author_parts[1:-1])
                author_last = author_parts[-1]
            metadata['author'] += '<author>'
            metadata[
                'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(
                    author_first)
            if author_middle:
                metadata[
                    'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(
                        author_middle)
            metadata[
                'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(
                    author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
            metadata[
                'author'] = '<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = ''
        tags = list(map(unicode_type, self.oeb_book.metadata.subject))
        if tags:
            tags = ', '.join(prepare_string_for_xml(x) for x in tags)
            metadata['keywords'] = '<keywords>%s</keywords>' % tags

        metadata['sequence'] = ''
        if self.oeb_book.metadata.series:
            index = '1'
            if self.oeb_book.metadata.series_index:
                index = self.oeb_book.metadata.series_index[0]
            metadata['sequence'] = '<sequence name="%s" number="%s"/>' % (
                prepare_string_for_xml(
                    '%s' % self.oeb_book.metadata.series[0]), index)

        year = publisher = isbn = ''
        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(
                    x).startswith('urn:uuid:'):
                metadata['id'] = unicode_type(x).split(':')[-1]
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
            metadata['id'] = unicode_type(uuid.uuid4())

        try:
            date = self.oeb_book.metadata['date'][0]
        except IndexError:
            pass
        else:
            year = '<year>%s</year>' % prepare_string_for_xml(
                date.value.partition('-')[0])

        try:
            publisher = self.oeb_book.metadata['publisher'][0]
        except IndexError:
            pass
        else:
            publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(
                publisher.value)

        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'isbn':
                isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)

        metadata['year'], metadata['isbn'], metadata[
            'publisher'] = year, isbn, publisher
        for key, value in metadata.items():
            if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
                           'publisher', 'isbn'):
                metadata[key] = prepare_string_for_xml(value)

        try:
            comments = self.oeb_book.metadata['description'][0]
        except Exception:
            metadata['comments'] = ''
        else:
            from calibre.utils.html2text import html2text
            metadata['comments'] = '<annotation><p>{}</p></annotation>'.format(
                prepare_string_for_xml(html2text(comments.value).strip()))

        # Keep the indentation level of the description the same as the body.
        header = textwrap.dedent('''\
            <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
            <description>
                <title-info>
                    <genre>%(genre)s</genre>
                    %(author)s
                    <book-title>%(title)s</book-title>
                    %(cover)s
                    <lang>%(lang)s</lang>
                    %(keywords)s
                    %(sequence)s
                    %(comments)s
                </title-info>
                <document-info>
                    %(author)s
                    <program-used>%(appname)s %(version)s</program-used>
                    <date>%(date)s</date>
                    <id>%(id)s</id>
                    <version>1.0</version>
                </document-info>
                <publish-info>
                    %(publisher)s
                    %(year)s
                    %(isbn)s
                </publish-info>
            </description>''') % metadata

        # Remove empty lines.
        return '\n'.join(filter(unicode_type.strip, header.splitlines()))
Esempio n. 27
0
def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    ext = cover_path.rpartition('.')[-1].lower()
    cname, tname = 'cover.' + ext, 'titlepage.xhtml'
    recommended_folders = get_recommended_folders(container, (cname, tname))

    if existing_image:
        raster_cover = existing_image
        manifest_id = {v: k
                       for k, v in container.manifest_id_map.iteritems()
                       }[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' %
                                                manifest_id)[0]
    else:
        folder = recommended_folders[cname]
        if folder:
            cname = folder + '/' + cname
        raster_cover_item = container.generate_item(cname, id_prefix='cover')
        raster_cover = container.href_to_name(raster_cover_item.get('href'),
                                              container.opf_name)

        with open(cover_path, 'rb') as src, container.open(raster_cover,
                                                           'wb') as dest:
            shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults('epub_output')
        keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
        no_svg = opts.get('no_svg_cover', False)
    else:
        keep_aspect = options.get('keep_aspect', False)
        no_svg = options.get('no_svg', False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
    else:
        width, height = 600, 800
        try:
            if existing_image:
                width, height = identify_data(
                    container.raw_data(existing_image, decode=False))[:2]
            else:
                width, height = identify(cover_path)[:2]
        except:
            container.log.exception("Failed to get width and height of cover")
        ar = 'xMidYMid meet' if keep_aspect else 'none'
        templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
        templ = templ.replace('__viewbox__', '0 0 %d %d' % (width, height))
        templ = templ.replace('__width__', str(width))
        templ = templ.replace('__height__', str(height))
    folder = recommended_folders[tname]
    if folder:
        tname = folder + '/' + tname
    titlepage_item = container.generate_item(tname, id_prefix='titlepage')
    titlepage = container.href_to_name(titlepage_item.get('href'),
                                       container.opf_name)
    raw = templ % container.name_to_href(raster_cover,
                                         titlepage).encode('utf-8')
    with container.open(titlepage, 'wb') as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get('id') != 'cover':
        from calibre.ebooks.oeb.base import uuid_id
        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set('id', newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set('idref', newid)
        raster_cover_item.set('id', 'cover')

    spine = container.opf_xpath('//opf:spine')[0]
    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
    container.insert_into_xml(spine, ref, index=0)
    guide = container.opf_get_or_create('guide')
    container.insert_into_xml(
        guide,
        guide.makeelement(OPF('reference'),
                          type='cover',
                          title=_('Cover'),
                          href=container.name_to_href(
                              titlepage, base=container.opf_name)))
    metadata = container.opf_get_or_create('metadata')
    meta = metadata.makeelement(OPF('meta'), name='cover')
    meta.set('content', raster_cover_item.get('id'))
    container.insert_into_xml(metadata, meta)

    return raster_cover, titlepage
Esempio n. 28
0
def check_opf(container):
    errors = []

    if container.opf.tag != OPF('package'):
        err = BaseError(_('The OPF does not have the correct root element'),
                        container.opf_name)
        err.HELP = xml(
            _('The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">'
              )).format(OPF2_NS)
        errors.append(err)

    for tag in ('metadata', 'manifest', 'spine'):
        if not container.opf_xpath('/opf:package/opf:' + tag):
            errors.append(MissingSection(container.opf_name, tag))

    all_ids = set(container.opf_xpath('//*/@id'))
    for elem in container.opf_xpath('//*[@idref]'):
        if elem.get('idref') not in all_ids:
            errors.append(
                IncorrectIdref(container.opf_name, elem.get('idref'),
                               elem.sourceline))

    nl_items = [
        elem.sourceline for elem in container.opf_xpath(
            '//opf:spine/opf:itemref[@linear="no"]')
    ]
    if nl_items:
        errors.append(NonLinearItems(container.opf_name, nl_items))

    seen, dups = {}, {}
    for item in container.opf_xpath(
            '/opf:package/opf:manifest/opf:item[@href]'):
        href = item.get('href')
        if not container.exists(
                container.href_to_name(href, container.opf_name)):
            errors.append(
                MissingHref(container.opf_name, href, item.sourceline))
        if href in seen:
            if href not in dups:
                dups[href] = [seen[href]]
            dups[href].append(item.sourceline)
        else:
            seen[href] = item.sourceline
    errors.extend(
        DuplicateHref(container.opf_name, eid, locs)
        for eid, locs in dups.iteritems())

    seen, dups = {}, {}
    for item in container.opf_xpath(
            '/opf:package/opf:spine/opf:itemref[@idref]'):
        ref = item.get('idref')
        if ref in seen:
            if ref not in dups:
                dups[ref] = [seen[ref]]
            dups[ref].append(item.sourceline)
        else:
            seen[ref] = item.sourceline
    errors.extend(
        DuplicateHref(container.opf_name, eid, locs, for_spine=True)
        for eid, locs in dups.iteritems())

    spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
    if spine:
        spine = spine[0]
        mitems = [
            x for x in container.opf_xpath(
                '/opf:package/opf:manifest/opf:item[@id]')
            if x.get('id') == spine.get('toc')
        ]
        if mitems:
            mitem = mitems[0]
            if mitem.get('media-type', '') != guess_type('a.ncx'):
                errors.append(
                    IncorrectToc(container.opf_name,
                                 mitem.sourceline,
                                 bad_mimetype=mitem.get('media-type')))
        else:
            errors.append(
                IncorrectToc(container.opf_name,
                             spine.sourceline,
                             bad_idref=spine.get('toc')))

    covers = container.opf_xpath(
        '/opf:package/opf:metadata/opf:meta[@name="cover"]')
    if len(covers) > 0:
        if len(covers) > 1:
            errors.append(
                MultipleCovers(container.opf_name,
                               [c.sourceline for c in covers]))
        manifest_ids = set(
            container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
        for cover in covers:
            if cover.get('content', None) not in manifest_ids:
                errors.append(
                    IncorrectCover(container.opf_name, cover.sourceline,
                                   cover.get('content', '')))
            raw = etree.tostring(cover)
            try:
                n, c = raw.index('name="'), raw.index('content="')
            except ValueError:
                n = c = -1
            if n > -1 and c > -1 and n > c:
                errors.append(NookCover(container.opf_name, cover.sourceline))

    uid = container.opf.get('unique-identifier', None)
    if uid is None or not container.opf_xpath(
            '/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
        errors.append(NoUID(container.opf_name))

    for item, name, linear in container.spine_iter:
        mt = container.mime_map[name]
        if mt != XHTML_MIME:
            iid = item.get('idref', None)
            lnum = None
            if iid:
                mitem = container.opf_xpath(
                    '/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
                if mitem:
                    lnum = mitem[0].sourceline
                else:
                    iid = None
            errors.append(BadSpineMime(name, iid, mt, lnum,
                                       container.opf_name))

    return errors
Esempio n. 29
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join("META-INF", "encryption.xml"))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u"."):
                if f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith("."):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError("%s is not a valid EPUB file (could not find opf)" % path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        self.removed_cover = self.rationalize_cover(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError("EPUB files with DTBook markup are not supported")

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in ("application/vnd.adobe-page-template+xml", "application/text"):
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError("No valid entries in the spine of this EPUB")

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u"content.opf")
Esempio n. 30
0
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
    from calibre.ebooks.oeb.base import OPF
    if not mi.is_null('title'):
        m.clear('title')
        m.add('title', mi.title)
    if mi.title_sort:
        if not m.title:
            m.add('title', mi.title_sort)
        m.clear('title_sort')
        m.add('title_sort', mi.title_sort)
    if not mi.is_null('authors'):
        m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
        for a in mi.authors:
            attrib = {'role':'aut'}
            if mi.author_sort:
                attrib[OPF('file-as')] = mi.author_sort
            m.add('creator', a, attrib=attrib)
    if not mi.is_null('book_producer'):
        m.filter('contributor', lambda x : x.role.lower() == 'bkp')
        m.add('contributor', mi.book_producer, role='bkp')
    elif override_input_metadata:
        m.filter('contributor', lambda x : x.role.lower() == 'bkp')
    if not mi.is_null('comments'):
        m.clear('description')
        m.add('description', mi.comments)
    elif override_input_metadata:
        m.clear('description')
    if not mi.is_null('publisher'):
        m.clear('publisher')
        m.add('publisher', mi.publisher)
    elif override_input_metadata:
        m.clear('publisher')
    if not mi.is_null('series'):
        m.clear('series')
        m.add('series', mi.series)
    elif override_input_metadata:
        m.clear('series')
    identifiers = mi.get_identifiers()
    set_isbn = False
    for typ, val in identifiers.iteritems():
        has = False
        if typ.lower() == 'isbn':
            set_isbn = True
        for x in m.identifier:
            if x.scheme.lower() == typ.lower():
                x.content = val
                has = True
        if not has:
            m.add('identifier', val, scheme=typ.upper())
    if override_input_metadata and not set_isbn:
        m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
    if not mi.is_null('languages'):
        m.clear('language')
        for lang in mi.languages:
            if lang and lang.lower() not in ('und', ''):
                m.add('language', lang)
    if not mi.is_null('series_index'):
        m.clear('series_index')
        m.add('series_index', mi.format_series_index())
    elif override_input_metadata:
        m.clear('series_index')
    if not mi.is_null('rating'):
        m.clear('rating')
        m.add('rating', '%.2f'%mi.rating)
    elif override_input_metadata:
        m.clear('rating')
    if not mi.is_null('tags'):
        m.clear('subject')
        for t in mi.tags:
            m.add('subject', t)
    elif override_input_metadata:
        m.clear('subject')
    if not mi.is_null('pubdate'):
        m.clear('date')
        m.add('date', isoformat(mi.pubdate))
    if not mi.is_null('timestamp'):
        m.clear('timestamp')
        m.add('timestamp', isoformat(mi.timestamp))
    if not mi.is_null('rights'):
        m.clear('rights')
        m.add('rights', mi.rights)
    if not mi.is_null('publication_type'):
        m.clear('publication_type')
        m.add('publication_type', mi.publication_type)

    if not m.timestamp:
        m.add('timestamp', isoformat(now()))
Esempio n. 31
0
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
        share_not_sync=True, cover_offset=None, thumbnail_offset=None,
        start_offset=None, mobi_doctype=2, num_of_resources=None,
        kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None,
        page_progression_direction=None, primary_writing_mode=None):
    exth = BytesIO()
    nrecs = 0

    for term in metadata:
        if term not in EXTH_CODES:
            continue
        code = EXTH_CODES[term]
        items = metadata[term]
        if term == 'creator':
            if prefer_author_sort:
                creators = [authors_to_sort_string([unicode_type(c)]) for c in
                            items]
            else:
                creators = [unicode_type(c) for c in items]
            items = creators
        elif term == 'rights':
            try:
                rights = utf8_text(unicode_type(metadata.rights[0]))
            except:
                rights = b'Unknown'
            exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
            exth.write(rights)
            nrecs += 1
            continue

        for item in items:
            data = unicode_type(item)
            if term != 'description':
                data = COLLAPSE_RE.sub(' ', data)
            if term == 'identifier':
                if data.lower().startswith('urn:isbn:'):
                    data = data[9:]
                elif item.scheme.lower() == 'isbn':
                    pass
                else:
                    continue
            if term == 'language':
                d2 = lang_as_iso639_1(data)
                if d2:
                    data = d2
            data = utf8_text(data)
            exth.write(pack(b'>II', code, len(data) + 8))
            exth.write(data)
            nrecs += 1

    # Write UUID as ASIN
    uuid = None
    from calibre.ebooks.oeb.base import OPF
    for x in metadata['identifier']:
        if (x.get(OPF('scheme'), None).lower() == 'uuid' or
                unicode_type(x).startswith('urn:uuid:')):
            uuid = unicode_type(x).split(':')[-1]
            break
    if uuid is None:
        from uuid import uuid4
        uuid = str(uuid4())

    if isinstance(uuid, unicode_type):
        uuid = uuid.encode('utf-8')
    if not share_not_sync:
        exth.write(pack(b'>II', 113, len(uuid) + 8))
        exth.write(uuid)
        nrecs += 1

    # Write UUID as SOURCE
    c_uuid = b'calibre:%s' % uuid
    exth.write(pack(b'>II', 112, len(c_uuid) + 8))
    exth.write(c_uuid)
    nrecs += 1

    # Write cdetype
    if not is_periodical:
        if not share_not_sync:
            exth.write(pack(b'>II', 501, 12))
            exth.write(b'EBOK')
            nrecs += 1
    else:
        ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
        if ids:
            exth.write(pack(b'>II', 501, 12))
            exth.write(ids)
            nrecs += 1

    # Add a publication date entry
    if metadata['date']:
        datestr = str(metadata['date'][0])
    elif metadata['timestamp']:
        datestr = str(metadata['timestamp'][0])

    if datestr is None:
        raise ValueError("missing date or timestamp")

    datestr = bytes(datestr)
    exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
    exth.write(datestr)
    nrecs += 1
    if is_periodical:
        exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
        exth.write(datestr)
        nrecs += 1

    if be_kindlegen2:
        mv = 200 if iswindows else 202 if isosx else 201
        vals = {204:mv, 205:2, 206:9, 207:0}
    elif is_periodical:
        # Pretend to be amazon's super secret periodical generator
        vals = {204:201, 205:2, 206:0, 207:101}
    else:
        # Pretend to be kindlegen 1.2
        vals = {204:201, 205:1, 206:2, 207:33307}
    for code, val in vals.iteritems():
        exth.write(pack(b'>III', code, 12, val))
        nrecs += 1
    if be_kindlegen2:
        revnum = b'0730-890adc2'
        exth.write(pack(b'>II', 535, 8 + len(revnum)) + revnum)
        nrecs += 1

    if cover_offset is not None:
        exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
            cover_offset))
        exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
        nrecs += 2
    if thumbnail_offset is not None:
        exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
            thumbnail_offset))
        thumbnail_uri_str = bytes('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4)))
        exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8))
        exth.write(thumbnail_uri_str)
        nrecs += 2

    if start_offset is not None:
        try:
            len(start_offset)
        except TypeError:
            start_offset = [start_offset]
        for so in start_offset:
            if so is not None:
                exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
                    so))
                nrecs += 1

    if kf8_header_index is not None:
        exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
            kf8_header_index))
        nrecs += 1

    if num_of_resources is not None:
        exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
            num_of_resources))
        nrecs += 1

    if kf8_unknown_count is not None:
        exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
            kf8_unknown_count))
        nrecs += 1

    if primary_writing_mode:
        pwm = primary_writing_mode.encode('utf-8')
        exth.write(pack(b'>II', EXTH_CODES['primary_writing_mode'], len(pwm) + 8))
        exth.write(pwm)
        nrecs += 1

    if page_progression_direction in {'rtl', 'ltr', 'default'}:
        ppd = bytes(page_progression_direction)
        exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8))
        exth.write(ppd)
        nrecs += 1

    exth = exth.getvalue()
    trail = len(exth) % 4
    pad = b'\0' * (4 - trail)  # Always pad w/ at least 1 byte
    exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
    return b''.join(exth)
Esempio n. 32
0
def split(container, name, loc_or_xpath, before=True, totals=None):
    '''
    Split the file specified by name at the position specified by loc_or_xpath.
    Splitting automatically migrates all links and references to the affected
    files.

    :param loc_or_xpath: Should be an XPath expression such as
        //h:div[@id="split_here"]. Can also be a *loc* which is used internally to
        implement splitting in the preview panel.
    :param before: If True the split occurs before the identified element otherwise after it.
    :param totals: Used internally
    '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        try:
            split_point = node_from_loc(root, loc_or_xpath, totals=totals)
        except MalformedMarkup:
            # The webkit HTML parser and the container parser have yielded
            # different node counts, this can happen if the file is valid XML
            # but contains constructs like nested <p> tags. So force parse it
            # with the HTML 5 parser and try again.
            raw = container.raw_data(name)
            root = container.parse_xhtml(raw,
                                         fname=name,
                                         force_html5_parse=True)
            try:
                split_point = node_from_loc(root, loc_or_xpath, totals=totals)
            except MalformedMarkup:
                raise MalformedMarkup(
                    _('The file %s has malformed markup. Try running the Fix HTML tool'
                      ' before splitting') % name)
            container.replace(name, root)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(
        root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(
        root2.xpath('//*/@name'))
    base, ext = name.rpartition('.')[0::2]
    base = re.sub(r'_split\d+$', '', base)
    nname, s = None, 0
    while not nname or container.exists(nname):
        s += 1
        nname = '%s_split%d.%s' % (base, s, ext)
    manifest_item = container.generate_item(
        nname, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'),
                                         container.opf_name)

    # Fix links in the split trees
    for r in (root1, root2):
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors_in_top:
                    if r is root2:
                        a.set(
                            'href', '%s#%s' % (container.name_to_href(
                                name, bottom_name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)
                elif purl.fragment in anchors_in_bottom:
                    if r is root1:
                        a.set(
                            'href', '%s#%s' % (container.name_to_href(
                                bottom_name, name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in iteritems(container.mime_map):
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name,
                                     bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
Esempio n. 33
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log)

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'
            for elem in opf.itermanifest():
                elem.set('href', delta+elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta+elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml', 'application/adobe.page-template+xml',
                    'application/text'}:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')