Esempio n. 1
0
    def get_embed_font_info(self, family, failure_critical=True):
        efi = []
        body_font_family = None
        if not family:
            return body_font_family, efi
        from ebook_converter.utils.fonts.scanner import font_scanner, NoFonts
        from ebook_converter.utils.fonts.utils import panose_to_css_generic_family
        try:
            faces = font_scanner.fonts_for_family(family)
        except NoFonts:
            msg = ('No embeddable fonts found for family: %r' % family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi
        if not faces:
            msg = ('No embeddable fonts found for family: %r' % family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi

        for i, font in enumerate(faces):
            ext = 'otf' if font['is_otf'] else 'ttf'
            fid, href = self.oeb.manifest.generate(
                id=u'font',
                href='fonts/%s.%s' %
                (ascii_filename(font['full_name']).replace(' ', '-'), ext))
            item = self.oeb.manifest.add(fid,
                                         href,
                                         mimetypes.guess_type('dummy.' +
                                                              ext)[0],
                                         data=font_scanner.get_font_data(font))
            item.unload_data_from_memory()

            cfont = {
                'font-family': '"%s"' % font['font-family'],
                'panose-1': ' '.join(map(str, font['panose'])),
                'src': 'url(%s)' % item.href,
            }

            if i == 0:
                generic_family = panose_to_css_generic_family(font['panose'])
                body_font_family = "'%s',%s" % (font['font-family'],
                                                generic_family)
                self.oeb.log('Embedding font: %s' % font['font-family'])
            for k in ('font-weight', 'font-style', 'font-stretch'):
                if font[k] != 'normal':
                    cfont[k] = font[k]
            rule = '@font-face { %s }' % ('; '.join('%s:%s' % (k, v)
                                                    for k, v in cfont.items()))
            rule = css_parser.parseString(rule)
            efi.append(rule)

        return body_font_family, efi
Esempio n. 2
0
 def create_filename(self, href, fmt):
     fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href)))
     fname = posixpath.splitext(fname)[0]
     fname = fname[:75].rstrip('.') or 'image'
     num = 0
     base = fname
     while fname.lower() in self.seen_filenames:
         num += 1
         fname = base + str(num)
     self.seen_filenames.add(fname.lower())
     fname += os.extsep + fmt.lower()
     return fname
Esempio n. 3
0
 def write_header(self):  # PalmDB header {{{
     '''
     Write the PalmDB header
     '''
     title = ascii_filename(str(self.oeb.metadata.title[0])).replace(
         ' ', '_')
     if not isinstance(title, bytes):
         title = title.encode('ascii')
     title = title[:31]
     title = title + (b'\0' * (32 - len(title)))
     now = int(time.time())
     nrecords = len(self.records)
     self.write(title, pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
                b'BOOK', b'MOBI',
                pack(b'>IIH', (2 * nrecords) - 1, 0, nrecords))
     offset = self.tell() + (8 * nrecords) + 2
     for i, record in enumerate(self.records):
         self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2 * i)[1:])
         offset += len(record)
     self.write(b'\0\0')
Esempio n. 4
0
    def write(self, name, dest_dir, docx, variant):
        f = self.fonts[name]
        ef = f.embedded[variant]
        raw = docx.read(ef.name)
        prefix = raw[:32]
        if ef.key:
            key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
            key = bytearray(
                reversed(
                    tuple(
                        int(key[i:i + 2], 16) for i in range(0, len(key), 2))))
            prefix = bytearray(prefix)
            prefix = bytes(
                bytearray(prefix[i] ^ key[i % len(key)]
                          for i in range(len(prefix))))
        if not is_truetype_font(prefix):
            return None
        ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
        fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
        with open(os.path.join(dest_dir, fname), 'wb') as dest:
            dest.write(prefix)
            dest.write(raw[32:])

        return fname
Esempio n. 5
0
def sanitize_file_name(x):
    ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_',
                                     ascii_filename(x))).strip().rstrip('.')
    ans, ext = ans.rpartition('.')[::2]
    return (ans.strip() + '.' + ext.strip()).rstrip('.')
Esempio n. 6
0
def image_filename(x):
    return ascii_filename(x).replace(' ', '_').replace('#', '_')
Esempio n. 7
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        import uuid

        from ebook_converter.ebooks.oeb.base import DirContainer
        from ebook_converter.ebooks.snb.snbfile import SNBFile

        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
        except Exception:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invalid SNB file")
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, None, options,
                             encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
            meta = etree.fromstring(meta)
            item_map = {'title': './/head/name',
                        'creator': './/head/author',
                        'language': './/head/language',
                        'generator': './/head/generator',
                        'publisher': './/head/publisher',
                        'cover': './/head/cover'}
            d = {}
            for key, item in item_map.items():
                node = meta.find(item)
                if node is not None:
                    d[key] = node.text if node.text is not None else ''
                else:
                    d[key] = ''

            oeb.metadata.add('title', d['title'])
            oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
            oeb.metadata.add('language',
                             d['language'].lower().replace('_', '-'))
            oeb.metadata.add('generator', d['generator'])
            oeb.metadata.add('publisher', d['publisher'])
            if d['cover'] != '':
                oeb.guide.add('cover', 'Cover', d['cover'])

        bookid = str(uuid.uuid4())
        oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in oeb.metadata.identifier:
            if 'id' in ident.attrib:
                oeb.uid = oeb.metadata.identifier[0]
                break

        with TemporaryDirectory('_snb2oeb', keep=True) as tdir:
            log.debug('Process TOC ...')
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
                toc = etree.fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
                    chapterSrc = ch.get('src')
                    fname = 'ch_%d.htm' % i
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
                    snbc = etree.fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
                            lines.append('<p>%s</p>' % html_encode(line.text))
                        elif line.tag == 'img':
                            lines.append('<p><img src="%s" /></p>' %
                                         html_encode(line.text))
                    with open(os.path.join(tdir, fname), 'wb') as f:
                        f.write((HTML_TEMPLATE %
                                 (chapterName,
                                  '\n'.join(lines))).encode('utf-8',
                                                            'replace'))
                    oeb.toc.add(ch.text, fname)
                    id, href = oeb.manifest.generate(
                        id='html', href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
                    i = i + 1
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
                                                     href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f

        return oeb
Esempio n. 8
0
    def to_xml(self, write_files=True):
        bookinfo = ('<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
                    '<Title reading="%s">%s</Title>\n'
                    '<Author reading="%s">%s</Author>\n'
                    '<BookID>%s</BookID>\n'
                    '<Publisher reading="">%s</Publisher>\n'
                    '<Label reading="">%s</Label>\n'
                    '<Category reading="">%s</Category>\n'
                    '<Classification reading="">%s</Classification>\n'
                    '<FreeText reading="">%s</FreeText>\n'
                    '</BookInfo>\n<DocInfo>\n' %
                    (self.metadata.title_reading, self.metadata.title,
                     self.metadata.author_reading, self.metadata.author,
                     self.metadata.book_id, self.metadata.publisher,
                     self.metadata.label, self.metadata.category,
                     self.metadata.classification, self.metadata.free_text))
        th = self.doc_info.thumbnail
        if th:
            prefix = ascii_filename(self.metadata.title)
            bookinfo += (
                '<CThumbnail file="%s" />\n' %
                (prefix + '_thumbnail.' + self.doc_info.thumbnail_extension))
            if write_files:
                with open(
                        prefix + '_thumbnail.' +
                        self.doc_info.thumbnail_extension, 'wb') as f:
                    f.write(th)
        bookinfo += ('<Language reading="">%s</Language>\n'
                     '<Creator reading="">%s</Creator>\n'
                     '<Producer reading="">%s</Producer>\n'
                     '<SumPage>%s</SumPage>\n'
                     '</DocInfo>\n</Info>\n%s</BookInformation>\n' %
                     (self.doc_info.language, self.doc_info.creator,
                      self.doc_info.producer, self.doc_info.page, self.toc))
        pages = ''
        done_main = False
        pt_id = -1
        for page_tree in self:
            if not done_main:
                done_main = True
                pages += '<Main>\n'
                close = '</Main>\n'
                pt_id = page_tree.id
            else:
                pages += '<PageTree objid="%d">\n' % (page_tree.id, )
                close = '</PageTree>\n'
            for page in page_tree:
                pages += str(page)
            pages += close
        traversed_objects = [
            int(i) for i in re.findall(r'objid="(\w+)"', pages)
        ] + [pt_id]

        objects = '\n<Objects>\n'
        styles = '\n<Style>\n'
        for obj in self.objects:
            obj = self.objects[obj]
            if obj.id in traversed_objects:
                continue
            if isinstance(obj, (Font, Text, TOCObject)):
                continue
            if isinstance(obj, StyleObject):
                styles += str(obj)
            else:
                objects += str(obj)
        styles += '</Style>\n'
        objects += '</Objects>\n'
        if write_files:
            self.write_files()
        return ('<BBeBXylog version="1.0">\n' + bookinfo + pages + styles +
                objects + '</BBeBXylog>')
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from ebook_converter.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from ebook_converter.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from ebook_converter.utils.zipfile import ZipFile
        from ebook_converter.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from ebook_converter.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
                if isinstance(html, str):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(item.data, encoding='unicode')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from ebook_converter.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with open(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)