def get_embed_font_info(self, family, failure_critical=True): efi = [] body_font_family = None if not family: return body_font_family, efi from ebook_converter.utils.fonts.scanner import font_scanner, NoFonts from ebook_converter.utils.fonts.utils import panose_to_css_generic_family try: faces = font_scanner.fonts_for_family(family) except NoFonts: msg = ('No embeddable fonts found for family: %r' % family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi if not faces: msg = ('No embeddable fonts found for family: %r' % family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate( id=u'font', href='fonts/%s.%s' % (ascii_filename(font['full_name']).replace(' ', '-'), ext)) item = self.oeb.manifest.add(fid, href, mimetypes.guess_type('dummy.' + ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() cfont = { 'font-family': '"%s"' % font['font-family'], 'panose-1': ' '.join(map(str, font['panose'])), 'src': 'url(%s)' % item.href, } if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = "'%s',%s" % (font['font-family'], generic_family) self.oeb.log('Embedding font: %s' % font['font-family']) for k in ('font-weight', 'font-style', 'font-stretch'): if font[k] != 'normal': cfont[k] = font[k] rule = '@font-face { %s }' % ('; '.join('%s:%s' % (k, v) for k, v in cfont.items())) rule = css_parser.parseString(rule) efi.append(rule) return body_font_family, efi
def create_filename(self, href, fmt): fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href))) fname = posixpath.splitext(fname)[0] fname = fname[:75].rstrip('.') or 'image' num = 0 base = fname while fname.lower() in self.seen_filenames: num += 1 fname = base + str(num) self.seen_filenames.add(fname.lower()) fname += os.extsep + fmt.lower() return fname
def write_header(self): # PalmDB header {{{ ''' Write the PalmDB header ''' title = ascii_filename(str(self.oeb.metadata.title[0])).replace( ' ', '_') if not isinstance(title, bytes): title = title.encode('ascii') title = title[:31] title = title + (b'\0' * (32 - len(title))) now = int(time.time()) nrecords = len(self.records) self.write(title, pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), b'BOOK', b'MOBI', pack(b'>IIH', (2 * nrecords) - 1, 0, nrecords)) offset = self.tell() + (8 * nrecords) + 2 for i, record in enumerate(self.records): self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2 * i)[1:]) offset += len(record) self.write(b'\0\0')
def write(self, name, dest_dir, docx, variant): f = self.fonts[name] ef = f.embedded[variant] raw = docx.read(ef.name) prefix = raw[:32] if ef.key: key = re.sub(r'[^A-Fa-f0-9]', '', ef.key) key = bytearray( reversed( tuple( int(key[i:i + 2], 16) for i in range(0, len(key), 2)))) prefix = bytearray(prefix) prefix = bytes( bytearray(prefix[i] ^ key[i % len(key)] for i in range(len(prefix)))) if not is_truetype_font(prefix): return None ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf' fname = ascii_filename('%s - %s.%s' % (name, variant, ext)) with open(os.path.join(dest_dir, fname), 'wb') as dest: dest.write(prefix) dest.write(raw[32:]) return fname
def sanitize_file_name(x): ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_', ascii_filename(x))).strip().rstrip('.') ans, ext = ans.rpartition('.')[::2] return (ans.strip() + '.' + ext.strip()).rstrip('.')
def image_filename(x): return ascii_filename(x).replace(' ', '_').replace('#', '_')
def convert(self, stream, options, file_ext, log, accelerators): import uuid from ebook_converter.ebooks.oeb.base import DirContainer from ebook_converter.ebooks.snb.snbfile import SNBFile log.debug("Parsing SNB file...") snbFile = SNBFile() try: snbFile.Parse(stream) except Exception: raise ValueError("Invalid SNB file") if not snbFile.IsValid(): log.debug("Invalid SNB file") raise ValueError("Invalid SNB file") log.debug("Handle meta data ...") from ebook_converter.ebooks.conversion.plumber import create_oebbook oeb = create_oebbook(log, None, options, encoding=options.input_encoding, populate=False) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) item_map = {'title': './/head/name', 'creator': './/head/author', 'language': './/head/language', 'generator': './/head/generator', 'publisher': './/head/publisher', 'cover': './/head/cover'} d = {} for key, item in item_map.items(): node = meta.find(item) if node is not None: d[key] = node.text if node.text is not None else '' else: d[key] = '' oeb.metadata.add('title', d['title']) oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'}) oeb.metadata.add('language', d['language'].lower().replace('_', '-')) oeb.metadata.add('generator', d['generator']) oeb.metadata.add('publisher', d['publisher']) if d['cover'] != '': oeb.guide.add('cover', 'Cover', d['cover']) bookid = str(uuid.uuid4()) oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in oeb.metadata.identifier: if 'id' in ident.attrib: oeb.uid = oeb.metadata.identifier[0] break with TemporaryDirectory('_snb2oeb', keep=True) as tdir: log.debug('Process TOC ...') toc = snbFile.GetFileStream('snbf/toc.snbf') oeb.container = DirContainer(tdir, log) if toc is not None: toc = etree.fromstring(toc) i = 1 for ch in toc.find('.//body'): chapterName = ch.text chapterSrc = ch.get('src') fname = 'ch_%d.htm' % i data = snbFile.GetFileStream('snbc/' + chapterSrc) if data is None: continue snbc = etree.fromstring(data) lines = [] for line in snbc.find('.//body'): if line.tag == 'text': lines.append('<p>%s</p>' % html_encode(line.text)) elif line.tag == 'img': lines.append('<p><img src="%s" /></p>' % html_encode(line.text)) with open(os.path.join(tdir, fname), 'wb') as f: f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode('utf-8', 'replace')) oeb.toc.add(ch.text, fname) id, href = oeb.manifest.generate( id='html', href=ascii_filename(fname)) item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = fname oeb.spine.add(item, True) i = i + 1 imageFiles = snbFile.OutputImageFiles(tdir) for f, m in imageFiles: id, href = oeb.manifest.generate(id='image', href=ascii_filename(f)) item = oeb.manifest.add(id, href, m) item.html_input_href = f return oeb
def to_xml(self, write_files=True): bookinfo = ('<BookInformation>\n<Info version="1.1">\n<BookInfo>\n' '<Title reading="%s">%s</Title>\n' '<Author reading="%s">%s</Author>\n' '<BookID>%s</BookID>\n' '<Publisher reading="">%s</Publisher>\n' '<Label reading="">%s</Label>\n' '<Category reading="">%s</Category>\n' '<Classification reading="">%s</Classification>\n' '<FreeText reading="">%s</FreeText>\n' '</BookInfo>\n<DocInfo>\n' % (self.metadata.title_reading, self.metadata.title, self.metadata.author_reading, self.metadata.author, self.metadata.book_id, self.metadata.publisher, self.metadata.label, self.metadata.category, self.metadata.classification, self.metadata.free_text)) th = self.doc_info.thumbnail if th: prefix = ascii_filename(self.metadata.title) bookinfo += ( '<CThumbnail file="%s" />\n' % (prefix + '_thumbnail.' + self.doc_info.thumbnail_extension)) if write_files: with open( prefix + '_thumbnail.' + self.doc_info.thumbnail_extension, 'wb') as f: f.write(th) bookinfo += ('<Language reading="">%s</Language>\n' '<Creator reading="">%s</Creator>\n' '<Producer reading="">%s</Producer>\n' '<SumPage>%s</SumPage>\n' '</DocInfo>\n</Info>\n%s</BookInformation>\n' % (self.doc_info.language, self.doc_info.creator, self.doc_info.producer, self.doc_info.page, self.toc)) pages = '' done_main = False pt_id = -1 for page_tree in self: if not done_main: done_main = True pages += '<Main>\n' close = '</Main>\n' pt_id = page_tree.id else: pages += '<PageTree objid="%d">\n' % (page_tree.id, ) close = '</PageTree>\n' for page in page_tree: pages += str(page) pages += close traversed_objects = [ int(i) for i in re.findall(r'objid="(\w+)"', pages) ] + [pt_id] objects = '\n<Objects>\n' styles = '\n<Style>\n' for obj in self.objects: obj = self.objects[obj] if obj.id in traversed_objects: continue if isinstance(obj, (Font, Text, TOCObject)): continue if isinstance(obj, StyleObject): styles += str(obj) else: objects += str(obj) styles += '</Style>\n' objects += '</Objects>\n' if write_files: self.write_files() return ('<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>')
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from ebook_converter.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from ebook_converter.ebooks.metadata.opf2 import OPF, metadata_to_opf from ebook_converter.utils.zipfile import ZipFile from ebook_converter.utils.filenames import ascii_filename # HTML if opts.htmlz_css_type == 'inline': from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer OEB2HTMLizer = OEB2HTMLInlineCSSizer elif opts.htmlz_css_type == 'tag': from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer OEB2HTMLizer = OEB2HTMLNoCSSizer else: from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer with TemporaryDirectory(u'_htmlz_output') as tdir: htmlizer = OEB2HTMLizer(log) html = htmlizer.oeb2html(oeb_book, opts) fname = u'index' if opts.htmlz_title_filename: from ebook_converter.utils.filenames import shorten_components_to fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0] with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf: if isinstance(html, str): html = html.encode('utf-8') tf.write(html) # CSS if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external': with open(os.path.join(tdir, u'style.css'), 'wb') as tf: tf.write(htmlizer.get_css(oeb_book)) # Images images = htmlizer.images if images: if not os.path.exists(os.path.join(tdir, u'images')): os.makedirs(os.path.join(tdir, u'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: data = etree.tostring(item.data, encoding='unicode') else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) # Cover cover_path = None try: cover_data = None if oeb_book.metadata.cover: term = oeb_book.metadata.cover[0].term cover_data = oeb_book.guide[term].item.data if cover_data: from ebook_converter.utils.img import save_cover_data_to cover_path = os.path.join(tdir, u'cover.jpg') with open(cover_path, 'w') as cf: cf.write('') save_cover_data_to(cover_data, cover_path) except: import traceback traceback.print_exc() # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8'))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir)