def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def convert(self, stream, options, file_ext, log, accelerators): import uuid from calibre.ebooks.oeb.base import DirContainer from calibre.ebooks.snb.snbfile import SNBFile from calibre.utils.xml_parse import safe_xml_fromstring log.debug("Parsing SNB file...") snbFile = SNBFile() try: snbFile.Parse(stream) except: raise ValueError("Invalid SNB file") if not snbFile.IsValid(): log.debug("Invalid SNB file") raise ValueError("Invalid SNB file") log.debug("Handle meta data ...") from calibre.ebooks.conversion.plumber import create_oebbook oeb = create_oebbook(log, None, options, encoding=options.input_encoding, populate=False) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = safe_xml_fromstring(meta) l = { 'title': './/head/name', 'creator': './/head/author', 'language': './/head/language', 'generator': './/head/generator', 'publisher': './/head/publisher', 'cover': './/head/cover', } d = {} for item in l: node = meta.find(l[item]) if node is not None: d[item] = node.text if node.text is not None else '' else: d[item] = '' oeb.metadata.add('title', d['title']) oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'}) oeb.metadata.add('language', d['language'].lower().replace('_', '-')) oeb.metadata.add('generator', d['generator']) oeb.metadata.add('publisher', d['publisher']) if d['cover'] != '': oeb.guide.add('cover', 'Cover', d['cover']) bookid = unicode_type(uuid.uuid4()) oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in oeb.metadata.identifier: if 'id' in ident.attrib: oeb.uid = oeb.metadata.identifier[0] break with TemporaryDirectory('_snb2oeb', keep=True) as tdir: log.debug('Process TOC ...') toc = snbFile.GetFileStream('snbf/toc.snbf') oeb.container = DirContainer(tdir, log) if toc is not None: toc = safe_xml_fromstring(toc) i = 1 for ch in toc.find('.//body'): chapterName = ch.text chapterSrc = ch.get('src') fname = 'ch_%d.htm' % i data = snbFile.GetFileStream('snbc/' + chapterSrc) if data is None: continue snbc = safe_xml_fromstring(data) lines = [] for line in snbc.find('.//body'): if line.tag == 'text': lines.append('<p>%s</p>' % html_encode(line.text)) elif line.tag == 'img': lines.append('<p><img src="%s" /></p>' % html_encode(line.text)) with open(os.path.join(tdir, fname), 'wb') as f: f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode( 'utf-8', 'replace')) oeb.toc.add(ch.text, fname) id, href = oeb.manifest.generate( id='html', href=ascii_filename(fname)) item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = fname oeb.spine.add(item, True) i = i + 1 imageFiles = snbFile.OutputImageFiles(tdir) for f, m in imageFiles: id, href = oeb.manifest.generate(id='image', href=ascii_filename(f)) item = oeb.manifest.add(id, href, m) item.html_input_href = f return oeb
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from calibre.ebooks.snb.snbfile import SNBFile from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName self.opts = opts from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable try: rasterizer = SVGRasterizer() rasterizer(oeb_book, opts) except Unavailable: log.warn('SVG rasterizer unavailable, SVG will not be converted') # Create temp dir with TemporaryDirectory('_snb_output') as tdir: # Create stub directories snbfDir = os.path.join(tdir, 'snbf') snbcDir = os.path.join(tdir, 'snbc') snbiDir = os.path.join(tdir, 'snbc/images') os.mkdir(snbfDir) os.mkdir(snbcDir) os.mkdir(snbiDir) # Process Meta data meta = oeb_book.metadata if meta.title: title = str(meta.title[0]) else: title = '' authors = [str(x) for x in meta.creator if x.role == 'aut'] if meta.publisher: publishers = str(meta.publisher[0]) else: publishers = '' if meta.language: lang = str(meta.language[0]).upper() else: lang = '' if meta.description: abstract = str(meta.description[0]) else: abstract = '' # Process Cover g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine href = None if 'titlepage' not in g: if 'cover' in g: href = g['cover'].href # Output book info file bookInfoTree = etree.Element("book-snbf", version="1.0") headTree = etree.SubElement(bookInfoTree, "head") etree.SubElement(headTree, "name").text = title etree.SubElement(headTree, "author").text = ' '.join(authors) etree.SubElement(headTree, "language").text = lang etree.SubElement(headTree, "rights") etree.SubElement(headTree, "publisher").text = publishers etree.SubElement( headTree, "generator").text = __appname__ + ' ' + __version__ etree.SubElement(headTree, "created") etree.SubElement(headTree, "abstract").text = abstract if href is not None: etree.SubElement(headTree, "cover").text = ProcessFileName(href) else: etree.SubElement(headTree, "cover") with open(os.path.join(snbfDir, 'book.snbf'), 'wb') as f: f.write( etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8')) # Output TOC tocInfoTree = etree.Element("toc-snbf") tocHead = etree.SubElement(tocInfoTree, "head") tocBody = etree.SubElement(tocInfoTree, "body") outputFiles = {} if oeb_book.toc.count() == 0: log.warn('This SNB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(oeb_book.spine)) oeb_book.toc.add(_('Start page'), first.href) else: first = next(iter(oeb_book.spine)) if oeb_book.toc[0].href != first.href: # The pages before the fist item in toc will be stored as # "Cover Pages". # oeb_book.toc does not support "insert", so we generate # the tocInfoTree directly instead of modifying the toc ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(first.href) + ".snbc") ch.text = _('Cover pages') outputFiles[first.href] = [] outputFiles[first.href].append(("", _("Cover pages"))) for tocitem in oeb_book.toc: if tocitem.href.find('#') != -1: item = tocitem.href.split('#') if len(item) != 2: log.error('Error in TOC item: %s' % tocitem) else: if item[0] in outputFiles: outputFiles[item[0]].append( (item[1], tocitem.title)) else: outputFiles[item[0]] = [] if "" not in outputFiles[item[0]]: outputFiles[item[0]].append( ("", tocitem.title + _(" (Preface)"))) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(item[0]) + ".snbc") ch.text = tocitem.title + _(" (Preface)") outputFiles[item[0]].append( (item[1], tocitem.title)) else: if tocitem.href in outputFiles: outputFiles[tocitem.href].append(("", tocitem.title)) else: outputFiles[tocitem.href] = [] outputFiles[tocitem.href].append(("", tocitem.title)) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(tocitem.href) + ".snbc") ch.text = tocitem.title etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody) with open(os.path.join(snbfDir, 'toc.snbf'), 'wb') as f: f.write( etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8')) # Output Files oldTree = None mergeLast = False lastName = None for item in s: from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES if m.hrefs[item.href].media_type in OEB_DOCS: if item.href not in outputFiles: log.debug( 'File %s is unused in TOC. Continue in last chapter' % item.href) mergeLast = True else: if oldTree is not None and mergeLast: log.debug('Output the modified chapter again: %s' % lastName) with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) mergeLast = False log.debug('Converting %s to snbc...' % item.href) snbwriter = SNBMLizer(log) snbcTrees = None if not mergeLast: snbcTrees = snbwriter.extract_content( oeb_book, item, outputFiles[item.href], opts) for subName in snbcTrees: postfix = '' if subName != '': postfix = '_' + subName lastName = ProcessFileName(item.href + postfix + ".snbc") oldTree = snbcTrees[subName] with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) else: log.debug('Merge %s with last TOC item...' % item.href) snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts) # Output the last one if needed log.debug('Output the last modified chapter again: %s' % lastName) if oldTree is not None and mergeLast: with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) mergeLast = False for item in m: if m.hrefs[item.href].media_type in OEB_IMAGES: log.debug('Converting image: %s ...' % item.href) content = m.hrefs[item.href].data # Convert & Resize image self.HandleImage( content, os.path.join(snbiDir, ProcessFileName(item.href))) # Package as SNB File snbFile = SNBFile() snbFile.FromDir(tdir) snbFile.Output(output_path)