Ejemplo n.º 1
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()

    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(io.BytesIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)

        meta = snbFile.GetFileStream('snbf/book.snbf')

        if meta is not None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text

            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover is not None and cover.text is not None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))

    except Exception:
        import traceback
        traceback.print_exc()

    return mi
Ejemplo n.º 2
0
    def convert(self, stream, options, file_ext, log, accelerators):
        import uuid

        from calibre.ebooks.oeb.base import DirContainer
        from calibre.ebooks.snb.snbfile import SNBFile
        from calibre.utils.xml_parse import safe_xml_fromstring

        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
        except:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invalid SNB file")
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log,
                             None,
                             options,
                             encoding=options.input_encoding,
                             populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
            meta = safe_xml_fromstring(meta)
            l = {
                'title': './/head/name',
                'creator': './/head/author',
                'language': './/head/language',
                'generator': './/head/generator',
                'publisher': './/head/publisher',
                'cover': './/head/cover',
            }
            d = {}
            for item in l:
                node = meta.find(l[item])
                if node is not None:
                    d[item] = node.text if node.text is not None else ''
                else:
                    d[item] = ''

            oeb.metadata.add('title', d['title'])
            oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
            oeb.metadata.add('language',
                             d['language'].lower().replace('_', '-'))
            oeb.metadata.add('generator', d['generator'])
            oeb.metadata.add('publisher', d['publisher'])
            if d['cover'] != '':
                oeb.guide.add('cover', 'Cover', d['cover'])

        bookid = unicode_type(uuid.uuid4())
        oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in oeb.metadata.identifier:
            if 'id' in ident.attrib:
                oeb.uid = oeb.metadata.identifier[0]
                break

        with TemporaryDirectory('_snb2oeb', keep=True) as tdir:
            log.debug('Process TOC ...')
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
                toc = safe_xml_fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
                    chapterSrc = ch.get('src')
                    fname = 'ch_%d.htm' % i
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
                    snbc = safe_xml_fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
                            lines.append('<p>%s</p>' % html_encode(line.text))
                        elif line.tag == 'img':
                            lines.append('<p><img src="%s" /></p>' %
                                         html_encode(line.text))
                    with open(os.path.join(tdir, fname), 'wb') as f:
                        f.write((HTML_TEMPLATE %
                                 (chapterName, '\n'.join(lines))).encode(
                                     'utf-8', 'replace'))
                    oeb.toc.add(ch.text, fname)
                    id, href = oeb.manifest.generate(
                        id='html', href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
                    i = i + 1
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
                                                     href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f

        return oeb
Ejemplo n.º 3
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.snb.snbfile import SNBFile
        from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName

        self.opts = opts
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        try:
            rasterizer = SVGRasterizer()
            rasterizer(oeb_book, opts)
        except Unavailable:
            log.warn('SVG rasterizer unavailable, SVG will not be converted')

        # Create temp dir
        with TemporaryDirectory('_snb_output') as tdir:
            # Create stub directories
            snbfDir = os.path.join(tdir, 'snbf')
            snbcDir = os.path.join(tdir, 'snbc')
            snbiDir = os.path.join(tdir, 'snbc/images')
            os.mkdir(snbfDir)
            os.mkdir(snbcDir)
            os.mkdir(snbiDir)

            # Process Meta data
            meta = oeb_book.metadata
            if meta.title:
                title = str(meta.title[0])
            else:
                title = ''
            authors = [str(x) for x in meta.creator if x.role == 'aut']
            if meta.publisher:
                publishers = str(meta.publisher[0])
            else:
                publishers = ''
            if meta.language:
                lang = str(meta.language[0]).upper()
            else:
                lang = ''
            if meta.description:
                abstract = str(meta.description[0])
            else:
                abstract = ''

            # Process Cover
            g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
            href = None
            if 'titlepage' not in g:
                if 'cover' in g:
                    href = g['cover'].href

            # Output book info file
            bookInfoTree = etree.Element("book-snbf", version="1.0")
            headTree = etree.SubElement(bookInfoTree, "head")
            etree.SubElement(headTree, "name").text = title
            etree.SubElement(headTree, "author").text = ' '.join(authors)
            etree.SubElement(headTree, "language").text = lang
            etree.SubElement(headTree, "rights")
            etree.SubElement(headTree, "publisher").text = publishers
            etree.SubElement(
                headTree, "generator").text = __appname__ + ' ' + __version__
            etree.SubElement(headTree, "created")
            etree.SubElement(headTree, "abstract").text = abstract
            if href is not None:
                etree.SubElement(headTree,
                                 "cover").text = ProcessFileName(href)
            else:
                etree.SubElement(headTree, "cover")
            with open(os.path.join(snbfDir, 'book.snbf'), 'wb') as f:
                f.write(
                    etree.tostring(bookInfoTree,
                                   pretty_print=True,
                                   encoding='utf-8'))

            # Output TOC
            tocInfoTree = etree.Element("toc-snbf")
            tocHead = etree.SubElement(tocInfoTree, "head")
            tocBody = etree.SubElement(tocInfoTree, "body")
            outputFiles = {}
            if oeb_book.toc.count() == 0:
                log.warn('This SNB file has no Table of Contents. '
                         'Creating a default TOC')
                first = next(iter(oeb_book.spine))
                oeb_book.toc.add(_('Start page'), first.href)
            else:
                first = next(iter(oeb_book.spine))
                if oeb_book.toc[0].href != first.href:
                    # The pages before the fist item in toc will be stored as
                    # "Cover Pages".
                    # oeb_book.toc does not support "insert", so we generate
                    # the tocInfoTree directly instead of modifying the toc
                    ch = etree.SubElement(tocBody, "chapter")
                    ch.set("src", ProcessFileName(first.href) + ".snbc")
                    ch.text = _('Cover pages')
                    outputFiles[first.href] = []
                    outputFiles[first.href].append(("", _("Cover pages")))

            for tocitem in oeb_book.toc:
                if tocitem.href.find('#') != -1:
                    item = tocitem.href.split('#')
                    if len(item) != 2:
                        log.error('Error in TOC item: %s' % tocitem)
                    else:
                        if item[0] in outputFiles:
                            outputFiles[item[0]].append(
                                (item[1], tocitem.title))
                        else:
                            outputFiles[item[0]] = []
                            if "" not in outputFiles[item[0]]:
                                outputFiles[item[0]].append(
                                    ("", tocitem.title + _(" (Preface)")))
                                ch = etree.SubElement(tocBody, "chapter")
                                ch.set("src",
                                       ProcessFileName(item[0]) + ".snbc")
                                ch.text = tocitem.title + _(" (Preface)")
                            outputFiles[item[0]].append(
                                (item[1], tocitem.title))
                else:
                    if tocitem.href in outputFiles:
                        outputFiles[tocitem.href].append(("", tocitem.title))
                    else:
                        outputFiles[tocitem.href] = []
                        outputFiles[tocitem.href].append(("", tocitem.title))
                ch = etree.SubElement(tocBody, "chapter")
                ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
                ch.text = tocitem.title

            etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)

            with open(os.path.join(snbfDir, 'toc.snbf'), 'wb') as f:
                f.write(
                    etree.tostring(tocInfoTree,
                                   pretty_print=True,
                                   encoding='utf-8'))

            # Output Files
            oldTree = None
            mergeLast = False
            lastName = None
            for item in s:
                from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES
                if m.hrefs[item.href].media_type in OEB_DOCS:
                    if item.href not in outputFiles:
                        log.debug(
                            'File %s is unused in TOC. Continue in last chapter'
                            % item.href)
                        mergeLast = True
                    else:
                        if oldTree is not None and mergeLast:
                            log.debug('Output the modified chapter again: %s' %
                                      lastName)
                            with open(os.path.join(snbcDir, lastName),
                                      'wb') as f:
                                f.write(
                                    etree.tostring(oldTree,
                                                   pretty_print=True,
                                                   encoding='utf-8'))
                            mergeLast = False

                    log.debug('Converting %s to snbc...' % item.href)
                    snbwriter = SNBMLizer(log)
                    snbcTrees = None
                    if not mergeLast:
                        snbcTrees = snbwriter.extract_content(
                            oeb_book, item, outputFiles[item.href], opts)
                        for subName in snbcTrees:
                            postfix = ''
                            if subName != '':
                                postfix = '_' + subName
                            lastName = ProcessFileName(item.href + postfix +
                                                       ".snbc")
                            oldTree = snbcTrees[subName]
                            with open(os.path.join(snbcDir, lastName),
                                      'wb') as f:
                                f.write(
                                    etree.tostring(oldTree,
                                                   pretty_print=True,
                                                   encoding='utf-8'))
                    else:
                        log.debug('Merge %s with last TOC item...' % item.href)
                        snbwriter.merge_content(oldTree, oeb_book, item,
                                                [('', _("Start"))], opts)

            # Output the last one if needed
            log.debug('Output the last modified chapter again: %s' % lastName)
            if oldTree is not None and mergeLast:
                with open(os.path.join(snbcDir, lastName), 'wb') as f:
                    f.write(
                        etree.tostring(oldTree,
                                       pretty_print=True,
                                       encoding='utf-8'))
                mergeLast = False

            for item in m:
                if m.hrefs[item.href].media_type in OEB_IMAGES:
                    log.debug('Converting image: %s ...' % item.href)
                    content = m.hrefs[item.href].data
                    # Convert & Resize image
                    self.HandleImage(
                        content,
                        os.path.join(snbiDir, ProcessFileName(item.href)))

            # Package as SNB File
            snbFile = SNBFile()
            snbFile.FromDir(tdir)
            snbFile.Output(output_path)