Beispiel #1
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        prints(_('No file specified'), file=sys.stderr)
        return 1
    path = args[1]
    stream_type = os.path.splitext(path)[1].replace('.', '').lower()

    trying_to_set = False
    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'get_cover'):
            continue
        if getattr(opts, pref.name) is not None:
            trying_to_set = True
            break
    with open(path, 'rb') as stream:
        mi = get_metadata(stream, stream_type, force_read_metadata=True)
    if trying_to_set:
        prints(_('Original metadata')+'::')
    metadata = unicode_type(mi)
    if trying_to_set:
        metadata = '\t'+'\n\t'.join(metadata.split('\n'))
    prints(metadata, safe_encode=True)

    if trying_to_set:
        with open(path, 'r+b') as stream:
            do_set_metadata(opts, mi, stream, stream_type)
            stream.seek(0)
            stream.flush()
            lrf = None
            if stream_type == 'lrf':
                if opts.lrf_bookid is not None:
                    lrf = LRFMetaFile(stream)
                    lrf.book_id = opts.lrf_bookid
            mi = get_metadata(stream, stream_type, force_read_metadata=True)
        prints('\n' + _('Changed metadata') + '::')
        metadata = unicode_type(mi)
        metadata = '\t'+'\n\t'.join(metadata.split('\n'))
        prints(metadata, safe_encode=True)
        if lrf is not None:
            prints('\tBookID:', lrf.book_id)

    if opts.to_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPFCreator
        opf = OPFCreator(getcwd(), mi)
        with open(opts.to_opf, 'wb') as f:
            opf.render(f)
        prints(_('OPF created in'), opts.to_opf)

    if opts.get_cover is not None:
        if mi.cover_data and mi.cover_data[1]:
            with open(opts.get_cover, 'wb') as f:
                f.write(mi.cover_data[1])
                prints(_('Cover saved to'), f.name)
        else:
            prints(_('No cover found'), file=sys.stderr)

    return 0
Beispiel #2
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        prints(_('No file specified'), file=sys.stderr)
        return 1
    path = args[1]
    stream = open(path, 'r+b')
    stream_type = os.path.splitext(path)[1].replace('.', '').lower()

    trying_to_set = False
    for pref in config().option_set.preferences:
        if pref.name in ('to_opf', 'get_cover'):
            continue
        if getattr(opts, pref.name) is not None:
            trying_to_set = True
            break
    mi = get_metadata(stream, stream_type, force_read_metadata=True)
    if trying_to_set:
        prints(_('Original metadata') + '::')
    metadata = unicode(mi)
    if trying_to_set:
        metadata = '\t' + '\n\t'.join(metadata.split('\n'))
    prints(metadata, safe_encode=True)

    if trying_to_set:
        stream.seek(0)
        do_set_metadata(opts, mi, stream, stream_type)
        stream.seek(0)
        stream.flush()
        lrf = None
        if stream_type == 'lrf':
            if opts.lrf_bookid is not None:
                lrf = LRFMetaFile(stream)
                lrf.book_id = opts.lrf_bookid
        mi = get_metadata(stream, stream_type, force_read_metadata=True)
        prints('\n' + _('Changed metadata') + '::')
        metadata = unicode(mi)
        metadata = '\t' + '\n\t'.join(metadata.split('\n'))
        prints(metadata, safe_encode=True)
        if lrf is not None:
            prints('\tBookID:', lrf.book_id)

    if opts.to_opf is not None:
        from calibre.ebooks.metadata.opf2 import OPFCreator
        opf = OPFCreator(os.getcwdu(), mi)
        with open(opts.to_opf, 'wb') as f:
            opf.render(f)
        prints(_('OPF created in'), opts.to_opf)

    if opts.get_cover is not None:
        if mi.cover_data and mi.cover_data[1]:
            with open(opts.get_cover, 'wb') as f:
                f.write(mi.cover_data[1])
                prints(_('Cover saved to'), f.name)
        else:
            prints(_('No cover found'), file=sys.stderr)

    return 0
Beispiel #3
0
def do_show_metadata(db, id, as_opf):
    if not db.has_id(id):
        raise ValueError('Id #%d is not present in database.'%id)
    mi = db.get_metadata(id, index_is_id=True)
    if as_opf:
        mi = OPFCreator(os.getcwdu(), mi)
        mi.render(sys.stdout)
    else:
        prints(unicode(mi))
Beispiel #4
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.toc import TOC
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.utils.zipfile import ZipFile

        self.options = options
        self.log = log
        pages, images = [], []
        toc = TOC()

        if file_ext == 'pmlz':
            log.debug('De-compressing content to temporary directory...')
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)

                pmls = glob.glob(os.path.join(tdir, '*.pml'))
                for pml in pmls:
                    html_name = os.path.splitext(
                        os.path.basename(pml))[0] + '.html'
                    html_path = os.path.join(os.getcwd(), html_name)

                    pages.append(html_name)
                    log.debug('Processing PML item %s...' % pml)
                    ttoc = self.process_pml(pml, html_path)
                    toc += ttoc
                images = self.get_images(stream, tdir, True)
        else:
            toc = self.process_pml(stream, 'index.html')
            pages.append('index.html')

            if hasattr(stream, 'name'):
                images = self.get_images(
                    stream, os.path.abspath(os.path.dirname(stream.name)))

        # We want pages to be ordered alphabetically.
        pages.sort()

        manifest_items = []
        for item in pages + images:
            manifest_items.append((item, None))

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Reading metadata from input file...')
        mi = get_metadata(stream, 'pml')
        if 'images/cover.png' in images:
            mi.cover = 'images/cover.png'
        opf = OPFCreator(os.getcwd(), mi)
        log.debug('Generating manifest...')
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        opf.set_toc(toc)
        with lopen('metadata.opf', 'wb') as opffile:
            with lopen('toc.ncx', 'wb') as tocfile:
                opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(os.getcwd(), 'metadata.opf')
Beispiel #5
0
def do_show_metadata(db, id, as_opf):
    if not db.has_id(id):
        raise ValueError('Id #%d is not present in database.'%id)
    mi = db.get_metadata(id, index_is_id=True)
    if as_opf:
        mi = OPFCreator(os.getcwdu(), mi)
        mi.render(sys.stdout)
    else:
        prints(unicode(mi))
Beispiel #6
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub)
        if css:
            with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)

        def process_guide(E, guide):
            if self.toc_anchor is not None:
                guide.append(E.reference(
                    href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc'))
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #7
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #8
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.toc import TOC
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.utils.zipfile import ZipFile

        self.options = options
        self.log = log
        pages, images = [], []
        toc = TOC()

        if file_ext == 'pmlz':
            log.debug('De-compressing content to temporary directory...')
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)

                pmls = glob.glob(os.path.join(tdir, '*.pml'))
                for pml in pmls:
                    html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
                    html_path = os.path.join(getcwd(), html_name)

                    pages.append(html_name)
                    log.debug('Processing PML item %s...' % pml)
                    ttoc = self.process_pml(pml, html_path)
                    toc += ttoc
                images = self.get_images(stream, tdir, True)
        else:
            toc = self.process_pml(stream, 'index.html')
            pages.append('index.html')

            if hasattr(stream, 'name'):
                images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))

        # We want pages to be orded alphabetically.
        pages.sort()

        manifest_items = []
        for item in pages+images:
            manifest_items.append((item, None))

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Reading metadata from input file...')
        mi = get_metadata(stream, 'pml')
        if 'images/cover.png' in images:
            mi.cover = 'images/cover.png'
        opf = OPFCreator(getcwd(), mi)
        log.debug('Generating manifest...')
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        opf.set_toc(toc)
        with lopen('metadata.opf', 'wb') as opffile:
            with lopen('toc.ncx', 'wb') as tocfile:
                opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(getcwd(), 'metadata.opf')
Beispiel #9
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(os.getcwdu(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwdu(), mi)

        manifest = [(u'index.html', None)]

        images = os.listdir(os.getcwdu())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine([u'index.html'])
        log.debug('Rendering manifest...')
        with open(u'metadata.opf', 'wb') as opffile:
            opf.render(opffile)

        return os.path.join(os.getcwdu(), u'metadata.opf')
Beispiel #10
0
def main(opts, args, dbctx):
    if len(args) < 1:
        raise SystemExit(_('You must specify an id'))
    book_id = int(args[0])
    mi = dbctx.run('show_metadata', book_id)
    if mi is None:
        raise SystemExit('Id #%d is not present in database.' % id)
    if opts.as_opf:
        mi = OPFCreator(os.getcwdu(), mi)
        mi.render(sys.stdout)
    else:
        prints(unicode(mi))

    return 0
Beispiel #11
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #12
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub)
        if css:
            with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)

        def process_guide(E, guide):
            if self.toc_anchor is not None:
                guide.append(E.reference(
                    href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc'))
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #13
0
def main(opts, args, dbctx):
    if len(args) < 1:
        raise SystemExit(_('You must specify an id'))
    book_id = int(args[0])
    mi = dbctx.run('show_metadata', book_id)
    if mi is None:
        raise SystemExit(f'Id #{book_id} is not present in database.')
    if opts.as_opf:
        stdout = getattr(sys.stdout, 'buffer', sys.stdout)
        mi = OPFCreator(os.getcwd(), mi)
        mi.render(stdout)
    else:
        prints(str(mi))

    return 0
    def convert(self, stream, options, file_ext, log, accelerators):
        log.debug('Enter convert() ...')
        dest_dir = os.getcwdu()  # note: temp dir from calibre process
        log.debug('dest_dir: ' + dest_dir)

        mi = None

        # call latex2mobi with markup output only
        from subprocess import check_output, STDOUT, CalledProcessError

        args = [
            self.java_exec, '-jar',
            os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name,
            '-n', '-o', dest_dir
        ]

        from calibre_plugins.latexformulas_input.config import prefs

        if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '':
            args.append('-p')
            args.append(prefs['pandoc_exec'])

        try:
            log.debug(check_output(args, stderr=STDOUT))
        except CalledProcessError as e:
            log.debug(e.returncode)
            log.debug(e.cmd)
            log.debug(e.output)

        opf = OPFCreator(dest_dir, mi)
        markup_dir = dest_dir + os.path.sep + os.path.basename(
            stream.name) + '-markup'
        log.debug('Markup-dir: ' + markup_dir)
        log.debug('CreateManifestFromFilesIn()')

        opf.create_manifest_from_files_in([markup_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                log.debug('Item ' + str(item) + ' is of type text/html')
                item.media_type = guess_type('a.html')[0]
                log.debug('Guess type result: ' + item.media_type)
            if item.media_type == 'text/css':
                log.debug('Item ' + str(item) + ' is of type text/css')
                item.media_type = guess_type('a.css')[0]
                log.debug('Guess type result: ' + item.media_type)

        log.debug('Create_spine()')
        opf.create_spine(
            [os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html'])

        output_path = os.path.join(dest_dir, 'metadata.opf')
        with open(output_path, 'wb') as of:
            opf.render(of)

        log('Exit convert() ...')
        return output_path
Beispiel #15
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(os.getcwdu(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwdu(), mi)

        manifest = [(u'index.html', None)]

        images = os.listdir(os.getcwdu())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine([u'index.html'])
        log.debug('Rendering manifest...')
        with open(u'metadata.opf', 'wb') as opffile:
            opf.render(opffile)

        return os.path.join(os.getcwdu(), u'metadata.opf')
Beispiel #16
0
    def create_opf(self, output_dir, images, toc):
        with CurrentDir(output_dir):
            if 'cover.png' in images:
                self.mi.cover = os.path.join('images', 'cover.png')

            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            opf.set_toc(toc)
            with open('metadata.opf', 'wb') as opffile:
                with open('toc.ncx', 'wb') as tocfile:
                    opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(output_dir, 'metadata.opf')
Beispiel #17
0
    def write(self):
        toc = self.create_toc()
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        opf.create_spine(['index.html'])
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #18
0
    def create_opf(self, output_dir, pages, images):
        with CurrentDir(output_dir):
            opf = OPFCreator(output_dir, self.mi)

            manifest = []
            for page in pages + images:
                manifest.append((page, None))

            opf.create_manifest(manifest)
            opf.create_spine(pages)
            with open('metadata.opf', 'wb') as opffile:
                opf.render(opffile)

        return os.path.join(output_dir, 'metadata.opf')
Beispiel #19
0
    def create_opf(self, output_dir, images):
        with CurrentDir(output_dir):
            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images/', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            with open('metadata.opf', 'wb') as opffile:
                opf.render(opffile)

        return os.path.join(output_dir, 'metadata.opf')
Beispiel #20
0
    def create_opf(self, output_dir, images, toc):
        with CurrentDir(output_dir):
            if 'cover.png' in images:
                self.mi.cover = os.path.join('images', 'cover.png')

            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            opf.set_toc(toc)
            with open('metadata.opf', 'wb') as opffile:
                with open('toc.ncx', 'wb') as tocfile:
                    opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(output_dir, 'metadata.opf')
    def convert(self, stream, options, file_ext, log, accelerators):
        log.debug('Enter convert() ...')
        dest_dir = os.getcwdu()  # note: temp dir from calibre process
        log.debug('dest_dir: ' + dest_dir)

        mi = None

        # call latex2mobi with markup output only
        from subprocess import check_output, STDOUT, CalledProcessError

        args = [self.java_exec, '-jar', os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name,
                '-n', '-o', dest_dir]

        from calibre_plugins.latexformulas_input.config import prefs

        if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '':
            args.append('-p')
            args.append(prefs['pandoc_exec'])

        try:
            log.debug(check_output(args, stderr=STDOUT))
        except CalledProcessError as e:
            log.debug(e.returncode)
            log.debug(e.cmd)
            log.debug(e.output)

        opf = OPFCreator(dest_dir, mi)
        markup_dir = dest_dir + os.path.sep + os.path.basename(stream.name) + '-markup'
        log.debug('Markup-dir: ' + markup_dir)
        log.debug('CreateManifestFromFilesIn()')

        opf.create_manifest_from_files_in([markup_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                log.debug('Item ' + str(item) + ' is of type text/html')
                item.media_type = guess_type('a.html')[0]
                log.debug('Guess type result: ' + item.media_type)
            if item.media_type == 'text/css':
                log.debug('Item ' + str(item) + ' is of type text/css')
                item.media_type = guess_type('a.css')[0]
                log.debug('Guess type result: ' + item.media_type)

        log.debug('Create_spine()')
        opf.create_spine([os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html'])

        output_path = os.path.join(dest_dir, 'metadata.opf')
        with open(output_path, 'wb') as of:
            opf.render(of)

        log('Exit convert() ...')
        return output_path
Beispiel #22
0
    def write(self):
        toc = self.create_toc()
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        opf.create_spine(['index.html'])
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #23
0
    def create_opf(self, output_dir, pages, images):
        with CurrentDir(output_dir):
            opf = OPFCreator(output_dir, self.mi)

            manifest = []
            for page in pages+images:
                manifest.append((page, None))

            opf.create_manifest(manifest)
            opf.create_spine(pages)
            with open('metadata.opf', 'wb') as opffile:
                opf.render(opffile)

        return os.path.join(output_dir, 'metadata.opf')
Beispiel #24
0
    def create_opf(self, output_dir, images):
        with CurrentDir(output_dir):
            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images/', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            with open('metadata.opf', 'wb') as opffile:
                opf.render(opffile)

        return os.path.join(output_dir, 'metadata.opf')
Beispiel #25
0
    def write(self):
        toc = self.create_toc()
        raw = html.tostring(self.html, encoding="utf-8", doctype="<!DOCTYPE html>")
        with open(os.path.join(self.dest_dir, "index.html"), "wb") as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, "docx.css"), "wb") as f:
                f.write(css.encode("utf-8"))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        opf.create_spine(["index.html"])
        with open(os.path.join(self.dest_dir, "metadata.opf"), "wb") as of, open(
            os.path.join(self.dest_dir, "toc.ncx"), "wb"
        ) as ncx:
            opf.render(of, ncx, "toc.ncx")
        return os.path.join(self.dest_dir, "metadata.opf")
Beispiel #26
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return os.path.join(self.dest_dir, 'metadata.opf')
Beispiel #27
0
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>',
                                '<title>%s</title>' % (mi.title, ))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(getcwd()), mi)
            opf.create_manifest([(os.path.abspath(f2), None)
                                 for f2 in walk(getcwd())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Beispiel #28
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(os.getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwd(), mi)

        manifest = [('index.html', None)]

        images = os.listdir(os.getcwd())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine(['index.html'])
        log.debug('Rendering manifest...')
        with lopen('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        if os.path.exists('toc.ncx'):
            ncxid = opf.manifest.id_for_path('toc.ncx')
            if ncxid:
                with lopen('metadata.opf', 'r+b') as f:
                    raw = f.read().replace(
                        b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
                    f.seek(0)
                    f.write(raw)

        return os.path.join(os.getcwd(), 'metadata.opf')
Beispiel #29
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(getcwd(), mi)

        manifest = [('index.html', None)]

        images = os.listdir(getcwd())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine(['index.html'])
        log.debug('Rendering manifest...')
        with lopen('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        if os.path.exists('toc.ncx'):
            ncxid = opf.manifest.id_for_path('toc.ncx')
            if ncxid:
                with lopen('metadata.opf', 'r+b') as f:
                    raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
                    f.seek(0)
                    f.write(raw)

        return os.path.join(getcwd(), 'metadata.opf')
Beispiel #30
0
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>','<title>%s</title>'%(mi.title,))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
            opf.create_manifest([(os.path.abspath(f2), None) for f2 in
                walk(os.getcwdu())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Beispiel #31
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)

        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')

        self.log('Parsing XML...')
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)

        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
        extensions = { ('calibre', 'inline-class') : inline_class }
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = u'index.xhtml'
        with open(html, 'wb') as f:
            res = transform.tostring(result)
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            #clean multiple \n
            res = re.sub('\n+', '\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
                    # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(os.getcwdu(), mi)
        opf.create_manifest([(u'index.xhtml', None)])
        opf.create_spine([u'index.xhtml'])
        opf.render(open(u'metadata.opf', 'wb'))
        return os.path.abspath(u'metadata.opf')
Beispiel #32
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
        from calibre.ebooks.chardet import xml_to_unicode
        NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
        self.log = log
        log.debug('Parsing XML...')
        raw = stream.read().replace('\0', '')
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
            assume_utf8=True, resolve_entities=True)[0]
        try:
            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
            try:
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                        parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
        css = ''
        for s in stylesheets:
            css += etree.tostring(s, encoding=unicode, method='text',
                    with_tail=False) + '\n\n'
        if css:
            import cssutils, logging
            parser = cssutils.CSSParser(fetcher=None,
                    log=logging.getLogger('calibre.css'))

            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
            text = XHTML_CSS_NAMESPACE + css
            log.debug('Parsing stylesheet...')
            stylesheet = parser.parseString(text)
            stylesheet.namespaces['h'] = XHTML_NS
            css = unicode(stylesheet.cssText).replace('h|style', 'h|span')
            css = re.sub(r'name\s*=\s*', 'class=', css)
        self.extract_embedded_content(doc)
        log.debug('Converting XML to HTML...')
        ss = open(P('templates/fb2.xsl'), 'rb').read()
        if options.no_inline_fb2_toc:
            log('Disabling generation of inline FB2 TOC')
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
                    re.DOTALL).sub('', ss)

        styledoc = etree.fromstring(ss)

        transform = etree.XSLT(styledoc)
        result = transform(doc)
        for img in result.xpath('//img[@src]'):
            src = img.get('src')
            img.set('src', self.binary_map.get(src, src))
        index = transform.tostring(result)
        open(u'index.xhtml', 'wb').write(index)
        open(u'inline-styles.css', 'wb').write(css)
        stream.seek(0)
        mi = get_metadata(stream, 'fb2')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        cpath = None
        if mi.cover_data and mi.cover_data[1]:
            with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f:
                f.write(mi.cover_data[1])
            cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg')
        else:
            for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
                href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
                if href is not None:
                    if href.startswith('#'):
                        href = href[1:]
                    cpath = os.path.abspath(href)
                    break

        opf = OPFCreator(os.getcwdu(), mi)
        entries = [(f2, guess_type(f)[0]) for f2 in os.listdir(u'.')]
        opf.create_manifest(entries)
        opf.create_spine([u'index.xhtml'])
        if cpath:
            opf.guide.set_cover(cpath)
        with open(u'metadata.opf', 'wb') as f:
            opf.render(f)
        return os.path.join(os.getcwdu(), u'metadata.opf')
Beispiel #33
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.fb2 import ensure_namespace
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
        from calibre.ebooks.chardet import xml_to_unicode
        self.log = log
        log.debug('Parsing XML...')
        raw = stream.read().replace('\0', '')
        raw = xml_to_unicode(raw,
                             strip_encoding_pats=True,
                             assume_utf8=True,
                             resolve_entities=True)[0]
        try:
            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
            try:
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                                       parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        doc = ensure_namespace(doc)
        try:
            fb_ns = doc.nsmap[doc.prefix]
        except Exception:
            fb_ns = FB2NS

        NAMESPACES = {'f': fb_ns, 'l': XLINK_NS}
        stylesheets = doc.xpath(
            '//*[local-name() = "stylesheet" and @type="text/css"]')
        css = ''
        for s in stylesheets:
            css += etree.tostring(
                s, encoding=str, method='text', with_tail=False) + '\n\n'
        if css:
            import cssutils, logging
            parser = cssutils.CSSParser(fetcher=None,
                                        log=logging.getLogger('calibre.css'))

            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
            text = XHTML_CSS_NAMESPACE + css
            log.debug('Parsing stylesheet...')
            stylesheet = parser.parseString(text)
            stylesheet.namespaces['h'] = XHTML_NS
            css = str(stylesheet.cssText).replace('h|style', 'h|span')
            css = re.sub(r'name\s*=\s*', 'class=', css)
        self.extract_embedded_content(doc)
        log.debug('Converting XML to HTML...')
        ss = open(P('templates/fb2.xsl'), 'rb').read()
        ss = ss.replace("__FB_NS__", fb_ns)
        if options.no_inline_fb2_toc:
            log('Disabling generation of inline FB2 TOC')
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
                            re.DOTALL).sub('', ss)

        styledoc = etree.fromstring(ss)

        transform = etree.XSLT(styledoc)
        result = transform(doc)

        # Handle links of type note and cite
        notes = {
            a.get('href')[1:]: a
            for a in result.xpath('//a[@link_note and @href]')
            if a.get('href').startswith('#')
        }
        cites = {
            a.get('link_cite'): a
            for a in result.xpath('//a[@link_cite]') if not a.get('href', '')
        }
        all_ids = {x for x in result.xpath('//*/@id')}
        for cite, a in cites.items():
            note = notes.get(cite, None)
            if note:
                c = 1
                while 'cite%d' % c in all_ids:
                    c += 1
                if not note.get('id', None):
                    note.set('id', 'cite%d' % c)
                    all_ids.add(note.get('id'))
                a.set('href', '#%s' % note.get('id'))
        for x in result.xpath('//*[@link_note or @link_cite]'):
            x.attrib.pop('link_note', None)
            x.attrib.pop('link_cite', None)

        for img in result.xpath('//img[@src]'):
            src = img.get('src')
            img.set('src', self.binary_map.get(src, src))
        index = transform.tostring(result)
        open('index.xhtml', 'wb').write(index)
        open('inline-styles.css', 'wb').write(css)
        stream.seek(0)
        mi = get_metadata(stream, 'fb2')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        cpath = None
        if mi.cover_data and mi.cover_data[1]:
            with open('fb2_cover_calibre_mi.jpg', 'wb') as f:
                f.write(mi.cover_data[1])
            cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
        else:
            for img in doc.xpath('//f:coverpage/f:image',
                                 namespaces=NAMESPACES):
                href = img.get('{%s}href' % XLINK_NS, img.get('href', None))
                if href is not None:
                    if href.startswith('#'):
                        href = href[1:]
                    cpath = os.path.abspath(href)
                    break

        opf = OPFCreator(os.getcwd(), mi)
        entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir('.')]
        opf.create_manifest(entries)
        opf.create_spine(['index.xhtml'])
        if cpath:
            opf.guide.set_cover(cpath)
        with open('metadata.opf', 'wb') as f:
            opf.render(f)
        return os.path.join(os.getcwd(), 'metadata.opf')
Beispiel #34
0
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
        if mi is None:
            mi = MetaInformation(self.book_header.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
            opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
            opf.cover = u'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
                * opf.cover.split('/'))):
                opf.cover = None

        cover = opf.cover
        cover_copied = None
        if cover is not None:
            cover = cover.replace('/', os.sep)
            if os.path.exists(cover):
                ncover = u'images'+os.sep+u'calibre_cover.jpg'
                if os.path.exists(ncover):
                    os.remove(ncover)
                shutil.copyfile(cover, ncover)
                cover_copied = os.path.abspath(ncover)
                opf.cover = ncover.replace(os.sep, '/')

        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath(u'styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        added = set([])
        for i in getattr(self, 'image_names', []):
            path = os.path.join(bp, 'images', i)
            added.add(path)
            manifest.append((path, 'image/jpeg'))
        if cover_copied is not None:
            manifest.append((cover_copied, 'image/jpeg'))

        opf.create_manifest(manifest)
        opf.create_spine([os.path.basename(htmlfile)])
        toc = None
        if guide is not None:
            opf.create_guide(guide)
            for ref in opf.guide:
                if ref.type.lower() == 'toc':
                    toc = ref.href()

        ncx_manifest_entry = None
        if toc:
            ncx_manifest_entry = 'toc.ncx'
            elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if elems:
                tocobj = TOC()
                found = False
                reached = False
                for x in root.iter():
                    if x == elems[-1]:
                        reached = True
                        continue
                    if reached and x.tag == 'a':
                        href = x.get('href', '')
                        if href and re.match('\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in
                                    x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            item = tocobj.add_item(toc.partition('#')[0], href[1:],
                                text)
                            item.left_space = int(self.get_left_whitespace(x))
                            found = True
                    if reached and found and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
                tocobj = self.structure_toc(tocobj)
                opf.set_toc(tocobj)

        return opf, ncx_manifest_entry
Beispiel #35
0
    def write_opf(self, guide, toc, spine, resource_map):
        mi = self.header.exth.mi
        if self.cover_offset is not None and self.cover_offset < len(resource_map):
            mi.cover = resource_map[self.cover_offset]

        if len(list(toc)) < 2:
            self.log.warn("KF8 has no metadata Table of Contents")

            for ref in guide:
                if ref.type == "toc":
                    href = ref.href()
                    href, frag = urldefrag(href)
                    if os.path.exists(href.replace("/", os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
                        except:
                            self.log.exception("Failed to read inline ToC")

        opf = OPFCreator(os.getcwdu(), mi)
        opf.guide = guide

        def exclude(path):
            return os.path.basename(path) == "debug-raw.html"

        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        for entry in opf.manifest:
            if entry.mime_type == "text/html":
                entry.mime_type = "application/xhtml+xml"
        opf.create_spine(spine)
        opf.set_toc(toc)
        ppd = self.resc_data.get("page-progression-direction", None)
        if ppd:
            opf.page_progression_direction = ppd

        with open("metadata.opf", "wb") as of, open("toc.ncx", "wb") as ncx:
            opf.render(of, ncx, "toc.ncx")
        return "metadata.opf"
Beispiel #36
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
        from calibre.ebooks.chardet import xml_to_unicode
        self.log = log
        log.debug('Parsing XML...')
        raw = get_fb2_data(stream)[0]
        raw = raw.replace(b'\0', b'')
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
            assume_utf8=True, resolve_entities=True)[0]
        try:
            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
            try:
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                        parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        doc = ensure_namespace(doc)
        try:
            fb_ns = doc.nsmap[doc.prefix]
        except Exception:
            fb_ns = FB2NS

        NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
        stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
        css = ''
        for s in stylesheets:
            css += etree.tostring(s, encoding='unicode', method='text',
                    with_tail=False) + '\n\n'
        if css:
            import css_parser, logging
            parser = css_parser.CSSParser(fetcher=None,
                    log=logging.getLogger('calibre.css'))

            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
            text = XHTML_CSS_NAMESPACE + css
            log.debug('Parsing stylesheet...')
            stylesheet = parser.parseString(text)
            stylesheet.namespaces['h'] = XHTML_NS
            css = stylesheet.cssText
            if isinstance(css, bytes):
                css = css.decode('utf-8', 'replace')
            css = css.replace('h|style', 'h|span')
            css = re.sub(r'name\s*=\s*', 'class=', css)
        self.extract_embedded_content(doc)
        log.debug('Converting XML to HTML...')
        with open(P('templates/fb2.xsl'), 'rb') as f:
            ss = f.read().decode('utf-8')
        ss = ss.replace("__FB_NS__", fb_ns)
        if options.no_inline_fb2_toc:
            log('Disabling generation of inline FB2 TOC')
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
                    re.DOTALL).sub('', ss)

        styledoc = etree.fromstring(ss)

        transform = etree.XSLT(styledoc)
        result = transform(doc)

        # Handle links of type note and cite
        notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')}
        cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')}
        all_ids = {x for x in result.xpath('//*/@id')}
        for cite, a in iteritems(cites):
            note = notes.get(cite, None)
            if note:
                c = 1
                while 'cite%d' % c in all_ids:
                    c += 1
                if not note.get('id', None):
                    note.set('id', 'cite%d' % c)
                    all_ids.add(note.get('id'))
                a.set('href', '#%s' % note.get('id'))
        for x in result.xpath('//*[@link_note or @link_cite]'):
            x.attrib.pop('link_note', None)
            x.attrib.pop('link_cite', None)

        for img in result.xpath('//img[@src]'):
            src = img.get('src')
            img.set('src', self.binary_map.get(src, src))
        index = transform.tostring(result)
        with open(u'index.xhtml', 'wb') as f:
            f.write(index.encode('utf-8'))
        with open(u'inline-styles.css', 'wb') as f:
            f.write(css.encode('utf-8'))
        stream.seek(0)
        mi = get_metadata(stream, 'fb2')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        cpath = None
        if mi.cover_data and mi.cover_data[1]:
            with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f:
                f.write(mi.cover_data[1])
            cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg')
        else:
            for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
                href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
                if href is not None:
                    if href.startswith('#'):
                        href = href[1:]
                    cpath = os.path.abspath(href)
                    break

        opf = OPFCreator(getcwd(), mi)
        entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir(u'.')]
        opf.create_manifest(entries)
        opf.create_spine([u'index.xhtml'])
        if cpath:
            opf.guide.set_cover(cpath)
        with open(u'metadata.opf', 'wb') as f:
            opf.render(f)
        return os.path.join(getcwd(), u'metadata.opf')
Beispiel #37
0
    def write_opf(self, guide, toc, spine, resource_map):
        mi = self.header.exth.mi
        if self.cover_offset is not None and self.cover_offset < len(resource_map):
            mi.cover = resource_map[self.cover_offset]

        if len(list(toc)) < 2:
            self.log.warn("KF8 has no metadata Table of Contents")

            for ref in guide:
                if ref.type == "toc":
                    href = ref.href()
                    href, frag = urldefrag(href)
                    if os.path.exists(href.replace("/", os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
                        except:
                            self.log.exception("Failed to read inline ToC")

        opf = OPFCreator(os.getcwdu(), mi)
        opf.guide = guide

        def exclude(path):
            return os.path.basename(path) == "debug-raw.html"

        # If there are no images then the azw3 input plugin dumps all
        # binary records as .unknown images, remove them
        if self.for_tweak and os.path.exists("images") and os.path.isdir("images"):
            files = os.listdir("images")
            unknown = [x for x in files if x.endswith(".unknown")]
            if len(files) == len(unknown):
                [os.remove("images/" + f) for f in files]

        if self.for_tweak:
            try:
                os.remove("debug-raw.html")
            except:
                pass

        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        for entry in opf.manifest:
            if entry.mime_type == "text/html":
                entry.mime_type = "application/xhtml+xml"
        opf.create_spine(spine)
        opf.set_toc(toc)
        ppd = getattr(self.header.exth, "page_progression_direction", None)
        if ppd in {"ltr", "rtl", "default"}:
            opf.page_progression_direction = ppd

        with open("metadata.opf", "wb") as of, open("toc.ncx", "wb") as ncx:
            opf.render(of, ncx, "toc.ncx")
        return "metadata.opf"
Beispiel #38
0
    def write_opf(self, guide, toc, spine, resource_map):
        mi = self.header.exth.mi
        if (self.cover_offset is not None and self.cover_offset <
                len(resource_map)):
            mi.cover = resource_map[self.cover_offset]

        if len(list(toc)) < 2:
            self.log.warn('KF8 has no metadata Table of Contents')

            for ref in guide:
                if ref.type == 'toc':
                    href = ref.href()
                    href, frag = urldefrag(href)
                    if os.path.exists(href.replace('/', os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
                        except:
                            self.log.exception('Failed to read inline ToC')

        opf = OPFCreator(os.getcwdu(), mi)
        opf.guide = guide

        def exclude(path):
            return os.path.basename(path) == 'debug-raw.html'

        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        opf.create_spine(spine)
        opf.set_toc(toc)

        with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return 'metadata.opf'
Beispiel #39
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log = opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = u'comic_%d' % (i + 1) if len(comics_) > 1 else u'.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages:
                continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s' % stream.name)

        mi = MetaInformation(
            os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')])
        opf = OPFCreator(os.getcwdu(), mi)
        entries = []

        def href(x):
            if len(comics) == 1:
                return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x),
                             None,
                             _('Page') + ' %d' % (i + 1),
                             play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                                    None,
                                    comic[0],
                                    play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x),
                                      None,
                                      _('Page') + ' %d' % (i + 1),
                                      play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, u'toc.ncx')
        return os.path.abspath(u'metadata.opf')
Beispiel #40
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log = opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        num_pages_per_comic = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages:
                continue
            num_pages_per_comic.append(len(pages))
            if self.for_viewer:
                comics.append(
                    (title, pages, [self.create_viewer_wrapper(pages, cdir)]))
            else:
                wrappers = self.create_wrappers(pages)
                comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s' % stream.name)

        mi = MetaInformation(
            os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')])
        opf = OPFCreator(os.getcwd(), mi)
        entries = []

        def href(x):
            if len(comics) == 1:
                return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        cover_href = None
        for comic in comics:
            pages, wrappers = comic[1:]
            page_entries = [(x, None) for x in map(href, pages)]
            entries += [(w, None) for w in map(href, wrappers)] + page_entries
            if cover_href is None and page_entries:
                cover_href = page_entries[0][0]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        if self.for_viewer and cover_href:
            if os.path.isabs(cover_href):
                cover_href = os.path.relpath(cover_href).replace(os.sep, '/')
            opf.guide.set_cover(cover_href)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            if self.for_viewer:
                wrapper_page_href = href(wrappers[0])
                for i in range(num_pages_per_comic[0]):
                    toc.add_item('{}#page_{}'.format(wrapper_page_href, i + 1),
                                 None,
                                 _('Page') + ' %d' % (i + 1),
                                 play_order=i)

            else:
                for i, x in enumerate(wrappers):
                    toc.add_item(href(x),
                                 None,
                                 _('Page') + ' %d' % (i + 1),
                                 play_order=i)
        else:
            po = 0
            for num_pages, comic in zip(num_pages_per_comic, comics):
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                                    None,
                                    comic[0],
                                    play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    if self.for_viewer:
                        wrapper_page_href = href(wrappers[0])
                        for i in range(num_pages):
                            stoc.add_item('{}#page_{}'.format(
                                wrapper_page_href, i + 1),
                                          None,
                                          _('Page') + ' %d' % (i + 1),
                                          play_order=po)
                            po += 1
                    else:
                        for i, x in enumerate(wrappers):
                            stoc.add_item(href(x),
                                          None,
                                          _('Page') + ' %d' % (i + 1),
                                          play_order=po)
                            po += 1
        opf.set_toc(toc)
        with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n:
            opf.render(m, n, 'toc.ncx')
        return os.path.abspath('metadata.opf')
Beispiel #41
0
def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)
    opf.create_manifest(manifest)
    opf.create_spine(spine)
    with open(os.path.join(path, opf_name), 'wb') as opffile:
        opf.render(opffile)
Beispiel #42
0
    def write_opf(self, guide, toc, spine, resource_map):
        mi = self.header.exth.mi
        if (self.cover_offset is not None
                and self.cover_offset < len(resource_map)):
            mi.cover = resource_map[self.cover_offset]

        if len(list(toc)) < 2:
            self.log.warn('KF8 has no metadata Table of Contents')

            for ref in guide:
                if ref.type == 'toc':
                    href = ref.href()
                    href, frag = urldefrag(href)
                    if os.path.exists(href.replace('/', os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
                        except:
                            self.log.exception('Failed to read inline ToC')

        opf = OPFCreator(os.getcwdu(), mi)
        opf.guide = guide

        def exclude(path):
            return os.path.basename(path) == 'debug-raw.html'

        # If there are no images then the azw3 input plugin dumps all
        # binary records as .unknown images, remove them
        if self.for_tweak and os.path.exists('images') and os.path.isdir(
                'images'):
            files = os.listdir('images')
            unknown = [x for x in files if x.endswith('.unknown')]
            if len(files) == len(unknown):
                [os.remove('images/' + f) for f in files]

        if self.for_tweak:
            try:
                os.remove('debug-raw.html')
            except:
                pass

        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        for entry in opf.manifest:
            if entry.mime_type == 'text/html':
                entry.mime_type = 'application/xhtml+xml'
        opf.create_spine(spine)
        opf.set_toc(toc)
        ppd = getattr(self.header.exth, 'page_progression_direction', None)
        if ppd in {'ltr', 'rtl', 'default'}:
            opf.page_progression_direction = ppd

        with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return 'metadata.opf'
Beispiel #43
0
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
        if mi is None:
            mi = MetaInformation(self.book_header.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
            opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
            opf.cover = u'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
                * opf.cover.split('/'))):
                opf.cover = None

        cover = opf.cover
        cover_copied = None
        if cover is not None:
            cover = cover.replace('/', os.sep)
            if os.path.exists(cover):
                ncover = u'images'+os.sep+u'calibre_cover.jpg'
                if os.path.exists(ncover):
                    os.remove(ncover)
                shutil.copyfile(cover, ncover)
                cover_copied = os.path.abspath(ncover)
                opf.cover = ncover.replace(os.sep, '/')

        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath(u'styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        added = set([])
        for i in getattr(self, 'image_names', []):
            path = os.path.join(bp, 'images', i)
            added.add(path)
            manifest.append((path, 'image/jpeg'))
        if cover_copied is not None:
            manifest.append((cover_copied, 'image/jpeg'))

        opf.create_manifest(manifest)
        opf.create_spine([os.path.basename(htmlfile)])
        toc = None
        if guide is not None:
            opf.create_guide(guide)
            for ref in opf.guide:
                if ref.type.lower() == 'toc':
                    toc = ref.href()

        ncx_manifest_entry = None
        if toc:
            ncx_manifest_entry = 'toc.ncx'
            elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if elems:
                tocobj = TOC()
                found = False
                reached = False
                for x in root.iter():
                    if x == elems[-1]:
                        reached = True
                        continue
                    if reached and x.tag == 'a':
                        href = x.get('href', '')
                        if href and re.match('\\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in
                                    x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            item = tocobj.add_item(toc.partition('#')[0], href[1:],
                                text)
                            item.left_space = int(self.get_left_whitespace(x))
                            found = True
                    if reached and found and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
                tocobj = self.structure_toc(tocobj)
                opf.set_toc(tocobj)

        return opf, ncx_manifest_entry
Beispiel #44
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log= opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages: continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s'%stream.name)

        mi  = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
            [_('Unknown')])
        opf = OPFCreator(os.getcwdu(), mi)
        entries = []

        def href(x):
            if len(comics) == 1: return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
                        play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x), None,
                                _('Page')+' %d'%(i+1), play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, u'toc.ncx')
        return os.path.abspath(u'metadata.opf')
Beispiel #45
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        from calibre.utils.xml_parse import safe_xml_fromstring
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            self.log.exception('Unable to parse RTF')
            raise ValueError(
                _('This RTF file has a feature calibre does not '
                  'support. Convert it to HTML first and then try it.\n%s') %
                e)

        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')

        self.log('Parsing XML...')
        doc = safe_xml_fromstring(xml)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath(
                '//rtf:pict[@num]',
                namespaces={'rtf': 'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)

        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True),
                                       recover=False)
        extensions = {('calibre', 'inline-class'): inline_class}
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = 'index.xhtml'
        with open(html, 'wb') as f:
            res = as_bytes(transform.tostring(result))
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
            res = re.sub(b'\n+', b'\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
            # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(os.getcwd(), mi)
        opf.create_manifest([('index.xhtml', None)])
        opf.create_spine(['index.xhtml'])
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
Beispiel #46
0
def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)
    opf.create_manifest(manifest)
    opf.create_spine(spine)
    with open(os.path.join(path, opf_name), 'wb') as opffile:
        opf.render(opffile)
Beispiel #47
0
    def write_opf(self, guide, toc, spine, resource_map):
        mi = self.header.exth.mi
        if (self.cover_offset is not None and self.cover_offset <
                len(resource_map)):
            mi.cover = resource_map[self.cover_offset]

        if len(list(toc)) < 2:
            self.log.warn('KF8 has no metadata Table of Contents')

            for ref in guide:
                if ref.type == 'toc':
                    href = ref.href()
                    href, frag = urldefrag(href)
                    if os.path.exists(href.replace('/', os.sep)):
                        try:
                            toc = self.read_inline_toc(href, frag)
                        except:
                            self.log.exception('Failed to read inline ToC')

        opf = OPFCreator(os.getcwdu(), mi)
        opf.guide = guide

        def exclude(path):
            return os.path.basename(path) == 'debug-raw.html'

        # If there are no images then the azw3 input plugin dumps all
        # binary records as .unknown images, remove them
        if self.for_tweak and os.path.exists('images') and os.path.isdir('images'):
            files = os.listdir('images')
            unknown = [x for x in files if x.endswith('.unknown')]
            if len(files) == len(unknown):
                [os.remove('images/'+f) for f in files]

        if self.for_tweak:
            try:
                os.remove('debug-raw.html')
            except:
                pass

        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        for entry in opf.manifest:
            if entry.mime_type == 'text/html':
                entry.mime_type = 'application/xhtml+xml'
        opf.create_spine(spine)
        opf.set_toc(toc)
        ppd = getattr(self.header.exth, 'page_progression_direction', None)
        if ppd in {'ltr', 'rtl', 'default'}:
            opf.page_progression_direction = ppd

        with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        return 'metadata.opf'