Example #1
0
def tweak(ebook_file):
    ''' Command line interface to the Tweak Book tool '''
    fmt = ebook_file.rpartition('.')[-1].lower()
    exploder, rebuilder = get_tools(fmt)
    if exploder is None:
        prints('Cannot tweak %s files. Supported formats are: EPUB, HTMLZ, AZW3, MOBI'
                , file=sys.stderr)
        raise SystemExit(1)

    with TemporaryDirectory('_tweak_'+
            os.path.basename(ebook_file).rpartition('.')[0]) as tdir:
        try:
            opf = exploder(ebook_file, tdir, question=ask_cli_question)
        except WorkerError as e:
            prints('Failed to unpack', ebook_file)
            prints(e.orig_tb)
            raise SystemExit(1)
        except Error as e:
            prints(as_unicode(e), file=sys.stderr)
            raise SystemExit(1)

        if opf is None:
            # The question was answered with No
            return

        ed = os.environ.get('EDITOR', 'dummy')
        cmd = shlex.split(ed)
        isvim = bool([x for x in cmd[0].split('/') if x.endswith('vim')])

        proceed = False
        prints('Book extracted to', tdir)

        if not isvim:
            prints('Make your tweaks and once you are done,', __appname__,
                    'will rebuild', ebook_file, 'from', tdir)
            print()
            proceed = ask_cli_question('Rebuild ' + ebook_file + '?')
        else:
            base = os.path.basename(ebook_file)
            with TemporaryFile(base+'.zip') as zipf:
                with ZipFile(zipf, 'w') as zf:
                    zf.add_dir(tdir)
                try:
                    subprocess.check_call(cmd + [zipf])
                except:
                    prints(ed, 'failed, aborting...')
                    raise SystemExit(1)
                with ZipFile(zipf, 'r') as zf:
                    shutil.rmtree(tdir)
                    os.mkdir(tdir)
                    zf.extractall(path=tdir)
            proceed = True

        if proceed:
            prints('Rebuilding', ebook_file, 'please wait ...')
            try:
                rebuilder(tdir, ebook_file)
            except WorkerError as e:
                prints('Failed to rebuild', ebook_file)
                prints(e.orig_tb)
                raise SystemExit(1)
            prints(ebook_file, 'successfully tweaked')
Example #2
0
    def convert(self, recipe_or_file, opts, file_ext, log, accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            with lopen('download.recipe', 'rb') as f:
                self.recipe_source = f.read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.environ.get('CALIBRE_RECIPE_URN'):
                from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id
                urn = os.environ['CALIBRE_RECIPE_URN']
                log('Downloading recipe urn: ' + urn)
                rtype, recipe_id = urn.partition(':')[::2]
                if not recipe_id:
                    raise ValueError('Invalid recipe urn: ' + urn)
                if rtype == 'custom':
                    self.recipe_source = get_custom_recipe(recipe_id)
                else:
                    self.recipe_source = get_builtin_recipe_by_id(
                        urn, log=log, download_recipe=True)
                if not self.recipe_source:
                    raise ValueError('Could not find recipe with urn: ' + urn)
                if not isinstance(self.recipe_source, bytes):
                    self.recipe_source = self.recipe_source.encode('utf-8')
                recipe = compile_recipe(self.recipe_source)
            elif os.access(recipe_or_file, os.R_OK):
                with lopen(recipe_or_file, 'rb') as f:
                    self.recipe_source = f.read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import (
                    get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg',
                                recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                titles = frozenset(get_builtin_recipe_titles())
                if title not in titles:
                    title = getattr(opts, 'original_recipe_input_arg',
                                    recipe_or_file)
                    title = title.rpartition('.')[0]

                raw = get_builtin_recipe_by_title(
                    title,
                    log=log,
                    download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                            'Downloaded recipe needs calibre version at least: %s'
                            % ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception(
                        'Failed to compile downloaded recipe. Falling '
                        'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title,
                                                      log=log,
                                                      download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: ' +
                                         title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError(
                    '%r is not a valid recipe file or builtin recipe' %
                    recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Example #3
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(getcwd())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)

        opf = os.path.relpath(opf, getcwd())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'

            def normpath(x):
                return posixpath.normpath(delta + elem.get('href'))

            for elem in opf.itermanifest():
                elem.set('href', normpath(elem.get('href')))
            for elem in opf.iterguide():
                elem.set('href', normpath(elem.get('href')))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)
        if self.removed_cover:
            self.removed_items_to_ignore = (self.removed_cover,)
        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log, options)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_:
                mt = y.get('media-type', None)
                if mt in {
                        'application/vnd.adobe-page-template+xml',
                        'application/vnd.adobe.page-template+xml',
                        'application/adobe-page-template+xml',
                        'application/adobe.page-template+xml',
                        'application/text'
                }:
                    not_for_spine.add(id_)
                ext = y.get('href', '').rpartition('.')[-1].lower()
                if mt == 'text/plain' and ext in {'otf', 'ttf'}:
                    # some epub authoring software sets font mime types to
                    # text/plain
                    not_for_spine.add(id_)
                    y.set('media-type', 'application/font')

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Example #4
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
        from calibre.ebooks.chardet import detect
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.txt.processor import (
            convert_basic, convert_markdown_with_metadata,
            separate_paragraphs_single_line,
            separate_paragraphs_print_formatted, preserve_spaces,
            detect_paragraph_type, detect_formatting_type,
            normalize_line_endings, convert_textile, remove_indents,
            block_to_single_line, separate_hard_scene_breaks)

        self.log = log
        txt = ''
        log.debug('Reading text from file...')
        length = 0
        base_dir = os.getcwdu()

        # Extract content from zip archive.
        if file_ext == 'txtz':
            zf = ZipFile(stream)
            zf.extractall('.')

            for x in walk('.'):
                if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
                    with open(x, 'rb') as tf:
                        txt += tf.read() + '\n\n'
        else:
            if getattr(stream, 'name', None):
                base_dir = os.path.dirname(stream.name)
            txt = stream.read()
            if file_ext in {'md', 'textile', 'markdown'}:
                options.formatting_type = {
                    'md': 'markdown'
                }.get(file_ext, file_ext)
                log.info('File extension indicates particular formatting. '
                         'Forcing formatting type to: %s' %
                         options.formatting_type)
                options.paragraph_type = 'off'

        # Get the encoding of the document.
        if options.input_encoding:
            ienc = options.input_encoding
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt[:4096])
            det_encoding, confidence = det_encoding['encoding'], det_encoding[
                'confidence']
            if det_encoding and det_encoding.lower().replace(
                    '_',
                    '-').strip() in ('gb2312', 'chinese', 'csiso58gb231280',
                                     'euc-cn', 'euccn', 'eucgb2312-cn',
                                     'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
            ienc = det_encoding
            log.debug(
                'Detected input encoding as %s with a confidence of %s%%' %
                (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug(
                'No input encoding specified and could not auto detect using %s'
                % ienc)
        # Remove BOM from start of txt as its presence can confuse markdown
        import codecs
        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8,
                    codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
            if txt.startswith(bom):
                txt = txt[len(bom):]
                break
        txt = txt.decode(ienc, 'replace')

        # Replace entities
        txt = _ent_pat.sub(xml_entity_to_unicode, txt)

        # Normalize line endings
        txt = normalize_line_endings(txt)

        # Determine the paragraph type of the document.
        if options.paragraph_type == 'auto':
            options.paragraph_type = detect_paragraph_type(txt)
            if options.paragraph_type == 'unknown':
                log.debug(
                    'Could not reliably determine paragraph type using block')
                options.paragraph_type = 'block'
            else:
                log.debug('Auto detected paragraph type as %s' %
                          options.paragraph_type)

        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' %
                      options.formatting_type)

        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'unwrap_lines', False)
            setattr(options, 'smarten_punctuation', True)

        # Reformat paragraphs to block formatting based on the detected type.
        # We don't check for block because the processor assumes block.
        # single and print at transformed to block for processing.
        if options.paragraph_type == 'single':
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'print':
            txt = separate_hard_scene_breaks(txt)
            txt = separate_paragraphs_print_formatted(txt)
            txt = block_to_single_line(txt)
        elif options.paragraph_type == 'unformatted':
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            # unwrap lines based on punctuation
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            preprocessor = HeuristicProcessor(options,
                                              log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)

        if getattr(options, 'enable_heuristics', False) and getattr(
                options, 'dehyphenate', False):
            docanalysis = DocAnalysis('txt', txt)
            if not length:
                length = docanalysis.line_length(.5)
            dehyphenator = Dehyphenator(options.verbose, log=self.log)
            txt = dehyphenator(txt, 'txt', length)

        # User requested transformation on the text.
        if options.txt_in_remove_indents:
            txt = remove_indents(txt)

        # Preserve spaces will replace multiple spaces to a space
        # followed by the   entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)

        # Process the text using the appropriate text processor.
        self.shifted_files = []
        try:
            html = ''
            input_mi = None
            if options.formatting_type == 'markdown':
                log.debug('Running text through markdown conversion...')
                try:
                    input_mi, html = convert_markdown_with_metadata(
                        txt,
                        extensions=[
                            x.strip()
                            for x in options.markdown_extensions.split(',')
                            if x.strip()
                        ])
                except RuntimeError:
                    raise ValueError(
                        'This txt file has malformed markup, it cannot be'
                        ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax'
                    )
                html = self.fix_resources(html, base_dir)
            elif options.formatting_type == 'textile':
                log.debug('Running text through textile conversion...')
                html = convert_textile(txt)
                html = self.fix_resources(html, base_dir)
            else:
                log.debug('Running text through basic conversion...')
                flow_size = getattr(options, 'flow_size', 0)
                html = convert_basic(txt, epub_split_size_kb=flow_size)

            # Run the HTMLized text through the html processing plugin.
            from calibre.customize.ui import plugin_for_input_format
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
            options.input_encoding = 'utf-8'
            htmlfile = self.shift_file(base_dir, 'index.html',
                                       html.encode('utf-8'))
            odi = options.debug_pipeline
            options.debug_pipeline = None
            # Generate oeb from html conversion.
            oeb = html_input.convert(open(htmlfile, 'rb'), options, 'html',
                                     log, {})
            options.debug_pipeline = odi
        finally:
            for x in self.shifted_files:
                os.remove(x)

        # Set metadata from file.
        if input_mi is None:
            from calibre.customize.ui import get_file_type_metadata
            input_mi = get_file_type_metadata(stream, file_ext)
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
        self.html_postprocess_title = input_mi.title

        return oeb
Example #5
0
    def compile_website_translations(self):
        from calibre.utils.zipfile import ZipFile, ZipInfo, ZIP_STORED
        from calibre.ptempfile import TemporaryDirectory
        from calibre.utils.localization import get_iso639_translator, get_language, get_iso_language
        self.info('Compiling website translations...')
        srcbase = self.j(self.d(self.SRC), 'translations', 'website')
        fmap = {}
        files = []
        stats = {}
        done = []

        def handle_stats(src, nums):
            locale = fmap[src]
            trans = nums[0]
            total = trans if len(nums) == 1 else (trans + nums[1])
            stats[locale] = int(round(100 * trans / total))

        with TemporaryDirectory() as tdir, ZipFile(
                self.j(srcbase, 'locales.zip'), 'w', ZIP_STORED) as zf:
            for f in os.listdir(srcbase):
                if f.endswith('.po'):
                    l = f.partition('.')[0]
                    pf = l.split('_')[0]
                    if pf in {'en'}:
                        continue
                    d = os.path.join(tdir, l + '.mo')
                    f = os.path.join(srcbase, f)
                    fmap[f] = l
                    files.append((f, d))
            self.compile_group(files, handle_stats=handle_stats)

            for locale, translated in iteritems(stats):
                if translated >= 20:
                    with open(os.path.join(tdir, locale + '.mo'), 'rb') as f:
                        raw = f.read()
                    zi = ZipInfo(os.path.basename(f.name))
                    zi.compress_type = ZIP_STORED
                    zf.writestr(zi, raw)
                    done.append(locale)
            dl = done + ['en']

            lang_names = {}
            for l in dl:
                if l == 'en':
                    t = get_language
                else:
                    t = getattr(get_iso639_translator(l),
                                'gettext' if ispy3 else 'ugettext')
                    t = partial(get_iso_language, t)
                lang_names[l] = {x: t(x) for x in dl}
            zi = ZipInfo('lang-names.json')
            zi.compress_type = ZIP_STORED
            zf.writestr(
                zi,
                json.dumps(lang_names, ensure_ascii=False).encode('utf-8'))
        dest = self.j(self.d(self.stats), 'website-languages.txt')
        data = ' '.join(sorted(done))
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with open(dest, 'wb') as f:
            f.write(data)
Example #6
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory('_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = 'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(
                    100,
                    (ascii_filename(str(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + '.html'), 'wb') as tf:
                if isinstance(html, str):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book).encode('utf-8'))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, 'images')):
                    os.makedirs(os.path.join(tdir, 'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(
                                item.data, encoding='unicode').encode('utf-8')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, 'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = 'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
def books(self, oncard=None, end_session=True):
    '''
    Return a list of ebooks on the device.
    @param oncard:  If 'carda' or 'cardb' return a list of ebooks on the
                    specific storage card, otherwise return list of ebooks
                    in main memory of device. If a card is specified and no
                    books are on the card return empty list.
    @return: A BookList.

    '''
    if oncard:
        return BookList()

    self._log_location()
    booklist = BookList()
    cached_books = {}

    # Fetch current assets from Media folder
    assets_profile = self._localize_database_path(self.assets_subpath)

    #Fetch current metadata from iBooks's DB
    db_profile = self._localize_database_path(self.books_subpath)
    con = sqlite3.connect(db_profile['path'])

    # Mount the Media folder
    self.ios.mount_ios_media_folder()

    # Get Books.plist so we can find the covers
    books_plist = {}

    if True:
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Sync/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                #print(book)
                #print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        # Process any outliers
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                #print(book)
                #print
                continue

            # Don't overwrite existing cover_paths
            if not '/'.join(['/Books', book['Path']]) in books_plist:
                if 'Cover Path' in book and not ['/'.join(['/Books', book['Path']])] in book_plist:
                        books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
                else:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            # Don't overwrite existing cover_paths
            if not '/'.join(['/Books', book['Path']]) in books_plist:
                if 'Cover Path' in book:
                        books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']]))
                else:
                    books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

    else:
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

    print(books_plist)

    with con:
        con.row_factory = sqlite3.Row
        # Build a collection map
        collections_map = {}

        # Get the books
        cur = con.cursor()
        #cur.execute("ATTACH DATABASE '{0}' as 'ASSETS'".format(assets_profile['path'])

        cur.execute('''SELECT ZASSETURL,
                              ZBOOKAUTHOR,
                              ZSORTAUTHOR,
                              ZBOOKTITLE,
                              ZSORTTITLE,
                              ZDATABASEKEY,
                              ZDATEADDED
                       FROM ZBKBOOKINFO
                       WHERE ZASSETURL LIKE 'file://localhost%' AND
                             ZASSETURL LIKE '%.epub/'
                    ''')
        rows = cur.fetchall()
        book_count = len(rows)
        for i, row in enumerate(rows):
            book_id = row[b'ZDATABASEKEY']

            # Get the collection assignments
            collections = []

            # Get the primary metadata
            this_book = Book(row[b'ZBOOKTITLE'], row[b'ZBOOKAUTHOR'])
            original_path = row[b'ZASSETURL']
            path = original_path[original_path.find('Media/') + len('Media'):-1]
            this_book.path = path.replace('%20', ' ')
            timestamp = int(row[b'ZDATEADDED']) + NSTimeIntervalSince1970
            this_book.datetime = datetime.fromtimestamp(timestamp).timetuple()
            this_book.device_collections = collections
            this_book.uuid = None
            this_book.thumbnail = self._generate_thumbnail(this_book, books_plist[this_book.path])

            # Retrieve folder size from cache or compute and cache
            try:
                zfr = ZipFile(self.folder_archive_path)
                file_size = zfr.read(this_book.path)
                this_book.size = int(file_size)
                self._log_diagnostic("returning folder size from cache")
            except:
                self._log_diagnostic("opening folder cache for appending")
                zfw = ZipFile(self.folder_archive_path, mode='a')
                stats = self.ios.stat(this_book.path)
                this_book.size = self.ios.get_folder_size(this_book.path)
                zfw.writestr(this_book.path, str(this_book.size))
                zfw.close()
            finally:
                zfr.close()

            booklist.add_book(this_book, False)

            if self.report_progress is not None:
                self.report_progress(float((i + 1)*100 / book_count)/100,
                    '%(num)d of %(tot)d' % dict(num=i + 1, tot=book_count))

            cached_books[this_book.path] = {
                'title': this_book.title,
                'author': this_book.author,
                'authors': this_book.author.split(' & '),
                'uuid': this_book.uuid
                }
        cur.close()

    # Close the connection
    self.ios.dismount_ios_media_folder()

    if self.report_progress is not None:
        self.report_progress(1.0, _('finished'))

    self.cached_books = cached_books
    return booklist
Example #8
0
 def init_zipfile(self, stream):
     self.zipf = ZipFile(stream)
     self.names = frozenset(self.zipf.namelist())
Example #9
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        def find_all(field):
            ns, tag = fields[field]
            for x in root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns}):
                yield normalize(
                    tostring(x,
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        keywords = []
        for q in ('keyword', 'keywords'):
            for kw in find_all(q):
                keywords += [x.strip() for x in kw.split(',') if x.strip()]
        mi.tags = uniq(keywords)
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #10
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Example #11
0
    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
        from calibre.utils.logging import default_log as log
        from calibre.utils.config import JSONConfig

        # If preset specified from the cli, insert stored options from JSON file
        if hasattr(opts, 'preset') and opts.preset:
            available_presets = JSONConfig("catalog_presets")
            if opts.preset not in available_presets:
                if available_presets:
                    print(_('Error: Preset "%s" not found.' % opts.preset))
                    print(
                        _('Stored presets: %s' % ', '.join(
                            [p for p in sorted(available_presets.keys())])))
                else:
                    print(_('Error: No stored presets.'))
                return 1

            # Copy the relevant preset values to the opts object
            for item in available_presets[opts.preset]:
                if item not in [
                        'exclusion_rules_tw', 'format', 'prefix_rules_tw'
                ]:
                    setattr(opts, item, available_presets[opts.preset][item])

            # Provide an unconnected device
            opts.connected_device = {
                'is_device_connected': False,
                'kind': None,
                'name': None,
                'save_template': None,
                'serial': None,
                'storage': None,
            }

            # Convert prefix_rules and exclusion_rules from JSON lists to tuples
            prs = []
            for rule in opts.prefix_rules:
                prs.append(tuple(rule))
            opts.prefix_rules = tuple(prs)

            ers = []
            for rule in opts.exclusion_rules:
                ers.append(tuple(rule))
            opts.exclusion_rules = tuple(ers)

        opts.log = log
        opts.fmt = self.fmt = path_to_output.rpartition('.')[2]

        # Add local options
        opts.creator = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'),
                                          strftime('%d').lstrip('0'),
                                          strftime('%Y'))
        opts.creator_sort_as = '%s %s' % ('calibre', strftime('%Y-%m-%d'))
        opts.connected_kindle = False

        # Finalize output_profile
        op = opts.output_profile
        if op is None:
            op = 'default'

        if opts.connected_device['name'] and 'kindle' in opts.connected_device[
                'name'].lower():
            opts.connected_kindle = True
            if opts.connected_device['serial'] and \
               opts.connected_device['serial'][:4] in ['B004', 'B005']:
                op = "kindle_dx"
            else:
                op = "kindle"

        opts.description_clip = 380 if op.endswith(
            'dx') or 'kindle' not in op else 100
        opts.author_clip = 100 if op.endswith(
            'dx') or 'kindle' not in op else 60
        opts.output_profile = op

        opts.basename = "Catalog"
        opts.cli_environment = not hasattr(opts, 'sync')

        # Hard-wired to always sort descriptions by author, with series after non-series
        opts.sort_descriptions_by_author = True

        build_log = []

        build_log.append(
            "%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
            (self.name, current_library_name(), self.fmt,
             'for %s ' % opts.output_profile if opts.output_profile else '',
             'CLI' if opts.cli_environment else 'GUI',
             calibre_langcode_to_name(canonicalize_lang(get_lang()),
                                      localize=False)))

        # If exclude_genre is blank, assume user wants all tags as genres
        if opts.exclude_genre.strip() == '':
            # opts.exclude_genre = '\[^.\]'
            # build_log.append(" converting empty exclude_genre to '\[^.\]'")
            opts.exclude_genre = 'a^'
            build_log.append(" converting empty exclude_genre to 'a^'")
        if opts.connected_device['is_device_connected'] and \
           opts.connected_device['kind'] == 'device':
            if opts.connected_device['serial']:
                build_log.append(" connected_device: '%s' #%s%s " %
                                 (opts.connected_device['name'],
                                  opts.connected_device['serial'][0:4], 'x' *
                                  (len(opts.connected_device['serial']) - 4)))
                for storage in opts.connected_device['storage']:
                    if storage:
                        build_log.append("  mount point: %s" % storage)
            else:
                build_log.append(" connected_device: '%s'" %
                                 opts.connected_device['name'])
                try:
                    for storage in opts.connected_device['storage']:
                        if storage:
                            build_log.append("  mount point: %s" % storage)
                except:
                    build_log.append("  (no mount points)")
        else:
            build_log.append(" connected_device: '%s'" %
                             opts.connected_device['name'])

        opts_dict = vars(opts)
        if opts_dict['ids']:
            build_log.append(" book count: %d" % len(opts_dict['ids']))

        sections_list = []
        if opts.generate_authors:
            sections_list.append('Authors')
        if opts.generate_titles:
            sections_list.append('Titles')
        if opts.generate_series:
            sections_list.append('Series')
        if opts.generate_genres:
            sections_list.append('Genres')
        if opts.generate_recently_added:
            sections_list.append('Recently Added')
        if opts.generate_descriptions:
            sections_list.append('Descriptions')

        if not sections_list:
            if opts.cli_environment:
                opts.log.warn(
                    '*** No Section switches specified, enabling all Sections ***'
                )
                opts.generate_authors = True
                opts.generate_titles = True
                opts.generate_series = True
                opts.generate_genres = True
                opts.generate_recently_added = True
                opts.generate_descriptions = True
                sections_list = [
                    'Authors', 'Titles', 'Series', 'Genres', 'Recently Added',
                    'Descriptions'
                ]
            else:
                opts.log.warn(
                    '\n*** No enabled Sections, terminating catalog generation ***'
                )
                return [
                    "No Included Sections",
                    "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"
                ]
        if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
            warning = _(
                "\n*** Adding 'By authors' section required for MOBI output ***"
            )
            opts.log.warn(warning)
            sections_list.insert(0, 'Authors')
            opts.generate_authors = True

        opts.log(" Sections: %s" % ', '.join(sections_list))
        opts.section_list = sections_list

        # Limit thumb_width to 1.0" - 2.0"
        try:
            if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
                log.warning("coercing thumb_width from '%s' to '%s'" %
                            (opts.thumb_width, self.THUMB_SMALLEST))
                opts.thumb_width = self.THUMB_SMALLEST
            if float(opts.thumb_width) > float(self.THUMB_LARGEST):
                log.warning("coercing thumb_width from '%s' to '%s'" %
                            (opts.thumb_width, self.THUMB_LARGEST))
                opts.thumb_width = self.THUMB_LARGEST
            opts.thumb_width = "%.2f" % float(opts.thumb_width)
        except:
            log.error("coercing thumb_width from '%s' to '%s'" %
                      (opts.thumb_width, self.THUMB_SMALLEST))
            opts.thumb_width = "1.0"

        # eval prefix_rules if passed from command line
        if type(opts.prefix_rules) is not tuple:
            try:
                opts.prefix_rules = eval(opts.prefix_rules)
            except:
                log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
                raise
            for rule in opts.prefix_rules:
                if len(rule) != 4:
                    log.error(
                        "incorrect number of args for --prefix-rules: %s" %
                        repr(rule))

        # eval exclusion_rules if passed from command line
        if type(opts.exclusion_rules) is not tuple:
            try:
                opts.exclusion_rules = eval(opts.exclusion_rules)
            except:
                log.error("malformed --exclusion-rules: %s" %
                          opts.exclusion_rules)
                raise
            for rule in opts.exclusion_rules:
                if len(rule) != 3:
                    log.error(
                        "incorrect number of args for --exclusion-rules: %s" %
                        repr(rule))

        # Display opts
        keys = sorted(opts_dict.keys())
        build_log.append(" opts:")
        for key in keys:
            if key in [
                    'catalog_title', 'author_clip', 'connected_kindle',
                    'creator', 'cross_reference_authors', 'description_clip',
                    'exclude_book_marker', 'exclude_genre', 'exclude_tags',
                    'exclusion_rules', 'fmt', 'genre_source_field',
                    'header_note_source_field', 'merge_comments_rule',
                    'output_profile', 'prefix_rules', 'preset',
                    'read_book_marker', 'search_text', 'sort_by',
                    'sort_descriptions_by_author', 'sync', 'thumb_width',
                    'use_existing_cover', 'wishlist_tag'
            ]:
                build_log.append("  %s: %s" % (key, repr(opts_dict[key])))
        if opts.verbose:
            log('\n'.join(line for line in build_log))

        # Capture start_time
        opts.start_time = time.time()

        self.opts = opts

        if opts.verbose:
            log.info(" Begin catalog source generation (%s)" % str(
                datetime.timedelta(seconds=int(time.time() -
                                               opts.start_time))))

        # Launch the Catalog builder
        catalog = CatalogBuilder(db, opts, self, report_progress=notification)

        try:
            catalog.build_sources()
            if opts.verbose:
                log.info(" Completed catalog source generation (%s)\n" % str(
                    datetime.timedelta(seconds=int(time.time() -
                                                   opts.start_time))))
        except (AuthorSortMismatchException, EmptyCatalogException) as e:
            log.error(" *** Terminated catalog generation: %s ***" % e)
        except:
            log.error(" unhandled exception in catalog generator")
            raise

        else:
            recommendations = []
            recommendations.append(
                ('remove_fake_margins', False, OptionRecommendation.HIGH))
            recommendations.append(('comments', '', OptionRecommendation.HIGH))
            """
            >>> Use to debug generated catalog code before pipeline conversion <<<
            """
            GENERATE_DEBUG_EPUB = False
            if GENERATE_DEBUG_EPUB:
                catalog_debug_path = os.path.join(os.path.expanduser('~'),
                                                  'Desktop', 'Catalog debug')
                setattr(opts, 'debug_pipeline',
                        os.path.expanduser(catalog_debug_path))

            dp = getattr(opts, 'debug_pipeline', None)
            if dp is not None:
                recommendations.append(
                    ('debug_pipeline', dp, OptionRecommendation.HIGH))

            if opts.output_profile and opts.output_profile.startswith(
                    "kindle"):
                recommendations.append(('output_profile', opts.output_profile,
                                        OptionRecommendation.HIGH))
                recommendations.append(('book_producer', opts.output_profile,
                                        OptionRecommendation.HIGH))
                if opts.fmt == 'mobi':
                    recommendations.append(
                        ('no_inline_toc', True, OptionRecommendation.HIGH))
                    recommendations.append(
                        ('verbose', 2, OptionRecommendation.HIGH))

            # Use existing cover or generate new cover
            cpath = None
            existing_cover = False
            try:
                search_text = 'title:"%s" author:%s' % (
                    opts.catalog_title.replace('"', '\\"'), 'calibre')
                matches = db.search(search_text,
                                    return_matches=True,
                                    sort_results=False)
                if matches:
                    cpath = db.cover(matches[0],
                                     index_is_id=True,
                                     as_path=True)
                    if cpath and os.path.exists(cpath):
                        existing_cover = True
            except:
                pass

            if self.opts.use_existing_cover and not existing_cover:
                log.warning("no existing catalog cover found")

            if self.opts.use_existing_cover and existing_cover:
                recommendations.append(
                    ('cover', cpath, OptionRecommendation.HIGH))
                log.info("using existing catalog cover")
            else:
                from calibre.ebooks.covers import calibre_cover2
                log.info("replacing catalog cover")
                new_cover_path = PersistentTemporaryFile(suffix='.jpg')
                new_cover = calibre_cover2(opts.catalog_title, 'calibre')
                new_cover_path.write(new_cover)
                new_cover_path.close()
                recommendations.append(
                    ('cover', new_cover_path.name, OptionRecommendation.HIGH))

            # Run ebook-convert
            from calibre.ebooks.conversion.plumber import Plumber
            plumber = Plumber(os.path.join(catalog.catalog_path,
                                           opts.basename + '.opf'),
                              path_to_output,
                              log,
                              report_progress=notification,
                              abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path,
                              os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" % str(
                    datetime.timedelta(seconds=int(time.time() -
                                                   opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error
Example #12
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(unicode(os.getcwd()))
        except Exception:
            log.exception(
                "KEPUB appears to be invalid ZIP file, trying a "
                "more forgiving ZIP parser"
            )
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (
                    f.lower().endswith(".opf")
                    and "__MACOSX" not in f
                    and not os.path.basename(f).startswith(".")
                ):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)"
                ).format(path)
            )

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, unicode(os.getcwd()))
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (
            self.rationalize_cover3
            if opf.package_version >= 3.0
            else self.rationalize_cover2
        )
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported")  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                "application/vnd.adobe-page-template+xml",
                "application/vnd.adobe.page-template+xml",
                "application/adobe-page-template+xml",
                "application/adobe.page-template+xml",
                "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")
Example #13
0
            from calibre.ebooks.conversion.plumber import Plumber
            plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
                            path_to_output, log, report_progress=notification,
                            abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" %
                     str(datetime.timedelta(seconds=int(time.time() - opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error
def _generate_thumbnail(self, book, cover_path):
    '''
    Fetch the cover image, generate a thumbnail, cache
    Specific implementation for iBooks
    '''
    self._log_location(book.title)
    self._log_diagnostic(" book_path: %s" % book.path)
    self._log_diagnostic("cover_path: %s" % repr(cover_path))

    thumb_data = None
    thumb_path = book.path.rpartition('.')[0] + '.jpg'

    # Try getting the cover from the cache
    try:
        zfr = ZipFile(self.archive_path)
        thumb_data = zfr.read(thumb_path)
        if thumb_data == 'None':
            self._log_diagnostic("returning None from cover cache")
            zfr.close()
            return None
    except:
        self._log_diagnostic("opening cover cache for appending")
        zfw = ZipFile(self.archive_path, mode='a')
    else:
        self._log_diagnostic("returning thumb from cover cache")
        return thumb_data

    '''
    # Is book.path a directory (iBooks) or an epub?
    stats = self.ios.stat(book.path)
    if stats['st_ifmt'] == 'S_IFDIR':
        # ***  This needs to fetch the cover data from the directory  ***
        self._log_diagnostic("returning None, can't read iBooks covers yet")
        return thumb_data

    # Get the cover from the book
    try:
        stream = cStringIO.StringIO(self.ios.read(book.path, mode='rb'))
        mi = get_metadata(stream)
        if mi.cover_data is not None:
            img_data = cStringIO.StringIO(mi.cover_data[1])
    except:
        if self.verbose:
            self._log_diagnostic("ERROR: unable to get cover from '%s'" % book.title)
            import traceback
            #traceback.print_exc()
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        return thumb_data
    '''

    try:
        img_data = cStringIO.StringIO(self.ios.read(cover_path, mode='rb'))
    except:
        if self.verbose:
            self._log_diagnostic("ERROR fetching cover data for '%s', caching empty marker" % book.title)
            import traceback
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        # Cache the empty cover
        zfw.writestr(thumb_path, 'None')
        return thumb_data


    # Generate a thumb
    try:
        im = PILImage.open(img_data)
        scaled, width, height = fit_image(im.size[0], im.size[1], 60, 80)
        im = im.resize((int(width), int(height)), PILImage.ANTIALIAS)

        thumb = cStringIO.StringIO()
        im.convert('RGB').save(thumb, 'JPEG')
        thumb_data = thumb.getvalue()
        thumb.close()
        self._log_diagnostic("SUCCESS: generated thumb for '%s', caching" % book.title)
        # Cache the tagged thumb
        zfw.writestr(thumb_path, thumb_data)
    except:
        if self.verbose:
            self._log_diagnostic("ERROR generating thumb for '%s', caching empty marker" % book.title)
            import traceback
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        # Cache the empty cover
        zfw.writestr(thumb_path, 'None')
    finally:
        #img_data.close()
        zfw.close()

    return thumb_data
Example #15
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.utils.zipfile import ZipFile

        self.log = log
        html = u''
        top_levels = []

        # Extract content from zip archive.
        zf = ZipFile(stream)
        zf.extractall()

        # Find the HTML file in the archive. It needs to be
        # top level.
        index = u''
        multiple_html = False
        # Get a list of all top level files in the archive.
        for x in os.listdir(u'.'):
            if os.path.isfile(x):
                top_levels.append(x)
        # Try to find an index. file.
        for x in top_levels:
            if x.lower() in (u'index.html', u'index.xhtml', u'index.htm'):
                index = x
                break
        # Look for multiple HTML files in the archive. We look at the
        # top level files only as only they matter in HTMLZ.
        for x in top_levels:
            if os.path.splitext(x)[1].lower() in (u'.html', u'.xhtml', u'.htm'):
                # Set index to the first HTML file found if it's not
                # called index.
                if not index:
                    index = x
                else:
                    multiple_html = True
        # Warn the user if there multiple HTML file in the archive. HTMLZ
        # supports a single HTML file. A conversion with a multiple HTML file
        # HTMLZ archive probably won't turn out as the user expects. With
        # Multiple HTML files ZIP input should be used in place of HTMLZ.
        if multiple_html:
            log.warn(_('Multiple HTML files found in the archive. Only %s will be used.') % index)

        if index:
            with open(index, 'rb') as tf:
                html = tf.read()
        else:
            raise Exception(_('No top level HTML file found.'))

        if not html:
            raise Exception(_('Top level HTML file %s is empty') % index)

        # Encoding
        if options.input_encoding:
            ienc = options.input_encoding
        else:
            ienc = xml_to_unicode(html[:4096])[-1]
        html = html.decode(ienc, 'replace')

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = getcwd()
        htmlfile = os.path.join(base, u'index.html')
        c = 0
        while os.path.exists(htmlfile):
            c += 1
            htmlfile = u'index%d.html'%c
        with open(htmlfile, 'wb') as f:
            f.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        with open(htmlfile, 'rb') as f:
            oeb = html_input.convert(f, options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile)

        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)

        # Get the cover path from the OPF.
        cover_path = None
        opf = None
        for x in top_levels:
            if os.path.splitext(x)[1].lower() == u'.opf':
                opf = x
                break
        if opf:
            opf = OPF(opf, basedir=getcwd())
            cover_path = opf.raster_cover or opf.cover
        # Set the cover.
        if cover_path:
            cdata = None
            with open(os.path.join(getcwd(), cover_path), 'rb') as cf:
                cdata = cf.read()
            cover_name = os.path.basename(cover_path)
            id, href = oeb.manifest.generate('cover', cover_name)
            oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
            oeb.guide.add('cover', 'Cover', href)

        return oeb
Example #16
0
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        if self.opts.epub_inline_toc:
            from calibre.ebooks.mobi.writer8.toc import TOCAdder
            opts.mobi_toc_at_start = not opts.epub_toc_at_end
            opts.mobi_passthrough = False
            opts.no_inline_toc = False
            TOCAdder(oeb,
                     opts,
                     replace_previous_inline_toc=True,
                     ignore_existing_toc=True)

        if self.opts.epub_flatten:
            from calibre.ebooks.oeb.transforms.filenames import FlatFilenames
            FlatFilenames()(oeb, opts)
        else:
            from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
            UniqueFilenames()(oeb, opts)

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.upshift_markup()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages(check_colorspaces=True)(oeb, opts)

        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                      max_flow_size=self.opts.flow_size * 1024)
        split(self.oeb, self.opts)

        from calibre.ebooks.oeb.transforms.cover import CoverManager
        cm = CoverManager(
            no_default_cover=self.opts.no_default_epub_cover,
            no_svg_cover=self.opts.no_svg_cover,
            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
        cm(self.oeb, self.opts, self.log)

        self.workaround_sony_quirks()

        if self.oeb.toc.count() == 0:
            self.log.warn('This EPUB file has no Table of Contents. '
                          'Creating a default TOC')
            first = iter(self.oeb.spine).next()
            self.oeb.toc.add(_('Start'), first.href)

        from calibre.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
        uuid = None
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(
                    x).startswith('urn:uuid:'):
                uuid = unicode(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

        if uuid is None:
            self.log.warn('No UUID identifier found')
            from uuid import uuid4
            uuid = str(uuid4())
            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)

        if encrypted_fonts and not uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
                if unicode(x) == uuid:
                    x.content = 'urn:uuid:' + uuid

        with TemporaryDirectory(u'_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                if self.opts.output_profile.epub_periodical_format == 'sony':
                    from calibre.ebooks.epub.periodical import sony_metadata
                    metadata_xml, atom_xml = sony_metadata(oeb)
                    extra_entries = [(u'atom.xml', 'application/atom+xml',
                                      atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([
                os.path.join(tdir, x) for x in os.listdir(tdir)
                if x.endswith('.ncx')
            ][0])
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
            with initialize_container(output_path,
                                      os.path.basename(opf),
                                      extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml', encryption)
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                                  metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    if os.path.isdir(opts.extract_to):
                        shutil.rmtree(opts.extract_to)
                    else:
                        os.remove(opts.extract_to)
                os.mkdir(opts.extract_to)
                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
Example #17
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'md':
        with open(path, 'w', encoding='utf-8') as f:
            if not mi.is_null('title'):
                print('#', mi.title, file=f)
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Example #18
0
def create_themeball(report, progress=None, abort=None):
    pool = ThreadPool(processes=cpu_count())
    buf = BytesIO()
    num = count()
    error_occurred = Event()

    def optimize(name):
        if abort is not None and abort.is_set():
            return
        if error_occurred.is_set():
            return
        try:
            i = next(num)
            if progress is not None:
                progress(i, _('Optimizing %s') % name)
            srcpath = os.path.join(report.path, name)
            ext = srcpath.rpartition('.')[-1].lower()
            if ext == 'png':
                optimize_png(srcpath)
            elif ext in ('jpg', 'jpeg'):
                optimize_jpeg(srcpath)
        except Exception:
            return sys.exc_info()

    errors = tuple(
        filter(None, pool.map(optimize, tuple(report.name_map.iterkeys()))))
    pool.close(), pool.join()
    if abort is not None and abort.is_set():
        return
    if errors:
        e = errors[0]
        reraise(*e)

    if progress is not None:
        progress(next(num), _('Creating theme file'))
    with ZipFile(buf, 'w') as zf:
        for name in report.name_map:
            srcpath = os.path.join(report.path, name)
            with lopen(srcpath, 'rb') as f:
                zf.writestr(name, f.read(), compression=ZIP_STORED)
    buf.seek(0)
    out = BytesIO()
    if abort is not None and abort.is_set():
        return None, None
    if progress is not None:
        progress(next(num), _('Compressing theme file'))
    compress(buf, out, level=9)
    buf = BytesIO()
    prefix = report.name
    if abort is not None and abort.is_set():
        return None, None
    with ZipFile(buf, 'w') as zf:
        with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f:
            zf.writestr(prefix + '/' + THEME_METADATA, f.read())
        zf.writestr(prefix + '/' + THEME_COVER, create_cover(report))
        zf.writestr(prefix + '/' + 'icons.zip.xz',
                    out.getvalue(),
                    compression=ZIP_STORED)
    if progress is not None:
        progress(next(num), _('Finished'))
    return buf.getvalue(), prefix
Example #19
0
def do_dump(path, dest):
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)
    pretty_all_xml_in_dir(dest)
    def _get_epub_toc(self, cid=None, path=None, prepend_title=None):
        '''
        Given a calibre id, return the epub TOC indexed by section
        If cid, use copy in library, else use path to copy on device
        '''
        toc = None
#         if cid is not None:
#             mi = self.opts.gui.current_db.get_metadata(cid, index_is_id=True)
#             toc = None
#             if 'EPUB' in mi.formats:
#                 fpath = self.opts.gui.current_db.format(cid, 'EPUB', index_is_id=True, as_path=True)
#             else:
#                 return toc
#         elif path is not None:
#             fpath = path
#         else:
#             return toc
        fpath = path

        # iBooks stores books unzipped
        # Marvin stores books zipped
        # Need spine, ncx_tree to construct toc

        if self.ios.stat(fpath) and self.ios.stat(fpath)['st_ifmt'] == 'S_IFDIR':
            # Find the OPF in the unzipped ePub
            fp = '/'.join([fpath, 'META-INF', 'container.xml'])
            cf = cStringIO.StringIO(self.ios.read(fp))
            container = etree.parse(cf)
            opf_file = container.xpath('.//*[local-name()="rootfile"]')[0].get('full-path')
            oebps = opf_file.rpartition('/')[0]

            fp = '/'.join([fpath, opf_file])
            opf = cStringIO.StringIO(self.ios.read(fp))
            opf_tree = etree.parse(opf)
            spine = opf_tree.xpath('.//*[local-name()="spine"]')[0]
            ncx_fs = spine.get('toc')
            manifest = opf_tree.xpath('.//*[local-name()="manifest"]')[0]
            ncx_file = manifest.find('.//*[@id="%s"]' % ncx_fs).get('href')

            fp = '/'.join([fpath, oebps, ncx_file])
            ncxf = cStringIO.StringIO(self.ios.read(fp))
            ncx_tree = etree.parse(ncxf)
            #self._log(etree.tostring(ncx_tree, pretty_print=True))

        else:
            # Find the OPF file in the zipped ePub
            zfo = cStringIO.StringIO(self.ios.read(fpath, mode='rb'))
            try:
                zf = ZipFile(zfo, 'r')
                container = etree.fromstring(zf.read('META-INF/container.xml'))
                opf_tree = etree.fromstring(zf.read(container.xpath('.//*[local-name()="rootfile"]')[0].get('full-path')))

                spine = opf_tree.xpath('.//*[local-name()="spine"]')[0]
                ncx_fs = spine.get('toc')
                manifest = opf_tree.xpath('.//*[local-name()="manifest"]')[0]
                ncx = manifest.find('.//*[@id="%s"]' % ncx_fs).get('href')

                # Find the ncx file
                fnames = zf.namelist()
                _ncx = [x for x in fnames if ncx in x][0]
                ncx_tree = etree.fromstring(zf.read(_ncx))
            except:
                import traceback
                self._log_location()
                self._log(" unable to unzip '%s'" % fpath)
                self._log(traceback.format_exc())
                return toc

        # fpath points to epub (zipped or unzipped dir)
        # spine, ncx_tree populated
        try:
            toc = OrderedDict()
            # 1. capture idrefs from spine
            for i, el in enumerate(spine):
                toc[str(i)] = el.get('idref')

            # 2. Resolve <spine> idrefs to <manifest> hrefs
            for el in toc:
                toc[el] = manifest.find('.//*[@id="%s"]' % toc[el]).get('href')

            # 3. Build a dict of src:toc_entry
            src_map = OrderedDict()
            navMap = ncx_tree.xpath('.//*[local-name()="navMap"]')[0]
            for navPoint in navMap:
                # Get the first-level entry
                src = re.sub(r'#.*$', '', navPoint.xpath('.//*[local-name()="content"]')[0].get('src'))
                toc_entry = navPoint.xpath('.//*[local-name()="text"]')[0].text
                src_map[src] = toc_entry

                # Get any nested navPoints
                nested_navPts = navPoint.xpath('.//*[local-name()="navPoint"]')
                for nnp in nested_navPts:
                    src = re.sub(r'#.*$', '', nnp.xpath('.//*[local-name()="content"]')[0].get('src'))
                    toc_entry = nnp.xpath('.//*[local-name()="text"]')[0].text
                    src_map[src] = toc_entry

            # Resolve src paths to toc_entry
            for section in toc:
                if toc[section] in src_map:
                    if prepend_title:
                        toc[section] = "%s &middot; %s" % (prepend_title,  src_map[toc[section]])
                    else:
                        toc[section] = src_map[toc[section]]
                else:
                    toc[section] = None

            # 5. Fill in the gaps
            current_toc_entry = None
            for section in toc:
                if toc[section] is None:
                    toc[section] = current_toc_entry
                else:
                    current_toc_entry = toc[section]
        except:
            import traceback
            self._log_location()
            self._log("{:~^80}".format(" error parsing '%s' " % fpath))
            self._log(traceback.format_exc())
            self._log("{:~^80}".format(" end traceback "))

        return toc