Python ZipFile.ZipFile Examples

Programming Language: Python

Namespace/Package Name: calibre.utils.zipfile

Class/Type: ZipFile

Method/Function: ZipFile

Examples at hotexamples.com: 20

Python ZipFile.ZipFile - 20 examples found. These are the top rated real world Python examples of calibre.utils.zipfile.ZipFile.ZipFile extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(22)

ZipFile(20)

namelist(14)

read(10)

writestr(10)

extractall(6)

add_dir(5)

open(5)

comment(2)

extract(2)

write(2)

_didModify(1)

getinfo(1)

Example #1

Show file

File: tweak.py Project: tokot/calibre

def tweak(ebook_file):
    ''' Command line interface to the Tweak Book tool '''
    fmt = ebook_file.rpartition('.')[-1].lower()
    exploder, rebuilder = get_tools(fmt)
    if exploder is None:
        prints('Cannot tweak %s files. Supported formats are: EPUB, HTMLZ, AZW3, MOBI'
                , file=sys.stderr)
        raise SystemExit(1)

    with TemporaryDirectory('_tweak_'+
            os.path.basename(ebook_file).rpartition('.')[0]) as tdir:
        try:
            opf = exploder(ebook_file, tdir, question=ask_cli_question)
        except WorkerError as e:
            prints('Failed to unpack', ebook_file)
            prints(e.orig_tb)
            raise SystemExit(1)
        except Error as e:
            prints(as_unicode(e), file=sys.stderr)
            raise SystemExit(1)

        if opf is None:
            # The question was answered with No
            return

        ed = os.environ.get('EDITOR', 'dummy')
        cmd = shlex.split(ed)
        isvim = bool([x for x in cmd[0].split('/') if x.endswith('vim')])

        proceed = False
        prints('Book extracted to', tdir)

        if not isvim:
            prints('Make your tweaks and once you are done,', __appname__,
                    'will rebuild', ebook_file, 'from', tdir)
            print()
            proceed = ask_cli_question('Rebuild ' + ebook_file + '?')
        else:
            base = os.path.basename(ebook_file)
            with TemporaryFile(base+'.zip') as zipf:
                with ZipFile(zipf, 'w') as zf:
                    zf.add_dir(tdir)
                try:
                    subprocess.check_call(cmd + [zipf])
                except:
                    prints(ed, 'failed, aborting...')
                    raise SystemExit(1)
                with ZipFile(zipf, 'r') as zf:
                    shutil.rmtree(tdir)
                    os.mkdir(tdir)
                    zf.extractall(path=tdir)
            proceed = True

        if proceed:
            prints('Rebuilding', ebook_file, 'please wait ...')
            try:
                rebuilder(tdir, ebook_file)
            except WorkerError as e:
                prints('Failed to rebuild', ebook_file)
                prints(e.orig_tb)
                raise SystemExit(1)
            prints(ebook_file, 'successfully tweaked')

Example #2

Show file

    def convert(self, recipe_or_file, opts, file_ext, log, accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            with lopen('download.recipe', 'rb') as f:
                self.recipe_source = f.read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.environ.get('CALIBRE_RECIPE_URN'):
                from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id
                urn = os.environ['CALIBRE_RECIPE_URN']
                log('Downloading recipe urn: ' + urn)
                rtype, recipe_id = urn.partition(':')[::2]
                if not recipe_id:
                    raise ValueError('Invalid recipe urn: ' + urn)
                if rtype == 'custom':
                    self.recipe_source = get_custom_recipe(recipe_id)
                else:
                    self.recipe_source = get_builtin_recipe_by_id(
                        urn, log=log, download_recipe=True)
                if not self.recipe_source:
                    raise ValueError('Could not find recipe with urn: ' + urn)
                if not isinstance(self.recipe_source, bytes):
                    self.recipe_source = self.recipe_source.encode('utf-8')
                recipe = compile_recipe(self.recipe_source)
            elif os.access(recipe_or_file, os.R_OK):
                with lopen(recipe_or_file, 'rb') as f:
                    self.recipe_source = f.read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import (
                    get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg',
                                recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                titles = frozenset(get_builtin_recipe_titles())
                if title not in titles:
                    title = getattr(opts, 'original_recipe_input_arg',
                                    recipe_or_file)
                    title = title.rpartition('.')[0]

                raw = get_builtin_recipe_by_title(
                    title,
                    log=log,
                    download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                            'Downloaded recipe needs calibre version at least: %s'
                            % ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception(
                        'Failed to compile downloaded recipe. Falling '
                        'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title,
                                                      log=log,
                                                      download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: ' +
                                         title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError(
                    '%r is not a valid recipe file or builtin recipe' %
                    recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

Example #3

Show file

File: epub_input.py Project: prajoria/calibre

    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(getcwd())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)

        opf = os.path.relpath(opf, getcwd())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'

            def normpath(x):
                return posixpath.normpath(delta + elem.get('href'))

            for elem in opf.itermanifest():
                elem.set('href', normpath(elem.get('href')))
            for elem in opf.iterguide():
                elem.set('href', normpath(elem.get('href')))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)
        if self.removed_cover:
            self.removed_items_to_ignore = (self.removed_cover,)
        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log, options)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_:
                mt = y.get('media-type', None)
                if mt in {
                        'application/vnd.adobe-page-template+xml',
                        'application/vnd.adobe.page-template+xml',
                        'application/adobe-page-template+xml',
                        'application/adobe.page-template+xml',
                        'application/text'
                }:
                    not_for_spine.add(id_)
                ext = y.get('href', '').rpartition('.')[-1].lower()
                if mt == 'text/plain' and ext in {'otf', 'ttf'}:
                    # some epub authoring software sets font mime types to
                    # text/plain
                    not_for_spine.add(id_)
                    y.set('media-type', 'application/font')

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')

Example #4

Show file

    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
        from calibre.ebooks.chardet import detect
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.txt.processor import (
            convert_basic, convert_markdown_with_metadata,
            separate_paragraphs_single_line,
            separate_paragraphs_print_formatted, preserve_spaces,
            detect_paragraph_type, detect_formatting_type,
            normalize_line_endings, convert_textile, remove_indents,
            block_to_single_line, separate_hard_scene_breaks)

        self.log = log
        txt = ''
        log.debug('Reading text from file...')
        length = 0
        base_dir = os.getcwdu()

        # Extract content from zip archive.
        if file_ext == 'txtz':
            zf = ZipFile(stream)
            zf.extractall('.')

            for x in walk('.'):
                if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
                    with open(x, 'rb') as tf:
                        txt += tf.read() + '\n\n'
        else:
            if getattr(stream, 'name', None):
                base_dir = os.path.dirname(stream.name)
            txt = stream.read()
            if file_ext in {'md', 'textile', 'markdown'}:
                options.formatting_type = {
                    'md': 'markdown'
                }.get(file_ext, file_ext)
                log.info('File extension indicates particular formatting. '
                         'Forcing formatting type to: %s' %
                         options.formatting_type)
                options.paragraph_type = 'off'

        # Get the encoding of the document.
        if options.input_encoding:
            ienc = options.input_encoding
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt[:4096])
            det_encoding, confidence = det_encoding['encoding'], det_encoding[
                'confidence']
            if det_encoding and det_encoding.lower().replace(
                    '_',
                    '-').strip() in ('gb2312', 'chinese', 'csiso58gb231280',
                                     'euc-cn', 'euccn', 'eucgb2312-cn',
                                     'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
            ienc = det_encoding
            log.debug(
                'Detected input encoding as %s with a confidence of %s%%' %
                (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug(
                'No input encoding specified and could not auto detect using %s'
                % ienc)
        # Remove BOM from start of txt as its presence can confuse markdown
        import codecs
        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8,
                    codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
            if txt.startswith(bom):
                txt = txt[len(bom):]
                break
        txt = txt.decode(ienc, 'replace')

        # Replace entities
        txt = _ent_pat.sub(xml_entity_to_unicode, txt)

        # Normalize line endings
        txt = normalize_line_endings(txt)

        # Determine the paragraph type of the document.
        if options.paragraph_type == 'auto':
            options.paragraph_type = detect_paragraph_type(txt)
            if options.paragraph_type == 'unknown':
                log.debug(
                    'Could not reliably determine paragraph type using block')
                options.paragraph_type = 'block'
            else:
                log.debug('Auto detected paragraph type as %s' %
                          options.paragraph_type)

        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' %
                      options.formatting_type)

        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'unwrap_lines', False)
            setattr(options, 'smarten_punctuation', True)

        # Reformat paragraphs to block formatting based on the detected type.
        # We don't check for block because the processor assumes block.
        # single and print at transformed to block for processing.
        if options.paragraph_type == 'single':
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'print':
            txt = separate_hard_scene_breaks(txt)
            txt = separate_paragraphs_print_formatted(txt)
            txt = block_to_single_line(txt)
        elif options.paragraph_type == 'unformatted':
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            # unwrap lines based on punctuation
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            preprocessor = HeuristicProcessor(options,
                                              log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)

        if getattr(options, 'enable_heuristics', False) and getattr(
                options, 'dehyphenate', False):
            docanalysis = DocAnalysis('txt', txt)
            if not length:
                length = docanalysis.line_length(.5)
            dehyphenator = Dehyphenator(options.verbose, log=self.log)
            txt = dehyphenator(txt, 'txt', length)

        # User requested transformation on the text.
        if options.txt_in_remove_indents:
            txt = remove_indents(txt)

        # Preserve spaces will replace multiple spaces to a space
        # followed by the &nbsp; entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)

        # Process the text using the appropriate text processor.
        self.shifted_files = []
        try:
            html = ''
            input_mi = None
            if options.formatting_type == 'markdown':
                log.debug('Running text through markdown conversion...')
                try:
                    input_mi, html = convert_markdown_with_metadata(
                        txt,
                        extensions=[
                            x.strip()
                            for x in options.markdown_extensions.split(',')
                            if x.strip()
                        ])
                except RuntimeError:
                    raise ValueError(
                        'This txt file has malformed markup, it cannot be'
                        ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax'
                    )
                html = self.fix_resources(html, base_dir)
            elif options.formatting_type == 'textile':
                log.debug('Running text through textile conversion...')
                html = convert_textile(txt)
                html = self.fix_resources(html, base_dir)
            else:
                log.debug('Running text through basic conversion...')
                flow_size = getattr(options, 'flow_size', 0)
                html = convert_basic(txt, epub_split_size_kb=flow_size)

            # Run the HTMLized text through the html processing plugin.
            from calibre.customize.ui import plugin_for_input_format
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
            options.input_encoding = 'utf-8'
            htmlfile = self.shift_file(base_dir, 'index.html',
                                       html.encode('utf-8'))
            odi = options.debug_pipeline
            options.debug_pipeline = None
            # Generate oeb from html conversion.
            oeb = html_input.convert(open(htmlfile, 'rb'), options, 'html',
                                     log, {})
            options.debug_pipeline = odi
        finally:
            for x in self.shifted_files:
                os.remove(x)

        # Set metadata from file.
        if input_mi is None:
            from calibre.customize.ui import get_file_type_metadata
            input_mi = get_file_type_metadata(stream, file_ext)
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
        self.html_postprocess_title = input_mi.title

        return oeb

Example #5

Show file

    def compile_website_translations(self):
        from calibre.utils.zipfile import ZipFile, ZipInfo, ZIP_STORED
        from calibre.ptempfile import TemporaryDirectory
        from calibre.utils.localization import get_iso639_translator, get_language, get_iso_language
        self.info('Compiling website translations...')
        srcbase = self.j(self.d(self.SRC), 'translations', 'website')
        fmap = {}
        files = []
        stats = {}
        done = []

        def handle_stats(src, nums):
            locale = fmap[src]
            trans = nums[0]
            total = trans if len(nums) == 1 else (trans + nums[1])
            stats[locale] = int(round(100 * trans / total))

        with TemporaryDirectory() as tdir, ZipFile(
                self.j(srcbase, 'locales.zip'), 'w', ZIP_STORED) as zf:
            for f in os.listdir(srcbase):
                if f.endswith('.po'):
                    l = f.partition('.')[0]
                    pf = l.split('_')[0]
                    if pf in {'en'}:
                        continue
                    d = os.path.join(tdir, l + '.mo')
                    f = os.path.join(srcbase, f)
                    fmap[f] = l
                    files.append((f, d))
            self.compile_group(files, handle_stats=handle_stats)

            for locale, translated in iteritems(stats):
                if translated >= 20:
                    with open(os.path.join(tdir, locale + '.mo'), 'rb') as f:
                        raw = f.read()
                    zi = ZipInfo(os.path.basename(f.name))
                    zi.compress_type = ZIP_STORED
                    zf.writestr(zi, raw)
                    done.append(locale)
            dl = done + ['en']

            lang_names = {}
            for l in dl:
                if l == 'en':
                    t = get_language
                else:
                    t = getattr(get_iso639_translator(l),
                                'gettext' if ispy3 else 'ugettext')
                    t = partial(get_iso_language, t)
                lang_names[l] = {x: t(x) for x in dl}
            zi = ZipInfo('lang-names.json')
            zi.compress_type = ZIP_STORED
            zf.writestr(
                zi,
                json.dumps(lang_names, ensure_ascii=False).encode('utf-8'))
        dest = self.j(self.d(self.stats), 'website-languages.txt')
        data = ' '.join(sorted(done))
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with open(dest, 'wb') as f:
            f.write(data)

Example #6

Show file

File: htmlz_output.py Project: smdx023/calibre

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory('_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = 'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(
                    100,
                    (ascii_filename(str(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + '.html'), 'wb') as tf:
                if isinstance(html, str):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book).encode('utf-8'))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, 'images')):
                    os.makedirs(os.path.join(tdir, 'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(
                                item.data, encoding='unicode').encode('utf-8')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, 'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = 'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)

Example #7

Show file

File: iBooks_direct_overlay.py Project: kmshi/calibre-ios-reader-applications

def books(self, oncard=None, end_session=True):
    '''
    Return a list of ebooks on the device.
    @param oncard:  If 'carda' or 'cardb' return a list of ebooks on the
                    specific storage card, otherwise return list of ebooks
                    in main memory of device. If a card is specified and no
                    books are on the card return empty list.
    @return: A BookList.

    '''
    if oncard:
        return BookList()

    self._log_location()
    booklist = BookList()
    cached_books = {}

    # Fetch current assets from Media folder
    assets_profile = self._localize_database_path(self.assets_subpath)

    #Fetch current metadata from iBooks's DB
    db_profile = self._localize_database_path(self.books_subpath)
    con = sqlite3.connect(db_profile['path'])

    # Mount the Media folder
    self.ios.mount_ios_media_folder()

    # Get Books.plist so we can find the covers
    books_plist = {}

    if True:
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Sync/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                #print(book)
                #print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        # Process any outliers
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                #print(book)
                #print
                continue

            # Don't overwrite existing cover_paths
            if not '/'.join(['/Books', book['Path']]) in books_plist:
                if 'Cover Path' in book and not ['/'.join(['/Books', book['Path']])] in book_plist:
                        books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
                else:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            # Don't overwrite existing cover_paths
            if not '/'.join(['/Books', book['Path']]) in books_plist:
                if 'Cover Path' in book:
                        books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']]))
                else:
                    books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

    else:
        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

        raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books']
        for book in raw_plist:
            if not 'Path' in book:
                print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist']))
                print(book)
                print
                continue

            if 'Cover Path' in book:
                    books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']]))
            else:
                books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']]))

    print(books_plist)

    with con:
        con.row_factory = sqlite3.Row
        # Build a collection map
        collections_map = {}

        # Get the books
        cur = con.cursor()
        #cur.execute("ATTACH DATABASE '{0}' as 'ASSETS'".format(assets_profile['path'])

        cur.execute('''SELECT ZASSETURL,
                              ZBOOKAUTHOR,
                              ZSORTAUTHOR,
                              ZBOOKTITLE,
                              ZSORTTITLE,
                              ZDATABASEKEY,
                              ZDATEADDED
                       FROM ZBKBOOKINFO
                       WHERE ZASSETURL LIKE 'file://localhost%' AND
                             ZASSETURL LIKE '%.epub/'
                    ''')
        rows = cur.fetchall()
        book_count = len(rows)
        for i, row in enumerate(rows):
            book_id = row[b'ZDATABASEKEY']

            # Get the collection assignments
            collections = []

            # Get the primary metadata
            this_book = Book(row[b'ZBOOKTITLE'], row[b'ZBOOKAUTHOR'])
            original_path = row[b'ZASSETURL']
            path = original_path[original_path.find('Media/') + len('Media'):-1]
            this_book.path = path.replace('%20', ' ')
            timestamp = int(row[b'ZDATEADDED']) + NSTimeIntervalSince1970
            this_book.datetime = datetime.fromtimestamp(timestamp).timetuple()
            this_book.device_collections = collections
            this_book.uuid = None
            this_book.thumbnail = self._generate_thumbnail(this_book, books_plist[this_book.path])

            # Retrieve folder size from cache or compute and cache
            try:
                zfr = ZipFile(self.folder_archive_path)
                file_size = zfr.read(this_book.path)
                this_book.size = int(file_size)
                self._log_diagnostic("returning folder size from cache")
            except:
                self._log_diagnostic("opening folder cache for appending")
                zfw = ZipFile(self.folder_archive_path, mode='a')
                stats = self.ios.stat(this_book.path)
                this_book.size = self.ios.get_folder_size(this_book.path)
                zfw.writestr(this_book.path, str(this_book.size))
                zfw.close()
            finally:
                zfr.close()

            booklist.add_book(this_book, False)

            if self.report_progress is not None:
                self.report_progress(float((i + 1)*100 / book_count)/100,
                    '%(num)d of %(tot)d' % dict(num=i + 1, tot=book_count))

            cached_books[this_book.path] = {
                'title': this_book.title,
                'author': this_book.author,
                'authors': this_book.author.split(' & '),
                'uuid': this_book.uuid
                }
        cur.close()

    # Close the connection
    self.ios.dismount_ios_media_folder()

    if self.report_progress is not None:
        self.report_progress(1.0, _('finished'))

    self.cached_books = cached_books
    return booklist

Example #8

Show file

 def init_zipfile(self, stream):
     self.zipf = ZipFile(stream)
     self.names = frozenset(self.zipf.namelist())

Example #9

Show file

def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        def find_all(field):
            ns, tag = fields[field]
            for x in root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns}):
                yield normalize(
                    tostring(x,
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        keywords = []
        for q in ('keyword', 'keywords'):
            for kw in find_all(q):
                keywords += [x.strip() for x in kw.split(',') if x.strip()]
        mi.tags = uniq(keywords)
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi

Example #10

Show file

def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)

Example #11

Show file

    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
        from calibre.utils.logging import default_log as log
        from calibre.utils.config import JSONConfig

        # If preset specified from the cli, insert stored options from JSON file
        if hasattr(opts, 'preset') and opts.preset:
            available_presets = JSONConfig("catalog_presets")
            if opts.preset not in available_presets:
                if available_presets:
                    print(_('Error: Preset "%s" not found.' % opts.preset))
                    print(
                        _('Stored presets: %s' % ', '.join(
                            [p for p in sorted(available_presets.keys())])))
                else:
                    print(_('Error: No stored presets.'))
                return 1

            # Copy the relevant preset values to the opts object
            for item in available_presets[opts.preset]:
                if item not in [
                        'exclusion_rules_tw', 'format', 'prefix_rules_tw'
                ]:
                    setattr(opts, item, available_presets[opts.preset][item])

            # Provide an unconnected device
            opts.connected_device = {
                'is_device_connected': False,
                'kind': None,
                'name': None,
                'save_template': None,
                'serial': None,
                'storage': None,
            }

            # Convert prefix_rules and exclusion_rules from JSON lists to tuples
            prs = []
            for rule in opts.prefix_rules:
                prs.append(tuple(rule))
            opts.prefix_rules = tuple(prs)

            ers = []
            for rule in opts.exclusion_rules:
                ers.append(tuple(rule))
            opts.exclusion_rules = tuple(ers)

        opts.log = log
        opts.fmt = self.fmt = path_to_output.rpartition('.')[2]

        # Add local options
        opts.creator = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'),
                                          strftime('%d').lstrip('0'),
                                          strftime('%Y'))
        opts.creator_sort_as = '%s %s' % ('calibre', strftime('%Y-%m-%d'))
        opts.connected_kindle = False

        # Finalize output_profile
        op = opts.output_profile
        if op is None:
            op = 'default'

        if opts.connected_device['name'] and 'kindle' in opts.connected_device[
                'name'].lower():
            opts.connected_kindle = True
            if opts.connected_device['serial'] and \
               opts.connected_device['serial'][:4] in ['B004', 'B005']:
                op = "kindle_dx"
            else:
                op = "kindle"

        opts.description_clip = 380 if op.endswith(
            'dx') or 'kindle' not in op else 100
        opts.author_clip = 100 if op.endswith(
            'dx') or 'kindle' not in op else 60
        opts.output_profile = op

        opts.basename = "Catalog"
        opts.cli_environment = not hasattr(opts, 'sync')

        # Hard-wired to always sort descriptions by author, with series after non-series
        opts.sort_descriptions_by_author = True

        build_log = []

        build_log.append(
            "%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
            (self.name, current_library_name(), self.fmt,
             'for %s ' % opts.output_profile if opts.output_profile else '',
             'CLI' if opts.cli_environment else 'GUI',
             calibre_langcode_to_name(canonicalize_lang(get_lang()),
                                      localize=False)))

        # If exclude_genre is blank, assume user wants all tags as genres
        if opts.exclude_genre.strip() == '':
            # opts.exclude_genre = '\[^.\]'
            # build_log.append(" converting empty exclude_genre to '\[^.\]'")
            opts.exclude_genre = 'a^'
            build_log.append(" converting empty exclude_genre to 'a^'")
        if opts.connected_device['is_device_connected'] and \
           opts.connected_device['kind'] == 'device':
            if opts.connected_device['serial']:
                build_log.append(" connected_device: '%s' #%s%s " %
                                 (opts.connected_device['name'],
                                  opts.connected_device['serial'][0:4], 'x' *
                                  (len(opts.connected_device['serial']) - 4)))
                for storage in opts.connected_device['storage']:
                    if storage:
                        build_log.append("  mount point: %s" % storage)
            else:
                build_log.append(" connected_device: '%s'" %
                                 opts.connected_device['name'])
                try:
                    for storage in opts.connected_device['storage']:
                        if storage:
                            build_log.append("  mount point: %s" % storage)
                except:
                    build_log.append("  (no mount points)")
        else:
            build_log.append(" connected_device: '%s'" %
                             opts.connected_device['name'])

        opts_dict = vars(opts)
        if opts_dict['ids']:
            build_log.append(" book count: %d" % len(opts_dict['ids']))

        sections_list = []
        if opts.generate_authors:
            sections_list.append('Authors')
        if opts.generate_titles:
            sections_list.append('Titles')
        if opts.generate_series:
            sections_list.append('Series')
        if opts.generate_genres:
            sections_list.append('Genres')
        if opts.generate_recently_added:
            sections_list.append('Recently Added')
        if opts.generate_descriptions:
            sections_list.append('Descriptions')

        if not sections_list:
            if opts.cli_environment:
                opts.log.warn(
                    '*** No Section switches specified, enabling all Sections ***'
                )
                opts.generate_authors = True
                opts.generate_titles = True
                opts.generate_series = True
                opts.generate_genres = True
                opts.generate_recently_added = True
                opts.generate_descriptions = True
                sections_list = [
                    'Authors', 'Titles', 'Series', 'Genres', 'Recently Added',
                    'Descriptions'
                ]
            else:
                opts.log.warn(
                    '\n*** No enabled Sections, terminating catalog generation ***'
                )
                return [
                    "No Included Sections",
                    "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"
                ]
        if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
            warning = _(
                "\n*** Adding 'By authors' section required for MOBI output ***"
            )
            opts.log.warn(warning)
            sections_list.insert(0, 'Authors')
            opts.generate_authors = True

        opts.log(" Sections: %s" % ', '.join(sections_list))
        opts.section_list = sections_list

        # Limit thumb_width to 1.0" - 2.0"
        try:
            if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
                log.warning("coercing thumb_width from '%s' to '%s'" %
                            (opts.thumb_width, self.THUMB_SMALLEST))
                opts.thumb_width = self.THUMB_SMALLEST
            if float(opts.thumb_width) > float(self.THUMB_LARGEST):
                log.warning("coercing thumb_width from '%s' to '%s'" %
                            (opts.thumb_width, self.THUMB_LARGEST))
                opts.thumb_width = self.THUMB_LARGEST
            opts.thumb_width = "%.2f" % float(opts.thumb_width)
        except:
            log.error("coercing thumb_width from '%s' to '%s'" %
                      (opts.thumb_width, self.THUMB_SMALLEST))
            opts.thumb_width = "1.0"

        # eval prefix_rules if passed from command line
        if type(opts.prefix_rules) is not tuple:
            try:
                opts.prefix_rules = eval(opts.prefix_rules)
            except:
                log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
                raise
            for rule in opts.prefix_rules:
                if len(rule) != 4:
                    log.error(
                        "incorrect number of args for --prefix-rules: %s" %
                        repr(rule))

        # eval exclusion_rules if passed from command line
        if type(opts.exclusion_rules) is not tuple:
            try:
                opts.exclusion_rules = eval(opts.exclusion_rules)
            except:
                log.error("malformed --exclusion-rules: %s" %
                          opts.exclusion_rules)
                raise
            for rule in opts.exclusion_rules:
                if len(rule) != 3:
                    log.error(
                        "incorrect number of args for --exclusion-rules: %s" %
                        repr(rule))

        # Display opts
        keys = sorted(opts_dict.keys())
        build_log.append(" opts:")
        for key in keys:
            if key in [
                    'catalog_title', 'author_clip', 'connected_kindle',
                    'creator', 'cross_reference_authors', 'description_clip',
                    'exclude_book_marker', 'exclude_genre', 'exclude_tags',
                    'exclusion_rules', 'fmt', 'genre_source_field',
                    'header_note_source_field', 'merge_comments_rule',
                    'output_profile', 'prefix_rules', 'preset',
                    'read_book_marker', 'search_text', 'sort_by',
                    'sort_descriptions_by_author', 'sync', 'thumb_width',
                    'use_existing_cover', 'wishlist_tag'
            ]:
                build_log.append("  %s: %s" % (key, repr(opts_dict[key])))
        if opts.verbose:
            log('\n'.join(line for line in build_log))

        # Capture start_time
        opts.start_time = time.time()

        self.opts = opts

        if opts.verbose:
            log.info(" Begin catalog source generation (%s)" % str(
                datetime.timedelta(seconds=int(time.time() -
                                               opts.start_time))))

        # Launch the Catalog builder
        catalog = CatalogBuilder(db, opts, self, report_progress=notification)

        try:
            catalog.build_sources()
            if opts.verbose:
                log.info(" Completed catalog source generation (%s)\n" % str(
                    datetime.timedelta(seconds=int(time.time() -
                                                   opts.start_time))))
        except (AuthorSortMismatchException, EmptyCatalogException) as e:
            log.error(" *** Terminated catalog generation: %s ***" % e)
        except:
            log.error(" unhandled exception in catalog generator")
            raise

        else:
            recommendations = []
            recommendations.append(
                ('remove_fake_margins', False, OptionRecommendation.HIGH))
            recommendations.append(('comments', '', OptionRecommendation.HIGH))
            """
            >>> Use to debug generated catalog code before pipeline conversion <<<
            """
            GENERATE_DEBUG_EPUB = False
            if GENERATE_DEBUG_EPUB:
                catalog_debug_path = os.path.join(os.path.expanduser('~'),
                                                  'Desktop', 'Catalog debug')
                setattr(opts, 'debug_pipeline',
                        os.path.expanduser(catalog_debug_path))

            dp = getattr(opts, 'debug_pipeline', None)
            if dp is not None:
                recommendations.append(
                    ('debug_pipeline', dp, OptionRecommendation.HIGH))

            if opts.output_profile and opts.output_profile.startswith(
                    "kindle"):
                recommendations.append(('output_profile', opts.output_profile,
                                        OptionRecommendation.HIGH))
                recommendations.append(('book_producer', opts.output_profile,
                                        OptionRecommendation.HIGH))
                if opts.fmt == 'mobi':
                    recommendations.append(
                        ('no_inline_toc', True, OptionRecommendation.HIGH))
                    recommendations.append(
                        ('verbose', 2, OptionRecommendation.HIGH))

            # Use existing cover or generate new cover
            cpath = None
            existing_cover = False
            try:
                search_text = 'title:"%s" author:%s' % (
                    opts.catalog_title.replace('"', '\\"'), 'calibre')
                matches = db.search(search_text,
                                    return_matches=True,
                                    sort_results=False)
                if matches:
                    cpath = db.cover(matches[0],
                                     index_is_id=True,
                                     as_path=True)
                    if cpath and os.path.exists(cpath):
                        existing_cover = True
            except:
                pass

            if self.opts.use_existing_cover and not existing_cover:
                log.warning("no existing catalog cover found")

            if self.opts.use_existing_cover and existing_cover:
                recommendations.append(
                    ('cover', cpath, OptionRecommendation.HIGH))
                log.info("using existing catalog cover")
            else:
                from calibre.ebooks.covers import calibre_cover2
                log.info("replacing catalog cover")
                new_cover_path = PersistentTemporaryFile(suffix='.jpg')
                new_cover = calibre_cover2(opts.catalog_title, 'calibre')
                new_cover_path.write(new_cover)
                new_cover_path.close()
                recommendations.append(
                    ('cover', new_cover_path.name, OptionRecommendation.HIGH))

            # Run ebook-convert
            from calibre.ebooks.conversion.plumber import Plumber
            plumber = Plumber(os.path.join(catalog.catalog_path,
                                           opts.basename + '.opf'),
                              path_to_output,
                              log,
                              report_progress=notification,
                              abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path,
                              os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" % str(
                    datetime.timedelta(seconds=int(time.time() -
                                                   opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error

Example #12

Show file

    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(unicode(os.getcwd()))
        except Exception:
            log.exception(
                "KEPUB appears to be invalid ZIP file, trying a "
                "more forgiving ZIP parser"
            )
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (
                    f.lower().endswith(".opf")
                    and "__MACOSX" not in f
                    and not os.path.basename(f).startswith(".")
                ):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)"
                ).format(path)
            )

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, unicode(os.getcwd()))
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (
            self.rationalize_cover3
            if opf.package_version >= 3.0
            else self.rationalize_cover2
        )
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported")  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                "application/vnd.adobe-page-template+xml",
                "application/vnd.adobe.page-template+xml",
                "application/adobe-page-template+xml",
                "application/adobe.page-template+xml",
                "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")

Example #13

Show file

            from calibre.ebooks.conversion.plumber import Plumber
            plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
                            path_to_output, log, report_progress=notification,
                            abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" %
                     str(datetime.timedelta(seconds=int(time.time() - opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error

Example #14

Show file

File: iBooks_direct_overlay.py Project: kmshi/calibre-ios-reader-applications

def _generate_thumbnail(self, book, cover_path):
    '''
    Fetch the cover image, generate a thumbnail, cache
    Specific implementation for iBooks
    '''
    self._log_location(book.title)
    self._log_diagnostic(" book_path: %s" % book.path)
    self._log_diagnostic("cover_path: %s" % repr(cover_path))

    thumb_data = None
    thumb_path = book.path.rpartition('.')[0] + '.jpg'

    # Try getting the cover from the cache
    try:
        zfr = ZipFile(self.archive_path)
        thumb_data = zfr.read(thumb_path)
        if thumb_data == 'None':
            self._log_diagnostic("returning None from cover cache")
            zfr.close()
            return None
    except:
        self._log_diagnostic("opening cover cache for appending")
        zfw = ZipFile(self.archive_path, mode='a')
    else:
        self._log_diagnostic("returning thumb from cover cache")
        return thumb_data

    '''
    # Is book.path a directory (iBooks) or an epub?
    stats = self.ios.stat(book.path)
    if stats['st_ifmt'] == 'S_IFDIR':
        # ***  This needs to fetch the cover data from the directory  ***
        self._log_diagnostic("returning None, can't read iBooks covers yet")
        return thumb_data

    # Get the cover from the book
    try:
        stream = cStringIO.StringIO(self.ios.read(book.path, mode='rb'))
        mi = get_metadata(stream)
        if mi.cover_data is not None:
            img_data = cStringIO.StringIO(mi.cover_data[1])
    except:
        if self.verbose:
            self._log_diagnostic("ERROR: unable to get cover from '%s'" % book.title)
            import traceback
            #traceback.print_exc()
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        return thumb_data
    '''

    try:
        img_data = cStringIO.StringIO(self.ios.read(cover_path, mode='rb'))
    except:
        if self.verbose:
            self._log_diagnostic("ERROR fetching cover data for '%s', caching empty marker" % book.title)
            import traceback
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        # Cache the empty cover
        zfw.writestr(thumb_path, 'None')
        return thumb_data


    # Generate a thumb
    try:
        im = PILImage.open(img_data)
        scaled, width, height = fit_image(im.size[0], im.size[1], 60, 80)
        im = im.resize((int(width), int(height)), PILImage.ANTIALIAS)

        thumb = cStringIO.StringIO()
        im.convert('RGB').save(thumb, 'JPEG')
        thumb_data = thumb.getvalue()
        thumb.close()
        self._log_diagnostic("SUCCESS: generated thumb for '%s', caching" % book.title)
        # Cache the tagged thumb
        zfw.writestr(thumb_path, thumb_data)
    except:
        if self.verbose:
            self._log_diagnostic("ERROR generating thumb for '%s', caching empty marker" % book.title)
            import traceback
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip())
        # Cache the empty cover
        zfw.writestr(thumb_path, 'None')
    finally:
        #img_data.close()
        zfw.close()

    return thumb_data

Example #15

Show file

File: htmlz_input.py Project: zyhong/calibre

    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.utils.zipfile import ZipFile

        self.log = log
        html = u''
        top_levels = []

        # Extract content from zip archive.
        zf = ZipFile(stream)
        zf.extractall()

        # Find the HTML file in the archive. It needs to be
        # top level.
        index = u''
        multiple_html = False
        # Get a list of all top level files in the archive.
        for x in os.listdir(u'.'):
            if os.path.isfile(x):
                top_levels.append(x)
        # Try to find an index. file.
        for x in top_levels:
            if x.lower() in (u'index.html', u'index.xhtml', u'index.htm'):
                index = x
                break
        # Look for multiple HTML files in the archive. We look at the
        # top level files only as only they matter in HTMLZ.
        for x in top_levels:
            if os.path.splitext(x)[1].lower() in (u'.html', u'.xhtml', u'.htm'):
                # Set index to the first HTML file found if it's not
                # called index.
                if not index:
                    index = x
                else:
                    multiple_html = True
        # Warn the user if there multiple HTML file in the archive. HTMLZ
        # supports a single HTML file. A conversion with a multiple HTML file
        # HTMLZ archive probably won't turn out as the user expects. With
        # Multiple HTML files ZIP input should be used in place of HTMLZ.
        if multiple_html:
            log.warn(_('Multiple HTML files found in the archive. Only %s will be used.') % index)

        if index:
            with open(index, 'rb') as tf:
                html = tf.read()
        else:
            raise Exception(_('No top level HTML file found.'))

        if not html:
            raise Exception(_('Top level HTML file %s is empty') % index)

        # Encoding
        if options.input_encoding:
            ienc = options.input_encoding
        else:
            ienc = xml_to_unicode(html[:4096])[-1]
        html = html.decode(ienc, 'replace')

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = getcwd()
        htmlfile = os.path.join(base, u'index.html')
        c = 0
        while os.path.exists(htmlfile):
            c += 1
            htmlfile = u'index%d.html'%c
        with open(htmlfile, 'wb') as f:
            f.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        with open(htmlfile, 'rb') as f:
            oeb = html_input.convert(f, options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile)

        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)

        # Get the cover path from the OPF.
        cover_path = None
        opf = None
        for x in top_levels:
            if os.path.splitext(x)[1].lower() == u'.opf':
                opf = x
                break
        if opf:
            opf = OPF(opf, basedir=getcwd())
            cover_path = opf.raster_cover or opf.cover
        # Set the cover.
        if cover_path:
            cdata = None
            with open(os.path.join(getcwd(), cover_path), 'rb') as cf:
                cdata = cf.read()
            cover_name = os.path.basename(cover_path)
            id, href = oeb.manifest.generate('cover', cover_name)
            oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
            oeb.guide.add('cover', 'Cover', href)

        return oeb

Example #16

Show file

File: epub_output.py Project: kmshi/calibre

    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        if self.opts.epub_inline_toc:
            from calibre.ebooks.mobi.writer8.toc import TOCAdder
            opts.mobi_toc_at_start = not opts.epub_toc_at_end
            opts.mobi_passthrough = False
            opts.no_inline_toc = False
            TOCAdder(oeb,
                     opts,
                     replace_previous_inline_toc=True,
                     ignore_existing_toc=True)

        if self.opts.epub_flatten:
            from calibre.ebooks.oeb.transforms.filenames import FlatFilenames
            FlatFilenames()(oeb, opts)
        else:
            from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
            UniqueFilenames()(oeb, opts)

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.upshift_markup()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages(check_colorspaces=True)(oeb, opts)

        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                      max_flow_size=self.opts.flow_size * 1024)
        split(self.oeb, self.opts)

        from calibre.ebooks.oeb.transforms.cover import CoverManager
        cm = CoverManager(
            no_default_cover=self.opts.no_default_epub_cover,
            no_svg_cover=self.opts.no_svg_cover,
            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
        cm(self.oeb, self.opts, self.log)

        self.workaround_sony_quirks()

        if self.oeb.toc.count() == 0:
            self.log.warn('This EPUB file has no Table of Contents. '
                          'Creating a default TOC')
            first = iter(self.oeb.spine).next()
            self.oeb.toc.add(_('Start'), first.href)

        from calibre.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
        uuid = None
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(
                    x).startswith('urn:uuid:'):
                uuid = unicode(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

        if uuid is None:
            self.log.warn('No UUID identifier found')
            from uuid import uuid4
            uuid = str(uuid4())
            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)

        if encrypted_fonts and not uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
                if unicode(x) == uuid:
                    x.content = 'urn:uuid:' + uuid

        with TemporaryDirectory(u'_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                if self.opts.output_profile.epub_periodical_format == 'sony':
                    from calibre.ebooks.epub.periodical import sony_metadata
                    metadata_xml, atom_xml = sony_metadata(oeb)
                    extra_entries = [(u'atom.xml', 'application/atom+xml',
                                      atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([
                os.path.join(tdir, x) for x in os.listdir(tdir)
                if x.endswith('.ncx')
            ][0])
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
            with initialize_container(output_path,
                                      os.path.basename(opf),
                                      extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml', encryption)
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                                  metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    if os.path.isdir(opts.extract_to):
                        shutil.rmtree(opts.extract_to)
                    else:
                        os.remove(opts.extract_to)
                os.mkdir(opts.extract_to)
                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)

Example #17

Show file

def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'md':
        with open(path, 'w', encoding='utf-8') as f:
            if not mi.is_null('title'):
                print('#', mi.title, file=f)
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)

Example #18

Show file

def create_themeball(report, progress=None, abort=None):
    pool = ThreadPool(processes=cpu_count())
    buf = BytesIO()
    num = count()
    error_occurred = Event()

    def optimize(name):
        if abort is not None and abort.is_set():
            return
        if error_occurred.is_set():
            return
        try:
            i = next(num)
            if progress is not None:
                progress(i, _('Optimizing %s') % name)
            srcpath = os.path.join(report.path, name)
            ext = srcpath.rpartition('.')[-1].lower()
            if ext == 'png':
                optimize_png(srcpath)
            elif ext in ('jpg', 'jpeg'):
                optimize_jpeg(srcpath)
        except Exception:
            return sys.exc_info()

    errors = tuple(
        filter(None, pool.map(optimize, tuple(report.name_map.iterkeys()))))
    pool.close(), pool.join()
    if abort is not None and abort.is_set():
        return
    if errors:
        e = errors[0]
        reraise(*e)

    if progress is not None:
        progress(next(num), _('Creating theme file'))
    with ZipFile(buf, 'w') as zf:
        for name in report.name_map:
            srcpath = os.path.join(report.path, name)
            with lopen(srcpath, 'rb') as f:
                zf.writestr(name, f.read(), compression=ZIP_STORED)
    buf.seek(0)
    out = BytesIO()
    if abort is not None and abort.is_set():
        return None, None
    if progress is not None:
        progress(next(num), _('Compressing theme file'))
    compress(buf, out, level=9)
    buf = BytesIO()
    prefix = report.name
    if abort is not None and abort.is_set():
        return None, None
    with ZipFile(buf, 'w') as zf:
        with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f:
            zf.writestr(prefix + '/' + THEME_METADATA, f.read())
        zf.writestr(prefix + '/' + THEME_COVER, create_cover(report))
        zf.writestr(prefix + '/' + 'icons.zip.xz',
                    out.getvalue(),
                    compression=ZIP_STORED)
    if progress is not None:
        progress(next(num), _('Finished'))
    return buf.getvalue(), prefix

Example #19

Show file

def do_dump(path, dest):
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)
    pretty_all_xml_in_dir(dest)

Example #20

Show file

File: reader_app_support.py Project: vins31/calibre-annotations

    def _get_epub_toc(self, cid=None, path=None, prepend_title=None):
        '''
        Given a calibre id, return the epub TOC indexed by section
        If cid, use copy in library, else use path to copy on device
        '''
        toc = None
#         if cid is not None:
#             mi = self.opts.gui.current_db.get_metadata(cid, index_is_id=True)
#             toc = None
#             if 'EPUB' in mi.formats:
#                 fpath = self.opts.gui.current_db.format(cid, 'EPUB', index_is_id=True, as_path=True)
#             else:
#                 return toc
#         elif path is not None:
#             fpath = path
#         else:
#             return toc
        fpath = path

        # iBooks stores books unzipped
        # Marvin stores books zipped
        # Need spine, ncx_tree to construct toc

        if self.ios.stat(fpath) and self.ios.stat(fpath)['st_ifmt'] == 'S_IFDIR':
            # Find the OPF in the unzipped ePub
            fp = '/'.join([fpath, 'META-INF', 'container.xml'])
            cf = cStringIO.StringIO(self.ios.read(fp))
            container = etree.parse(cf)
            opf_file = container.xpath('.//*[local-name()="rootfile"]')[0].get('full-path')
            oebps = opf_file.rpartition('/')[0]

            fp = '/'.join([fpath, opf_file])
            opf = cStringIO.StringIO(self.ios.read(fp))
            opf_tree = etree.parse(opf)
            spine = opf_tree.xpath('.//*[local-name()="spine"]')[0]
            ncx_fs = spine.get('toc')
            manifest = opf_tree.xpath('.//*[local-name()="manifest"]')[0]
            ncx_file = manifest.find('.//*[@id="%s"]' % ncx_fs).get('href')

            fp = '/'.join([fpath, oebps, ncx_file])
            ncxf = cStringIO.StringIO(self.ios.read(fp))
            ncx_tree = etree.parse(ncxf)
            #self._log(etree.tostring(ncx_tree, pretty_print=True))

        else:
            # Find the OPF file in the zipped ePub
            zfo = cStringIO.StringIO(self.ios.read(fpath, mode='rb'))
            try:
                zf = ZipFile(zfo, 'r')
                container = etree.fromstring(zf.read('META-INF/container.xml'))
                opf_tree = etree.fromstring(zf.read(container.xpath('.//*[local-name()="rootfile"]')[0].get('full-path')))

                spine = opf_tree.xpath('.//*[local-name()="spine"]')[0]
                ncx_fs = spine.get('toc')
                manifest = opf_tree.xpath('.//*[local-name()="manifest"]')[0]
                ncx = manifest.find('.//*[@id="%s"]' % ncx_fs).get('href')

                # Find the ncx file
                fnames = zf.namelist()
                _ncx = [x for x in fnames if ncx in x][0]
                ncx_tree = etree.fromstring(zf.read(_ncx))
            except:
                import traceback
                self._log_location()
                self._log(" unable to unzip '%s'" % fpath)
                self._log(traceback.format_exc())
                return toc

        # fpath points to epub (zipped or unzipped dir)
        # spine, ncx_tree populated
        try:
            toc = OrderedDict()
            # 1. capture idrefs from spine
            for i, el in enumerate(spine):
                toc[str(i)] = el.get('idref')

            # 2. Resolve <spine> idrefs to <manifest> hrefs
            for el in toc:
                toc[el] = manifest.find('.//*[@id="%s"]' % toc[el]).get('href')

            # 3. Build a dict of src:toc_entry
            src_map = OrderedDict()
            navMap = ncx_tree.xpath('.//*[local-name()="navMap"]')[0]
            for navPoint in navMap:
                # Get the first-level entry
                src = re.sub(r'#.*$', '', navPoint.xpath('.//*[local-name()="content"]')[0].get('src'))
                toc_entry = navPoint.xpath('.//*[local-name()="text"]')[0].text
                src_map[src] = toc_entry

                # Get any nested navPoints
                nested_navPts = navPoint.xpath('.//*[local-name()="navPoint"]')
                for nnp in nested_navPts:
                    src = re.sub(r'#.*$', '', nnp.xpath('.//*[local-name()="content"]')[0].get('src'))
                    toc_entry = nnp.xpath('.//*[local-name()="text"]')[0].text
                    src_map[src] = toc_entry

            # Resolve src paths to toc_entry
            for section in toc:
                if toc[section] in src_map:
                    if prepend_title:
                        toc[section] = "%s &middot; %s" % (prepend_title,  src_map[toc[section]])
                    else:
                        toc[section] = src_map[toc[section]]
                else:
                    toc[section] = None

            # 5. Fill in the gaps
            current_toc_entry = None
            for section in toc:
                if toc[section] is None:
                    toc[section] = current_toc_entry
                else:
                    current_toc_entry = toc[section]
        except:
            import traceback
            self._log_location()
            self._log("{:~^80}".format(" error parsing '%s' " % fpath))
            self._log(traceback.format_exc())
            self._log("{:~^80}".format(" end traceback "))

        return toc