def convert_to_cbz(self):
        '''
        Converts a cbr-comic to a cbz-comic
        '''
        from calibre.utils.unrar import RARFile, extract

        with TemporaryDirectory('_cbr2cbz') as tdir:
            # extract the rar file
            ffile = self.db.format(self.book_id, "cbr", as_path=True)
            extract(ffile, tdir)
            # get the comment
            with open(ffile, 'rb') as stream:
                zr = RARFile(stream, get_comment=True)
                comment = zr.comment
            delete_temp_file(ffile)

            # make the cbz file
            with TemporaryFile("comic.cbz") as tf:
                zf = ZipFile(tf, "w")
                zf.add_dir(tdir)
                zf.close()
                # write comment
                if comment:
                    writeZipComment(tf, comment)
                # add the cbz format to calibres library
                self.db.add_format(self.book_id, "cbz", tf)
                self.format = "cbz"
Example #2
0
class OCFZipReader(OCFReader):

    def __init__(self, stream, mode='r', root=None):
        if isinstance(stream, (LocalZipFile, ZipFile)):
            self.archive = stream
        else:
            try:
                self.archive = ZipFile(stream, mode=mode)
            except BadZipfile:
                raise EPubException("not a ZIP .epub OCF container")
        self.root = root
        if self.root is None:
            name = getattr(stream, 'name', False)
            if name:
                self.root = os.path.abspath(os.path.dirname(name))
            else:
                self.root = getcwd()
        super(OCFZipReader, self).__init__()

    def open(self, name, mode='r'):
        if isinstance(self.archive, LocalZipFile):
            return self.archive.open(name)
        return io.BytesIO(self.archive.read(name))

    def read_bytes(self, name):
        return self.archive.read(name)
Example #3
0
def get_metadata(stream):
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.ebooks.metadata.archive import is_comic
    stream_type = None
    zf = ZipFile(stream, 'r')
    names = zf.namelist()
    if is_comic(names):
        # Is probably a comic
        return get_metadata(stream, 'cbz')

    for f in names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'):
                with TemporaryDirectory() as tdir:
                    with CurrentDir(tdir):
                        path = zf.extract(f)
                        mi = get_metadata(open(path,'rb'), stream_type)
                        if stream_type == 'opf' and mi.application_id is None:
                            try:
                                # zip archive opf files without an application_id were assumed not to have a cover
                                # reparse the opf and if cover exists read its data from zip archive for the metadata
                                nmi = zip_opf_metadata(path, zf)
                                nmi.timestamp = None
                                return nmi
                            except:
                                pass
                        mi.timestamp = None
                        return mi
    raise ValueError('No ebook found in ZIP archive (%s)' % os.path.basename(getattr(stream, 'name', '') or '<stream>'))
Example #4
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.toc import TOC
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.utils.zipfile import ZipFile

        self.options = options
        self.log = log
        pages, images = [], []
        toc = TOC()

        if file_ext == 'pmlz':
            log.debug('De-compressing content to temporary directory...')
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)

                pmls = glob.glob(os.path.join(tdir, '*.pml'))
                for pml in pmls:
                    html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
                    html_path = os.path.join(getcwd(), html_name)

                    pages.append(html_name)
                    log.debug('Processing PML item %s...' % pml)
                    ttoc = self.process_pml(pml, html_path)
                    toc += ttoc
                images = self.get_images(stream, tdir, True)
        else:
            toc = self.process_pml(stream, 'index.html')
            pages.append('index.html')

            if hasattr(stream, 'name'):
                images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))

        # We want pages to be orded alphabetically.
        pages.sort()

        manifest_items = []
        for item in pages+images:
            manifest_items.append((item, None))

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Reading metadata from input file...')
        mi = get_metadata(stream, 'pml')
        if 'images/cover.png' in images:
            mi.cover = 'images/cover.png'
        opf = OPFCreator(getcwd(), mi)
        log.debug('Generating manifest...')
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        opf.set_toc(toc)
        with lopen('metadata.opf', 'wb') as opffile:
            with lopen('toc.ncx', 'wb') as tocfile:
                opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(getcwd(), 'metadata.opf')
Example #5
0
 def safe_replace(self, name, datastream, extra_replacements={},
     add_missing=False):
     from calibre.utils.zipfile import ZipFile, ZipInfo
     replacements = {name:datastream}
     replacements.update(extra_replacements)
     names = frozenset(replacements.keys())
     found = set([])
     with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
         ztemp = ZipFile(temp, 'w')
         for offset, header in self.file_info.itervalues():
             if header.filename in names:
                 zi = ZipInfo(header.filename)
                 zi.compress_type = header.compression_method
                 ztemp.writestr(zi, replacements[header.filename].read())
                 found.add(header.filename)
             else:
                 ztemp.writestr(header.filename, self.read(header.filename,
                     spool_size=0))
         if add_missing:
             for name in names - found:
                 ztemp.writestr(name, replacements[name].read())
         ztemp.close()
         zipstream = self.stream
         temp.seek(0)
         zipstream.seek(0)
         zipstream.truncate()
         shutil.copyfileobj(temp, zipstream)
         zipstream.flush()
Example #6
0
def initialize_container(path_to_container, opf_name='metadata.opf',
        extra_entries=[]):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
    rootfiles = ''
    for path, mimetype, _ in extra_entries:
        rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
                path, mimetype)
    CONTAINER = u'''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
      {extra_entries}
   </rootfiles>
</container>
    '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', '', 0755)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
    return zf
Example #7
0
def run(epub, opts, log):
    with TemporaryDirectory('_epub-fix') as tdir:
        with CurrentDir(tdir):
            zf = ZipFile(epub)
            zf.extractall()
            zf.close()
            container = Container(tdir, log)
            for fixer in epub_fixers():
                fix = getattr(opts, fixer.fix_name, False)
                fixer.run(container, opts, log, fix=fix)
            container.write(epub)
Example #8
0
    def run(self, archive):
        from calibre.utils.zipfile import ZipFile
        is_rar = archive.lower().endswith('.rar')
        if is_rar:
            from calibre.utils.unrar import extract_member, names
        else:
            zf = ZipFile(archive, 'r')

        if is_rar:
            with open(archive, 'rb') as rf:
                fnames = list(names(rf))
        else:
            fnames = zf.namelist()

        def fname_ok(fname):
            bn = os.path.basename(fname).lower()
            if bn == 'thumbs.db':
                return False
            if '.' not in bn:
                return False
            if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
                return False
            if '__MACOSX' in fname.split('/'):
                return False
            return True

        fnames = list(filter(fname_ok, fnames))
        if is_comic(fnames):
            ext = '.cbr' if is_rar else '.cbz'
            of = self.temporary_file('_archive_extract'+ext)
            with open(archive, 'rb') as f:
                of.write(f.read())
            of.close()
            return of.name
        if len(fnames) > 1 or not fnames:
            return archive
        fname = fnames[0]
        ext = os.path.splitext(fname)[1][1:]
        if ext.lower() not in {
                'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
                'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
            return archive

        of = self.temporary_file('_archive_extract.'+ext)
        with closing(of):
            if is_rar:
                with open(archive, 'rb') as f:
                    data = extract_member(f, match=None, name=fname)[1]
                of.write(data)
            else:
                of.write(zf.read(fname))
        return of.name
Example #9
0
    def __init__(self, pathtoepub, log, clone_data=None, tdir=None):
        if clone_data is not None:
            super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data)
            for x in ('pathtoepub', 'obfuscated_fonts'):
                setattr(self, x, clone_data[x])
            return

        self.pathtoepub = pathtoepub
        if tdir is None:
            tdir = PersistentTemporaryDirectory('_epub_container')
        tdir = os.path.abspath(os.path.realpath(tdir))
        self.root = tdir
        with open(self.pathtoepub, 'rb') as stream:
            try:
                zf = ZipFile(stream)
                zf.extractall(tdir)
            except:
                log.exception('EPUB appears to be invalid ZIP file, trying a'
                        ' more forgiving ZIP parser')
                from calibre.utils.localunzip import extractall
                stream.seek(0)
                extractall(stream)
        try:
            os.remove(join(tdir, 'mimetype'))
        except EnvironmentError:
            pass

        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get('full-path')).split('/')))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')

        super(EpubContainer, self).__init__(tdir, opf_path, log)

        self.obfuscated_fonts = {}
        if 'META-INF/encryption.xml' in self.name_path_map:
            self.process_encryption()
        self.parsed_cache['META-INF/container.xml'] = container
Example #10
0
def get_fb2_data(stream):
    from calibre.utils.zipfile import ZipFile, BadZipfile
    pos = stream.tell()
    try:
        zf = ZipFile(stream)
    except BadZipfile:
        stream.seek(pos)
        ans = stream.read()
        zip_file_name = None
    else:
        names = zf.namelist()
        names = [x for x in names if x.lower().endswith('.fb2')] or names
        zip_file_name = names[0]
        ans = zf.open(zip_file_name).read()
    return ans, zip_file_name
Example #11
0
    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f
Example #12
0
    def __init__(self, pathtoepub, log, clone_data=None, tdir=None):
        if clone_data is not None:
            super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data)
            for x in ("pathtoepub", "obfuscated_fonts"):
                setattr(self, x, clone_data[x])
            return

        self.pathtoepub = pathtoepub
        if tdir is None:
            tdir = PersistentTemporaryDirectory("_epub_container")
        tdir = os.path.abspath(os.path.realpath(tdir))
        self.root = tdir
        with open(self.pathtoepub, "rb") as stream:
            try:
                zf = ZipFile(stream)
                zf.extractall(tdir)
            except:
                log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser")
                from calibre.utils.localunzip import extractall

                stream.seek(0)
                extractall(stream)
        try:
            os.remove(join(tdir, "mimetype"))
        except EnvironmentError:
            pass

        container_path = join(self.root, "META-INF", "container.xml")
        if not exists(container_path):
            raise InvalidEpub("No META-INF/container.xml in epub")
        container = etree.fromstring(open(container_path, "rb").read())
        opf_files = container.xpath(
            (r"child::ocf:rootfiles/ocf:rootfile" '[@media-type="%s" and @full-path]' % guess_type("a.opf")),
            namespaces={"ocf": OCF_NS},
        )
        if not opf_files:
            raise InvalidEpub("META-INF/container.xml contains no link to OPF file")
        opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get("full-path")).split("/")))
        if not exists(opf_path):
            raise InvalidEpub("OPF file does not exist at location pointed to" " by META-INF/container.xml")

        super(EpubContainer, self).__init__(tdir, opf_path, log)

        self.obfuscated_fonts = {}
        if "META-INF/encryption.xml" in self.name_path_map:
            self.process_encryption()
        self.parsed_cache["META-INF/container.xml"] = container
Example #13
0
    def dump_input(self, ret, output_dir):
        out_dir = os.path.join(self.opts.debug_pipeline, 'input')
        if isinstance(ret, basestring):
            shutil.copytree(output_dir, out_dir)
        else:
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
        if self.input_fmt == 'recipe':
            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
                'periodical.downloaded_recipe'), 'w')
            zf.add_dir(out_dir)
            with self.input_plugin:
                self.input_plugin.save_download(zf)
            zf.close()

        self.log.info('Input debug saved to:', out_dir)
    def update_cover(self):
        # get the calibre cover
        cover_path = self.db.cover(self.book_id, as_path=True)
        fmt = cover_path.rpartition('.')[-1]
        new_cover_name = "00000000_cover." + fmt

        self.make_temp_cbz_file()

        # search for a previously embeded cover
        zf = ZipFile(self.file)
        cover_info = ""
        for name in zf.namelist():
            if name.rsplit(".", 1)[0] == "00000000_cover":
                cover_info = name
                break

        # delete previous cover
        if cover_info != "":
            with open(self.file, 'r+b') as zf, open(cover_path, 'r+b') as cp:
                safe_replace(zf, cover_info, cp)

        # save the cover in the file
        else:
            zf = ZipFile(self.file, "a")
            zf.write(cover_path, new_cover_name)
            zf.close()

        delete_temp_file(cover_path)
Example #15
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.pml.pmlml import PMLMLizer
        from calibre.utils.zipfile import ZipFile

        with TemporaryDirectory('_pmlz_output') as tdir:
            pmlmlizer = PMLMLizer(log)
            pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
            with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
                out.write(pml.encode(opts.pml_output_encoding, 'replace'))

            img_path = os.path.join(tdir, 'index_img')
            if not os.path.exists(img_path):
                os.makedirs(img_path)
            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts)

            log.debug('Compressing output...')
            pmlz = ZipFile(output_path, 'w')
            pmlz.add_dir(tdir)
Example #16
0
def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pml = ''
    if stream.name.endswith('.pmlz'):
        with TemporaryDirectory('_unpmlz') as tdir:
            zf = ZipFile(stream)
            zf.extractall(tdir)

            pmls = glob.glob(os.path.join(tdir, '*.pml'))
            for p in pmls:
                with open(p, 'r+b') as p_stream:
                    pml += p_stream.read()
            if extract_cover:
                mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True)
    else:
        pml = stream.read()
        if extract_cover:
            mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))

    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
        m = re.search(r'TITLE="(.*?)"', comment)
        if m:
            mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
            mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
        m = re.search(r'PUBLISHER="(.*?)"', comment)
        if m:
            mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'COPYRIGHT="(.*?)"', comment)
        if m:
            mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'ISBN="(.*?)"', comment)
        if m:
            mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))

    return mi
Example #17
0
def initialize_container(path_to_container, opf_name='metadata.opf',
        extra_entries=[]):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
    rootfiles = ''
    for path, mimetype, _ in extra_entries:
        rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
                path, mimetype)
    CONTAINER = simple_container_xml(opf_name, rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', '', 0755)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
    return zf
    def embed_cix_metadata(self):
        '''
        Embeds the cix_metadata
        '''
        from io import StringIO

        cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)

        # ensure we have a temp file
        self.make_temp_cbz_file()

        # make a new cbz if a metadata file is already there, to prevent corruption
        if self.zipinfo is not None:
            with open(self.file, 'r+b') as zf:
                safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore')))

        else:
            # save the metadata in the file
            zf = ZipFile(self.file, "a")
            zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore'))
            zf.close()
Example #19
0
def get_cover(opf, opf_path, stream, reader=None):
    raster_cover = opf.raster_cover
    stream.seek(0)
    try:
        zf = ZipFile(stream)
    except:
        stream.seek(0)
        zf = LocalZipFile(stream)

    if raster_cover:
        base = posixpath.dirname(opf_path)
        cpath = posixpath.normpath(posixpath.join(base, raster_cover))
        if reader is not None and \
            reader.encryption_meta.is_encrypted(cpath):
                return
        try:
            member = zf.getinfo(cpath)
        except:
            pass
        else:
            f = zf.open(member)
            data = f.read()
            f.close()
            zf.close()
            return data

    return render_cover(opf, opf_path, zf, reader=reader)
Example #20
0
 def __enter__(self, *args):
     '''
     Add this plugin to the python path so that it's contents become directly importable.
     Useful when bundling large python libraries into the plugin. Use it like this::
         with plugin:
             import something
     '''
     if self.plugin_path is not None:
         from calibre.utils.zipfile import ZipFile
         zf = ZipFile(self.plugin_path)
         extensions = set([x.rpartition('.')[-1].lower() for x in
             zf.namelist()])
         zip_safe = True
         for ext in ('pyd', 'so', 'dll', 'dylib'):
             if ext in extensions:
                 zip_safe = False
                 break
         if zip_safe:
             sys.path.insert(0, self.plugin_path)
             self.sys_insertion_path = self.plugin_path
         else:
             from calibre.ptempfile import TemporaryDirectory
             self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
             self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
             zf.extractall(self.sys_insertion_path)
             sys.path.insert(0, self.sys_insertion_path)
         zf.close()
Example #21
0
    def run(self, archive):
        from calibre.utils.zipfile import ZipFile
        is_rar = archive.lower().endswith('.rar')
        if is_rar:
            from calibre.utils.unrar import extract_member, names
        else:
            zf = ZipFile(archive, 'r')

        if is_rar:
            with open(archive, 'rb') as rf:
                fnames = list(names(rf))
        else:
            fnames = zf.namelist()

        fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db']
        if is_comic(fnames):
            ext = '.cbr' if is_rar else '.cbz'
            of = self.temporary_file('_archive_extract'+ext)
            with open(archive, 'rb') as f:
                of.write(f.read())
            of.close()
            return of.name
        if len(fnames) > 1 or not fnames:
            return archive
        fname = fnames[0]
        ext = os.path.splitext(fname)[1][1:]
        if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
                'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'):
            return archive

        of = self.temporary_file('_archive_extract.'+ext)
        with closing(of):
            if is_rar:
                with open(archive, 'rb') as f:
                    data = extract_member(f, match=None, name=fname)[1]
                of.write(data)
            else:
                of.write(zf.read(fname))
        return of.name
    def get_comic_metadata_from_cbz(self):
        '''
        Reads the comic metadata from the comic cbz file as comictagger metadata
        '''
        self.make_temp_cbz_file()
        # open the zipfile
        zf = ZipFile(self.file)

        # get cix metadata
        for name in zf.namelist():
            if name.lower() == "comicinfo.xml":
                self.cix_metadata = ComicInfoXml().metadataFromString(zf.read(name))
                self.zipinfo = name
                break

        # get the cbi metadata
        if ComicBookInfo().validateString(zf.comment):
            self.cbi_metadata = ComicBookInfo().metadataFromString(zf.comment)
        zf.close()

        # get combined metadata
        self._get_combined_metadata()
Example #23
0
 def __init__(self, stream, mode='r', root=None):
     try:
         self.archive = ZipFile(stream, mode=mode)
     except BadZipfile:
         raise EPubException("not a ZIP .epub OCF container")
     self.root = root
     if self.root is None:
         name = getattr(stream, 'name', False)
         if name:
             self.root = os.path.abspath(os.path.dirname(name))
         else:
             self.root = os.getcwdu()
     super(OCFZipReader, self).__init__()
Example #24
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.base import OEB_IMAGES
        from calibre.utils.zipfile import ZipFile
        from lxml import etree

        with TemporaryDirectory('_txtz_output') as tdir:
            # TXT
            txt_name = 'index.txt'
            if opts.txt_output_formatting.lower() == 'textile':
                txt_name = 'index.text'
            with TemporaryFile(txt_name) as tf:
                TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
                shutil.copy(tf, os.path.join(tdir, txt_name))

            # Images
            for item in oeb_book.manifest:
                if item.media_type in OEB_IMAGES:
                    if hasattr(self.writer, 'images'):
                        path = os.path.join(tdir, 'images')
                        if item.href in self.writer.images:
                            href = self.writer.images[item.href]
                        else:
                            continue
                    else:
                        path = os.path.join(tdir, os.path.dirname(item.href))
                        href = os.path.basename(item.href)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    with open(os.path.join(path, href), 'wb') as imgf:
                        imgf.write(item.data)

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))

            txtz = ZipFile(output_path, 'w')
            txtz.add_dir(tdir)
Example #25
0
def build_plugin(path):
    from calibre import prints
    from calibre.ptempfile import PersistentTemporaryFile
    from calibre.utils.zipfile import ZipFile, ZIP_STORED
    path = type(u'')(path)
    names = frozenset(os.listdir(path))
    if u'__init__.py' not in names:
        prints(path, ' is not a valid plugin')
        raise SystemExit(1)
    t = PersistentTemporaryFile(u'.zip')
    with ZipFile(t, u'w', ZIP_STORED) as zf:
        zf.add_dir(path, simple_filter=lambda x:x in {'.git', '.bzr', '.svn', '.hg'})
    t.close()
    plugin = add_plugin(t.name)
    os.remove(t.name)
    prints(u'Plugin updated:', plugin.name, plugin.version)
Example #26
0
def initialize_container(path_to_container,
                         opf_name='metadata.opf',
                         extra_entries=[]):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
    rootfiles = ''
    for path, mimetype, _ in extra_entries:
        rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
            path, mimetype)
    CONTAINER = simple_container_xml(opf_name, rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', '', 0755)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
    return zf
Example #27
0
def run(epub, opts, log):
    with TemporaryDirectory('_epub-fix') as tdir:
        with CurrentDir(tdir):
            zf = ZipFile(epub)
            zf.extractall()
            zf.close()
            container = Container(tdir, log)
            for fixer in epub_fixers():
                fix = getattr(opts, fixer.fix_name, False)
                fixer.run(container, opts, log, fix=fix)
            container.write(epub)
Example #28
0
class OCFZipReader(OCFReader):
    def __init__(self, stream, mode='r', root=None):
        try:
            self.archive = ZipFile(stream, mode=mode)
        except BadZipfile:
            raise EPubException("not a ZIP .epub OCF container")
        self.root = root
        if self.root is None:
            name = getattr(stream, 'name', False)
            if name:
                self.root = os.path.abspath(os.path.dirname(name))
            else:
                self.root = os.getcwdu()
        super(OCFZipReader, self).__init__()

    def open(self, name, mode='r'):
        return StringIO(self.archive.read(name))
Example #29
0
 def write(self, path):
     for name in self.dirtied:
         data = self.cache[name]
         raw = data
         if hasattr(data, 'xpath'):
             raw = etree.tostring(data, encoding='utf-8',
                     xml_declaration=True)
         with open(self.name_map[name], 'wb') as f:
             f.write(raw)
     self.dirtied.clear()
     zf = ZipFile(path, 'w')
     zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
             compression=ZIP_STORED)
     zf.add_dir(self.root)
     zf.close()
Example #30
0
def zip_rebuilder(tdir, path):
    with ZipFile(path, 'w', compression=ZIP_DEFLATED) as zf:
        # Write mimetype
        mt = os.path.join(tdir, 'mimetype')
        if os.path.exists(mt):
            zf.write(mt, 'mimetype', compress_type=ZIP_STORED)
        # Write everything else
        exclude_files = {'.DS_Store', 'mimetype', 'iTunesMetadata.plist'}
        for root, dirs, files in os.walk(tdir):
            for fn in files:
                if fn in exclude_files:
                    continue
                absfn = os.path.join(root, fn)
                zfn = unicodedata.normalize(
                    'NFC',
                    os.path.relpath(absfn, tdir).replace(os.sep, '/'))
                zf.write(absfn, zfn)
Example #31
0
def set_metadata(stream, mi):
    replacements = {}

    # Get the OPF in the archive.
    with ZipFile(stream) as zf:
        opf_path = get_first_opf_name(zf)
        opf_stream = io.BytesIO(zf.read(opf_path))
    opf = OPF(opf_stream)

    # Cover.
    new_cdata = None
    try:
        new_cdata = mi.cover_data[1]
        if not new_cdata:
            raise Exception('no cover')
    except:
        try:
            with open(mi.cover, 'rb') as f:
                new_cdata = f.read()
        except:
            pass
    if new_cdata:
        cpath = opf.raster_cover
        if not cpath:
            cpath = 'cover.jpg'
        new_cover = _write_new_cover(new_cdata, cpath)
        replacements[cpath] = open(new_cover.name, 'rb')
        mi.cover = cpath

    # Update the metadata.
    opf.smart_update(mi, replace_metadata=True)
    newopf = io.BytesIO(opf.render())
    safe_replace(stream,
                 opf_path,
                 newopf,
                 extra_replacements=replacements,
                 add_missing=True)

    # Cleanup temporary files.
    try:
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
    except:
        pass
Example #32
0
 def write(self, path_or_stream, mi, create_empty_document=False):
     if create_empty_document:
         self.create_empty_document(mi)
     with ZipFile(path_or_stream, 'w') as zf:
         zf.writestr('[Content_Types].xml', self.contenttypes)
         zf.writestr('_rels/.rels', self.containerrels)
         zf.writestr('docProps/core.xml', self.convert_metadata(mi))
         zf.writestr('docProps/app.xml', self.appproperties)
         zf.writestr('word/webSettings.xml', self.websettings)
         zf.writestr('word/document.xml', xml2str(self.document))
         zf.writestr('word/styles.xml', xml2str(self.styles))
         zf.writestr('word/fontTable.xml', xml2str(self.font_table))
         zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
         zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
         for fname, data_getter in self.images.iteritems():
             zf.writestr(fname, data_getter())
         for fname, data in self.fonts.iteritems():
             zf.writestr(fname, data)
Example #33
0
    def embed_cbi_metadata(self):
        '''
        Embeds the cbi_metadata
        '''
        cbi_string = ComicBookInfo().stringFromMetadata(self.comic_metadata)

        # ensure we have a temp file
        self.make_temp_cbz_file()
        # save the metadata in the comment
        zf = ZipFile(self.file, 'a')
        zf.comment = cbi_string.encode("utf-8")
        zf._didModify = True
        zf.close()
Example #34
0
def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
    from calibre.spell.dictionary import parse_lang_code
    dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries')
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    num = 0
    with ZipFile(source_path) as zf:

        def read_file(key):
            try:
                return zf.open(key).read()
            except KeyError:
                # Some dictionaries apparently put the xcu in a sub-directory
                # and incorrectly make paths relative to that directory instead
                # of the root, for example:
                # http://extensions.libreoffice.org/extension-center/italian-dictionary-thesaurus-hyphenation-patterns/releases/4.1/dict-it.oxt
                while key.startswith('../'):
                    key = key[3:]
                return zf.open(key.lstrip('/')).read()

        root = etree.fromstring(zf.open('META-INF/manifest.xml').read())
        xcu = XPath(
            '//manifest:file-entry[@manifest:media-type="application/vnd.sun.star.configuration-data"]'
        )(root)[0].get('{%s}full-path' % NS_MAP['manifest'])
        for (dic, aff), locales in iteritems(
                parse_xcu(zf.open(xcu).read(), origin='')):
            dic, aff = dic.lstrip('/'), aff.lstrip('/')
            d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir)
            locales = uniq([
                x for x in map(fill_country_code, locales)
                if parse_lang_code(x).countrycode
            ])
            if not locales:
                continue
            metadata = [name] + list(locales)
            with open(os.path.join(d, 'locales'), 'wb') as f:
                f.write(('\n'.join(metadata)).encode('utf-8'))
            dd, ad = convert_to_utf8(read_file(dic), read_file(aff))
            with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f:
                f.write(dd)
            with open(os.path.join(d, '%s.aff' % locales[0]), 'wb') as f:
                f.write(ad)
            num += 1
    return num
Example #35
0
def get_metadata(stream):
    c = DOCX(stream, extract=False)
    mi = c.metadata
    c.close()
    stream.seek(0)
    cdata = None
    with ZipFile(stream, 'r') as zf:
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height / width <= 1.8 and height * width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi
Example #36
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1],
                                     zf.read(cover_href))
    except:
        return mi
    return mi
Example #37
0
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
    raw, zip_file_name = get_fb2_data(stream)
    root = _get_fbroot(raw)
    ctx = Context(root)
    desc = ctx.get_or_create(root, 'description')
    ti = ctx.get_or_create(desc, 'title-info')
    pi = ctx.get_or_create(desc, 'publish-info')

    indent = ti.text

    _set_comments(ti, mi, ctx)
    _set_series(ti, mi, ctx)
    _set_tags(ti, mi, ctx)
    _set_authors(ti, mi, ctx)
    _set_title(ti, mi, ctx)
    _set_publisher(pi, mi, ctx)
    _set_pubdate(pi, mi, ctx)
    _set_cover(ti, mi, ctx)

    for child in ti:
        child.tail = indent

    # Apparently there exists FB2 reading software that chokes on the use of
    # single quotes in xml declaration. Sigh. See
    # https://www.mobileread.com/forums/showthread.php?p=2273184#post2273184
    raw = b'<?xml version="1.0" encoding="UTF-8"?>\n'
    raw += etree.tostring(root,
                          method='xml',
                          encoding='utf-8',
                          xml_declaration=False)

    stream.seek(0)
    stream.truncate()
    if zip_file_name:
        from calibre.utils.zipfile import ZipFile
        with ZipFile(stream, 'w') as zf:
            zf.writestr(zip_file_name, raw)
    else:
        stream.write(raw)
Example #38
0
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>',
                                '<title>%s</title>' % (mi.title, ))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(getcwd()), mi)
            opf.create_manifest([(os.path.abspath(f2), None)
                                 for f2 in walk(getcwd())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Example #39
0
def dump(path):
    dest = os.path.splitext(os.path.basename(path))[0]
    dest += '-dumped'
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)

    for f in walk(dest):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                root = etree.fromstring(raw)
                stream.seek(0)
                stream.truncate()
                stream.write(
                    etree.tostring(root,
                                   pretty_print=True,
                                   encoding='utf-8',
                                   xml_declaration=True))

    print(path, 'dumped to', dest)
Example #40
0
 def __enter__(self, *args):
     if self.plugin_path is not None:
         from calibre.utils.zipfile import ZipFile
         zf = ZipFile(self.plugin_path)
         extensions = set([x.rpartition('.')[-1].lower() for x in
             zf.namelist()])
         zip_safe = True
         for ext in ('pyd', 'so', 'dll', 'dylib'):
             if ext in extensions:
                 zip_safe = False
         if zip_safe:
             sys.path.insert(0, self.plugin_path)
             self.sys_insertion_path = self.plugin_path
         else:
             from calibre.ptempfile import TemporaryDirectory
             self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
             self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
             zf.extractall(self.sys_insertion_path)
             sys.path.insert(0, self.sys_insertion_path)
         zf.close()
Example #41
0
 def compile_content_server_translations(self):
     self.info('Compiling content-server translations')
     from calibre.utils.rapydscript import msgfmt
     from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo, ZIP_STORED
     with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'),
                  'w', ZIP_DEFLATED) as zf:
         for src in glob.glob(
                 os.path.join(self.TRANSLATIONS, 'content-server', '*.po')):
             data, h = self.hash_and_data(src)
             current_hash = h.digest()
             saved_hash, saved_data = self.read_cache(src)
             if current_hash == saved_hash:
                 raw = saved_data
             else:
                 # self.info('\tParsing ' + os.path.basename(src))
                 raw = None
                 po_data = data.decode('utf-8')
                 data = json.loads(msgfmt(po_data))
                 translated_entries = {
                     k: v
                     for k, v in iteritems(data['entries'])
                     if v and sum(map(len, v))
                 }
                 data[u'entries'] = translated_entries
                 data[u'hash'] = h.hexdigest()
                 cdata = b'{}'
                 if translated_entries:
                     raw = json.dumps(data,
                                      ensure_ascii=False,
                                      sort_keys=True)
                     if isinstance(raw, type(u'')):
                         raw = raw.encode('utf-8')
                     cdata = raw
                 self.write_cache(cdata, current_hash, src)
             if raw:
                 zi = ZipInfo(os.path.basename(src).rpartition('.')[0])
                 zi.compress_type = ZIP_STORED if is_ci else ZIP_DEFLATED
                 zf.writestr(zi, raw)
Example #42
0
def get_metadata(stream):
    with ZipFile(stream, 'r') as zf:

        mi = Metadata(_('Unknown'))
        cdata = None

        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if zi.filename.lower() == 'docprops/core.xml':
                _read_doc_props(zf.read(zi), mi)
            elif zi.filename.lower() == 'docprops/app.xml':
                _read_app_props(zf.read(zi), mi)
            elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height / width <= 1.8 and height * width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi
Example #43
0
 def compile_content_server_translations(self):
     self.info('\nCompiling content-server translations')
     from calibre.utils.rapydscript import msgfmt
     from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
     with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'),
                  'w', ZIP_DEFLATED) as zf:
         for src in glob.glob(
                 os.path.join(self.TRANSLATIONS, 'content-server', '*.po')):
             with open(src, 'rb') as f:
                 po_data = f.read().decode('utf-8')
             data = json.loads(msgfmt(po_data))
             translated_entries = {
                 k: v
                 for k, v in data['entries'].iteritems()
                 if v and sum(map(len, v))
             }
             data['entries'] = translated_entries
             if translated_entries:
                 raw = json.dumps(data, ensure_ascii=False, sort_keys=True)
                 if isinstance(raw, type(u'')):
                     raw = raw.encode('utf-8')
                 zi = ZipInfo(os.path.basename(src).rpartition('.')[0])
                 zi.compress_type = ZIP_DEFLATED
                 zf.writestr(zi, raw)
Example #44
0
 def handle_zip_of_opf_files(self, stream):
     ''' Given a zip up of a bunch of opf files, either merge them or add them to library '''
     result = {'updated': 0, 'added': 0}
     with ZipFile(stream, 'r') as zf:
         self.start_applying_updates()
         for zi in zf.infolist():
             ext = zi.filename.rpartition('.')[-1].lower()
             if ext in {'opf'}:
                 try:
                     raw = zf.open(zi)
                     opf = OPF(raw)
                     mi = opf.to_book_metadata()
                     casanova_id = self.extract_id(mi)
                     if casanova_id:
                         book_mi = self.get_casanova_metadata(
                             casanova_id['id'])
                         if book_mi:
                             # Update an existing book's metadata!
                             result['updated'] = result['updated'] + 1
                             self.apply_metadata_update(
                                 casanova_id['id'], book_mi, mi)
                         else:
                             # Create a new book entry
                             result['added'] = result['added'] + 1
                             self.model.db.import_book(mi, [])
                 except:
                     foo = False
             if ext in {'jpg', 'png', 'gif'}:
                 # try and handle the cover
                 casanova_id = zi.filename.partition('.')[0].lower()
                 if casanova_id in self.book_map:
                     book_id = self.book_map[casanova_id]
                     raw = zf.open(zi)
                     self.db.set_cover(book_id, raw)
         self.finish_applying_updates()
         return result
Example #45
0
    def convert_cbr_to_cbz(self):
        '''
        Converts a rar or cbr-comic to a cbz-comic
        '''
        from calibre.utils.unrar import extract, comment

        with TemporaryDirectory('_cbr2cbz') as tdir:
            # extract the rar file
            ffile = self.db.format(self.book_id, self.format, as_path=True)
            extract(ffile, tdir)
            comments = comment(ffile)
            delete_temp_file(ffile)

            # make the cbz file
            with TemporaryFile("comic.cbz") as tf:
                zf = ZipFile(tf, "w")
                zf.add_dir(tdir)
                if comments:
                    zf.comment = comments.encode("utf-8")
                zf.close()
                # add the cbz format to calibres library
                self.db.add_format(self.book_id, "cbz", tf)
                self.format = "cbz"
    def embed_cix_metadata(self):
        '''
        Embeds the cix_metadata
        '''
        from io import StringIO

        cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata)

        # ensure we have a temp file
        self.make_temp_cbz_file()

        # use the safe_replace function from calibre to prevent coruption
        if self.zipinfo is not None:
            with open(self.file, 'r+b') as zf:
                safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore')))
        # save the metadata in the file
        else:
            zf = ZipFile(self.file, "a")
            zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore'))
            zf.close()
Example #47
0
 def __enter__(self, *args):
     if self.plugin_path is not None:
         from calibre.utils.zipfile import ZipFile
         zf = ZipFile(self.plugin_path)
         extensions = set([x.rpartition('.')[-1].lower() for x in
             zf.namelist()])
         zip_safe = True
         for ext in ('pyd', 'so', 'dll', 'dylib'):
             if ext in extensions:
                 zip_safe = False
         if zip_safe:
             sys.path.insert(0, self.plugin_path)
             self.sys_insertion_path = self.plugin_path
         else:
             from calibre.ptempfile import TemporaryDirectory
             self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
             self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
             zf.extractall(self.sys_insertion_path)
             sys.path.insert(0, self.sys_insertion_path)
         zf.close()
def _initialize_overlay(self):
    '''
    Perform any additional initialization
    '''
    self._log_location(self.ios_reader_app)
    self.assets_subpath = '/Media/Books/Sync/Database/OutstandingAssets_4.sqlite'
    self.books_subpath = '/Documents/BKLibrary_database/iBooks_*.sqlite'

    # Confirm/create folder size archive
    if not os.path.exists(self.cache_dir):
        self._log_diagnostic("creating folder cache at '%s'" % self.cache_dir)
        os.makedirs(self.cache_dir)

    self.folder_archive_path = os.path.join(self.cache_dir, "folders.zip")
    if not os.path.exists(self.folder_archive_path):
        self._log_diagnostic("creating folder cache")
        zfw = ZipFile(self.folder_archive_path, mode='w', compression=0)
        zfw.writestr("%s Folder Size Archive" % self.name, '')
        zfw.close()
    else:
        self._log_diagnostic("existing folder cache at '%s'" % self.folder_archive_path)
Example #49
0
def get_cover(opf, opf_path, stream, reader=None):
    raster_cover = opf.raster_cover
    stream.seek(0)
    zf = ZipFile(stream)
    if raster_cover:
        base = posixpath.dirname(opf_path)
        cpath = posixpath.normpath(posixpath.join(base, raster_cover))
        if reader is not None and \
            reader.encryption_meta.is_encrypted(cpath):
            return
        try:
            member = zf.getinfo(cpath)
        except:
            pass
        else:
            f = zf.open(member)
            data = f.read()
            f.close()
            zf.close()
            return data

    return render_cover(opf, opf_path, zf, reader=reader)
    def get_comic_metadata_from_cbz(self):
        '''
        Reads the comic metadata from the comic cbz file as comictagger metadata
        '''
        self.make_temp_cbz_file()
        # open the zipfile
        zf = ZipFile(self.file)

        # get cix metadata
        for name in zf.namelist():
            if name.lower() == "comicinfo.xml":
                self.cix_metadata = ComicInfoXml().metadataFromString(zf.read(name))
                self.zipinfo = name
                break

        # get the cbi metadata
        if ComicBookInfo().validateString(zf.comment):
            self.cbi_metadata = ComicBookInfo().metadataFromString(zf.comment)
        zf.close()

        # get combined metadata
        self._get_combined_metadata()
Example #51
0
class DOCX(object):

    def __init__(self, path_or_stream, log=None, extract=True):
        self.docx_is_transitional = True
        stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
        self.name = getattr(stream, 'name', None) or '<stream>'
        self.log = log or default_log
        if extract:
            self.extract(stream)
        else:
            self.init_zipfile(stream)
        self.read_content_types()
        self.read_package_relationships()
        self.namespace = DOCXNamespace(self.docx_is_transitional)

    def init_zipfile(self, stream):
        self.zipf = ZipFile(stream)
        self.names = frozenset(self.zipf.namelist())

    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f

    def exists(self, name):
        return name in self.names

    def read(self, name):
        if hasattr(self, 'zipf'):
            return self.zipf.open(name).read()
        path = self.names[name]
        with open(path, 'rb') as f:
            return f.read()

    def read_content_types(self):
        try:
            raw = self.read('[Content_Types].xml')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
        root = fromstring(raw)
        self.content_types = {}
        self.default_content_types = {}
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
            self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
            name = item.get('PartName').lstrip('/')
            self.content_types[name] = item.get('ContentType')

    def content_type(self, name):
        if name in self.content_types:
            return self.content_types[name]
        ext = name.rpartition('.')[-1].lower()
        if ext in self.default_content_types:
            return self.default_content_types[ext]
        return guess_type(name)[0]

    def read_package_relationships(self):
        try:
            raw = self.read('_rels/.rels')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
        root = fromstring(raw)
        self.relationships = {}
        self.relationships_rmap = {}
        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
            target = item.get('Target').lstrip('/')
            typ = item.get('Type')
            if target == 'word/document.xml':
                self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument'
            self.relationships[typ] = target
            self.relationships_rmap[target] = typ

    @property
    def document_name(self):
        name = self.relationships.get(self.namespace.names['DOCUMENT'], None)
        if name is None:
            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
            if not names:
                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
            name = names[0]
        return name

    @property
    def document(self):
        return fromstring(self.read(self.document_name))

    @property
    def document_relationships(self):
        return self.get_relationships(self.document_name)

    def get_relationships(self, name):
        base = '/'.join(name.split('/')[:-1])
        by_id, by_type = {}, {}
        parts = name.split('/')
        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
        try:
            raw = self.read(name)
        except KeyError:
            pass
        else:
            root = fromstring(raw)
            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
                target = item.get('Target')
                if item.get('TargetMode', None) != 'External' and not target.startswith('#'):
                    target = '/'.join((base, target.lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
                by_id[Id] = by_type[typ] = target

        return by_id, by_type

    def get_document_properties_names(self):
        name = self.relationships.get(self.namespace.names['DOCPROPS'], None)
        if name is None:
            names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
            if names:
                name = names[0]
        yield name
        name = self.relationships.get(self.namespace.names['APPPROPS'], None)
        if name is None:
            names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
            if names:
                name = names[0]
        yield name

    @property
    def metadata(self):
        mi = Metadata(_('Unknown'))
        dp_name, ap_name = self.get_document_properties_names()
        if dp_name:
            try:
                raw = self.read(dp_name)
            except KeyError:
                pass
            else:
                read_doc_props(raw, mi, self.namespace.XPath)
        if mi.is_null('language'):
            try:
                raw = self.read('word/styles.xml')
            except KeyError:
                pass
            else:
                read_default_style_language(raw, mi, self.namespace.XPath)

        ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None)
        if ap_name:
            try:
                raw = self.read(ap_name)
            except KeyError:
                pass
            else:
                read_app_props(raw, mi)

        return mi

    def close(self):
        if hasattr(self, 'zipf'):
            self.zipf.close()
        else:
            try:
                shutil.rmtree(self.tdir)
            except EnvironmentError:
                pass
Example #52
0
 def init_zipfile(self, stream):
     self.zipf = ZipFile(stream)
     self.names = frozenset(self.zipf.namelist())
Example #53
0
                              report_progress=notification,
                              abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path,
                              os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" % str(
                    datetime.timedelta(seconds=int(time.time() -
                                                   opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error
Example #54
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(
                    unicode_type(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode_type(
                                etree.tostring(item.data,
                                               encoding=unicode_type))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images',
                                             images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Example #55
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
            zf.extractall(cwd)
        except Exception:
            log.exception("KEPUB appears to be invalid ZIP file, trying a "
                          "more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (f.lower().endswith(".opf") and "__MACOSX" not in f
                        and not os.path.basename(f).startswith(".")):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)").
                format(path))

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
        opf = os.path.relpath(opf, cwd)
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (self.rationalize_cover3
             if opf.package_version >= 3.0 else self.rationalize_cover2)
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported"
                      )  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                    "application/vnd.adobe-page-template+xml",
                    "application/vnd.adobe.page-template+xml",
                    "application/adobe-page-template+xml",
                    "application/adobe.page-template+xml",
                    "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")
Example #57
0
    def convert(self, recipe_or_file, opts, file_ext, log,
            accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            self.recipe_source = open(u'download.recipe', 'rb').read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.access(recipe_or_file, os.R_OK):
                self.recipe_source = open(recipe_or_file, 'rb').read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import \
                        get_builtin_recipe_by_title
                title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                raw = get_builtin_recipe_by_title(title, log=log,
                        download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                        'Downloaded recipe needs calibre version at least: %s' % \
                        ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception('Failed to compile downloaded recipe. Falling '
                            'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title, log=log,
                            download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: '+title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError('%r is not a valid recipe file or builtin recipe' %
                        recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir(u'.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk(u'.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Example #58
0
 def init_zipfile(self, stream):
     self.zipf = ZipFile(stream)
     self.names = frozenset(self.zipf.namelist())
Example #59
0
class DOCX(object):

    def __init__(self, path_or_stream, log=None, extract=True):
        stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
        self.name = getattr(stream, 'name', None) or '<stream>'
        self.log = log or default_log
        if extract:
            self.extract(stream)
        else:
            self.init_zipfile(stream)
        self.read_content_types()
        self.read_package_relationships()

    def init_zipfile(self, stream):
        self.zipf = ZipFile(stream)
        self.names = frozenset(self.zipf.namelist())

    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f

    def exists(self, name):
        return name in self.names

    def read(self, name):
        if hasattr(self, 'zipf'):
            return self.zipf.open(name).read()
        path = self.names[name]
        with open(path, 'rb') as f:
            return f.read()

    def read_content_types(self):
        try:
            raw = self.read('[Content_Types].xml')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
        root = fromstring(raw)
        self.content_types = {}
        self.default_content_types = {}
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
            self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
            name = item.get('PartName').lstrip('/')
            self.content_types[name] = item.get('ContentType')

    def content_type(self, name):
        if name in self.content_types:
            return self.content_types[name]
        ext = name.rpartition('.')[-1].lower()
        if ext in self.default_content_types:
            return self.default_content_types[ext]
        return guess_type(name)[0]

    def read_package_relationships(self):
        try:
            raw = self.read('_rels/.rels')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
        root = fromstring(raw)
        self.relationships = {}
        self.relationships_rmap = {}
        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
            target = item.get('Target').lstrip('/')
            typ = item.get('Type')
            self.relationships[typ] = target
            self.relationships_rmap[target] = typ

    @property
    def document_name(self):
        name = self.relationships.get(DOCUMENT, None)
        if name is None:
            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
            if not names:
                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
            name = names[0]
        return name

    @property
    def document(self):
        return fromstring(self.read(self.document_name))

    @property
    def document_relationships(self):
        return self.get_relationships(self.document_name)

    def get_relationships(self, name):
        base = '/'.join(name.split('/')[:-1])
        by_id, by_type = {}, {}
        parts = name.split('/')
        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
        try:
            raw = self.read(name)
        except KeyError:
            pass
        else:
            root = fromstring(raw)
            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
                target = item.get('Target')
                if item.get('TargetMode', None) != 'External' and not target.startswith('#'):
                    target = '/'.join((base, target.lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
                by_id[Id] = by_type[typ] = target

        return by_id, by_type

    @property
    def metadata(self):
        mi = Metadata(_('Unknown'))
        name = self.relationships.get(DOCPROPS, None)
        if name is None:
            names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
            if names:
                name = names[0]
        if name:
            try:
                raw = self.read(name)
            except KeyError:
                pass
            else:
                read_doc_props(raw, mi)
        if mi.is_null('language'):
            try:
                raw = self.read('word/styles.xml')
            except KeyError:
                pass
            else:
                read_default_style_language(raw, mi)

        name = self.relationships.get(APPPROPS, None)
        if name is None:
            names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
            if names:
                name = names[0]
        if name:
            try:
                raw = self.read(name)
            except KeyError:
                pass
            else:
                read_app_props(raw, mi)

        return mi

    def close(self):
        if hasattr(self, 'zipf'):
            self.zipf.close()
        else:
            try:
                shutil.rmtree(self.tdir)
            except EnvironmentError:
                pass