def __enter__(self, *args): ''' Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something ''' if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = set([x.rpartition('.')[-1].lower() for x in zf.namelist()]) zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def __enter__(self, *args): ''' Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something ''' if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = {x.rpartition('.')[-1].lower() for x in zf.namelist()} zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__( *args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def get_cover(opf, opf_path, stream, reader=None): raster_cover = opf.raster_cover stream.seek(0) try: zf = ZipFile(stream) except: stream.seek(0) zf = LocalZipFile(stream) if raster_cover: base = posixpath.dirname(opf_path) cpath = posixpath.normpath(posixpath.join(base, raster_cover)) if reader is not None and \ reader.encryption_meta.is_encrypted(cpath): return try: member = zf.getinfo(cpath) except: pass else: f = zf.open(member) data = f.read() f.close() zf.close() return data return render_cover(opf, opf_path, zf, reader=reader)
def get_picture_size(self): from calibre.utils.magick import Image self.make_temp_cbz_file() zf = ZipFile(self.file) files = zf.namelist() size_x, size_y = 0, 0 index = 1 while index < 10 and index < len(files): fname = files[index] if fname.lower().rpartition('.')[-1] in IMG_EXTENSIONS: with zf.open(fname) as ffile: img = Image() try: img.open(ffile) size_x, size_y = img.size except: pass if size_x < size_y: break index += 1 zf.close() size = round(size_x * size_y / 1000000, 2) return size
def update_cover(self): # get the calibre cover cover_path = self.db.cover(self.book_id, as_path=True) fmt = cover_path.rpartition('.')[-1] new_cover_name = "00000000_cover." + fmt self.make_temp_cbz_file() # search for a previously embeded cover zf = ZipFile(self.file) cover_info = "" for name in zf.namelist(): if name.rsplit(".", 1)[0] == "00000000_cover": cover_info = name break zf.close() # delete previous cover if cover_info != "": with open(self.file, 'r+b') as zf, open(cover_path, 'r+b') as cp: safe_replace(zf, cover_info, cp) # save the cover in the file else: zf = ZipFile(self.file, "a") zf.write(cover_path, new_cover_name) zf.close() delete_temp_file(cover_path)
def convert_to_cbz(self): ''' Converts a cbr-comic to a cbz-comic ''' from calibre.utils.unrar import RARFile, extract with TemporaryDirectory('_cbr2cbz') as tdir: # extract the rar file ffile = self.db.format(self.book_id, "cbr", as_path=True) extract(ffile, tdir) # get the comment with open(ffile, 'rb') as stream: zr = RARFile(stream, get_comment=True) comment = zr.comment delete_temp_file(ffile) # make the cbz file with TemporaryFile("comic.cbz") as tf: zf = ZipFile(tf, "w") zf.add_dir(tdir) zf.close() # write comment if comment: writeZipComment(tf, comment) # add the cbz format to calibres library self.db.add_format(self.book_id, "cbz", tf) self.format = "cbz"
def update_cover(self): # get the calibre cover cover_path = self.db.cover(self.book_id, as_path=True) fmt = cover_path.rpartition('.')[-1] new_cover_name = "00000000_cover." + fmt self.make_temp_cbz_file() # search for a previously embeded cover zf = ZipFile(self.file) cover_info = "" for name in zf.namelist(): if name.rsplit(".", 1)[0] == "00000000_cover": cover_info = name break # delete previous cover if cover_info != "": with open(self.file, 'r+b') as zf, open(cover_path, 'r+b') as cp: safe_replace(zf, cover_info, cp) # save the cover in the file else: zf = ZipFile(self.file, "a") zf.write(cover_path, new_cover_name) zf.close() delete_temp_file(cover_path)
def safe_replace(self, name, datastream, extra_replacements={}, add_missing=False): from calibre.utils.zipfile import ZipFile, ZipInfo replacements = {name: datastream} replacements.update(extra_replacements) names = frozenset(replacements.keys()) found = set([]) with SpooledTemporaryFile(max_size=100 * 1024 * 1024) as temp: ztemp = ZipFile(temp, 'w') for offset, header in self.file_info.itervalues(): if header.filename in names: zi = ZipInfo(header.filename) zi.compress_type = header.compression_method ztemp.writestr(zi, replacements[header.filename].read()) found.add(header.filename) else: ztemp.writestr(header.filename, self.read(header.filename, spool_size=0)) if add_missing: for name in names - found: ztemp.writestr(name, replacements[name].read()) ztemp.close() zipstream = self.stream temp.seek(0) zipstream.seek(0) zipstream.truncate() shutil.copyfileobj(temp, zipstream) zipstream.flush()
def safe_replace(self, name, datastream, extra_replacements={}, add_missing=False): from calibre.utils.zipfile import ZipFile, ZipInfo replacements = {name:datastream} replacements.update(extra_replacements) names = frozenset(replacements.keys()) found = set([]) with SpooledTemporaryFile(max_size=100*1024*1024) as temp: ztemp = ZipFile(temp, 'w') for offset, header in self.file_info.itervalues(): if header.filename in names: zi = ZipInfo(header.filename) zi.compress_type = header.compression_method ztemp.writestr(zi, replacements[header.filename].read()) found.add(header.filename) else: ztemp.writestr(header.filename, self.read(header.filename, spool_size=0)) if add_missing: for name in names - found: ztemp.writestr(name, replacements[name].read()) ztemp.close() zipstream = self.stream temp.seek(0) zipstream.seek(0) zipstream.truncate() shutil.copyfileobj(temp, zipstream) zipstream.flush()
def get_comic_metadata_from_cbz(self): ''' Reads the comic metadata from the comic cbz file as comictagger metadata ''' self.make_temp_cbz_file() # open the zipfile zf = ZipFile(self.file) # get cix metadata for name in zf.namelist(): if name.lower() == "comicinfo.xml": self.cix_metadata = ComicInfoXml().metadataFromString( zf.read(name)) self.zipinfo = name if not prefs['auto_count_pages']: break elif prefs['auto_count_pages'] and name.lower().rpartition( '.')[-1] in IMG_EXTENSIONS: self.pages += 1 # get the cbi metadata if ComicBookInfo().validateString(zf.comment): self.cbi_metadata = ComicBookInfo().metadataFromString(zf.comment) zf.close() # get combined metadata self._get_combined_metadata()
def count_pages(self): self.make_temp_cbz_file() zf = ZipFile(self.file) pages = 0 for name in zf.namelist(): if name.lower().rpartition('.')[-1] in IMG_EXTENSIONS: pages += 1 zf.close() return pages
def run(epub, opts, log): with TemporaryDirectory('_epub-fix') as tdir: with CurrentDir(tdir): zf = ZipFile(epub) zf.extractall() zf.close() container = Container(tdir, log) for fixer in epub_fixers(): fix = getattr(opts, fixer.fix_name, False) fixer.run(container, opts, log, fix=fix) container.write(epub)
def make_zip(zipfile_name, base_dir): zipfile_name = os.path.abspath(zipfile_name) + '.epub' zip_output = ZipFile(zipfile_name, 'w', ZIP_DEFLATED) zip_output.writestr('mimetype', 'application/epub+zip', ZIP_STORED) for dir_path, _, file_names in os.walk(base_dir): for name in file_names: path = os.path.normpath(os.path.join(dir_path, name)) aPath = os.path.normpath(os.path.join(dir_path.replace(base_dir, ''), name)) if os.path.isfile(path): zip_output.write(path, aPath) zip_output.close() return zipfile_name
def embed_cbi_metadata(self): ''' Embeds the cbi_metadata ''' cbi_string = ComicBookInfo().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # save the metadata in the comment zf = ZipFile(self.file, 'a') zf.comment = cbi_string.encode("utf-8") zf._didModify = True zf.close()
def write(self, path): for name in self.dirtied: data = self.cache[name] raw = data if hasattr(data, 'xpath'): raw = etree.tostring(data, encoding='utf-8', xml_declaration=True) with open(self.name_map[name], 'wb') as f: f.write(raw) self.dirtied.clear() zf = ZipFile(path, 'w') zf.writestr('mimetype', bytes(guess_type('a.epub')[0]), compression=ZIP_STORED) zf.add_dir(self.root) zf.close()
def dump_input(self, ret, output_dir): out_dir = os.path.join(self.opts.debug_pipeline, 'input') if isinstance(ret, basestring): shutil.copytree(output_dir, out_dir) else: if not os.path.exists(out_dir): os.makedirs(out_dir) self.dump_oeb(ret, out_dir) if self.input_fmt == 'recipe': zf = ZipFile(os.path.join(self.opts.debug_pipeline, 'periodical.downloaded_recipe'), 'w') zf.add_dir(out_dir) with self.input_plugin: self.input_plugin.save_download(zf) zf.close() self.log.info('Input debug saved to:', out_dir)
def embed_cix_metadata(self): ''' Embeds the cix_metadata ''' from io import StringIO cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # use the safe_replace function from calibre to prevent coruption if self.zipinfo is not None: with open(self.file, 'r+b') as zf: safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore'))) # save the metadata in the file else: zf = ZipFile(self.file, "a") zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore')) zf.close()
def __enter__(self, *args): if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = set([x.rpartition('.')[-1].lower() for x in zf.namelist()]) zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def _initialize_overlay(self): ''' Perform any additional initialization ''' self._log_location(self.ios_reader_app) self.assets_subpath = '/Media/Books/Sync/Database/OutstandingAssets_4.sqlite' self.books_subpath = '/Documents/BKLibrary_database/iBooks_*.sqlite' # Confirm/create folder size archive if not os.path.exists(self.cache_dir): self._log_diagnostic("creating folder cache at '%s'" % self.cache_dir) os.makedirs(self.cache_dir) self.folder_archive_path = os.path.join(self.cache_dir, "folders.zip") if not os.path.exists(self.folder_archive_path): self._log_diagnostic("creating folder cache") zfw = ZipFile(self.folder_archive_path, mode='w', compression=0) zfw.writestr("%s Folder Size Archive" % self.name, '') zfw.close() else: self._log_diagnostic("existing folder cache at '%s'" % self.folder_archive_path)
def embed_cix_metadata(self): ''' Embeds the cix_metadata ''' from io import StringIO cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # make a new cbz if a metadata file is already there, to prevent corruption if self.zipinfo is not None: with open(self.file, 'r+b') as zf: safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore'))) else: # save the metadata in the file zf = ZipFile(self.file, "a") zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore')) zf.close()
def get_comic_metadata_from_cbz(self): ''' Reads the comic metadata from the comic cbz file as comictagger metadata ''' self.make_temp_cbz_file() # open the zipfile zf = ZipFile(self.file) # get cix metadata for name in zf.namelist(): if name.lower() == "comicinfo.xml": self.cix_metadata = ComicInfoXml().metadataFromString(zf.read(name)) self.zipinfo = name break # get the cbi metadata if ComicBookInfo().validateString(zf.comment): self.cbi_metadata = ComicBookInfo().metadataFromString(zf.comment) zf.close() # get combined metadata self._get_combined_metadata()
def convert_cbr_to_cbz(self): ''' Converts a rar or cbr-comic to a cbz-comic ''' from calibre.utils.unrar import extract, comment with TemporaryDirectory('_cbr2cbz') as tdir: # extract the rar file ffile = self.db.format(self.book_id, self.format, as_path=True) extract(ffile, tdir) comments = comment(ffile) delete_temp_file(ffile) # make the cbz file with TemporaryFile("comic.cbz") as tf: zf = ZipFile(tf, "w") zf.add_dir(tdir) if comments: zf.comment = comments.encode("utf-8") zf.close() # add the cbz format to calibres library self.db.add_format(self.book_id, "cbz", tf) self.format = "cbz"
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() with lopen('download.recipe', 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.environ.get('CALIBRE_RECIPE_URN'): from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id urn = os.environ['CALIBRE_RECIPE_URN'] log('Downloading recipe urn: ' + urn) rtype, recipe_id = urn.partition(':')[::2] if not recipe_id: raise ValueError('Invalid recipe urn: ' + urn) if rtype == 'custom': self.recipe_source = get_custom_recipe(recipe_id) else: self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True) if not self.recipe_source: raise ValueError('Could not find recipe with urn: ' + urn) if not isinstance(self.recipe_source, bytes): self.recipe_source = self.recipe_source.encode('utf-8') recipe = compile_recipe(self.recipe_source) elif os.access(recipe_or_file, os.R_OK): with lopen(recipe_or_file, 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import ( get_builtin_recipe_by_title, get_builtin_recipe_titles) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] titles = frozenset(get_builtin_recipe_titles()) if title not in titles: title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = title.rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir('.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk('.'): if f.endswith('.opf'): return os.path.abspath(f)
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(DOCUMENT, None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type @property def metadata(self): mi = Metadata(_('Unknown')) name = self.relationships.get(DOCPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_doc_props(raw, mi) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi) name = self.relationships.get(APPPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() with lopen('download.recipe', 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.environ.get('CALIBRE_RECIPE_URN'): from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id urn = os.environ['CALIBRE_RECIPE_URN'] log('Downloading recipe urn: ' + urn) rtype, recipe_id = urn.partition(':')[::2] if not recipe_id: raise ValueError('Invalid recipe urn: ' + urn) if rtype == 'custom': self.recipe_source = get_custom_recipe(recipe_id) else: self.recipe_source = get_builtin_recipe_by_id( urn, log=log, download_recipe=True) if not self.recipe_source: raise ValueError('Could not find recipe with urn: ' + urn) if not isinstance(self.recipe_source, bytes): self.recipe_source = self.recipe_source.encode('utf-8') recipe = compile_recipe(self.recipe_source) elif os.access(recipe_or_file, os.R_OK): with lopen(recipe_or_file, 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import ( get_builtin_recipe_by_title, get_builtin_recipe_titles) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] titles = frozenset(get_builtin_recipe_titles()) if title not in titles: title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = title.rpartition('.')[0] raw = get_builtin_recipe_by_title( title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % ('.'.join(recipe.requires_version))) builtin = True except: log.exception( 'Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: ' + title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError( '%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir('.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk('.'): if f.endswith('.opf'): return os.path.abspath(f)
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() self.recipe_source = open(u'download.recipe', 'rb').read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.access(recipe_or_file, os.R_OK): self.recipe_source = open(recipe_or_file, 'rb').read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import \ get_builtin_recipe_by_title title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % \ ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir(u'.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk(u'.'): if f.endswith('.opf'): return os.path.abspath(f)
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() self.recipe_source = open(u'download.recipe', 'rb').read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.access(recipe_or_file, os.R_OK): self.recipe_source = open(recipe_or_file, 'rb').read() recipe = compile_recipe(self.recipe_source) else: from calibre.web.feeds.recipes.collection import \ get_builtin_recipe_by_title title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] raw = get_builtin_recipe_by_title( title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % \ ('.'.join(recipe.requires_version))) builtin = True except: log.exception( 'Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: ' + title) recipe = compile_recipe(raw) self.recipe_source = raw if recipe is None: raise ValueError( '%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir(u'.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk(u'.'): if f.endswith('.opf'): return os.path.abspath(f)
def _generate_thumbnail(self, book, cover_path): ''' Fetch the cover image, generate a thumbnail, cache Specific implementation for iBooks ''' self._log_location(book.title) self._log_diagnostic(" book_path: %s" % book.path) self._log_diagnostic("cover_path: %s" % repr(cover_path)) thumb_data = None thumb_path = book.path.rpartition('.')[0] + '.jpg' # Try getting the cover from the cache try: zfr = ZipFile(self.archive_path) thumb_data = zfr.read(thumb_path) if thumb_data == 'None': self._log_diagnostic("returning None from cover cache") zfr.close() return None except: self._log_diagnostic("opening cover cache for appending") zfw = ZipFile(self.archive_path, mode='a') else: self._log_diagnostic("returning thumb from cover cache") return thumb_data ''' # Is book.path a directory (iBooks) or an epub? stats = self.ios.stat(book.path) if stats['st_ifmt'] == 'S_IFDIR': # *** This needs to fetch the cover data from the directory *** self._log_diagnostic("returning None, can't read iBooks covers yet") return thumb_data # Get the cover from the book try: stream = cStringIO.StringIO(self.ios.read(book.path, mode='rb')) mi = get_metadata(stream) if mi.cover_data is not None: img_data = cStringIO.StringIO(mi.cover_data[1]) except: if self.verbose: self._log_diagnostic("ERROR: unable to get cover from '%s'" % book.title) import traceback #traceback.print_exc() exc_type, exc_value, exc_traceback = sys.exc_info() self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip()) return thumb_data ''' try: img_data = cStringIO.StringIO(self.ios.read(cover_path, mode='rb')) except: if self.verbose: self._log_diagnostic("ERROR fetching cover data for '%s', caching empty marker" % book.title) import traceback exc_type, exc_value, exc_traceback = sys.exc_info() self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip()) # Cache the empty cover zfw.writestr(thumb_path, 'None') return thumb_data # Generate a thumb try: im = PILImage.open(img_data) scaled, width, height = fit_image(im.size[0], im.size[1], 60, 80) im = im.resize((int(width), int(height)), PILImage.ANTIALIAS) thumb = cStringIO.StringIO() im.convert('RGB').save(thumb, 'JPEG') thumb_data = thumb.getvalue() thumb.close() self._log_diagnostic("SUCCESS: generated thumb for '%s', caching" % book.title) # Cache the tagged thumb zfw.writestr(thumb_path, thumb_data) except: if self.verbose: self._log_diagnostic("ERROR generating thumb for '%s', caching empty marker" % book.title) import traceback exc_type, exc_value, exc_traceback = sys.exc_info() self._log_diagnostic(traceback.format_exception_only(exc_type, exc_value)[0].strip()) # Cache the empty cover zfw.writestr(thumb_path, 'None') finally: #img_data.close() zfw.close() return thumb_data
def _generate_thumbnail(self, book): ''' Fetch the cover image, generate a thumbnail, cache Extracts covers from zipped epubs ''' self._log_location(book.title) #self._log("book_path: %s" % book.path) #self._log("book: '%s' by %s uuid: %s" % (book.title, book.author, book.uuid)) # Parse the cover from the connected device, model Fetch_Annotations:_get_epub_toc() thumb_data = None thumb_path = book.path.rpartition('.')[0] + '.jpg' # Try getting the cover from the cache try: zfr = ZipFile(self.archive_path) thumb_data = zfr.read(thumb_path) if thumb_data == 'None': self._log("returning None from cover cache") zfr.close() return None except: self._log("opening cover cache for appending") zfw = ZipFile(self.archive_path, mode='a') else: self._log("returning thumb from cover cache") return thumb_data # Get the cover from the book try: stream = cStringIO.StringIO(self.ios.read(book.path, mode='rb')) mi = get_metadata(stream) if mi.cover_data is not None: img_data = cStringIO.StringIO(mi.cover_data[1]) except: if self.verbose: self._log("ERROR: unable to get cover from '%s'" % book.title) import traceback #traceback.print_exc() exc_type, exc_value, exc_traceback = sys.exc_info() self._log(traceback.format_exception_only(exc_type, exc_value)[0].strip()) return thumb_data # Generate a thumb try: im = PILImage.open(img_data) scaled, width, height = fit_image(im.size[0], im.size[1], 60, 80) im = im.resize((int(width), int(height)), PILImage.ANTIALIAS) thumb = cStringIO.StringIO() im.convert('RGB').save(thumb, 'JPEG') thumb_data = thumb.getvalue() thumb.close() self._log("SUCCESS: generated thumb for '%s', caching" % book.title) # Cache the tagged thumb zfw.writestr(thumb_path, thumb_data) except: if self.verbose: self._log("ERROR generating thumb for '%s', caching empty marker" % book.title) import traceback exc_type, exc_value, exc_traceback = sys.exc_info() self._log(traceback.format_exception_only(exc_type, exc_value)[0].strip()) # Cache the empty cover zfw.writestr(thumb_path, 'None') finally: img_data.close() zfw.close() return thumb_data
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): self.docx_is_transitional = True stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() self.namespace = DOCXNamespace(self.docx_is_transitional) def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') if target == 'word/document.xml': self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument' self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(self.namespace.names['DOCUMENT'], None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type def get_document_properties_names(self): name = self.relationships.get(self.namespace.names['DOCPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] yield name name = self.relationships.get(self.namespace.names['APPPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] yield name @property def metadata(self): mi = Metadata(_('Unknown')) dp_name, ap_name = self.get_document_properties_names() if dp_name: try: raw = self.read(dp_name) except KeyError: pass else: read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi, self.namespace.XPath) ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass
def books(self, oncard=None, end_session=True): ''' Return a list of ebooks on the device. @param oncard: If 'carda' or 'cardb' return a list of ebooks on the specific storage card, otherwise return list of ebooks in main memory of device. If a card is specified and no books are on the card return empty list. @return: A BookList. ''' if oncard: return BookList() self._log_location() booklist = BookList() cached_books = {} # Fetch current assets from Media folder assets_profile = self._localize_database_path(self.assets_subpath) #Fetch current metadata from iBooks's DB db_profile = self._localize_database_path(self.books_subpath) con = sqlite3.connect(db_profile['path']) # Mount the Media folder self.ios.mount_ios_media_folder() # Get Books.plist so we can find the covers books_plist = {} if True: raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Sync/Books.plist'))['Books'] for book in raw_plist: if not 'Path' in book: print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist'])) #print(book) #print continue if 'Cover Path' in book: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']])) else: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']])) # Process any outliers raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books'] for book in raw_plist: if not 'Path' in book: print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist'])) #print(book) #print continue # Don't overwrite existing cover_paths if not '/'.join(['/Books', book['Path']]) in books_plist: if 'Cover Path' in book and not ['/'.join(['/Books', book['Path']])] in book_plist: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']])) else: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']])) raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books'] for book in raw_plist: if not 'Path' in book: print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist'])) print(book) print continue # Don't overwrite existing cover_paths if not '/'.join(['/Books', book['Path']]) in books_plist: if 'Cover Path' in book: books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']])) else: books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']])) else: raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Books.plist'))['Books'] for book in raw_plist: if not 'Path' in book: print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist'])) print(book) print continue if 'Cover Path' in book: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', book['Path'], book['Cover Path']])) else: books_plist['/'.join(['/Books', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']])) raw_plist = XmlPropertyListParser().parse(self.ios.read('/Books/Purchases/Purchases.plist'))['Books'] for book in raw_plist: if not 'Path' in book: print(" No 'Path' element found for '%s' by '%s'" % (book['Name'], book['Artist'])) print(book) print continue if 'Cover Path' in book: books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books/Purchases', book['Path'], book['Cover Path']])) else: books_plist['/'.join(['/Books/Purchases', book['Path']])] = unicode('/'.join(['/Books', 'Sync', 'Artwork', book['Persistent ID']])) print(books_plist) with con: con.row_factory = sqlite3.Row # Build a collection map collections_map = {} # Get the books cur = con.cursor() #cur.execute("ATTACH DATABASE '{0}' as 'ASSETS'".format(assets_profile['path']) cur.execute('''SELECT ZASSETURL, ZBOOKAUTHOR, ZSORTAUTHOR, ZBOOKTITLE, ZSORTTITLE, ZDATABASEKEY, ZDATEADDED FROM ZBKBOOKINFO WHERE ZASSETURL LIKE 'file://localhost%' AND ZASSETURL LIKE '%.epub/' ''') rows = cur.fetchall() book_count = len(rows) for i, row in enumerate(rows): book_id = row[b'ZDATABASEKEY'] # Get the collection assignments collections = [] # Get the primary metadata this_book = Book(row[b'ZBOOKTITLE'], row[b'ZBOOKAUTHOR']) original_path = row[b'ZASSETURL'] path = original_path[original_path.find('Media/') + len('Media'):-1] this_book.path = path.replace('%20', ' ') timestamp = int(row[b'ZDATEADDED']) + NSTimeIntervalSince1970 this_book.datetime = datetime.fromtimestamp(timestamp).timetuple() this_book.device_collections = collections this_book.uuid = None this_book.thumbnail = self._generate_thumbnail(this_book, books_plist[this_book.path]) # Retrieve folder size from cache or compute and cache try: zfr = ZipFile(self.folder_archive_path) file_size = zfr.read(this_book.path) this_book.size = int(file_size) self._log_diagnostic("returning folder size from cache") except: self._log_diagnostic("opening folder cache for appending") zfw = ZipFile(self.folder_archive_path, mode='a') stats = self.ios.stat(this_book.path) this_book.size = self.ios.get_folder_size(this_book.path) zfw.writestr(this_book.path, str(this_book.size)) zfw.close() finally: zfr.close() booklist.add_book(this_book, False) if self.report_progress is not None: self.report_progress(float((i + 1)*100 / book_count)/100, '%(num)d of %(tot)d' % dict(num=i + 1, tot=book_count)) cached_books[this_book.path] = { 'title': this_book.title, 'author': this_book.author, 'authors': this_book.author.split(' & '), 'uuid': this_book.uuid } cur.close() # Close the connection self.ios.dismount_ios_media_folder() if self.report_progress is not None: self.report_progress(1.0, _('finished')) self.cached_books = cached_books return booklist