def get_cover(opf, opf_path, stream, reader=None): raster_cover = opf.raster_cover stream.seek(0) try: zf = ZipFile(stream) except: stream.seek(0) zf = LocalZipFile(stream) if raster_cover: base = posixpath.dirname(opf_path) cpath = posixpath.normpath(posixpath.join(base, raster_cover)) if reader is not None and \ reader.encryption_meta.is_encrypted(cpath): return try: member = zf.getinfo(cpath) except: pass else: f = zf.open(member) data = f.read() f.close() zf.close() return data return render_cover(opf, opf_path, zf, reader=reader)
class OCFZipReader(OCFReader): def __init__(self, stream, mode='r', root=None): if isinstance(stream, (LocalZipFile, ZipFile)): self.archive = stream else: try: self.archive = ZipFile(stream, mode=mode) except BadZipfile: raise EPubException("not a ZIP .epub OCF container") self.root = root if self.root is None: name = getattr(stream, 'name', False) if name: self.root = os.path.abspath(os.path.dirname(name)) else: self.root = getcwd() super().__init__() def open(self, name): if isinstance(self.archive, LocalZipFile): return self.archive.open(name) return io.BytesIO(self.archive.read(name)) def read_bytes(self, name): return self.archive.read(name)
def get_picture_size(self): from calibre.utils.magick import Image self.make_temp_cbz_file() zf = ZipFile(self.file) files = zf.namelist() size_x, size_y = 0, 0 index = 1 while index < 10 and index < len(files): fname = files[index] if fname.lower().rpartition('.')[-1] in IMG_EXTENSIONS: with zf.open(fname) as ffile: img = Image() try: img.open(ffile) size_x, size_y = img.size except: pass if size_x < size_y: break index += 1 zf.close() size = round(size_x * size_y / 1000000, 2) return size
class OCFZipReader(OCFReader): def __init__(self, stream, mode='r', root=None): if isinstance(stream, (LocalZipFile, ZipFile)): self.archive = stream else: try: self.archive = ZipFile(stream, mode=mode) except BadZipfile: raise EPubException("not a ZIP .epub OCF container") self.root = root if self.root is None: name = getattr(stream, 'name', False) if name: self.root = os.path.abspath(os.path.dirname(name)) else: self.root = getcwd() super(OCFZipReader, self).__init__() def open(self, name, mode='r'): if isinstance(self.archive, LocalZipFile): return self.archive.open(name) return io.BytesIO(self.archive.read(name)) def read_bytes(self, name): return self.archive.read(name)
def get_fb2_data(stream): from calibre.utils.zipfile import ZipFile, BadZipfile pos = stream.tell() try: zf = ZipFile(stream) except BadZipfile: stream.seek(pos) ans = stream.read() zip_file_name = None else: names = zf.namelist() names = [x for x in names if x.lower().endswith('.fb2')] or names zip_file_name = names[0] ans = zf.open(zip_file_name).read() return ans, zip_file_name
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): self.docx_is_transitional = True stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() self.namespace = DOCXNamespace(self.docx_is_transitional) def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') if target == 'word/document.xml': self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument' self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(self.namespace.names['DOCUMENT'], None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type def get_document_properties_names(self): name = self.relationships.get(self.namespace.names['DOCPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] yield name name = self.relationships.get(self.namespace.names['APPPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] yield name @property def metadata(self): mi = Metadata(_('Unknown')) dp_name, ap_name = self.get_document_properties_names() if dp_name: try: raw = self.read(dp_name) except KeyError: pass else: read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi, self.namespace.XPath) ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(DOCUMENT, None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type @property def metadata(self): mi = Metadata(_('Unknown')) name = self.relationships.get(DOCPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_doc_props(raw, mi) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi) name = self.relationships.get(APPPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass