def __init__(self, file_, mime_type=None): """**Creating a new document** """ #: The MIME type of the document self.mime_type = mime_type # Some shortcuts op_sep = os.path.sep op_join = os.path.join op_isdir = os.path.isdir op_dirname = os.path.dirname # Preliminary settings depending on input #: The file mane of the document self.filename = getattr(file_, 'name', None) if self.filename is None and mime_type is None: raise ValueError( "Cannot guess mime type from such object, you should use the mime_type constructor arg." ) # Need to make a real file for urllib.urlopen objects if isinstance(file_, urllib.addinfourl): fh, self._cache_file = tempfile.mkstemp() fh = os.fdopen(fh, 'wb') fh.write(file_.read()) fh.close() file_.close() file_ = open(self._cache_file, 'rb') # Inflating the zipped file self._cache_dir = tempfile.mkdtemp() openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED) for outpath in openxmldoc.namelist(): # Makes Windows path when under Windows rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() file_.close() # Getting the content types declarations ct_file = op_join(self._cache_dir, '[Content_Types].xml') #: A :class:`openxmllib.contenttypes.ContentTypes` object for this document self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb')) return
def __init__(self, file_, mime_type=None): """**Creating a new document** """ #: The MIME type of the document self.mime_type = mime_type # Some shortcuts op_sep = os.path.sep op_join = os.path.join op_isdir = os.path.isdir op_dirname = os.path.dirname # Preliminary settings depending on input #: The file mane of the document self.filename = getattr(file_, 'name', None) if self.filename is None and mime_type is None: raise ValueError("Cannot guess mime type from such object, you should use the mime_type constructor arg.") # Need to make a real file for urllib.urlopen objects if isinstance(file_, urllib.addinfourl): fh, self._cache_file = tempfile.mkstemp() fh = os.fdopen(fh, 'wb') fh.write(file_.read()) fh.close() file_.close() file_ = open(self._cache_file, 'rb') # Inflating the zipped file self._cache_dir = tempfile.mkdtemp() openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED) for outpath in openxmldoc.namelist(): # Makes Windows path when under Windows rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() file_.close() # Getting the content types declarations ct_file = op_join(self._cache_dir, '[Content_Types].xml') #: A :class:`openxmllib.contenttypes.ContentTypes` object for this document self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb')) return
def getTreesFor(self, document, content_type): """Provides all XML documents for that content type @param document: a Document or subclass object @param content_type: a MIME content type @return: list of etree._ElementTree of that content type """ # Relative path without potential leading path separator # otherwise os.path.join doesn't work for rel_path in self.overrides[content_type]: if rel_path[0] in ('/', '\\'): rel_path = rel_path[1:] file_path = os.path.join(document._cache_dir, rel_path) yield etree.parse(utils.xmlFile(file_path, 'rb')) return
def __init__(self, file_, mime_type=None): """Creating a new document @param file_: An opened file(like) obj to the document A file must be opened in 'rb' mode """ self.mime_type = mime_type # Some shortcuts op_sep = os.path.sep op_join = os.path.join op_isdir = os.path.isdir op_dirname = os.path.dirname # Preliminary settings depending on input self.filename = getattr(file_, 'name', None) if self.filename is None and mime_type is None: raise ValueError( "Cannot guess mime type from such object, you should use the mime_type constructor arg." ) # Need to make a real file for urllib.urlopen objects if isinstance(file_, urllib.addinfourl): fh, self._cache_file = tempfile.mkstemp() fh = os.fdopen(fh, 'wb') fh.write(file_.read()) fh.close() file_.close() file_ = open(self._cache_file, 'rb') # Inflating the file self._cache_dir = tempfile.mkdtemp() openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED) for outpath in openxmldoc.namelist(): # We need to be sure that target dir exists rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() file_.close() # Getting the content types decl ct_file = op_join(self._cache_dir, '[Content_Types].xml') self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
def __init__(self, file_, mime_type=None): """Creating a new document @param file_: An opened file(like) obj to the document A file must be opened in 'rb' mode """ self.mime_type = mime_type # Some shortcuts op_sep = os.path.sep op_join = os.path.join op_isdir = os.path.isdir op_dirname = os.path.dirname # Preliminary settings depending on input self.filename = getattr(file_, 'name', None) if self.filename is None and mime_type is None: raise ValueError("Cannot guess mime type from such object, you should use the mime_type constructor arg.") # Need to make a real file for urllib.urlopen objects if isinstance(file_, urllib.addinfourl): fh, self._cache_file = tempfile.mkstemp() fh = os.fdopen(fh, 'wb') fh.write(file_.read()) fh.close() file_.close() file_ = open(self._cache_file, 'rb') # Inflating the file self._cache_dir = tempfile.mkdtemp() openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED) for outpath in openxmldoc.namelist(): # We need to be sure that target dir exists rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() file_.close() # Getting the content types decl ct_file = op_join(self._cache_dir, '[Content_Types].xml') self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
def documentCover(self): """Cover page image :return: (file extension, file object) tuple. """ rels_pth = os.path.join(self._cache_dir, "_rels", ".rels") rels_xml = lxml.etree.parse(xmlFile(rels_pth, 'rb')) thumb_ns = ns_map["thumbnails"] thumb_elm_xpr = "relationships:Relationship[@Type='%s']" % thumb_ns rels_xpath = lxml.etree.XPath(thumb_elm_xpr, namespaces=ns_map) try: cover_path = rels_xpath(rels_xml)[0].attrib["Target"] except IndexError: return None cover_fp = open(self._cache_dir + os.sep + cover_path, "rb") cover_type = imghdr.what(None, h=cover_fp.read(32)) cover_fp.seek(0) # some MS docs say the type can be JPEG which is ok, # or WMF, which imghdr does not recognize... if not cover_type: cover_type = cover_path.split('.')[-1] else: cover_type = cover_type.replace("jpeg", "jpg") return (cover_type, cover_fp)
for outpath in openxmldoc.namelist(): # We need to be sure that target dir exists rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() in_file.close() # Getting the content types decl ct_file = op_join(self._cache_dir, '[Content_Types].xml') self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb')) return @property def mimeType(self): """The official MIME type for this document @return: 'application/xxx' for this file """ if self.mime_type: # Supposed validated by the factory return self.mime_type for pattern, mime_type in self._extpattern_to_mime.items(): if fnmatch.fnmatch(self.filename, pattern): return mime_type
for outpath in openxmldoc.namelist(): # We need to be sure that target dir exists rel_outpath = op_sep.join(outpath.split('/')) abs_outpath = op_join(self._cache_dir, rel_outpath) abs_outdir = op_dirname(abs_outpath) if not op_isdir(abs_outdir): os.makedirs(abs_outdir) fh = file(abs_outpath, 'wb') fh.write(openxmldoc.read(outpath)) fh.close() openxmldoc.close() in_file.close() # Getting the content types decl ct_file = op_join(self._cache_dir, '[Content_Types].xml') self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb')) return @property def mimeType(self): """The official MIME type for this document @return: 'application/xxx' for this file """ if self.mime_type: # Supposed validated by the factory return self.mime_type for pattern, mime_type in self._extpattern_to_mime.items(): if fnmatch.fnmatch(self.filename, pattern): return mime_type return None