def explode(self): '''Explodes an epub archive''' z = ZipFile(self.name, 'r') # Returns a filehandle try: container = z.read(self._CONTAINER) except KeyError: # Is this DOS-format? If so, handle this as a special error try: container = z.read(self._CONTAINER.replace('/', '\\')) raise InvalidEpubException( "This ePub file was created with DOS/Windows path separators, which is not legal according to the PKZIP specification." ) except KeyError: raise InvalidEpubException( 'Was not able to locate container file %s' % self._CONTAINER, archive=self) try: z.read(constants.RIGHTS) raise DRMEpubException() except KeyError: pass parsed_container = util.xml_from_string(container) opf_filename = self._get_opf_filename(parsed_container) content_path = self._get_content_path(opf_filename) self.opf = z.read(opf_filename) parsed_opf = util.xml_from_string(self.opf) items = [ i for i in parsed_opf.iterdescendants(tag="{%s}item" % (NS['opf'])) ] toc_filename = self._get_toc(parsed_opf, items, content_path) try: self.toc = z.read(toc_filename) except KeyError: raise InvalidEpubException( 'TOC file was referenced in OPF, but not found in archive: toc file %s' % toc_filename, archive=self) parsed_toc = util.xml_from_string(self.toc) self.authors = self._get_authors(parsed_opf) self.title = self._get_title(parsed_opf) if self.use_spine_as_toc: self._get_content(z, parsed_opf, parsed_toc, items, content_path) else: self._get_content_from_nav_points(z, content_path)
def _get_metadata(self, metadata_tag, opf, plural=False, as_string=False, as_list=False): '''Returns a metadata item's text content by tag name, or a list if mulitple names match. If as_string is set to True, then always return a comma-delimited string.''' if self._parsed_metadata is None: try: self._parsed_metadata = util.xml_from_string(opf) except InvalidEpubException: return None text = [] alltext = self._parsed_metadata.findall('.//{%s}%s' % (NS['dc'], metadata_tag)) if as_list: return [t.text.strip() for t in alltext if t.text] if as_string: return ', '.join([t.text.strip() for t in alltext if t.text]) for t in alltext: if t.text is not None: text.append(t.text) if len(text) == 1: t = (text[0], ) if plural else text[0] return t return text
def explode(self): """Explodes an epub archive""" z = ZipFile(self.name, "r") # Returns a filehandle try: container = z.read(self._CONTAINER) except KeyError: # Is this DOS-format? If so, handle this as a special error try: container = z.read(self._CONTAINER.replace("/", "\\")) raise InvalidEpubException( "This ePub file was created with DOS/Windows path separators, which is not legal according to the PKZIP specification." ) except KeyError: raise InvalidEpubException("Was not able to locate container file %s" % self._CONTAINER, archive=self) try: z.read(constants.RIGHTS) raise DRMEpubException() except KeyError: pass parsed_container = util.xml_from_string(container) opf_filename = self._get_opf_filename(parsed_container) content_path = self._get_content_path(opf_filename) self.opf = z.read(opf_filename) parsed_opf = util.xml_from_string(self.opf) items = [i for i in parsed_opf.iterdescendants(tag="{%s}item" % (NS["opf"]))] toc_filename = self._get_toc(parsed_opf, items, content_path) try: self.toc = z.read(toc_filename) except KeyError: raise InvalidEpubException( "TOC file was referenced in OPF, but not found in archive: toc file %s" % toc_filename, archive=self ) parsed_toc = util.xml_from_string(self.toc) self.authors = self._get_authors(parsed_opf) self.title = self._get_title(parsed_opf) if self.use_spine_as_toc: self._get_content(z, parsed_opf, parsed_toc, items, content_path) else: self._get_content_from_nav_points(z, content_path)
def _get_metadata(self, metadata_tag, opf, plural=False, as_string=False, as_list=False): """Returns a metadata item's text content by tag name, or a list if mulitple names match. If as_string is set to True, then always return a comma-delimited string.""" if self._parsed_metadata is None: try: self._parsed_metadata = util.xml_from_string(opf) except InvalidEpubException: return None text = [] alltext = self._parsed_metadata.findall(".//{%s}%s" % (NS["dc"], metadata_tag)) if as_list: return [t.text.strip() for t in alltext if t.text] if as_string: return ", ".join([t.text.strip() for t in alltext if t.text]) for t in alltext: if t.text is not None: text.append(t.text) if len(text) == 1: t = (text[0],) if plural else text[0] return t return text