Esempio n. 1
0
    def explode(self):
        '''Explodes an epub archive'''
        z = ZipFile(self.name, 'r')  # Returns a filehandle
        try:
            container = z.read(self._CONTAINER)
        except KeyError:
            # Is this DOS-format?  If so, handle this as a special error
            try:
                container = z.read(self._CONTAINER.replace('/', '\\'))
                raise InvalidEpubException(
                    "This ePub file was created with DOS/Windows path separators, which is not legal according to the PKZIP specification."
                )
            except KeyError:
                raise InvalidEpubException(
                    'Was not able to locate container file %s' %
                    self._CONTAINER,
                    archive=self)

        try:
            z.read(constants.RIGHTS)
            raise DRMEpubException()
        except KeyError:
            pass

        parsed_container = util.xml_from_string(container)

        opf_filename = self._get_opf_filename(parsed_container)

        content_path = self._get_content_path(opf_filename)
        self.opf = z.read(opf_filename)
        parsed_opf = util.xml_from_string(self.opf)

        items = [
            i for i in parsed_opf.iterdescendants(tag="{%s}item" % (NS['opf']))
        ]

        toc_filename = self._get_toc(parsed_opf, items, content_path)
        try:
            self.toc = z.read(toc_filename)
        except KeyError:
            raise InvalidEpubException(
                'TOC file was referenced in OPF, but not found in archive: toc file %s'
                % toc_filename,
                archive=self)

        parsed_toc = util.xml_from_string(self.toc)

        self.authors = self._get_authors(parsed_opf)
        self.title = self._get_title(parsed_opf)
        if self.use_spine_as_toc:
            self._get_content(z, parsed_opf, parsed_toc, items, content_path)
        else:
            self._get_content_from_nav_points(z, content_path)
Esempio n. 2
0
 def _get_metadata(self,
                   metadata_tag,
                   opf,
                   plural=False,
                   as_string=False,
                   as_list=False):
     '''Returns a metadata item's text content by tag name, or a list if mulitple names match.
     If as_string is set to True, then always return a comma-delimited string.'''
     if self._parsed_metadata is None:
         try:
             self._parsed_metadata = util.xml_from_string(opf)
         except InvalidEpubException:
             return None
     text = []
     alltext = self._parsed_metadata.findall('.//{%s}%s' %
                                             (NS['dc'], metadata_tag))
     if as_list:
         return [t.text.strip() for t in alltext if t.text]
     if as_string:
         return ', '.join([t.text.strip() for t in alltext if t.text])
     for t in alltext:
         if t.text is not None:
             text.append(t.text)
     if len(text) == 1:
         t = (text[0], ) if plural else text[0]
         return t
     return text
Esempio n. 3
0
    def explode(self):
        """Explodes an epub archive"""
        z = ZipFile(self.name, "r")  # Returns a filehandle
        try:
            container = z.read(self._CONTAINER)
        except KeyError:
            # Is this DOS-format?  If so, handle this as a special error
            try:
                container = z.read(self._CONTAINER.replace("/", "\\"))
                raise InvalidEpubException(
                    "This ePub file was created with DOS/Windows path separators, which is not legal according to the PKZIP specification."
                )
            except KeyError:
                raise InvalidEpubException("Was not able to locate container file %s" % self._CONTAINER, archive=self)

        try:
            z.read(constants.RIGHTS)
            raise DRMEpubException()
        except KeyError:
            pass

        parsed_container = util.xml_from_string(container)

        opf_filename = self._get_opf_filename(parsed_container)

        content_path = self._get_content_path(opf_filename)
        self.opf = z.read(opf_filename)
        parsed_opf = util.xml_from_string(self.opf)

        items = [i for i in parsed_opf.iterdescendants(tag="{%s}item" % (NS["opf"]))]

        toc_filename = self._get_toc(parsed_opf, items, content_path)
        try:
            self.toc = z.read(toc_filename)
        except KeyError:
            raise InvalidEpubException(
                "TOC file was referenced in OPF, but not found in archive: toc file %s" % toc_filename, archive=self
            )

        parsed_toc = util.xml_from_string(self.toc)

        self.authors = self._get_authors(parsed_opf)
        self.title = self._get_title(parsed_opf)
        if self.use_spine_as_toc:
            self._get_content(z, parsed_opf, parsed_toc, items, content_path)
        else:
            self._get_content_from_nav_points(z, content_path)
Esempio n. 4
0
 def _get_metadata(self, metadata_tag, opf, plural=False, as_string=False, as_list=False):
     """Returns a metadata item's text content by tag name, or a list if mulitple names match.
     If as_string is set to True, then always return a comma-delimited string."""
     if self._parsed_metadata is None:
         try:
             self._parsed_metadata = util.xml_from_string(opf)
         except InvalidEpubException:
             return None
     text = []
     alltext = self._parsed_metadata.findall(".//{%s}%s" % (NS["dc"], metadata_tag))
     if as_list:
         return [t.text.strip() for t in alltext if t.text]
     if as_string:
         return ", ".join([t.text.strip() for t in alltext if t.text])
     for t in alltext:
         if t.text is not None:
             text.append(t.text)
     if len(text) == 1:
         t = (text[0],) if plural else text[0]
         return t
     return text