Ejemplo n.º 1
0
    def _read_opf(self):
        data = self.oeb.container.read(None)
        data = self.oeb.decode(data)
        data = XMLDECL_RE.sub('', data)
        data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0(/*)',
                OPF1_NS, data)
        try:
            opf = etree.fromstring(data)
        except etree.XMLSyntaxError:
            data = xml_replace_entities(clean_xml_chars(data), encoding=None)
            try:
                opf = etree.fromstring(data)
                self.logger.warn('OPF contains invalid HTML named entities')
            except etree.XMLSyntaxError:
                data = re.sub(r'(?is)<tours>.+</tours>', '', data)
                data = data.replace('<dc-metadata>',
                    '<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">')
                try:
                    opf = etree.fromstring(data)
                    self.logger.warn('OPF contains invalid tours section')
                except etree.XMLSyntaxError:
                    from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
                    opf = etree.fromstring(data, parser=RECOVER_PARSER)
                    self.logger.warn('OPF contains invalid markup, trying to parse it anyway')

        ns = namespace(opf.tag)
        if ns not in ('', OPF1_NS, OPF2_NS):
            raise OEBError('Invalid namespace %r for OPF document' % ns)
        opf = self._clean_opf(opf)
        return opf
Ejemplo n.º 2
0
    def __call__(self, oeb, path):
        """
        Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
        at :param:`path`.
        """
        version = int(self.version[0])
        opfname = None
        if os.path.splitext(path)[1].lower() == '.opf':
            opfname = os.path.basename(path)
            path = os.path.dirname(path)
        if not os.path.isdir(path):
            os.mkdir(path)
        output = DirContainer(path, oeb.log)
        for item in oeb.manifest.values():
            output.write(item.href, item.bytes_representation)

        if version == 1:
            metadata = oeb.to_opf1()
        elif version == 2:
            metadata = oeb.to_opf2(page_map=self.page_map)
        else:
            raise OEBError("Unrecognized OPF version %r" % self.version)
        pretty_print = self.pretty_print
        for mime, (href, data) in metadata.items():
            if opfname and mime == OPF_MIME:
                href = opfname
            output.write(href, xml2str(data, pretty_print=pretty_print))
        return
Ejemplo n.º 3
0
 def _spine_from_opf(self, opf):
     spine = self.oeb.spine
     manifest = self.oeb.manifest
     for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
         idref = elem.get('idref')
         if idref not in manifest.ids:
             self.logger.warn('Spine item %r not found' % idref)
             continue
         item = manifest.ids[idref]
         if item.media_type.lower() in OEB_DOCS and hasattr(
                 item.data, 'xpath'):
             spine.add(item, elem.get('linear'))
         else:
             if hasattr(item.data,
                        'tag') and item.data.tag and item.data.tag.endswith(
                            '}html'):
                 item.media_type = XHTML_MIME
                 spine.add(item, elem.get('linear'))
             else:
                 self.oeb.log.warn('The item %s is not a XML document.'
                                   ' Removing it from spine.' % item.href)
     if len(spine) == 0:
         raise OEBError("Spine is empty")
     self._spine_add_extra()
     for val in xpath(opf,
                      '/o2:package/o2:spine/@page-progression-direction'):
         if val in {'ltr', 'rtl'}:
             spine.page_progression_direction = val
Ejemplo n.º 4
0
 def _spine_from_opf(self, opf):
     spine = self.oeb.spine
     manifest = self.oeb.manifest
     for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
         idref = elem.get('idref')
         if idref not in manifest.ids:
             self.logger.warn(u'Spine item %r not found' % idref)
             continue
         item = manifest.ids[idref]
         if item.media_type.lower() in OEB_DOCS and hasattr(item.data, 'xpath'):
             spine.add(item, elem.get('linear'))
         else:
             self.oeb.log.warn('The item %s is not a XML document.'
                     ' Removing it from spine.'%item.href)
     if len(spine) == 0:
         raise OEBError("Spine is empty")
     self._spine_add_extra()