Exemple #1
0
    def remapLinks(self, webpage):
        for img in webpage.tree.findall('.//img'):
            img_fn = webpage.images.get(img.attrib['src'])
            if img_fn:
                zip_rel_path = os.path.join(config.img_rel_path, os.path.basename(img_fn))
                img.attrib['src'] = zip_rel_path
            else:
                remove_node(img)

        target_ids = [safe_xml_id(_id) for _id in webpage.tree.xpath('.//@id')]
        for a in webpage.tree.findall('.//a'):
            href = a.get('href')
            if not href: # this link is probably just an anchor
                continue
            if href.startswith('#'):
                target_id = safe_xml_id(href)[1:]
                if target_id not in target_ids:
                    a.set('id', target_id)
                    target_ids.append(target_id)
                a.set('href', '#'+target_id)
            else:
                url = clean_url(urlparse.urljoin(webpage.url, href))
                linked_wp = webpage.coll.url2webpage.get(url)
                if linked_wp:
                    a.set('href', linked_wp.id + '.xhtml')
                else:
                    a.set('href', url)
Exemple #2
0
    def remapLinks(self, webpage):
        for img in webpage.tree.findall('.//img'):
            img_fn = webpage.images.get(img.attrib['src'])
            if img_fn:
                zip_rel_path = os.path.join(config.img_rel_path,
                                            os.path.basename(img_fn))
                img.attrib['src'] = zip_rel_path
            else:
                remove_node(img)

        target_ids = [safe_xml_id(_id) for _id in webpage.tree.xpath('.//@id')]
        for a in webpage.tree.findall('.//a'):
            href = a.get('href')
            if not href:  # this link is probably just an anchor
                continue
            if href.startswith('#'):
                target_id = safe_xml_id(href)[1:]
                if target_id not in target_ids:
                    a.set('id', target_id)
                    target_ids.append(target_id)
                a.set('href', '#' + target_id)
            else:
                url = clean_url(urlparse.urljoin(webpage.url, href))
                linked_wp = webpage.coll.url2webpage.get(url)
                if linked_wp:
                    a.set('href', linked_wp.id + '.xhtml')
                else:
                    a.set('href', url)
Exemple #3
0
        def writeOPF_manifest():
            tree = E.manifest()
            tree.extend([E.item({'id': article.id,
                                 'href': article.path,
                                 'media-type': 'application/xhtml+xml'})
                         for article in self.articles])
            tree.append(E.item({'id':'ncx',
                                'href': os.path.basename(config.ncx_fn),
                                'media-type': 'application/x-dtbncx+xml'}))
            #FIXME add missing resources:
            # images
            # css
            for fn in self.added_files:
                if fn.startswith('OPS/'):
                    fn = fn[4:]
                mimetype, encoding = mimetypes.guess_type(fn)
                if mimetype in ['text/css',
                                'image/png',
                                'image/jpeg',
                                'image/gif',
                                ]:
                    _id = 'cover-image' if fn==self.cover_img_path else safe_xml_id(fn)
                    tree.append(E.item({'id': _id,
                                        'href': fn,
                                        'media-type': mimetype}))

            return tree
Exemple #4
0
    def addArticle(self, webpage):
        path = 'OPS/%s.xhtml' % webpage.id
        self.add_file(path, webpage.xml)
        self.articles.append(
            ArticleInfo(id=safe_xml_id(webpage.id),
                        path=os.path.basename(path),
                        title=webpage.title,
                        type='article' if isinstance(
                            webpage, collection.WebPage) else 'chapter'))

        if getattr(webpage, 'tree', False) != False:
            used_images = [
                src[len(config.img_rel_path):]
                for src in webpage.tree.xpath('//img/@src')
            ]
        else:
            used_images = []

        if getattr(webpage, 'images', False) != False:
            for img_src, img_fn in webpage.images.items():
                basename = os.path.basename(img_fn)
                if basename not in used_images:
                    continue
                zip_fn = os.path.join(config.img_abs_path, basename)
                self.link_file(img_fn, zip_fn, compression=False)
Exemple #5
0
    def addArticle(self, webpage):
        path = 'OPS/%s.xhtml' % webpage.id
        self.add_file(path, webpage.xml)
        self.articles.append(ArticleInfo(id=safe_xml_id(webpage.id),
                                         path=os.path.basename(path),
                                         title=webpage.title,
                                         type='article' if isinstance(webpage, collection.WebPage) else 'chapter'))

        if getattr(webpage, 'tree', False) != False:
            used_images = [src[len(config.img_rel_path):] for src in webpage.tree.xpath('//img/@src')]
        else:
            used_images = []

        if getattr(webpage, 'images', False) != False:
            for img_src, img_fn in webpage.images.items():
                basename = os.path.basename(img_fn)
                if basename not in used_images:
                    continue
                zip_fn = os.path.join(config.img_abs_path, basename)
                self.link_file(img_fn, zip_fn, compression=False)
Exemple #6
0
        def writeOPF_manifest():
            tree = E.manifest()
            tree.extend([
                E.item({
                    'id': article.id,
                    'href': article.path,
                    'media-type': 'application/xhtml+xml'
                }) for article in self.articles
            ])
            tree.append(
                E.item({
                    'id': 'ncx',
                    'href': os.path.basename(config.ncx_fn),
                    'media-type': 'application/x-dtbncx+xml'
                }))
            #FIXME add missing resources:
            # images
            # css
            for fn in self.added_files:
                if fn.startswith('OPS/'):
                    fn = fn[4:]
                mimetype, encoding = mimetypes.guess_type(fn)
                if mimetype in [
                        'text/css',
                        'image/png',
                        'image/jpeg',
                        'image/gif',
                ]:
                    _id = 'cover-image' if fn == self.cover_img_path else safe_xml_id(
                        fn)
                    tree.append(
                        E.item({
                            'id': _id,
                            'href': fn,
                            'media-type': mimetype
                        }))

            return tree