def remapLinks(self, webpage): for img in webpage.tree.findall('.//img'): img_fn = webpage.images.get(img.attrib['src']) if img_fn: zip_rel_path = os.path.join(config.img_rel_path, os.path.basename(img_fn)) img.attrib['src'] = zip_rel_path else: remove_node(img) target_ids = [safe_xml_id(_id) for _id in webpage.tree.xpath('.//@id')] for a in webpage.tree.findall('.//a'): href = a.get('href') if not href: # this link is probably just an anchor continue if href.startswith('#'): target_id = safe_xml_id(href)[1:] if target_id not in target_ids: a.set('id', target_id) target_ids.append(target_id) a.set('href', '#'+target_id) else: url = clean_url(urlparse.urljoin(webpage.url, href)) linked_wp = webpage.coll.url2webpage.get(url) if linked_wp: a.set('href', linked_wp.id + '.xhtml') else: a.set('href', url)
def remapLinks(self, webpage): for img in webpage.tree.findall('.//img'): img_fn = webpage.images.get(img.attrib['src']) if img_fn: zip_rel_path = os.path.join(config.img_rel_path, os.path.basename(img_fn)) img.attrib['src'] = zip_rel_path else: remove_node(img) target_ids = [safe_xml_id(_id) for _id in webpage.tree.xpath('.//@id')] for a in webpage.tree.findall('.//a'): href = a.get('href') if not href: # this link is probably just an anchor continue if href.startswith('#'): target_id = safe_xml_id(href)[1:] if target_id not in target_ids: a.set('id', target_id) target_ids.append(target_id) a.set('href', '#' + target_id) else: url = clean_url(urlparse.urljoin(webpage.url, href)) linked_wp = webpage.coll.url2webpage.get(url) if linked_wp: a.set('href', linked_wp.id + '.xhtml') else: a.set('href', url)
def writeOPF_manifest(): tree = E.manifest() tree.extend([E.item({'id': article.id, 'href': article.path, 'media-type': 'application/xhtml+xml'}) for article in self.articles]) tree.append(E.item({'id':'ncx', 'href': os.path.basename(config.ncx_fn), 'media-type': 'application/x-dtbncx+xml'})) #FIXME add missing resources: # images # css for fn in self.added_files: if fn.startswith('OPS/'): fn = fn[4:] mimetype, encoding = mimetypes.guess_type(fn) if mimetype in ['text/css', 'image/png', 'image/jpeg', 'image/gif', ]: _id = 'cover-image' if fn==self.cover_img_path else safe_xml_id(fn) tree.append(E.item({'id': _id, 'href': fn, 'media-type': mimetype})) return tree
def addArticle(self, webpage): path = 'OPS/%s.xhtml' % webpage.id self.add_file(path, webpage.xml) self.articles.append( ArticleInfo(id=safe_xml_id(webpage.id), path=os.path.basename(path), title=webpage.title, type='article' if isinstance( webpage, collection.WebPage) else 'chapter')) if getattr(webpage, 'tree', False) != False: used_images = [ src[len(config.img_rel_path):] for src in webpage.tree.xpath('//img/@src') ] else: used_images = [] if getattr(webpage, 'images', False) != False: for img_src, img_fn in webpage.images.items(): basename = os.path.basename(img_fn) if basename not in used_images: continue zip_fn = os.path.join(config.img_abs_path, basename) self.link_file(img_fn, zip_fn, compression=False)
def addArticle(self, webpage): path = 'OPS/%s.xhtml' % webpage.id self.add_file(path, webpage.xml) self.articles.append(ArticleInfo(id=safe_xml_id(webpage.id), path=os.path.basename(path), title=webpage.title, type='article' if isinstance(webpage, collection.WebPage) else 'chapter')) if getattr(webpage, 'tree', False) != False: used_images = [src[len(config.img_rel_path):] for src in webpage.tree.xpath('//img/@src')] else: used_images = [] if getattr(webpage, 'images', False) != False: for img_src, img_fn in webpage.images.items(): basename = os.path.basename(img_fn) if basename not in used_images: continue zip_fn = os.path.join(config.img_abs_path, basename) self.link_file(img_fn, zip_fn, compression=False)
def writeOPF_manifest(): tree = E.manifest() tree.extend([ E.item({ 'id': article.id, 'href': article.path, 'media-type': 'application/xhtml+xml' }) for article in self.articles ]) tree.append( E.item({ 'id': 'ncx', 'href': os.path.basename(config.ncx_fn), 'media-type': 'application/x-dtbncx+xml' })) #FIXME add missing resources: # images # css for fn in self.added_files: if fn.startswith('OPS/'): fn = fn[4:] mimetype, encoding = mimetypes.guess_type(fn) if mimetype in [ 'text/css', 'image/png', 'image/jpeg', 'image/gif', ]: _id = 'cover-image' if fn == self.cover_img_path else safe_xml_id( fn) tree.append( E.item({ 'id': _id, 'href': fn, 'media-type': mimetype })) return tree