def performOPFSourceUpdates(data, newbkpath, oldbkpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary updates = {} for i in range(0, len(keylist)): updates[keylist[i]] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all(["link", "item", "reference", "site"]): if "href" in tag.attrs: href = tag["href"] if href.find(":") == -1: parts = href.split('#') ahref = unquoteurl(parts[0]) fragment = "" if len(parts) > 1: fragment = parts[1] oldtarget = buildBookPath(ahref, startingDir(oldbkpath)) newtarget = updates.get(oldtarget, oldtarget) attribute_value = buildRelativePath(newbkpath, newtarget) if fragment != "": attribute_value = attribute_value + "#" + fragment attribute_value = quoteurl(attribute_value) tag["href"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def anchorNCXUpdatesAfterMerge(data, ncx_bookpath, sink_bookpath, merged_bookpaths): data = _remove_xml_header(data) startdir = startingDir(ncx_bookpath) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') if parts is not None: ahref = unquoteurl(parts[0]) target_bookpath = buildBookPath(ahref, startdir) if target_bookpath in merged_bookpaths: attribute_value = buildRelativePath( ncx_bookpath, sink_bookpath) if len(parts) > 1 and parts[1] != "": attribute_value += "#" + parts[1] tag["src"] = quoteurl(attribute_value) newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def performPageMapUpdates(data, newbkpath, oldbkpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary of xml_updates properly adjusted updates = OrderedDict() for i in range(0, len(keylist)): updates[ keylist[i] ] = valuelist[i] xml_empty_tags = ["page"] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all(["page"]): for att in ["href"]: if att in tag.attrs : ref = tag[att] if ref.find(":") == -1 : parts = ref.split('#') apath = urldecodepart(parts[0]) fragment = "" if len(parts) > 1: fragment = urldecodepart(parts[1]) oldtarget = buildBookPath(apath, startingDir(oldbkpath)) newtarget = updates.get(oldtarget, oldtarget) attribute_value = urlencodepart(buildRelativePath(newbkpath, newtarget)) if fragment != "": attribute_value = attribute_value + "#" + urlencodepart(fragment) tag[att] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def anchorNCXUpdates(data, ncx_bookpath, originating_bookpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary id_dict = OrderedDict() for i in range(0, len(keylist)): id_dict[ keylist[i] ] = valuelist[i] startdir = startingDir(ncx_bookpath) xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') apath = urldecodepart(parts[0]) # convert this path to its target bookpath target_bookpath = buildBookPath(apath, startdir) if (parts is not None) and (len(parts) > 1) and (target_bookpath == originating_bookpath) and (parts[1] != ""): fragment_id = urldecodepart(parts[1]) if fragment_id in id_dict: target_bookpath = id_dict[fragment_id] attribute_value = urlencodepart(buildRelativePath(ncx_bookpath, target_bookpath)) attribute_value = attribute_value + "#" + urlencodepart(fragment_id) tag["src"] = attribute_value; newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None): uniqueid = unicode_str(uniqueid) if uniqueid in self.id_to_href: raise WrapperException('Manifest Id is not unique') basename = unicode_str(basename) mime = unicode_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime == "application/x-dtbncx+xml" and self.epub_version.startswith( "2"): raise WrapperException('Can not add or remove an ncx under epub2') group = mime_group_map.get(mime, "Misc") default_path = self.group_paths[group][0] bookpath = basename if default_path != "": bookpath = default_path + "/" + basename href = buildRelativePath(self.opfbookpath, bookpath) if href in self.href_to_id: raise WrapperException('Basename already exists') # now actually write out the new file filepath = bookpath.replace("/", os.sep) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if mime in TEXT_MIMETYPES or isinstance(data, text_type): data = utf8_str(data) with open(filepath, 'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.id_to_props[uniqueid] = properties self.id_to_fall[uniqueid] = fallback self.id_to_over[uniqueid] = overlay self.id_to_bookpath[uniqueid] = bookpath self.href_to_id[href] = uniqueid self.bookpath_to_id[bookpath] = uniqueid self.added.append(uniqueid) self.modified[self.opfbookpath] = 'file' return uniqueid
def addbookpath(self, uniqueid, bookpath, data, mime=None): uniqueid = _unicodestr(uniqueid) if uniqueid in self.id_to_href: raise WrapperException('Manifest Id is not unique') bookpath = _unicodestr(bookpath) basename = bookpath.split("/")[-1] mime = _unicodestr(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime == "application/x-dtbncx+xml" and self.epub_version.startswith( "2"): raise WrapperException('Can not add or remove an ncx under epub2') href = buildRelativePath(self.opfbookpath, bookpath) if href in self.href_to_id: raise WrapperException('bookpath already exists') # now actually write out the new file filepath = bookpath.replace("/", os.sep) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not os.path.exists(base): os.makedirs(base) if mime in TEXT_MIMETYPES or isinstance(data, str): data = _utf8str(data) with open(filepath, 'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.id_to_props[uniqueid] = None self.id_to_fall[uniqueid] = None self.id_to_over[uniqueid] = None self.id_to_bookpath[uniqueid] = bookpath self.href_to_id[href] = uniqueid self.bookpath_to_id[bookpath] = uniqueid self.added.append(uniqueid) self.modified[self.opfbookpath] = 'file' return uniqueid
def get_relativepath(self, from_bookpath, to_bookpath): from_bookpath = _unicodestr(from_bookpath) to_bookpath = _unicodestr(to_bookpath) return buildRelativePath(from_bookpath, to_bookpath)