def performOPFSourceUpdates(data, currentdir, keylist, valuelist): # rebuild serialized lookup dictionary updates = {} for i in range(0, len(keylist)): updates[ keylist[i] ] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, builder=xmlbuilder) for tag in soup.find_all(["item","reference","site"]): if "href" in tag.attrs : href = tag["href"] if href.find(":") == -1 : parts = href.split('#') url = parts[0] fragment = "" if len(parts) > 1: fragment = parts[1] bookrelpath = os.path.join(currentdir, unquoteurl(url)) bookrelpath = os.path.normpath(bookrelpath) bookrelpath = bookrelpath.replace(os.sep, "/") if bookrelpath in updates: attribute_value = updates[bookrelpath] if fragment != "": attribute_value = attribute_value + "#" + fragment attribute_value = quoteurl(attribute_value) tag["href"] = attribute_value newdata = soup.decode(pretty_print=True, formatter='minimal') return newdata
def anchorNCXUpdates(data, originating_filename, keylist, valuelist): # rebuild serialized lookup dictionary id_dict = {} for i in range(0, len(keylist)): id_dict[keylist[i]] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, builder=xmlbuilder) original_filename_with_relative_path = TEXT_FOLDER_NAME + "/" + originating_filename for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') if (parts is not None) and (len(parts) > 1) and ( parts[0] == original_filename_with_relative_path) and ( parts[1] != ""): fragment_id = parts[1] if fragment_id in id_dict: attribute_value = TEXT_FOLDER_NAME + "/" + quoteurl( id_dict[fragment_id]) + "#" + fragment_id tag["src"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def performNCXSourceUpdates(data, currentdir, keylist, valuelist): # rebuild serialized lookup dictionary updates = {} for i in range(0, len(keylist)): updates[keylist[i]] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') url = parts[0] fragment = "" if len(parts) > 1: fragment = parts[1] bookrelpath = os.path.join(currentdir, unquoteurl(url)) bookrelpath = os.path.normpath(bookrelpath) bookrelpath = bookrelpath.replace(os.sep, "/") if bookrelpath in updates: attribute_value = updates[bookrelpath] if fragment != "": attribute_value = attribute_value + "#" + fragment attribute_value = quoteurl(attribute_value) tag["src"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def repairXML(data, self_closing_tags=ebook_xml_empty_tags, indent_chars=" "): xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=self_closing_tags) soup = BeautifulSoup(data, features=None, builder=xmlbuilder) newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars) return newdata
def repairXML(data, self_closing_tags=ebook_xml_empty_tags): xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=self_closing_tags) soup = BeautifulSoup(data, features=None, builder=xmlbuilder) newdata = soup.decode(pretty_print=True, formatter='minimal') return newdata