def anchorNCXUpdates(data, originating_filename, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary id_dict = {} for i in range(0, len(keylist)): id_dict[keylist[i]] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) original_filename_with_relative_path = TEXT_FOLDER_NAME + "/" + originating_filename for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') if (parts is not None) and (len(parts) > 1) and ( parts[0] == original_filename_with_relative_path) and ( parts[1] != ""): fragment_id = parts[1] if fragment_id in id_dict: attribute_value = TEXT_FOLDER_NAME + "/" + quoteurl( id_dict[fragment_id]) + "#" + fragment_id tag["src"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def performOPFSourceUpdates(data, newbkpath, oldbkpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary updates = {} for i in range(0, len(keylist)): updates[keylist[i]] = valuelist[i] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all(["link", "item", "reference", "site"]): if "href" in tag.attrs: href = tag["href"] if href.find(":") == -1: parts = href.split('#') ahref = unquoteurl(parts[0]) fragment = "" if len(parts) > 1: fragment = parts[1] oldtarget = buildBookPath(ahref, startingDir(oldbkpath)) newtarget = updates.get(oldtarget, oldtarget) attribute_value = buildRelativePath(newbkpath, newtarget) if fragment != "": attribute_value = attribute_value + "#" + fragment attribute_value = quoteurl(attribute_value) tag["href"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def repairXML(data, mtype="", indent_chars=" "): newdata = _remove_xml_header(data) # if well-formed - don't mess with it if _well_formed(newdata): return data newdata = _make_it_sane(newdata) if not _well_formed(newdata): newdata = _reformat(newdata) if mtype == "application/oebps-package+xml": newdata = newdata.decode('utf-8') newdata = Opf_Parser(newdata).rebuild_opfxml() # lxml requires utf-8 on Mac, won't work with unicode if isinstance(newdata, str): newdata = newdata.encode('utf-8') voidtags = get_void_tags(mtype) xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=voidtags) soup = BeautifulSoup(newdata, features=None, from_encoding="utf-8", builder=xmlbuilder) newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars) return newdata
def anchorNCXUpdatesAfterMerge(data, ncx_bookpath, sink_bookpath, merged_bookpaths): data = _remove_xml_header(data) startdir = startingDir(ncx_bookpath) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') if parts is not None: ahref = unquoteurl(parts[0]) target_bookpath = buildBookPath(ahref, startdir) if target_bookpath in merged_bookpaths: attribute_value = buildRelativePath( ncx_bookpath, sink_bookpath) if len(parts) > 1 and parts[1] != "": attribute_value += "#" + parts[1] tag["src"] = quoteurl(attribute_value) newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def performPageMapUpdates(data, newbkpath, oldbkpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary of xml_updates properly adjusted updates = OrderedDict() for i in range(0, len(keylist)): updates[ keylist[i] ] = valuelist[i] xml_empty_tags = ["page"] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all(["page"]): for att in ["href"]: if att in tag.attrs : ref = tag[att] if ref.find(":") == -1 : parts = ref.split('#') apath = urldecodepart(parts[0]) fragment = "" if len(parts) > 1: fragment = urldecodepart(parts[1]) oldtarget = buildBookPath(apath, startingDir(oldbkpath)) newtarget = updates.get(oldtarget, oldtarget) attribute_value = urlencodepart(buildRelativePath(newbkpath, newtarget)) if fragment != "": attribute_value = attribute_value + "#" + urlencodepart(fragment) tag[att] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def anchorNCXUpdatesAfterMerge(data, sink_filename, merged_names): data = _remove_xml_header(data) # build list of urls to rreplace namelist = [] for fn in merged_names: namelist.append(TEXT_FOLDER_NAME + "/" + fn) # and build url of file to replace them with sink_filename_with_relative_path = TEXT_FOLDER_NAME + "/" + sink_filename # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') if (parts is not None) and (parts[0] in namelist): attribute_value = sink_filename_with_relative_path if len(parts) > 1 and parts[1] != "": attribute_value += "#" + parts[1] tag["src"] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def anchorNCXUpdates(data, ncx_bookpath, originating_bookpath, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary id_dict = OrderedDict() for i in range(0, len(keylist)): id_dict[ keylist[i] ] = valuelist[i] startdir = startingDir(ncx_bookpath) xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all("content"): if "src" in tag.attrs: src = tag["src"] if src.find(":") == -1: parts = src.split('#') apath = urldecodepart(parts[0]) # convert this path to its target bookpath target_bookpath = buildBookPath(apath, startdir) if (parts is not None) and (len(parts) > 1) and (target_bookpath == originating_bookpath) and (parts[1] != ""): fragment_id = urldecodepart(parts[1]) if fragment_id in id_dict: target_bookpath = id_dict[fragment_id] attribute_value = urlencodepart(buildRelativePath(ncx_bookpath, target_bookpath)) attribute_value = attribute_value + "#" + urlencodepart(fragment_id) tag["src"] = attribute_value; newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def diagnose(data): """Diagnostic suite for isolating common problems.""" print("Diagnostic running on Beautiful Soup %s" % __version__) print("Python version %s" % sys.version) basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print(( "I noticed that %s is not installed. Installing it may help." % name)) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print("Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))) except ImportError as e: print("lxml is not installed or couldn't be imported.") if 'html5lib' in basic_parsers: try: import html5lib print("Found html5lib version %s" % html5lib.__version__) except ImportError as e: print("html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print('"%s" looks like a filename. Reading data from the file.' % data) data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) print( "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." ) return print() for parser in basic_parsers: print("Trying to parse your markup with %s" % parser) success = False try: soup = BeautifulSoup(data, parser) success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("Here's what %s did with the markup:" % parser) print(soup.prettify()) print("-" * 80)
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b - a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b - a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b - a))
def repairXML(data, self_closing_tags=ebook_xml_empty_tags, indent_chars=" "): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=self_closing_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars) return newdata
def performPageMapUpdates(data, currentdir, keylist, valuelist): data = _remove_xml_header(data) # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8 data = data.encode('utf-8') # rebuild serialized lookup dictionary of xml_updates properly adjusted updates = {} for i in range(0, len(keylist)): updates[keylist[i]] = "../" + valuelist[i] xml_empty_tags = ["page"] xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags) soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder) for tag in soup.find_all(["page"]): for att in ["href"]: if att in tag.attrs: ref = tag[att] if ref.find(":") == -1: parts = ref.split('#') url = parts[0] fragment = "" if len(parts) > 1: fragment = parts[1] bookrelpath = os.path.join(currentdir, unquoteurl(url)) bookrelpath = os.path.normpath(bookrelpath) bookrelpath = bookrelpath.replace(os.sep, "/") if bookrelpath in updates: attribute_value = updates[bookrelpath] if fragment != "": attribute_value = attribute_value + "#" + fragment attribute_value = quoteurl(attribute_value) tag[att] = attribute_value newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ") return newdata
def run(bk): # get epub version number if bk.launcher_version() >= 20160102: epubversion = bk.epub_version() else: epubversion = BeautifulSoup(bk.get_opf(), 'lxml').find('package')['version'] # get preferences prefs = bk.getPrefs() if prefs == {}: prefs['tag'] = 'span' prefs['attribute'] = 'epub:type' prefs['value'] = 'pagebreak' bk.savePrefs(prefs) prefs = bk.getPrefs() tag = prefs['tag'] attribute = prefs['attribute'] value = prefs['value'] # get nav doc and toc.ncx ids nav_id = ncx_id = None ncx_id = bk.gettocid() if epubversion.startswith('3'): opf_soup = BeautifulSoup(bk.get_opf(), 'lxml') if opf_soup.find('item', {'properties' : 'nav'}) is not None: nav_id = opf_soup.find('item', {'properties' : 'nav'})['id'] else: print('Nav document ID not found!') ncx_pagelist = '\n <pageList>\n <navLabel>\n <text>Pages</text>\n </navLabel>' nav_pagelist = ' <nav epub:type="page-list" id="page-list">\n <ol>\n' page_targets = 0 substitutePageNum(bk) # get all html files page_dic = {} errors = 0 for (html_id, href) in bk.text_iter(): html = bk.readfile(html_id) # load html code into BeautifulSoup soup = BeautifulSoup(html, 'html.parser') # find pagebreaks page_numbers = soup.find_all(tag, {attribute : value}) if not page_numbers: print('\nNo page number targets found in ' + os.path.basename(href)) else: page_targets += len(page_numbers) print('\n' + str(len(page_numbers)) + ' page number targets found in ' + os.path.basename(href)) # add pagelist entries to pagelist for page_number in page_numbers: # title has priority over string if page_number.has_attr('title'): title = page_number['title'] else: title = page_number.contents[0] # generate id, if necessary if not page_number.has_attr('id'): id = 'page' + title id = page_number['id'] # check for duplicate titles/ids if title not in page_dic: page_dic[title] = os.path.basename(href + '#' + id) else: errors += 1 page_dic[title] += ' / ' + os.path.basename(href + '#' + id) print('ERROR: duplicate page number found:', title, page_dic[title]) # epub2 ncx_pagelist += '''\n <pageTarget id="{}" type="normal" value="{}"> <navLabel> <text>{}</text> </navLabel> <content src="{}"/> </pageTarget>'''.format(id, title, title, href + '#' + id) # epub3 if nav_id: nav_pagelist += ' <li>\n <a href="{}">{}</a>\n </li>\n'.format('../' + href + '#' + id, title) if errors != 0: print('Plugin aborted because of {} duplicate page number(s).'.format(str(errors))) return -1 # add/replace NCX pagelist section if page_targets: ncx_pagelist += '\n </pageList>' if ncx_id: # get ncx contents ncx = bk.readfile(ncx_id) # delete existing pagelist ncx = re.sub('\s*\<pageList[^>]*\>.+?\<\/pageList\>\s*', '', ncx, flags = re.DOTALL) # add new pagelist ncx = ncx.replace('</ncx>', ncx_pagelist + '\n</ncx>') # update ncx file bk.writefile(ncx_id, ncx) print('\n' + str(page_targets) + ' page number targets found.\nNCX file updated. ') else: print('\nNCX file couldn\'t be found and updated.') else: print('\nNo page number targets found.\nNCX file not updated') # add/replace NAV pagelist section if nav_id: nav_pagelist += ' </ol>\n </nav>' new_pagelist = BeautifulSoup(nav_pagelist, 'html.parser') # get nav contents nav = bk.readfile(nav_id) nav_soup = BeautifulSoup(nav, 'html.parser') orig_nav_soup = str(nav_soup) old_page_list = nav_soup.find('nav', {'epub:type' : 'page-list'}) if old_page_list is not None: old_page_list.replace_with(new_pagelist) #print('Existing page-list updated.') else: nav_soup.body.insert(2, new_pagelist) #print('New page-list section added.') # update nav if str(nav_soup) != orig_nav_soup: try: bk.writefile(nav_id, str(nav_soup.prettyprint_xhtml(indent_level=0, eventual_encoding="utf-8", formatter="minimal", indent_chars=" "))) except: bk.writefile(nav_id, str(nav_soup)) print('NAV file updated.') else: print('NAV NOT file updated.') print('\nPlease click OK to close the Plugin Runner window.') return 0
def run(bk): # set Tk parameters for dialog box root = Tk() root.geometry("320x200+400+400") app = Dialog(root, bk) if not isosx: icon_img = PhotoImage(file=os.path.join( bk._w.plugin_dir, bk._w.plugin_name, 'sigil.png')) root.tk.call('wm', 'iconphoto', root._w, icon_img) root.mainloop() if Cancel == True: print( 'Plugin terminated by user.\nPlease click OK to close the Plugin Runner window.' ) return -1 # -------------------------------------- # get preferences # -------------------------------------- prefs = bk.getPrefs() # id prefix for <sup> footnote anchors fnanchor_id = prefs['anchorid'] # id prefix for <p> footnote definitions fndef_id = prefs['fndefid'] # class for <a> backlink numbers in footnote definitions file backlink_class = prefs['backlink'] kindle_compat = prefs['kindle'] ibooks_compat = prefs['ibooks'] notesource = prefs['notesource'] # debug mode if 'debug' not in prefs: prefs['debug'] = False bk.savePrefs(prefs) debug = prefs['debug'] # get epub version number if bk.launcher_version() >= 20160102: epubversion = bk.epub_version() else: epubversion = BeautifulSoup(bk.get_opf(), 'lxml').find('package')['version'] # ------------------------- # footnote linking process # ------------------------- template_anchor = '''<a class="duokan-footnote" href="#{fndef_id}{id}" id="{fnanchor_id}{id}"><img alt="" src="../Images/note.png"/></a>''' template_def = ''' <li class="duokan-footnote-item" id="{fndef_id}{id}"> <a class="{backlink_class}" href="#{fnanchor_id}{id}">◎</a>{text}</li>\n</ol>''' if kindle_compat and ibooks_compat: template_anchor = '''<a style="text-decoration:none!important;color:black;" class="duokan-footnote" epub:type="noteref" href="#{fndef_id}{id}" id="{fnanchor_id}{id}"><img alt="" src="../Images/note.png"/></a>''' template_def = ''' <li class="duokan-footnote-item" id="{fndef_id}{id}"> <p><a class="{backlink_class}" style="text-decoration:none!important;color:black;" href="#{fnanchor_id}{id}">◎</a>{text}</p></li>\n</ol>''' else: if kindle_compat: template_anchor = '''<a style="text-decoration:none!important;color:black;" class="duokan-footnote" href="#{fndef_id}{id}" id="{fnanchor_id}{id}"><img alt="" src="../Images/note.png"/></a>''' template_def = ''' <li class="duokan-footnote-item" id="{fndef_id}{id}"> <p><a class="{backlink_class}" style="text-decoration:none!important;color:black;" href="#{fnanchor_id}{id}">◎</a>{text}</p></li>\n</ol>''' if ibooks_compat: template_anchor = '''<a class="duokan-footnote" epub:type="noteref" href="#{fndef_id}{id}" id="{fnanchor_id}{id}"><img alt="" src="../Images/note.png"/></a>''' template_def = ''' <li class="duokan-footnote-item" id="{fndef_id}{id}"> <a class="{backlink_class}" style="color:black;" href="#{fnanchor_id}{id}">◎</a>{text}</li>\n</ol>''' anchor_count = 0 def_count = 0 pattern_anchor = re.compile(r'(?<!<p>)\[\d+\]') pattern_def = re.compile(r'\<p\>\[\d+\](.+)\<\/p\>') # validate note source note_html = None note_html_original = note_html if notesource: if not notesource.startswith('Text/'): notesource = 'Text/' + notesource temp_list = [ opf_href for (manifest_id, linear, opf_href) in bk.spine_iter() ] if notesource in temp_list: iter_list = [(manifest_id, linear, opf_href) for (manifest_id, linear, opf_href) in bk.spine_iter() if opf_href != notesource] note_html = bk.readfile(bk.href_to_id(notesource)) else: iter_list = list(bk.spine_iter()) for (manifest_id, linear, opf_href) in iter_list: print('-' * 20, opf_href, '-' * 20) html = bk.readfile(manifest_id) html_original = html note_anchor = re.search(pattern_anchor, html) if note_anchor is not None: # only once for each file with notes html = re.sub( r'\<\/head\>', r'<link href="../Styles/footnote.css" rel="stylesheet" type="text/css"/>\n</head>', html, ) if ibooks_compat: html = re.sub( r'\<\/body\>', r'<aside epub:type="footnote">\n<ol class="duokan-footnote-content">\n</ol>\n</aside>\n</body>', html, ) soup = BeautifulSoup(html, 'html.parser') soup.html['xmlns:epub'] = 'http://www.idpf.org/2007/ops' bk.writefile(manifest_id, str(soup)) del soup # update html string html = bk.readfile(manifest_id) html_original = html else: html = re.sub( r'\<\/body\>', r'<ol class="duokan-footnote-content">\n</ol>\n</body>', html, ) local_count = 0 while note_anchor is not None: anchor_count = anchor_count + 1 local_count += 1 template = template_anchor.format(id=anchor_count, fnanchor_id=fnanchor_id, fndef_id=fndef_id) html = re.sub(pattern_anchor, template, html, 1) print('Anchor No.' + str(anchor_count) + ': ' + note_anchor.group(0).strip('[]^')) note_anchor = re.search(pattern_anchor, html) if note_html: note_def = re.search(pattern_def, note_html) for i in range(1, local_count + 1): def_count = def_count + 1 note_html = re.sub(pattern_def, r'', note_html, 1) template = template_def.format( id=def_count, text=note_def.group(1).strip('[]^'), fnanchor_id=fnanchor_id, fndef_id=fndef_id, backlink_class=backlink_class, ) html = re.sub(r'\<\/ol\>', template, html, 1) print('Note No.' + str(def_count) + ': ' + note_def.group(1)) note_def = re.search(pattern_def, note_html) else: note_def = re.search(pattern_def, html) while note_def is not None: def_count = def_count + 1 html = re.sub(pattern_def, r'', html, 1) template = template_def.format( id=def_count, text=note_def.group(1).strip('[]^'), fnanchor_id=fnanchor_id, fndef_id=fndef_id, backlink_class=backlink_class, ) html = re.sub(r'\<\/ol\>', template, html, 1) print('Note No.' + str(def_count) + ': ' + note_def.group(1)) note_def = re.search(pattern_def, html) else: print("No note is found") if not html == html_original: bk.writefile(manifest_id, html) if not note_html == note_html_original: bk.writefile(bk.href_to_id(notesource), note_html) print('\nInfo: Remember to delete footnote source file %s manually.' % notesource) insert_note_css(bk, backlink_class=backlink_class) print( "\nInfo: Footnote generation succeeded, after which you'd better beautify all text files." ) return 0
def ProcessTextFile(self): """ This method runs when the button marked 'Get text file' is clicked """ #Request name of file to open FILEOPENOPTIONS = dict(title='Choose a text file to import', initialfile='', filetypes=[('Text files', ('.txt')), ('All files', ('*.*'))]) fHandle = filedialog.askopenfile(**FILEOPENOPTIONS) #Get the encoding of the text file with open(fHandle.name, "rb") as binary_file: data = binary_file.read() soup = BeautifulSoup(data) #Read the file with open(fHandle.name, 'rt', encoding=soup.original_encoding) as f: content = f.readlines() content = [x.strip() for x in content] #Replace angular brackets if required if self.is_checked.get() == True: print("Changing brackets") content = [x.replace('<', '<') for x in content] content = [x.replace('>', '>') for x in content] """ #Replace newlines with paragraph tags bodyText = bodyText.replace('\n', '</p>\n\n<p>').replace('\r', '') bodyText= bodyText.replace('<p></p>', '<p> </p>') """ # use PTagger to tag new lines if self.rule is None: self.set_rules() tagger = PTagger(self.rule) content = tagger.tag(content) #Now write the xhtml file xml = '<?xml version="1.0" encoding="utf-8"?>\n' xml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n' xml += '<html xmlns="http://www.w3.org/1999/xhtml">\n' xml += '<head>\n' xml += '<title></title>\n' xml += '</head>\n' xml += '<body>\n' for row in content: xml += row + '\n' xml += '</body>\n' xml += '</html>\n' #Set the name of the new xhtml section in the ePub to that of the filename Filename = fHandle.name head, fName = ntpath.split(fHandle.name) ChapterName = fName[:fName.index(".")] #Remove extension #Check whether this file already exists in the ePub for (id, href) in self.bk.text_iter(): if id == ChapterName or href == 'Text/' + ChapterName + '.xhtml': #If the section already exists reply = tkMessageBox.askquestion( "WARNING", "Do you want to delete the current page named " + ChapterName + ".xhtml?") if reply == "yes": #and it is not wanted bk.deletefile(id) #then delete it else: #otherwise do not import the text file print("Present xhtml page has been retained.") return #Add text file to ePub in a new xhtml section uid = ChapterName basename = uid + '.xhtml' mime = 'application/xhtml+xml' self.bk.addfile(uid, basename, xml, mime)