def browse_ebook(self, toc_filename): books = [] out_dir = os.path.dirname(toc_filename) #Read the Table of Contents file with open(toc_filename, "r") as toc_file: root = xml.etree.ElementTree.parse(toc_file).getroot() root = root.find( './{http://www.w3.org/1999/xhtml}body/{http://www.w3.org/1999/xhtml}nav' ) lists = root.findall('{http://www.w3.org/1999/xhtml}ol') book_num = 0 for list in lists: items = list.findall('{http://www.w3.org/1999/xhtml}li') for item in items: book_node = item.find('{http://www.w3.org/1999/xhtml}a') book = { 'number': book_num, 'title': AsciiNormalizer.to_ascii(book_node.text), 'chapters': [] } chapter_list = item.find( '{http://www.w3.org/1999/xhtml}ol') if (chapter_list): chapter_nodes = chapter_list.findall( '{http://www.w3.org/1999/xhtml}li') last_chapter_num = -1 for chapter_node in chapter_nodes: ch = chapter_node.find( '{http://www.w3.org/1999/xhtml}a') chapter_title = AsciiNormalizer.to_ascii(ch.text) chapter_file = os.path.join( out_dir, ch.get('href')) # parse out chapter number ch_num_str = chapter_title.partition( ' ')[0].partition('.')[0] chapter_num = -1 if (ch_num_str == 'Prologue:'): chapter_num = 0 elif (ch_num_str == 'Epilogue:'): chapter_num = last_chapter_num + 1 elif (ch_num_str.isnumeric()): chapter_num = int(ch_num_str) if (chapter_num >= 0): last_chapter_num = chapter_num book.get('chapters').append({ 'number': chapter_num, 'title': chapter_title, 'file': chapter_file }) books.append(book) book_num = book_num + 1 return books
def convert_chapter(self, chapter_filename): #TODO Get chapter number #TODO Get chapter title #TODO Get chapter icon_file #Get chapter text content = "" with open(chapter_filename, "r") as in_file: content = in_file.read() content = ''.join(xml.etree.ElementTree.fromstring(content).itertext()) content = AsciiNormalizer.to_ascii(content) return content
def setUp(self): self.func = AsciiNormalizer()
def test_OpenSingleQuote(self): s = 'The so-called \u2018fob\u2019 was on the table.' result = AsciiNormalizer.to_ascii(s) self.assertEqual("The so-called 'fob' was on the table.", result)
def test_HorizontalElipsis(self): s = 'Something\u2026 Strange' result = AsciiNormalizer.to_ascii(s) self.assertEqual('Something... Strange', result)
def test_LowerCwithCedilla(self): s = 'soup\xe7on' result = AsciiNormalizer.to_ascii(s) self.assertEqual("soupcon", result)
def test_DoubleQuotes(self): s = "He blinked. \u201cIt is nothing,\u201d he said." result = AsciiNormalizer.to_ascii(s) self.assertEqual('He blinked. "It is nothing," he said.', result)
def test_ENDash(self): s = "100\u2013500" result = AsciiNormalizer.to_ascii(s) self.assertEqual('100-500', result)
def test_EMDash(self): s = "It was the best\u2014and worst\u2014of times." result = AsciiNormalizer.to_ascii(s) self.assertEqual("It was the best--and worst--of times.", result)
def test_Elipses(self): s = 'Something.\xa0.\xa0.\xa0 Strange' result = AsciiNormalizer.to_ascii(s) self.assertEqual("Something... Strange", result)
def test_Apostrophe(self): s = 'Tel\u2019aran\u2019rhiod' result = AsciiNormalizer.to_ascii(s) self.assertEqual("Tel'aran'rhiod", result)