Example #1
0
 def browse_ebook(self, toc_filename):
     books = []
     out_dir = os.path.dirname(toc_filename)
     #Read the Table of Contents file
     with open(toc_filename, "r") as toc_file:
         root = xml.etree.ElementTree.parse(toc_file).getroot()
         root = root.find(
             './{http://www.w3.org/1999/xhtml}body/{http://www.w3.org/1999/xhtml}nav'
         )
         lists = root.findall('{http://www.w3.org/1999/xhtml}ol')
         book_num = 0
         for list in lists:
             items = list.findall('{http://www.w3.org/1999/xhtml}li')
             for item in items:
                 book_node = item.find('{http://www.w3.org/1999/xhtml}a')
                 book = {
                     'number': book_num,
                     'title': AsciiNormalizer.to_ascii(book_node.text),
                     'chapters': []
                 }
                 chapter_list = item.find(
                     '{http://www.w3.org/1999/xhtml}ol')
                 if (chapter_list):
                     chapter_nodes = chapter_list.findall(
                         '{http://www.w3.org/1999/xhtml}li')
                     last_chapter_num = -1
                     for chapter_node in chapter_nodes:
                         ch = chapter_node.find(
                             '{http://www.w3.org/1999/xhtml}a')
                         chapter_title = AsciiNormalizer.to_ascii(ch.text)
                         chapter_file = os.path.join(
                             out_dir, ch.get('href'))
                         # parse out chapter number
                         ch_num_str = chapter_title.partition(
                             ' ')[0].partition('.')[0]
                         chapter_num = -1
                         if (ch_num_str == 'Prologue:'):
                             chapter_num = 0
                         elif (ch_num_str == 'Epilogue:'):
                             chapter_num = last_chapter_num + 1
                         elif (ch_num_str.isnumeric()):
                             chapter_num = int(ch_num_str)
                         if (chapter_num >= 0):
                             last_chapter_num = chapter_num
                             book.get('chapters').append({
                                 'number':
                                 chapter_num,
                                 'title':
                                 chapter_title,
                                 'file':
                                 chapter_file
                             })
                     books.append(book)
                     book_num = book_num + 1
     return books
Example #2
0
 def convert_chapter(self, chapter_filename):
     #TODO Get chapter number
     #TODO Get chapter title
     #TODO Get chapter icon_file
     #Get chapter text
     content = ""
     with open(chapter_filename, "r") as in_file:
         content = in_file.read()
     content = ''.join(xml.etree.ElementTree.fromstring(content).itertext())
     content = AsciiNormalizer.to_ascii(content)
     return content
Example #3
0
 def setUp(self):
     self.func = AsciiNormalizer()
Example #4
0
 def test_OpenSingleQuote(self):
     s = 'The so-called \u2018fob\u2019 was on the table.'
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual("The so-called 'fob' was on the table.", result)
Example #5
0
 def test_HorizontalElipsis(self):
     s = 'Something\u2026 Strange'
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual('Something... Strange', result)
Example #6
0
 def test_LowerCwithCedilla(self):
     s = 'soup\xe7on'
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual("soupcon", result)
Example #7
0
 def test_DoubleQuotes(self):
     s = "He blinked. \u201cIt is nothing,\u201d he said."
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual('He blinked. "It is nothing," he said.', result)
Example #8
0
 def test_ENDash(self):
     s = "100\u2013500"
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual('100-500', result)
Example #9
0
 def test_EMDash(self):
     s = "It was the best\u2014and worst\u2014of times."
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual("It was the best--and worst--of times.", result)
Example #10
0
 def test_Elipses(self):
     s = 'Something.\xa0.\xa0.\xa0 Strange'
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual("Something... Strange", result)
Example #11
0
 def test_Apostrophe(self):
     s = 'Tel\u2019aran\u2019rhiod'
     result = AsciiNormalizer.to_ascii(s)
     self.assertEqual("Tel'aran'rhiod", result)