def _getFallbackReader(self): if self.bookFallback is None: try: self.bookFallback = Mobi(str(self.filePath)) # Need to parse all the header data in the book self.bookFallback.parse() except: log("MobiEBook: Expected exception for secondary reader, book %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) return self.bookFallback
class BaseTestCase(unittest.TestCase): path = None def setUp(self): path = pathjoin(PATH, self.path) self.mobi = Mobi(path) def tearDown(self): self.mobi.close()
def _getFallbackReader(self): if self.bookFallback is None: try: self.bookFallback = Mobi(self.filePath) # Need to parse all the header data in the book self.bookFallback.parse() except: log("MobiEBook: Expected exception for secondary reader, book %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) return self.bookFallback
def collect_book_tokens(self, book_path): book = Mobi(book_path) book.parse() records = [] sentences = [] all_words = [] for record in tqdm(book, desc='record_no'): record = record.decode('utf-8','replace') for item in BeautifulSoup(record, 'lxml').find_all('p'): block = item.text.lower() tokens = word_tokenize(block) all_words += tokens return all_words
def __init__(self, path_to_folder): self.path = path_to_folder files = [] for r, d, f in os.walk(self.path): for file in f: if '.mobi' in file: files.append(os.path.join(r, file)) for f in files: book = Mobi(f) book.parse() try: title = book.title() author = book.author() author = author.decode() title = title.decode() result = author + ' - ' + title + "\n" print(result) w = Files() w.save_file(result) except: print("pass", f)
def readMobi(eBook): #Open up our book and parse it book = Mobi(eBook) book.parse() #Return the information we want return(book.title().decode("utf-8") + " " + book.author().decode("utf-8"))
def read_mobi_data(nombre_comp): try: book = Mobi(nombre_comp) book.parse() tit = book.title().decode('utf-8') aut = book.author().decode('utf-8') return [aut, tit, '.mobi'] except Exception as err: print(os.path.join(nombre_comp) + ' ERROR:' + str(err)) return [None, None, None]
def extract_metadata_mobi(self, book): book = Mobi(book) book.parse() try: cover_image = book.readImageRecord(0) except KeyError: cover_image = None title = book.title().decode("utf-8") author = book.author().decode("utf-8") book_config = book.config try: description = self.stripTags( book_config['exth']['records'][103].decode("utf-8")) except KeyError: description = None try: identifier = book_config['exth']['records'][104].decode("utf-8") except KeyError: identifier = None try: publisher = book_config['exth']['records'][101].decode("utf-8") except KeyError: publisher = None date = None rights = None try: ftags = book_config['exth']['records'][105].decode("utf-8") if ":" in ftags: ftags = ftags.replace(":", ",") elif ";" in ftags: ftags = ftags.replace(";", ",") # elif re.search(r"\s", ftags): # Must be final assignment to avoid spliting on multiple delimeters # ftags = ftags.replace(" ", ",") except KeyError: ftags = None return [ title, author, cover_image, book.f.name, description, identifier, publisher, date, rights, ftags, ]
def read_mobi(filepath): book = Mobi(filepath) book.parse() records = [] for record in book: records.append(record) full_text = ' '.join(records) title = book.title().decode('utf-8') author = book.author().decode('utf-8') doc = { 'filepath': filepath, 'full_text': full_text, 'title': title, 'author': author, } return doc
def generate_mobi_name(in_filename, template=Template(u'$author - $title.$extn')): # NOTE assume in_filename is in correct encoding (ideally Unicode string) and will "just work" book = Mobi(in_filename) book.parse() # title is sometimes different compared with "503" entry. E.g. compare 'Broken Homes' and 'Broken Homes (PC Peter Grant Book 4)' for https://www.amazon.com/Broken-Homes-Peter-Grant-Book-ebook/dp/B00DYX9OPC/ author, title = book.author(), book.title( ) # returns bytes. NOTE not going to use these... #print(type(author)) #print((author, title)) book_codepage = book.config.get('mobi', {}).get( 'text Encoding', 1252 ) # not sure if this is text encoding for content or all meta data (e.g. titles) #print(book_codepage) """ import pprint pprint.pprint(book.config) """ BOOK_CODEPAGE2ENCODING = { 1252: 'windows-1252', 65001: 'utf-8', } #print(BOOK_CODEPAGE2ENCODING[book_codepage]) # use this with names #print((author, title)) author = book.config['exth']['records'][100] title = book.config['exth']['records'].get(503) or book.config['mobi'].get( 'Full Name' ) # NOTE if both exist, may want the longest one. So far all books from Amazon I've seen have consistently been 503 (e.g. "SERIES Book X") author = author.decode(BOOK_CODEPAGE2ENCODING[book_codepage]) title = title.decode(BOOK_CODEPAGE2ENCODING[book_codepage]) extn = os.path.splitext(in_filename)[1] extn = extn[1:] # removed leading period #print((author, title, extn)) #print('%s - %s' % (author, title)) new_filename = template.substitute(author=author, title=title, extn=extn) # TODO use a dict? #new_filename = ' .... ??? <1of 2> "hello"...........' ## DEBUG new_filename = safe_filename(new_filename) #print(repr(new_filename)) return new_filename
def __init__(self, path_to_folder): self.path = path_to_folder files = [] for r, d, f in os.walk(self.path): for file in f: if '.mobi' in file: files.append(os.path.join(r, file)) for f in files: book = Mobi(f) book.parse() try: title = book.title() author = book.author() author = author.decode() title = title.decode() new_name = str("Kindle/" + author + " - " + title + ".mobi") os.rename(f, new_name) except: print("pass", f)
from mobi import Mobi import pprint book = Mobi("test/CharlesDarwin.mobi") book.parse() for record in book: print(record) pprint.pprint(book.config)
def setUp(self): path = pathjoin(PATH, self.path) self.mobi = Mobi(path)
#coding: utf-8 from mobi import Mobi import os from pprint import pprint book = Mobi('test/1.mobi') print("Title: %s" % book.title) print("Author: %s" % book.author) print("Publisher: %s" % book.publisher) print("Language: %s" % book.language) print("ISBN: %s" % book.isbn) #pprint(book.palmdb) #pprint(book.palmdoc) #pprint(book.mobi) #book.extract('output') print(book.layout())
class MobiEBook(EBookBase): def __init__(self, filePath, removeFileWhenComplete=False): EBookBase.__init__(self, filePath, removeFileWhenComplete) self.book = None self.bookFallback = None try: self.book = KMobi(self.filePath) except: log("MobiEBook: Failed to process eBook %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # A secondary Mobi reader, if the first can not handle the given file def _getFallbackReader(self): if self.bookFallback is None: try: self.bookFallback = Mobi(self.filePath) # Need to parse all the header data in the book self.bookFallback.parse() except: log("MobiEBook: Expected exception for secondary reader, book %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) return self.bookFallback def getTitle(self): # Default the title to the filename - this should be overwritten title = None if self.book is not None: try: title = self.book.title except: log("MobiEBook: Failed to get title for mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # If we failed to get the title, use the fallback Mobi reader if title in [None, ""]: fallback = self._getFallbackReader() if fallback is not None: try: title = fallback.title() except: log("MobiEBook: Failed to get title using fallback mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) if title in [None, ""]: title = self.getFallbackTitle() try: log("MobiEBook: Title is %s for book %s" % (title.decode('utf-8', 'ignore'), self.filePath)) except: pass return title def getAuthor(self): author = "" if self.book is not None: try: author = self.book.author except: log("MobiEBook: Failed to get author for mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # If we failed to get the author, use the fallback Mobi reader if author in [None, ""]: fallback = self._getFallbackReader() if fallback is not None: try: author = fallback.author() except: log("MobiEBook: Failed to get author using fallback mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) try: log("MobiEBook: Author is %s for book %s" % (author.decode('utf-8', 'ignore'), self.filePath)) except: pass return author def extractCoverImage(self): log("MobiEBook: Extracting cover for %s" % self.filePath) # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the cover image try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to extract cover for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) coverTargetName = None if dir_exists(extractDir): coverImages = self._findCoverImage(extractDir) if len(coverImages) > 0: coverImageSrc = coverImages[0] log("MobiEBook: Found cover file %s" % coverImageSrc) coverFileName, oldExt = os.path.splitext(self.fileName) cacheCoverName = "%s.jpg" % coverFileName coverTargetName = os_path_join(Settings.getCoverCacheLocation(), cacheCoverName) # Now move the file to the covers cache directory copy = xbmcvfs.copy(coverImageSrc, coverTargetName) if copy: log("MobiEBook: copy successful for %s" % coverTargetName) else: log("MobiEBook: copy failed from %s to %s" % (coverImageSrc, coverTargetName)) else: log("MobiEBook: No cover image found for %s" % self.filePath) # Now tidy up the extracted data try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to tidy up directory %s" % extractDir) else: log("MobiEBook: Failed to extract Mobi file %s" % self.filePath) return coverTargetName def _findCoverImage(self, dirPath): coverImages = [] dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.startswith('cover') and (aFile.endswith('jpg') or aFile.endswith('jpeg') or aFile.endswith('png')): # Add this image to the list coverImages.append(os_path_join(dirPath, aFile)) # Now check any of the directories for aDir in dirs: coverImages = coverImages + self._findCoverImage(os_path_join(dirPath, aDir)) return coverImages def getChapterDetails(self): log("MobiEBook: Extracting chapter list for %s" % self.filePath) # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the cover image try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to unpack book for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) chapterDetails = [] if dir_exists(extractDir): tocNcx = self._findTocNcx(extractDir) if tocNcx not in [None, ""]: log("MobiEBook: TOC file found: %s" % tocNcx) # Now we have the TOC file, we need to parse it, we already have # a tool for that, as it is the ePub format try: # Read the contents of the TOC file into a string tocFile = xbmcvfs.File(tocNcx, 'r') tocStr = tocFile.read() tocFile.close() # Now load it into the parser toc = epub.ncx.parse_toc(tocStr) # Get all the chapters for navPoint in toc.nav_map.nav_point: # Get each of the chapter labels for aLabelGroup in navPoint.labels: if aLabelGroup not in [None, ""]: for aLabel in aLabelGroup: if aLabel not in [None, ""]: log("MobiEBook: Adding chapter %s with src %s" % (aLabel, navPoint.src)) detail = {'title': aLabel.encode("utf-8"), 'link': navPoint.src} chapterDetails.append(detail) # Only need the first string for this label group break del toc except: log("MobiEBook: Failed to process TOC %s with error: %s" % (tocNcx, traceback.format_exc()), xbmc.LOGERROR) else: log("MobiEBook: Failed to find TOC file") # Check if we have any chapters, if there are none, then we should show the whole book if (len(chapterDetails) < 1) or (not Settings.onlyShowWholeBookIfChapters()): htmlFiles = self._findHtmlFiles(extractDir) # Check if there are any html files if len(htmlFiles) > 0: keyHtmlFile = None for htmlFile in htmlFiles: if htmlFile.endswith('book.html'): keyHtmlFile break if keyHtmlFile is None: keyHtmlFile = htmlFiles[0] detail = {'title': ADDON.getLocalizedString(32016), 'link': keyHtmlFile} chapterDetails.insert(0, detail) # Now tidy up the extracted data try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to tidy up directory %s" % extractDir) else: log("MobiEBook: Failed to extract Mobi file %s" % self.filePath) return chapterDetails def _findTocNcx(self, dirPath): tocNcx = None dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.lower() == 'toc.ncx': # Found the table of contents file tocNcx = os_path_join(dirPath, aFile) break # Now check any of the directories for aDir in dirs: if tocNcx is None: tocNcx = self._findTocNcx(os_path_join(dirPath, aDir)) return tocNcx def _findHtmlFiles(self, dirPath): htmlFiles = [] dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.endswith('.html'): # Add this page to the list htmlFiles.append(aFile) # Now check any of the directories for aDir in dirs: htmlFiles = htmlFiles + self._findHtmlFiles(os_path_join(dirPath, aDir)) return htmlFiles # Get the text for a given chapter def getChapterContents(self, chapterLink): log("MobiEBook: Getting chapter contents for %s" % chapterLink) # Find out the name of the page that this chapter is stored in sections = chapterLink.split('#') bookFileName = None chapterStartFlag = None if len(sections) > 0: bookFileName = sections[0] if len(sections) > 1: chapterStartFlag = sections[1] # Get the content of the chapter, this will be in HTML chapterContent = "" # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the chapter contents try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to unpack book for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # Find the file containing the book contents bookFileLocation = self._findBookFile(extractDir, bookFileName) bookContents = "" if bookFileLocation not in [None, ""]: # Read the contents of the file try: # Read the contents of the book file into a string bookFile = xbmcvfs.File(bookFileLocation, 'r') bookContents = bookFile.read() bookFile.close() except: log("MobiEBook: Failed to read contents of book %s with error: %s" % (bookFileName, traceback.format_exc()), xbmc.LOGERROR) else: log("MobiEBook: Failed to find book content file %s" % bookFileName) # Cleanup the extract directory if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) chapterContent = "" if bookContents not in [None, ""]: if chapterStartFlag is not None: # Split out the chapter (For now just add the whole book) # Split based on page markers pageBreaks = bookContents.split('<mbp:pagebreak/>') anchorHtml = "<a id=\"%s\"" % chapterStartFlag # Find which section contains this anchor for page in pageBreaks: if anchorHtml in page.decode("utf-8"): log("MobiEBook: Found page for chapter marker %s" % chapterStartFlag) chapterContent = self._mobiHtmlParsing(page) break else: log("MobiEBook: Chapter start flag, showing whole book") chapterContent = self._mobiHtmlParsing(bookContents) if chapterContent not in [None, ""]: chapterContent = self.convertHtmlIntoKodiText(chapterContent) return chapterContent # Finds the given book file def _findBookFile(self, dirPath, bookFileName): bookFile = None dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.lower() == bookFileName: # Found a match, set the value bookFile = os_path_join(dirPath, aFile) break # Now check any of the directories for aDir in dirs: if bookFile is None: bookFile = self._findBookFile(os_path_join(dirPath, aDir), bookFileName) return bookFile def _mobiHtmlParsing(self, chapterContentIn): # There are no line-breaks in the mobi file, so add them for each # html paragraph tag chapterContent = chapterContentIn.replace('</p>', '</p>\n') # Headings are just shown in larger fornts for mobi chapterContent = re.sub(r'<font size="5">(.*?)</font>', r'<h1>\1</h1>', chapterContent) return chapterContent
from mobi import Mobi book = Mobi("test/CharlesDarwin.mobi"); book.parse(); for record in book: print record, import pprint pprint.pprint(book.config)
from mobi import Mobi collins = "C:/Users/Anchpop/Google Drive/book/Calibre library/HarperCollins Publishers/Collins French to English (One Way) (115)/Collins French to English (One - HarperCollins Publishers.mobi" potter = "C:/Users/Anchpop/Google Drive/book/Calibre library/J.K. Rowling/Harry Potter et la Coupe de Feu (La (110)/Harry Potter et la Coupe de Feu - J.K. Rowling.mobi" book = Mobi(potter) book.parse() for record in book: if record: print(record)
#!/usr/bin/env python2.7 from mobi import Mobi book = Mobi("../zdic.prc"); book.parse(); for record in book: print record,
from mobi import Mobi import os path = os.path.dirname(__file__) book = Mobi(f"{path}/test/CharlesDarwin.mobi"); book.parse(); for record in book: print(record) import pprint pprint.pprint(book.config)
class MobiEBook(EBookBase): def __init__(self, filePath, removeFileWhenComplete=False): EBookBase.__init__(self, filePath, removeFileWhenComplete) self.book = None self.bookFallback = None try: self.book = KMobi(self.filePath) except: log("MobiEBook: Failed to process eBook %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # A secondary Mobi reader, if the first can not handle the given file def _getFallbackReader(self): if self.bookFallback is None: try: self.bookFallback = Mobi(str(self.filePath)) # Need to parse all the header data in the book self.bookFallback.parse() except: log("MobiEBook: Expected exception for secondary reader, book %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) return self.bookFallback def getTitle(self): # Default the title to the filename - this should be overwritten title = None if self.book is not None: try: title = self.book.title except: log("MobiEBook: Failed to get title for mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # If we failed to get the title, use the fallback Mobi reader if title in [None, ""]: fallback = self._getFallbackReader() if fallback is not None: try: title = fallback.title() except: log("MobiEBook: Failed to get title using fallback mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) if title in [None, ""]: title = self.getFallbackTitle() try: log("MobiEBook: Title is %s for book %s" % (title.decode('utf-8', 'ignore'), self.filePath)) except: pass return title def getAuthor(self): author = "" if self.book is not None: try: author = self.book.author except: log("MobiEBook: Failed to get author for mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # If we failed to get the author, use the fallback Mobi reader if author in [None, ""]: fallback = self._getFallbackReader() if fallback is not None: try: author = fallback.author() except: log("MobiEBook: Failed to get author using fallback mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) try: log("MobiEBook: Author is %s for book %s" % (author.decode('utf-8', 'ignore'), self.filePath)) except: pass return author def getDescription(self): description = "" if self.book is not None: try: description = self.book.description except: log("MobiEBook: Failed to get description for mobi %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) if description is None: description = "" else: description = self.convertHtmlIntoKodiText(description) return description def extractCoverImage(self): log("MobiEBook: Extracting cover for %s" % self.filePath) # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the cover image try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to extract cover for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) coverTargetName = None if dir_exists(extractDir): coverImages = self._findCoverImage(extractDir) if len(coverImages) > 0: coverImageSrc = coverImages[0] log("MobiEBook: Found cover file %s" % coverImageSrc) coverFileName, oldExt = os.path.splitext(self.fileName) cacheCoverName = "%s.jpg" % coverFileName coverTargetName = os_path_join(Settings.getCoverCacheLocation(), cacheCoverName) # Now move the file to the covers cache directory copy = xbmcvfs.copy(coverImageSrc, coverTargetName) if copy: log("MobiEBook: copy successful for %s" % coverTargetName) else: log("MobiEBook: copy failed from %s to %s" % (coverImageSrc, coverTargetName)) else: log("MobiEBook: No cover image found for %s" % self.filePath) # Now tidy up the extracted data try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to tidy up directory %s" % extractDir) else: log("MobiEBook: Failed to extract Mobi file %s" % self.filePath) return coverTargetName def _findCoverImage(self, dirPath): coverImages = [] dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.startswith('cover') and (aFile.endswith('jpg') or aFile.endswith('jpeg') or aFile.endswith('png')): # Add this image to the list coverImages.append(os_path_join(dirPath, aFile)) # Now check any of the directories for aDir in dirs: coverImages = coverImages + self._findCoverImage(os_path_join(dirPath, aDir)) return coverImages def getChapterDetails(self): log("MobiEBook: Extracting chapter list for %s" % self.filePath) # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the cover image try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to unpack book for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) chapterDetails = [] if dir_exists(extractDir): tocNcx = self._findTocNcx(extractDir) if tocNcx not in [None, ""]: log("MobiEBook: TOC file found: %s" % tocNcx) # Now we have the TOC file, we need to parse it, we already have # a tool for that, as it is the ePub format try: # Read the contents of the TOC file into a string tocFile = xbmcvfs.File(tocNcx, 'r') tocStr = tocFile.read() tocFile.close() # Now load it into the parser toc = epub.ncx.parse_toc(tocStr) # Get all the chapters for navPoint in toc.nav_map.nav_point: # Get each of the chapter labels for aLabelGroup in navPoint.labels: if aLabelGroup not in [None, ""]: for aLabel in aLabelGroup: if aLabel not in [None, ""]: log("MobiEBook: Adding chapter %s with src %s" % (aLabel, navPoint.src)) detail = {'title': aLabel.encode("utf-8"), 'link': navPoint.src} chapterDetails.append(detail) # Only need the first string for this label group break del toc except: log("MobiEBook: Failed to process TOC %s with error: %s" % (tocNcx, traceback.format_exc()), xbmc.LOGERROR) else: log("MobiEBook: Failed to find TOC file") # Check if we have any chapters, if there are none, then we should show the whole book if (len(chapterDetails) < 1) or (not Settings.onlyShowWholeBookIfChapters()): htmlFiles = self._findHtmlFiles(extractDir) # Check if there are any html files if len(htmlFiles) > 0: keyHtmlFile = None for htmlFile in htmlFiles: if htmlFile.endswith('book.html'): keyHtmlFile break if keyHtmlFile is None: keyHtmlFile = htmlFiles[0] detail = {'title': ADDON.getLocalizedString(32016), 'link': keyHtmlFile} chapterDetails.insert(0, detail) # Now tidy up the extracted data try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to tidy up directory %s" % extractDir) else: log("MobiEBook: Failed to extract Mobi file %s" % self.filePath) return chapterDetails def _findTocNcx(self, dirPath): tocNcx = None dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.lower() == 'toc.ncx': # Found the table of contents file tocNcx = os_path_join(dirPath, aFile) break # Now check any of the directories for aDir in dirs: if tocNcx is None: tocNcx = self._findTocNcx(os_path_join(dirPath, aDir)) return tocNcx def _findHtmlFiles(self, dirPath): htmlFiles = [] dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.endswith('.html'): # Add this page to the list htmlFiles.append(aFile) # Now check any of the directories for aDir in dirs: htmlFiles = htmlFiles + self._findHtmlFiles(os_path_join(dirPath, aDir)) return htmlFiles # Get the text for a given chapter def getChapterContents(self, chapterLink): log("MobiEBook: Getting chapter contents for %s" % chapterLink) # Find out the name of the page that this chapter is stored in sections = chapterLink.split('#') bookFileName = None chapterStartFlag = None if len(sections) > 0: bookFileName = sections[0] if len(sections) > 1: chapterStartFlag = sections[1] # Get the content of the chapter, this will be in HTML chapterContent = "" # Get the location that the book is to be extracted to extractDir = os_path_join(Settings.getTempLocation(), 'mobi_extracted') # Check if the mobi extract directory already exists if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) # Extract the contents of the book so we can get the chapter contents try: kindleunpack.unpackBook(self.filePath, extractDir, None, '2', True) except: log("MobiEBook: Failed to unpack book for %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR) # Find the file containing the book contents bookFileLocation = self._findBookFile(extractDir, bookFileName) bookContents = "" if bookFileLocation not in [None, ""]: # Read the contents of the file try: # Read the contents of the book file into a string bookFile = xbmcvfs.File(bookFileLocation, 'r') bookContents = bookFile.read() bookFile.close() except: log("MobiEBook: Failed to read contents of book %s with error: %s" % (bookFileName, traceback.format_exc()), xbmc.LOGERROR) else: log("MobiEBook: Failed to find book content file %s" % bookFileName) # Cleanup the extract directory if dir_exists(extractDir): try: shutil.rmtree(extractDir, True) except: log("MobiEBook: Failed to delete directory %s" % extractDir) chapterContent = "" if bookContents not in [None, ""]: if chapterStartFlag is not None: # Split out the chapter (For now just add the whole book) # Split based on page markers pageBreaks = bookContents.split('<mbp:pagebreak/>') anchorHtml = "<a id=\"%s\"" % chapterStartFlag # Find which section contains this anchor for page in pageBreaks: if anchorHtml in page.decode("utf-8"): log("MobiEBook: Found page for chapter marker %s" % chapterStartFlag) chapterContent = self._mobiHtmlParsing(page) break else: log("MobiEBook: Chapter start flag, showing whole book") chapterContent = self._mobiHtmlParsing(bookContents) if chapterContent not in [None, ""]: chapterContent = self.convertHtmlIntoKodiText(chapterContent) return chapterContent # Finds the given book file def _findBookFile(self, dirPath, bookFileName): bookFile = None dirs, files = xbmcvfs.listdir(dirPath) for aFile in files: if aFile.lower() == bookFileName: # Found a match, set the value bookFile = os_path_join(dirPath, aFile) break # Now check any of the directories for aDir in dirs: if bookFile is None: bookFile = self._findBookFile(os_path_join(dirPath, aDir), bookFileName) return bookFile def _mobiHtmlParsing(self, chapterContentIn): # There are no line-breaks in the mobi file, so add them for each # html paragraph tag chapterContent = chapterContentIn.replace('</p>', '</p>\n') # Headings are just shown in larger fornts for mobi chapterContent = re.sub(r'<font size="5">(.*?)</font>', r'<h1>\1</h1>', chapterContent) return chapterContent