def __init__(self, fn): self.filename = fn # Set some fields to defaults self.title = fn self.author = "??" self.language = "??" # Rob Addition: Description self.description = "" self.is_a_book = False f = open(fn) d = f.read(68) f.close() encodings = {1252: 'cp1252', 65001: 'utf-8'} supported_types = ('BOOKMOBI', 'TEXtREAd') self.type = d[60:68] if self.type not in supported_types: LOG(1, "Unsupported file type %s" % (self.type)) return None try: db = parse_palmdb(fn) except: return None self.is_a_book = True # now we have a better guess at the title, use it for now self.title = db.name self.records = db.records rec0 = self.records[0].data #LOG(5,repr(rec0)) if self.type == 'BOOKMOBI': LOG(3, "This is a MOBI book") self.mobi = {} for field, pos, fmt in MOBI_HDR_FIELDS: end = pos + calcsize(fmt) if (end > len(rec0) or ("header_len" in self.mobi and end > self.mobi["header_len"])): continue LOG( 4, "field: %s, fmt: %s, @ [%d:%d], data: %s" % (field, fmt, pos, end, repr(rec0[pos:end]))) (self.mobi[field], ) = unpack(">%s" % fmt, rec0[pos:end]) LOG(3, "self.mobi: %s" % repr(self.mobi)) # Get and decode the book name if self.mobi['locale_language'] in LANGUAGES: lang = LANGUAGES[self.mobi['locale_language']] if self.mobi['locale_country'] == 0: LOG(2, "Book language: %s" % lang[0][1]) self.language = "%s (%s)" % (lang[0][1], lang[0][0]) elif self.mobi['locale_country'] in lang: country = lang[self.mobi['locale_country']] LOG(2, "Book language is %s (%s)" % (lang[0][1], country[1])) self.language = "%s (%s-%s)" % (lang[0][1], lang[0][0], country[0]) pos = self.mobi['full_name_offs'] end = pos + self.mobi['full_name_len'] self.title = rec0[pos:end].decode(encodings[self.mobi['encoding']]) LOG(2, "Book name: %s" % self.title) if self.mobi['id'] != 'MOBI': LOG(0, "Mobi header missing!") return None if (0x40 & self.mobi['exth_flags']): # check for EXTH self.exth = parse_exth(rec0, self.mobi['header_len'] + 16) LOG(3, "EXTH header: %s" % repr(self.exth)) if 'author' in self.exth: self.author = ' & '.join(self.exth['author']) else: self.author = "n/a" self.rawdata = d if (('updated title' in self.exth) and (type(self.exth['updated title']) is str)): self.title = ' '.join(self.exth['updated title']) if 'description' in self.exth: self.description = ' <P> '.join(self.exth['description']) elif self.type == 'TEXtREAd': LOG(2, "This is an older MOBI book") self.rawdata = d compression, data_len, rec_count, rec_size, pos = unpack( PRC_HDRFMT, rec0[:calcsize(PRC_HDRFMT)]) LOG( 3, "compression %d, data_len %d, rec_count %d, rec_size %d" % (compression, data_len, rec_count, rec_size)) if compression == 2: data = uncompress(self.records[1].data) else: data = self.records[1].data from BeautifulSoup import BeautifulSoup soup = BeautifulSoup(data) self.metadata = soup.fetch("dc-metadata") try: self.title = soup.fetch("dc:title")[0].getText() self.author = soup.fetch("dc:creator")[0].getText() self.language = soup.fetch("dc:language")[0].getText() except: self.title, self.author, self.language = ("Unknown", "Unknown", "en-us") try: self.description = soup.fetch("dc:description")[0].getText() except: pass
def to_html(self): last_idx = (self.mobi['first_image_idx'] if 'mobi' in self.__dict__ else -1) return ''.join([uncompress(x.data) for x in self.records[1:last_idx]])
def to_html(self): last_idx = ( self.mobi['first_image_idx'] if 'mobi' in self.__dict__ else -1) return ''.join([uncompress(x.data) for x in self.records[1:last_idx]])
def __init__(self, fn): self.filename = fn # Set some fields to defaults self.title = fn self.author = "??" self.language = "??" self.is_a_book = False f = open(fn) d = f.read(68) f.close() encodings = { 1252: 'cp1252', 65001: 'utf-8' } supported_types = ('BOOKMOBI', 'TEXtREAd') self.type = d[60:68] if self.type not in supported_types: LOG(1, "Unsupported file type %s" % (self.type)) return None try: db = parse_palmdb(fn) except: return None self.is_a_book = True # now we have a better guess at the title, use it for now self.title = db.name self.records = db.records rec0 = self.records[0].data #LOG(5,repr(rec0)) if self.type == 'BOOKMOBI': LOG(3, "This is a MOBI book") self.mobi = {} for field, pos, fmt in MOBI_HDR_FIELDS: end = pos + calcsize(fmt) if (end > len(rec0) or ("header_len" in self.mobi and end > self.mobi["header_len"])): continue LOG(4, "field: %s, fmt: %s, @ [%d:%d], data: %s" % ( field, fmt, pos, end, repr(rec0[pos:end]))) (self.mobi[field], ) = unpack(">%s" % fmt, rec0[pos:end]) LOG(3, "self.mobi: %s" % repr(self.mobi)) # Get and decode the book name if self.mobi['locale_language'] in LANGUAGES: lang = LANGUAGES[self.mobi['locale_language']] if self.mobi['locale_country'] == 0: LOG(2, "Book language: %s" % lang[0][1]) self.language = "%s (%s)" % (lang[0][1], lang[0][0]) elif self.mobi['locale_country'] in lang: country = lang[self.mobi['locale_country']] LOG(2, "Book language is %s (%s)" % ( lang[0][1], country[1])) self.language = "%s (%s-%s)" % ( lang[0][1], lang[0][0], country[0] ) pos = self.mobi['full_name_offs'] end = pos + self.mobi['full_name_len'] self.title = rec0[pos:end].decode(encodings[self.mobi['encoding']]) LOG(2, "Book name: %s" % self.title) if self.mobi['id'] != 'MOBI': LOG(0, "Mobi header missing!") return None if (0x40 & self.mobi['exth_flags']): # check for EXTH self.exth = parse_exth(rec0, self.mobi['header_len'] + 16) LOG(3, "EXTH header: %s" % repr(self.exth)) if 'author' in self.exth: self.author = ' & '.join(self.exth['author']) else: self.author = "n/a" self.rawdata = d if (('updated title' in self.exth) and (type(self.exth['updated title']) is str)): self.title = ' '.join(self.exth['updated title']) elif self.type == 'TEXtREAd': LOG(2, "This is an older MOBI book") self.rawdata = d compression, data_len, rec_count, rec_size, pos = unpack( PRC_HDRFMT, rec0[:calcsize(PRC_HDRFMT)]) LOG(3, "compression %d, data_len %d, rec_count %d, rec_size %d" % (compression, data_len, rec_count, rec_size)) if compression == 2: data = uncompress(self.records[1].data) else: data = self.records[1].data from BeautifulSoup import BeautifulSoup soup = BeautifulSoup(data) self.metadata = soup.fetch("dc-metadata") try: self.title = soup.fetch("dc:title")[0].getText() self.author = soup.fetch("dc:creator")[0].getText() self.language = soup.fetch("dc:language")[0].getText() except: self.title, self.author, self.language = ("Unknown", "Unknown", "en-us")
def uncompress_lz77(data): """LZ77""" return lz77.uncompress(data)