def get_book_length(self): from calibre.ebooks.metadata.mobi import StreamSlicer book_fs = self.path.replace('.%s' % self.bookmark_extension, '.%s' % self.book_format) self.book_length = 0 if self.bookmark_extension == 'mbp': # Read the book len from the header try: with lopen(book_fs, 'rb') as f: self.stream = io.BytesIO(f.read()) self.data = StreamSlicer(self.stream) self.nrecs, = unpack('>H', self.data[76:78]) record0 = self.record(0) self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) except: pass elif self.bookmark_extension == 'tan': # Read bookLength from metadata from calibre.ebooks.metadata.topaz import MetadataUpdater try: with lopen(book_fs, 'rb') as f: mu = MetadataUpdater(f) self.book_length = mu.book_length except: pass else: print("unsupported bookmark_extension: %s" % self.bookmark_extension)
def record(self, n): from calibre.ebooks.metadata.mobi import StreamSlicer if n >= self.nrecs: raise ValueError('non-existent record %r' % n) offoff = 78 + (8 * n) start, = unpack('>I', self.data[offoff + 0:offoff + 4]) stop = None if n < (self.nrecs - 1): stop, = unpack('>I', self.data[offoff + 8:offoff + 12]) return StreamSlicer(self.stream, start, stop)
def get_bookmark_data(self): ''' Return the timestamp and last_read_location ''' from calibre.ebooks.metadata.mobi import StreamSlicer user_notes = {} if self.bookmark_extension == 'mbp': MAGIC_MOBI_CONSTANT = 150 with open(self.path, 'rb') as f: stream = StringIO(f.read()) data = StreamSlicer(stream) self.timestamp, = unpack('>I', data[0x24:0x28]) bpar_offset, = unpack('>I', data[0x4e:0x52]) lrlo = bpar_offset + 0x0c self.last_read = int(unpack('>I', data[lrlo:lrlo + 4])[0]) self.last_read_location = self.last_read / MAGIC_MOBI_CONSTANT + 1 entries, = unpack('>I', data[0x4a:0x4e]) # Store the annotations/locations bpl = bpar_offset + 4 bpar_len, = unpack('>I', data[bpl:bpl + 4]) bpar_len += 8 #print "bpar_len: 0x%x" % bpar_len eo = bpar_offset + bpar_len # Walk bookmark entries #print " --- %s --- " % self.path current_entry = 1 sig = data[eo:eo + 4] previous_block = None while sig == 'DATA': text = None entry_type = None rec_len, = unpack('>I', data[eo + 4:eo + 8]) if rec_len == 0: current_block = "empty_data" elif data[eo + 8:eo + 12] == "EBAR": current_block = "data_header" #entry_type = "data_header" location, = unpack('>I', data[eo + 0x34:eo + 0x38]) #print "data_header location: %d" % location else: current_block = "text_block" if previous_block == 'empty_data': entry_type = 'Note' elif previous_block == 'data_header': entry_type = 'Highlight' text = data[eo + 8:eo + 8 + rec_len].decode('utf-16-be') if entry_type: displayed_location = location / MAGIC_MOBI_CONSTANT + 1 user_notes[location] = dict( id=self.id, displayed_location=displayed_location, type=entry_type, text=text) eo += rec_len + 8 current_entry += 1 previous_block = current_block sig = data[eo:eo + 4] while sig == 'BKMK': # Fix start location for Highlights using BKMK data end_loc, = unpack('>I', data[eo + 0x10:eo + 0x14]) if end_loc in user_notes and \ (user_notes[end_loc]['type'] == 'Highlight' or \ user_notes[end_loc]['type'] == 'Note'): # Switch location to start (0x08:0x0c) start, = unpack('>I', data[eo + 8:eo + 12]) user_notes[start] = user_notes[end_loc] ''' print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'], end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1, start, start//MAGIC_MOBI_CONSTANT + 1) ''' user_notes[start][ 'displayed_location'] = start / MAGIC_MOBI_CONSTANT + 1 user_notes.pop(end_loc) else: # If a bookmark coincides with a user annotation, the locs could # be the same - cheat by nudging -1 # Skip bookmark for last_read_location if end_loc != self.last_read: # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1) displayed_location = end_loc / MAGIC_MOBI_CONSTANT + 1 user_notes[end_loc - 1] = dict( id=self.id, displayed_location=displayed_location, type='Bookmark', text=None) rec_len, = unpack('>I', data[eo + 4:eo + 8]) eo += rec_len + 8 sig = data[eo:eo + 4] elif self.bookmark_extension == 'tan': from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata def get_topaz_highlight(displayed_location): # Parse My Clippings.txt for a matching highlight # Search looks for book title match, highlight match, and location match # Author is not matched # This will find the first instance of a clipping only book_fs = self.path.replace('.%s' % self.bookmark_extension, '.%s' % self.book_format) with open(book_fs, 'rb') as f2: stream = StringIO(f2.read()) mi = get_topaz_metadata(stream) my_clippings = self.path split = my_clippings.find('documents') + len('documents/') my_clippings = my_clippings[:split] + "My Clippings.txt" try: with open(my_clippings, 'r') as f2: marker_found = 0 text = '' search_str1 = '%s' % (mi.title) search_str2 = '- Highlight Loc. %d' % ( displayed_location) for line in f2: if marker_found == 0: if line.startswith(search_str1): marker_found = 1 elif marker_found == 1: if line.startswith(search_str2): marker_found = 2 elif marker_found == 2: if line.startswith('=========='): break text += line.strip() else: raise Exception('error') except: text = '(Unable to extract highlight text from My Clippings.txt)' return text MAGIC_TOPAZ_CONSTANT = 33.33 self.timestamp = os.path.getmtime(self.path) with open(self.path, 'rb') as f: stream = StringIO(f.read()) data = StreamSlicer(stream) self.last_read = int(unpack('>I', data[5:9])[0]) self.last_read_location = self.last_read / MAGIC_TOPAZ_CONSTANT + 1 entries, = unpack('>I', data[9:13]) current_entry = 0 e_base = 0x0d while current_entry < entries: location, = unpack('>I', data[e_base + 2:e_base + 6]) text = None text_len, = unpack('>I', data[e_base + 0xA:e_base + 0xE]) e_type, = unpack('>B', data[e_base + 1]) if e_type == 0: e_type = 'Bookmark' elif e_type == 1: e_type = 'Highlight' text = get_topaz_highlight(location / MAGIC_TOPAZ_CONSTANT + 1) elif e_type == 2: e_type = 'Note' text = data[e_base + 0x10:e_base + 0x10 + text_len] else: e_type = 'Unknown annotation type' displayed_location = location / MAGIC_TOPAZ_CONSTANT + 1 user_notes[location] = dict( id=self.id, displayed_location=displayed_location, type=e_type, text=text) if text_len == 0xFFFFFFFF: e_base = e_base + 14 else: e_base = e_base + 14 + 2 + text_len current_entry += 1 for location in user_notes: if location == self.last_read: user_notes.pop(location) break elif self.bookmark_extension == 'pdr': self.timestamp = os.path.getmtime(self.path) with open(self.path, 'rb') as f: stream = StringIO(f.read()) data = StreamSlicer(stream) self.last_read = int(unpack('>I', data[5:9])[0]) entries, = unpack('>I', data[9:13]) current_entry = 0 e_base = 0x0d self.pdf_page_offset = 0 while current_entry < entries: ''' location, = unpack('>I', data[e_base+2:e_base+6]) text = None text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) e_type, = unpack('>B', data[e_base+1]) if e_type == 0: e_type = 'Bookmark' elif e_type == 1: e_type = 'Highlight' text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) elif e_type == 2: e_type = 'Note' text = data[e_base+0x10:e_base+0x10+text_len] else: e_type = 'Unknown annotation type' if self.book_format in ['tpz','azw1']: displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 elif self.book_format == 'pdf': # *** This needs implementation displayed_location = location user_notes[location] = dict(id=self.id, displayed_location=displayed_location, type=e_type, text=text) if text_len == 0xFFFFFFFF: e_base = e_base + 14 else: e_base = e_base + 14 + 2 + text_len current_entry += 1 ''' # Use label as page number pdf_location, = unpack('>I', data[e_base + 1:e_base + 5]) label_len, = unpack('>H', data[e_base + 5:e_base + 7]) location = int(data[e_base + 7:e_base + 7 + label_len]) displayed_location = location e_type = 'Bookmark' text = None user_notes[location] = dict( id=self.id, displayed_location=displayed_location, type=e_type, text=text) self.pdf_page_offset = pdf_location - location e_base += (7 + label_len) current_entry += 1 self.last_read_location = self.last_read - self.pdf_page_offset else: print "unsupported bookmark_extension: %s" % self.bookmark_extension self.user_notes = user_notes