Python StreamSlicer Examples

Programming Language: Python

Namespace/Package Name: calibre.ebooks.metadata.mobi

Class/Type: StreamSlicer

Examples at hotexamples.com: 3

Python StreamSlicer - 3 examples found. These are the top rated real world Python examples of calibre.ebooks.metadata.mobi.StreamSlicer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StreamSlicer(3)

Frequently Used Methods

StreamSlicer (3)

Example #1

Show file

File: bookmark.py Project: smdx023/calibre

    def get_book_length(self):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        book_fs = self.path.replace('.%s' % self.bookmark_extension,
                                    '.%s' % self.book_format)

        self.book_length = 0
        if self.bookmark_extension == 'mbp':
            # Read the book len from the header
            try:
                with lopen(book_fs, 'rb') as f:
                    self.stream = io.BytesIO(f.read())
                    self.data = StreamSlicer(self.stream)
                    self.nrecs, = unpack('>H', self.data[76:78])
                    record0 = self.record(0)
                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
            except:
                pass
        elif self.bookmark_extension == 'tan':
            # Read bookLength from metadata
            from calibre.ebooks.metadata.topaz import MetadataUpdater
            try:
                with lopen(book_fs, 'rb') as f:
                    mu = MetadataUpdater(f)
                    self.book_length = mu.book_length
            except:
                pass
        else:
            print("unsupported bookmark_extension: %s" %
                  self.bookmark_extension)

Example #2

Show file

File: bookmark.py Project: pombreda/calibre-1

 def record(self, n):
     from calibre.ebooks.metadata.mobi import StreamSlicer
     if n >= self.nrecs:
         raise ValueError('non-existent record %r' % n)
     offoff = 78 + (8 * n)
     start, = unpack('>I', self.data[offoff + 0:offoff + 4])
     stop = None
     if n < (self.nrecs - 1):
         stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
     return StreamSlicer(self.stream, start, stop)

Example #3

Show file

File: bookmark.py Project: pombreda/calibre-1

    def get_bookmark_data(self):
        ''' Return the timestamp and last_read_location '''
        from calibre.ebooks.metadata.mobi import StreamSlicer
        user_notes = {}
        if self.bookmark_extension == 'mbp':
            MAGIC_MOBI_CONSTANT = 150
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.timestamp, = unpack('>I', data[0x24:0x28])
                bpar_offset, = unpack('>I', data[0x4e:0x52])
                lrlo = bpar_offset + 0x0c
                self.last_read = int(unpack('>I', data[lrlo:lrlo + 4])[0])
                self.last_read_location = self.last_read / MAGIC_MOBI_CONSTANT + 1
                entries, = unpack('>I', data[0x4a:0x4e])

                # Store the annotations/locations
                bpl = bpar_offset + 4
                bpar_len, = unpack('>I', data[bpl:bpl + 4])
                bpar_len += 8
                #print "bpar_len: 0x%x" % bpar_len
                eo = bpar_offset + bpar_len

                # Walk bookmark entries
                #print " --- %s --- " % self.path
                current_entry = 1
                sig = data[eo:eo + 4]
                previous_block = None

                while sig == 'DATA':
                    text = None
                    entry_type = None
                    rec_len, = unpack('>I', data[eo + 4:eo + 8])
                    if rec_len == 0:
                        current_block = "empty_data"
                    elif data[eo + 8:eo + 12] == "EBAR":
                        current_block = "data_header"
                        #entry_type = "data_header"
                        location, = unpack('>I', data[eo + 0x34:eo + 0x38])
                        #print "data_header location: %d" % location
                    else:
                        current_block = "text_block"
                        if previous_block == 'empty_data':
                            entry_type = 'Note'
                        elif previous_block == 'data_header':
                            entry_type = 'Highlight'
                        text = data[eo + 8:eo + 8 +
                                    rec_len].decode('utf-16-be')

                    if entry_type:
                        displayed_location = location / MAGIC_MOBI_CONSTANT + 1
                        user_notes[location] = dict(
                            id=self.id,
                            displayed_location=displayed_location,
                            type=entry_type,
                            text=text)

                    eo += rec_len + 8
                    current_entry += 1
                    previous_block = current_block
                    sig = data[eo:eo + 4]

                while sig == 'BKMK':
                    # Fix start location for Highlights using BKMK data
                    end_loc, = unpack('>I', data[eo + 0x10:eo + 0x14])

                    if end_loc in user_notes and \
                       (user_notes[end_loc]['type'] == 'Highlight' or \
                        user_notes[end_loc]['type'] == 'Note'):
                        # Switch location to start (0x08:0x0c)
                        start, = unpack('>I', data[eo + 8:eo + 12])
                        user_notes[start] = user_notes[end_loc]
                        '''
                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
                                                                    end_loc,
                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
                                                                    start,
                                                                    start//MAGIC_MOBI_CONSTANT + 1)
                        '''
                        user_notes[start][
                            'displayed_location'] = start / MAGIC_MOBI_CONSTANT + 1
                        user_notes.pop(end_loc)
                    else:
                        # If a bookmark coincides with a user annotation, the locs could
                        # be the same - cheat by nudging -1
                        # Skip bookmark for last_read_location
                        if end_loc != self.last_read:
                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
                            displayed_location = end_loc / MAGIC_MOBI_CONSTANT + 1
                            user_notes[end_loc - 1] = dict(
                                id=self.id,
                                displayed_location=displayed_location,
                                type='Bookmark',
                                text=None)
                    rec_len, = unpack('>I', data[eo + 4:eo + 8])
                    eo += rec_len + 8
                    sig = data[eo:eo + 4]

        elif self.bookmark_extension == 'tan':
            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata

            def get_topaz_highlight(displayed_location):
                # Parse My Clippings.txt for a matching highlight
                # Search looks for book title match, highlight match, and location match
                # Author is not matched
                # This will find the first instance of a clipping only
                book_fs = self.path.replace('.%s' % self.bookmark_extension,
                                            '.%s' % self.book_format)
                with open(book_fs, 'rb') as f2:
                    stream = StringIO(f2.read())
                    mi = get_topaz_metadata(stream)
                my_clippings = self.path
                split = my_clippings.find('documents') + len('documents/')
                my_clippings = my_clippings[:split] + "My Clippings.txt"
                try:
                    with open(my_clippings, 'r') as f2:
                        marker_found = 0
                        text = ''
                        search_str1 = '%s' % (mi.title)
                        search_str2 = '- Highlight Loc. %d' % (
                            displayed_location)
                        for line in f2:
                            if marker_found == 0:
                                if line.startswith(search_str1):
                                    marker_found = 1
                            elif marker_found == 1:
                                if line.startswith(search_str2):
                                    marker_found = 2
                            elif marker_found == 2:
                                if line.startswith('=========='):
                                    break
                                text += line.strip()
                        else:
                            raise Exception('error')
                except:
                    text = '(Unable to extract highlight text from My Clippings.txt)'
                return text

            MAGIC_TOPAZ_CONSTANT = 33.33
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                self.last_read_location = self.last_read / MAGIC_TOPAZ_CONSTANT + 1
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                while current_entry < entries:
                    location, = unpack('>I', data[e_base + 2:e_base + 6])
                    text = None
                    text_len, = unpack('>I', data[e_base + 0xA:e_base + 0xE])
                    e_type, = unpack('>B', data[e_base + 1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location /
                                                   MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base + 0x10:e_base + 0x10 + text_len]
                    else:
                        e_type = 'Unknown annotation type'

                    displayed_location = location / MAGIC_TOPAZ_CONSTANT + 1
                    user_notes[location] = dict(
                        id=self.id,
                        displayed_location=displayed_location,
                        type=e_type,
                        text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                for location in user_notes:
                    if location == self.last_read:
                        user_notes.pop(location)
                        break

        elif self.bookmark_extension == 'pdr':
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'

                    if self.book_format in ['tpz','azw1']:
                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    elif self.book_format == 'pdf':
                        # *** This needs implementation
                        displayed_location = location
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                    '''
                    # Use label as page number
                    pdf_location, = unpack('>I', data[e_base + 1:e_base + 5])
                    label_len, = unpack('>H', data[e_base + 5:e_base + 7])
                    location = int(data[e_base + 7:e_base + 7 + label_len])
                    displayed_location = location
                    e_type = 'Bookmark'
                    text = None
                    user_notes[location] = dict(
                        id=self.id,
                        displayed_location=displayed_location,
                        type=e_type,
                        text=text)
                    self.pdf_page_offset = pdf_location - location
                    e_base += (7 + label_len)
                    current_entry += 1

                self.last_read_location = self.last_read - self.pdf_page_offset

        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
        self.user_notes = user_notes