Python StreamSlicer Exemples, calibre.ebooks.metadata.mobi.StreamSlicer Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : bookmark.py Projet : smdx023/calibre

    def get_book_length(self):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        book_fs = self.path.replace('.%s' % self.bookmark_extension,
                                    '.%s' % self.book_format)

        self.book_length = 0
        if self.bookmark_extension == 'mbp':
            # Read the book len from the header
            try:
                with lopen(book_fs, 'rb') as f:
                    self.stream = io.BytesIO(f.read())
                    self.data = StreamSlicer(self.stream)
                    self.nrecs, = unpack('>H', self.data[76:78])
                    record0 = self.record(0)
                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
            except:
                pass
        elif self.bookmark_extension == 'tan':
            # Read bookLength from metadata
            from calibre.ebooks.metadata.topaz import MetadataUpdater
            try:
                with lopen(book_fs, 'rb') as f:
                    mu = MetadataUpdater(f)
                    self.book_length = mu.book_length
            except:
                pass
        else:
            print("unsupported bookmark_extension: %s" %
                  self.bookmark_extension)

Exemple #2

0

Afficher le fichier

Fichier : bookmark.py Projet : pombreda/calibre-1

 def record(self, n):
     from calibre.ebooks.metadata.mobi import StreamSlicer
     if n >= self.nrecs:
         raise ValueError('non-existent record %r' % n)
     offoff = 78 + (8 * n)
     start, = unpack('>I', self.data[offoff + 0:offoff + 4])
     stop = None
     if n < (self.nrecs - 1):
         stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
     return StreamSlicer(self.stream, start, stop)

Exemple #3

0

Afficher le fichier

Fichier : bookmark.py Projet : pombreda/calibre-1

    def get_bookmark_data(self):
        ''' Return the timestamp and last_read_location '''
        from calibre.ebooks.metadata.mobi import StreamSlicer
        user_notes = {}
        if self.bookmark_extension == 'mbp':
            MAGIC_MOBI_CONSTANT = 150
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.timestamp, = unpack('>I', data[0x24:0x28])
                bpar_offset, = unpack('>I', data[0x4e:0x52])
                lrlo = bpar_offset + 0x0c
                self.last_read = int(unpack('>I', data[lrlo:lrlo + 4])[0])
                self.last_read_location = self.last_read / MAGIC_MOBI_CONSTANT + 1
                entries, = unpack('>I', data[0x4a:0x4e])

                # Store the annotations/locations
                bpl = bpar_offset + 4
                bpar_len, = unpack('>I', data[bpl:bpl + 4])
                bpar_len += 8
                #print "bpar_len: 0x%x" % bpar_len
                eo = bpar_offset + bpar_len

                # Walk bookmark entries
                #print " --- %s --- " % self.path
                current_entry = 1
                sig = data[eo:eo + 4]
                previous_block = None

                while sig == 'DATA':
                    text = None
                    entry_type = None
                    rec_len, = unpack('>I', data[eo + 4:eo + 8])
                    if rec_len == 0:
                        current_block = "empty_data"
                    elif data[eo + 8:eo + 12] == "EBAR":
                        current_block = "data_header"
                        #entry_type = "data_header"
                        location, = unpack('>I', data[eo + 0x34:eo + 0x38])
                        #print "data_header location: %d" % location
                    else:
                        current_block = "text_block"
                        if previous_block == 'empty_data':
                            entry_type = 'Note'
                        elif previous_block == 'data_header':
                            entry_type = 'Highlight'
                        text = data[eo + 8:eo + 8 +
                                    rec_len].decode('utf-16-be')

                    if entry_type:
                        displayed_location = location / MAGIC_MOBI_CONSTANT + 1
                        user_notes[location] = dict(
                            id=self.id,
                            displayed_location=displayed_location,
                            type=entry_type,
                            text=text)

                    eo += rec_len + 8
                    current_entry += 1
                    previous_block = current_block
                    sig = data[eo:eo + 4]

                while sig == 'BKMK':
                    # Fix start location for Highlights using BKMK data
                    end_loc, = unpack('>I', data[eo + 0x10:eo + 0x14])

                    if end_loc in user_notes and \
                       (user_notes[end_loc]['type'] == 'Highlight' or \
                        user_notes[end_loc]['type'] == 'Note'):
                        # Switch location to start (0x08:0x0c)
                        start, = unpack('>I', data[eo + 8:eo + 12])
                        user_notes[start] = user_notes[end_loc]
                        '''
                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
                                                                    end_loc,
                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
                                                                    start,
                                                                    start//MAGIC_MOBI_CONSTANT + 1)
                        '''
                        user_notes[start][
                            'displayed_location'] = start / MAGIC_MOBI_CONSTANT + 1
                        user_notes.pop(end_loc)
                    else:
                        # If a bookmark coincides with a user annotation, the locs could
                        # be the same - cheat by nudging -1
                        # Skip bookmark for last_read_location
                        if end_loc != self.last_read:
                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
                            displayed_location = end_loc / MAGIC_MOBI_CONSTANT + 1
                            user_notes[end_loc - 1] = dict(
                                id=self.id,
                                displayed_location=displayed_location,
                                type='Bookmark',
                                text=None)
                    rec_len, = unpack('>I', data[eo + 4:eo + 8])
                    eo += rec_len + 8
                    sig = data[eo:eo + 4]

        elif self.bookmark_extension == 'tan':
            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata

            def get_topaz_highlight(displayed_location):
                # Parse My Clippings.txt for a matching highlight
                # Search looks for book title match, highlight match, and location match
                # Author is not matched
                # This will find the first instance of a clipping only
                book_fs = self.path.replace('.%s' % self.bookmark_extension,
                                            '.%s' % self.book_format)
                with open(book_fs, 'rb') as f2:
                    stream = StringIO(f2.read())
                    mi = get_topaz_metadata(stream)
                my_clippings = self.path
                split = my_clippings.find('documents') + len('documents/')
                my_clippings = my_clippings[:split] + "My Clippings.txt"
                try:
                    with open(my_clippings, 'r') as f2:
                        marker_found = 0
                        text = ''
                        search_str1 = '%s' % (mi.title)
                        search_str2 = '- Highlight Loc. %d' % (
                            displayed_location)
                        for line in f2:
                            if marker_found == 0:
                                if line.startswith(search_str1):
                                    marker_found = 1
                            elif marker_found == 1:
                                if line.startswith(search_str2):
                                    marker_found = 2
                            elif marker_found == 2:
                                if line.startswith('=========='):
                                    break
                                text += line.strip()
                        else:
                            raise Exception('error')
                except:
                    text = '(Unable to extract highlight text from My Clippings.txt)'
                return text

            MAGIC_TOPAZ_CONSTANT = 33.33
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                self.last_read_location = self.last_read / MAGIC_TOPAZ_CONSTANT + 1
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                while current_entry < entries:
                    location, = unpack('>I', data[e_base + 2:e_base + 6])
                    text = None
                    text_len, = unpack('>I', data[e_base + 0xA:e_base + 0xE])
                    e_type, = unpack('>B', data[e_base + 1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location /
                                                   MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base + 0x10:e_base + 0x10 + text_len]
                    else:
                        e_type = 'Unknown annotation type'

                    displayed_location = location / MAGIC_TOPAZ_CONSTANT + 1
                    user_notes[location] = dict(
                        id=self.id,
                        displayed_location=displayed_location,
                        type=e_type,
                        text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                for location in user_notes:
                    if location == self.last_read:
                        user_notes.pop(location)
                        break

        elif self.bookmark_extension == 'pdr':
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path, 'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'

                    if self.book_format in ['tpz','azw1']:
                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    elif self.book_format == 'pdf':
                        # *** This needs implementation
                        displayed_location = location
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                    '''
                    # Use label as page number
                    pdf_location, = unpack('>I', data[e_base + 1:e_base + 5])
                    label_len, = unpack('>H', data[e_base + 5:e_base + 7])
                    location = int(data[e_base + 7:e_base + 7 + label_len])
                    displayed_location = location
                    e_type = 'Bookmark'
                    text = None
                    user_notes[location] = dict(
                        id=self.id,
                        displayed_location=displayed_location,
                        type=e_type,
                        text=text)
                    self.pdf_page_offset = pdf_location - location
                    e_base += (7 + label_len)
                    current_entry += 1

                self.last_read_location = self.last_read - self.pdf_page_offset

        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
        self.user_notes = user_notes