Ejemplo n.º 1
0
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d' %
                   (r.idx, dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
                    '(%d ends, %d complete, %d starts)') %
                   tuple(map(len, (s + e + c, e, c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        ans.append('TBS bytes: %s' % format_bytes(byts))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:' % typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
                                'Depth: %d, Offset: %d, Size: %d) [%s]') %
                               (x.index, x.parent_index, x.depth, x.offset,
                                x.size, x.label))

        def bin4(num):
            ans = bin(num)[2:]
            return as_bytes('0' * (4 - len(ans)) + ans)

        def repr_extra(x):
            return str({bin4(k): v for k, v in iteritems(extra)})

        tbs_type = 0
        is_periodical = self.doc_type in (257, 258, 259)
        if len(byts):
            outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
            byts = byts[consumed:]
            for k in extra:
                tbs_type |= k
            ans.append('\nTBS: %d (%s)' % (tbs_type, bin4(tbs_type)))
            ans.append('Outermost index: %d' % outermost_index)
            ans.append('Unknown extra start bytes: %s' % repr_extra(extra))
            if is_periodical:  # Hierarchical periodical
                try:
                    byts, a = self.interpret_periodical(
                        tbs_type, byts, dat['geom'][0])
                except:
                    import traceback
                    traceback.print_exc()
                    a = []
                    print('Failed to decode TBS bytes for record: %d' % r.idx)
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s' % ' '.join(sbyts))

        ans.append('')
        return tbs_type, ans
Ejemplo n.º 2
0
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
            dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
            '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
                c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        ans.append('TBS bytes: %s'%format_bytes(byts))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:'%typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
                            'Depth: %d, Offset: %d, Size: %d) [%s]')%(
                        x.index, x.parent_index, x.depth, x.offset, x.size, x.label))

        def bin4(num):
            ans = bin(num)[2:]
            return as_bytes('0'*(4-len(ans)) + ans)

        def repr_extra(x):
            return str({bin4(k):v for k, v in iteritems(extra)})

        tbs_type = 0
        is_periodical = self.doc_type in (257, 258, 259)
        if len(byts):
            outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
            byts = byts[consumed:]
            for k in extra:
                tbs_type |= k
            ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
            ans.append('Outermost index: %d'%outermost_index)
            ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
            if is_periodical:  # Hierarchical periodical
                try:
                    byts, a = self.interpret_periodical(tbs_type, byts,
                        dat['geom'][0])
                except:
                    import traceback
                    traceback.print_exc()
                    a = []
                    print('Failed to decode TBS bytes for record: %d'%r.idx)
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s'%' '.join(sbyts))

        ans.append('')
        return tbs_type, ans
Ejemplo n.º 3
0
        def read_section_transitions(byts, psi=None):  # {{{
            if psi is None:
                # Assume previous section is 1
                psi = self.get_index(1)

            while byts:
                ai, extra, consumed = decode_tbs(byts)
                byts = byts[consumed:]
                if extra.get(0b0010, None) is not None:
                    raise ValueError('Dont know how to interpret flag 0b0010'
                                     ' while reading section transitions')
                if extra.get(0b1000, None) is not None:
                    if len(extra) > 1:
                        raise ValueError(
                            'Dont know how to interpret flags'
                            ' %r while reading section transitions' % extra)
                    nsi = self.get_index(psi.index + 1)
                    ans.append('Last article in this record of section %d'
                               ' (relative to next section index [%d]): '
                               '%d [%d absolute index]' %
                               (psi.index, nsi.index, ai, ai + nsi.index))
                    psi = nsi
                    continue

                ans.append('First article in this record of section %d'
                           ' (relative to its parent section): '
                           '%d [%d absolute index]' %
                           (psi.index, ai, ai + psi.index))

                num = extra.get(0b0100, None)
                if num is None:
                    msg = ('The section %d has at most one article'
                           ' in this record') % psi.index
                else:
                    msg = ('Number of articles in this record of '
                           'section %d: %d') % (psi.index, num)
                ans.append(msg)

                offset = extra.get(0b0001, None)
                if offset is not None:
                    if offset == 0:
                        ans.append('This record is spanned by the article:'
                                   '%d' % (ai + psi.index))
                    else:
                        ans.append(
                            '->Offset to start of next section (%d) from start'
                            ' of record: %d [%d absolute offset]' %
                            (psi.index + 1, offset, offset + record_offset))
            return byts
Ejemplo n.º 4
0
        def read_section_transitions(byts, psi=None):  # {{{
            if psi is None:
                # Assume previous section is 1
                psi = self.get_index(1)

            while byts:
                ai, extra, consumed = decode_tbs(byts)
                byts = byts[consumed:]
                if extra.get(0b0010, None) is not None:
                    raise ValueError('Dont know how to interpret flag 0b0010'
                            ' while reading section transitions')
                if extra.get(0b1000, None) is not None:
                    if len(extra) > 1:
                        raise ValueError('Dont know how to interpret flags'
                                ' %r while reading section transitions'%extra)
                    nsi = self.get_index(psi.index+1)
                    ans.append('Last article in this record of section %d'
                            ' (relative to next section index [%d]): '
                            '%d [%d absolute index]'%(psi.index, nsi.index, ai,
                                ai+nsi.index))
                    psi = nsi
                    continue

                ans.append('First article in this record of section %d'
                        ' (relative to its parent section): '
                        '%d [%d absolute index]'%(psi.index, ai, ai+psi.index))

                num = extra.get(0b0100, None)
                if num is None:
                    msg = ('The section %d has at most one article'
                            ' in this record')%psi.index
                else:
                    msg = ('Number of articles in this record of '
                        'section %d: %d')%(psi.index, num)
                ans.append(msg)

                offset = extra.get(0b0001, None)
                if offset is not None:
                    if offset == 0:
                        ans.append('This record is spanned by the article:'
                                '%d'%(ai+psi.index))
                    else:
                        ans.append('->Offset to start of next section (%d) from start'
                            ' of record: %d [%d absolute offset]'%(psi.index+1,
                                offset, offset+record_offset))
            return byts
Ejemplo n.º 5
0
 def read_starting_section(byts):  # {{{
     orig = byts
     si, extra, consumed = decode_tbs(byts)
     byts = byts[consumed:]
     if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
         raise ValueError('Dont know how to interpret flags %r'
                 ' when reading starting section'%extra)
     si = self.get_index(si)
     ans.append('The section at the start of this record is:'
             ' %s'%si.index)
     if 0b0100 in extra:
         num = extra[0b0100]
         ans.append('The number of articles from the section %d'
                 ' in this record: %s'%(si.index, num))
     elif 0b0001 in extra:
         eof = extra[0b0001]
         if eof != 0:
             raise ValueError('Unknown eof value %s when reading'
                     ' starting section. All bytes: %r'%(eof, orig))
         ans.append('??This record has more than one article from '
                 ' the section: %s'%si.index)
     return si, byts
Ejemplo n.º 6
0
 def read_starting_section(byts):  # {{{
     orig = byts
     si, extra, consumed = decode_tbs(byts)
     byts = byts[consumed:]
     if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
         raise ValueError('Dont know how to interpret flags %r'
                 ' when reading starting section'%extra)
     si = self.get_index(si)
     ans.append('The section at the start of this record is:'
             ' %s'%si.index)
     if 0b0100 in extra:
         num = extra[0b0100]
         ans.append('The number of articles from the section %d'
                 ' in this record: %s'%(si.index, num))
     elif 0b0001 in extra:
         eof = extra[0b0001]
         if eof != 0:
             raise ValueError('Unknown eof value %s when reading'
                     ' starting section. All bytes: %r'%(eof, orig))
         ans.append('??This record has more than one article from '
                 ' the section: %s'%si.index)
     return si, byts
Ejemplo n.º 7
0
    def read_tbs(self):
        from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
                collect_indexing_data, encode_strands_as_sequences,
                sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
        entry_map = []
        for index in self.ncx_index:
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))

        indexing_data = collect_indexing_data(entry_map, list(map(len,
            self.text_records)))
        self.indexing_data = [DOC + '\n' +textwrap.dedent('''\
                Index Entry lines are of the form:
                depth:index_number [action] parent (index_num-parent) Geometry

                Where Geometry is the start and end of the index entry w.r.t
                the start of the text record.

                ''')]

        tbs_type = 8
        try:
            calculate_all_tbs(indexing_data)
        except NegativeStrandIndex:
            calculate_all_tbs(indexing_data, tbs_type=5)
            tbs_type = 5

        for i, strands in enumerate(indexing_data):
            rec = self.text_records[i]
            tbs_bytes = rec.trailing_data.get('indexing', b'')
            desc = ['Record #%d'%i]
            for s, strand in enumerate(strands):
                desc.append('Strand %d'%s)
                for entries in itervalues(strand):
                    for e in entries:
                        desc.append(
                        ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)'%(
                            e.depth * ('  ') + '- ', e.index, e.action, e.parent,
                            e.index-(e.parent or 0), e.start-i*RECORD_SIZE,
                            e.start+e.length-i*RECORD_SIZE))
            desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
            flag_sz = 3
            sequences = []
            otbs = tbs_bytes
            while tbs_bytes:
                try:
                    val, extra, consumed = decode_tbs(tbs_bytes, flag_size=flag_sz)
                except:
                    break
                flag_sz = 4
                tbs_bytes = tbs_bytes[consumed:]
                extra = {bin(k):v for k, v in iteritems(extra)}
                sequences.append((val, extra))
            for j, seq in enumerate(sequences):
                desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
            if tbs_bytes:
                desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
            calculated_sequences = encode_strands_as_sequences(strands,
                    tbs_type=tbs_type)
            try:
                calculated_bytes = sequences_to_bytes(calculated_sequences)
            except:
                calculated_bytes = b'failed to calculate tbs bytes'
            if calculated_bytes != otbs:
                print('WARNING: TBS mismatch for record %d'%i)
                desc.append('WARNING: TBS mismatch!')
                desc.append('Calculated sequences: %r'%calculated_sequences)
            desc.append('')
            self.indexing_data.append('\n'.join(desc))
Ejemplo n.º 8
0
    def read_tbs(self):
        from calibre.ebooks.mobi.writer8.tbs import (
            Entry, DOC, collect_indexing_data, encode_strands_as_sequences,
            sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
        entry_map = []
        for index in self.ncx_index:
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))

        indexing_data = collect_indexing_data(
            entry_map, list(map(len, self.text_records)))
        self.indexing_data = [
            DOC + '\n' + textwrap.dedent('''\
                Index Entry lines are of the form:
                depth:index_number [action] parent (index_num-parent) Geometry

                Where Geometry is the start and end of the index entry w.r.t
                the start of the text record.

                ''')
        ]

        tbs_type = 8
        try:
            calculate_all_tbs(indexing_data)
        except NegativeStrandIndex:
            calculate_all_tbs(indexing_data, tbs_type=5)
            tbs_type = 5

        for i, strands in enumerate(indexing_data):
            rec = self.text_records[i]
            tbs_bytes = rec.trailing_data.get('indexing', b'')
            desc = ['Record #%d' % i]
            for s, strand in enumerate(strands):
                desc.append('Strand %d' % s)
                for entries in strand.itervalues():
                    for e in entries:
                        desc.append(
                            ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)' %
                            (e.depth * ('  ') + '- ', e.index, e.action,
                             e.parent, e.index -
                             (e.parent or 0), e.start - i * RECORD_SIZE,
                             e.start + e.length - i * RECORD_SIZE))
            desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
            flag_sz = 3
            sequences = []
            otbs = tbs_bytes
            while tbs_bytes:
                try:
                    val, extra, consumed = decode_tbs(tbs_bytes,
                                                      flag_size=flag_sz)
                except:
                    break
                flag_sz = 4
                tbs_bytes = tbs_bytes[consumed:]
                extra = {bin(k): v for k, v in extra.iteritems()}
                sequences.append((val, extra))
            for j, seq in enumerate(sequences):
                desc.append('Sequence #%d: %r %r' % (j, seq[0], seq[1]))
            if tbs_bytes:
                desc.append('Remaining bytes: %s' % format_bytes(tbs_bytes))
            calculated_sequences = encode_strands_as_sequences(
                strands, tbs_type=tbs_type)
            try:
                calculated_bytes = sequences_to_bytes(calculated_sequences)
            except:
                calculated_bytes = b'failed to calculate tbs bytes'
            if calculated_bytes != otbs:
                print('WARNING: TBS mismatch for record %d' % i)
                desc.append('WARNING: TBS mismatch!')
                desc.append('Calculated sequences: %r' % calculated_sequences)
            desc.append('')
            self.indexing_data.append('\n'.join(desc))