def dump_record(self, r, dat): ans = [] ans.append('\nRecord #%d: Starts at: %d Ends at: %d' % (r.idx, dat['geom'][0], dat['geom'][1])) s, e, c = dat['starts'], dat['ends'], dat['complete'] ans.append(('\tContains: %d index entries ' '(%d ends, %d complete, %d starts)') % tuple(map(len, (s + e + c, e, c, s)))) byts = bytearray(r.trailing_data.get('indexing', b'')) ans.append('TBS bytes: %s' % format_bytes(byts)) for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)): if entries: ans.append('\t%s:' % typ) for x in entries: ans.append(('\t\tIndex Entry: %s (Parent index: %s, ' 'Depth: %d, Offset: %d, Size: %d) [%s]') % (x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) def bin4(num): ans = bin(num)[2:] return as_bytes('0' * (4 - len(ans)) + ans) def repr_extra(x): return str({bin4(k): v for k, v in iteritems(extra)}) tbs_type = 0 is_periodical = self.doc_type in (257, 258, 259) if len(byts): outermost_index, extra, consumed = decode_tbs(byts, flag_size=3) byts = byts[consumed:] for k in extra: tbs_type |= k ans.append('\nTBS: %d (%s)' % (tbs_type, bin4(tbs_type))) ans.append('Outermost index: %d' % outermost_index) ans.append('Unknown extra start bytes: %s' % repr_extra(extra)) if is_periodical: # Hierarchical periodical try: byts, a = self.interpret_periodical( tbs_type, byts, dat['geom'][0]) except: import traceback traceback.print_exc() a = [] print('Failed to decode TBS bytes for record: %d' % r.idx) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) ans.append('Remaining bytes: %s' % ' '.join(sbyts)) ans.append('') return tbs_type, ans
def dump_record(self, r, dat): ans = [] ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx, dat['geom'][0], dat['geom'][1])) s, e, c = dat['starts'], dat['ends'], dat['complete'] ans.append(('\tContains: %d index entries ' '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e, c, s)))) byts = bytearray(r.trailing_data.get('indexing', b'')) ans.append('TBS bytes: %s'%format_bytes(byts)) for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)): if entries: ans.append('\t%s:'%typ) for x in entries: ans.append(('\t\tIndex Entry: %s (Parent index: %s, ' 'Depth: %d, Offset: %d, Size: %d) [%s]')%( x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) def bin4(num): ans = bin(num)[2:] return as_bytes('0'*(4-len(ans)) + ans) def repr_extra(x): return str({bin4(k):v for k, v in iteritems(extra)}) tbs_type = 0 is_periodical = self.doc_type in (257, 258, 259) if len(byts): outermost_index, extra, consumed = decode_tbs(byts, flag_size=3) byts = byts[consumed:] for k in extra: tbs_type |= k ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) ans.append('Outermost index: %d'%outermost_index) ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) if is_periodical: # Hierarchical periodical try: byts, a = self.interpret_periodical(tbs_type, byts, dat['geom'][0]) except: import traceback traceback.print_exc() a = [] print('Failed to decode TBS bytes for record: %d'%r.idx) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) ans.append('Remaining bytes: %s'%' '.join(sbyts)) ans.append('') return tbs_type, ans
def read_section_transitions(byts, psi=None): # {{{ if psi is None: # Assume previous section is 1 psi = self.get_index(1) while byts: ai, extra, consumed = decode_tbs(byts) byts = byts[consumed:] if extra.get(0b0010, None) is not None: raise ValueError('Dont know how to interpret flag 0b0010' ' while reading section transitions') if extra.get(0b1000, None) is not None: if len(extra) > 1: raise ValueError( 'Dont know how to interpret flags' ' %r while reading section transitions' % extra) nsi = self.get_index(psi.index + 1) ans.append('Last article in this record of section %d' ' (relative to next section index [%d]): ' '%d [%d absolute index]' % (psi.index, nsi.index, ai, ai + nsi.index)) psi = nsi continue ans.append('First article in this record of section %d' ' (relative to its parent section): ' '%d [%d absolute index]' % (psi.index, ai, ai + psi.index)) num = extra.get(0b0100, None) if num is None: msg = ('The section %d has at most one article' ' in this record') % psi.index else: msg = ('Number of articles in this record of ' 'section %d: %d') % (psi.index, num) ans.append(msg) offset = extra.get(0b0001, None) if offset is not None: if offset == 0: ans.append('This record is spanned by the article:' '%d' % (ai + psi.index)) else: ans.append( '->Offset to start of next section (%d) from start' ' of record: %d [%d absolute offset]' % (psi.index + 1, offset, offset + record_offset)) return byts
def read_section_transitions(byts, psi=None): # {{{ if psi is None: # Assume previous section is 1 psi = self.get_index(1) while byts: ai, extra, consumed = decode_tbs(byts) byts = byts[consumed:] if extra.get(0b0010, None) is not None: raise ValueError('Dont know how to interpret flag 0b0010' ' while reading section transitions') if extra.get(0b1000, None) is not None: if len(extra) > 1: raise ValueError('Dont know how to interpret flags' ' %r while reading section transitions'%extra) nsi = self.get_index(psi.index+1) ans.append('Last article in this record of section %d' ' (relative to next section index [%d]): ' '%d [%d absolute index]'%(psi.index, nsi.index, ai, ai+nsi.index)) psi = nsi continue ans.append('First article in this record of section %d' ' (relative to its parent section): ' '%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) num = extra.get(0b0100, None) if num is None: msg = ('The section %d has at most one article' ' in this record')%psi.index else: msg = ('Number of articles in this record of ' 'section %d: %d')%(psi.index, num) ans.append(msg) offset = extra.get(0b0001, None) if offset is not None: if offset == 0: ans.append('This record is spanned by the article:' '%d'%(ai+psi.index)) else: ans.append('->Offset to start of next section (%d) from start' ' of record: %d [%d absolute offset]'%(psi.index+1, offset, offset+record_offset)) return byts
def read_starting_section(byts): # {{{ orig = byts si, extra, consumed = decode_tbs(byts) byts = byts[consumed:] if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra: raise ValueError('Dont know how to interpret flags %r' ' when reading starting section'%extra) si = self.get_index(si) ans.append('The section at the start of this record is:' ' %s'%si.index) if 0b0100 in extra: num = extra[0b0100] ans.append('The number of articles from the section %d' ' in this record: %s'%(si.index, num)) elif 0b0001 in extra: eof = extra[0b0001] if eof != 0: raise ValueError('Unknown eof value %s when reading' ' starting section. All bytes: %r'%(eof, orig)) ans.append('??This record has more than one article from ' ' the section: %s'%si.index) return si, byts
def read_tbs(self): from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC, collect_indexing_data, encode_strands_as_sequences, sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex) entry_map = [] for index in self.ncx_index: vals = list(index)[:-1] + [None, None, None, None] entry_map.append(Entry(*(vals[:12]))) indexing_data = collect_indexing_data(entry_map, list(map(len, self.text_records))) self.indexing_data = [DOC + '\n' +textwrap.dedent('''\ Index Entry lines are of the form: depth:index_number [action] parent (index_num-parent) Geometry Where Geometry is the start and end of the index entry w.r.t the start of the text record. ''')] tbs_type = 8 try: calculate_all_tbs(indexing_data) except NegativeStrandIndex: calculate_all_tbs(indexing_data, tbs_type=5) tbs_type = 5 for i, strands in enumerate(indexing_data): rec = self.text_records[i] tbs_bytes = rec.trailing_data.get('indexing', b'') desc = ['Record #%d'%i] for s, strand in enumerate(strands): desc.append('Strand %d'%s) for entries in itervalues(strand): for e in entries: desc.append( ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)'%( e.depth * (' ') + '- ', e.index, e.action, e.parent, e.index-(e.parent or 0), e.start-i*RECORD_SIZE, e.start+e.length-i*RECORD_SIZE)) desc.append('TBS Bytes: ' + format_bytes(tbs_bytes)) flag_sz = 3 sequences = [] otbs = tbs_bytes while tbs_bytes: try: val, extra, consumed = decode_tbs(tbs_bytes, flag_size=flag_sz) except: break flag_sz = 4 tbs_bytes = tbs_bytes[consumed:] extra = {bin(k):v for k, v in iteritems(extra)} sequences.append((val, extra)) for j, seq in enumerate(sequences): desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1])) if tbs_bytes: desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes)) calculated_sequences = encode_strands_as_sequences(strands, tbs_type=tbs_type) try: calculated_bytes = sequences_to_bytes(calculated_sequences) except: calculated_bytes = b'failed to calculate tbs bytes' if calculated_bytes != otbs: print('WARNING: TBS mismatch for record %d'%i) desc.append('WARNING: TBS mismatch!') desc.append('Calculated sequences: %r'%calculated_sequences) desc.append('') self.indexing_data.append('\n'.join(desc))
def read_tbs(self): from calibre.ebooks.mobi.writer8.tbs import ( Entry, DOC, collect_indexing_data, encode_strands_as_sequences, sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex) entry_map = [] for index in self.ncx_index: vals = list(index)[:-1] + [None, None, None, None] entry_map.append(Entry(*(vals[:12]))) indexing_data = collect_indexing_data( entry_map, list(map(len, self.text_records))) self.indexing_data = [ DOC + '\n' + textwrap.dedent('''\ Index Entry lines are of the form: depth:index_number [action] parent (index_num-parent) Geometry Where Geometry is the start and end of the index entry w.r.t the start of the text record. ''') ] tbs_type = 8 try: calculate_all_tbs(indexing_data) except NegativeStrandIndex: calculate_all_tbs(indexing_data, tbs_type=5) tbs_type = 5 for i, strands in enumerate(indexing_data): rec = self.text_records[i] tbs_bytes = rec.trailing_data.get('indexing', b'') desc = ['Record #%d' % i] for s, strand in enumerate(strands): desc.append('Strand %d' % s) for entries in strand.itervalues(): for e in entries: desc.append( ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)' % (e.depth * (' ') + '- ', e.index, e.action, e.parent, e.index - (e.parent or 0), e.start - i * RECORD_SIZE, e.start + e.length - i * RECORD_SIZE)) desc.append('TBS Bytes: ' + format_bytes(tbs_bytes)) flag_sz = 3 sequences = [] otbs = tbs_bytes while tbs_bytes: try: val, extra, consumed = decode_tbs(tbs_bytes, flag_size=flag_sz) except: break flag_sz = 4 tbs_bytes = tbs_bytes[consumed:] extra = {bin(k): v for k, v in extra.iteritems()} sequences.append((val, extra)) for j, seq in enumerate(sequences): desc.append('Sequence #%d: %r %r' % (j, seq[0], seq[1])) if tbs_bytes: desc.append('Remaining bytes: %s' % format_bytes(tbs_bytes)) calculated_sequences = encode_strands_as_sequences( strands, tbs_type=tbs_type) try: calculated_bytes = sequences_to_bytes(calculated_sequences) except: calculated_bytes = b'failed to calculate tbs bytes' if calculated_bytes != otbs: print('WARNING: TBS mismatch for record %d' % i) desc.append('WARNING: TBS mismatch!') desc.append('Calculated sequences: %r' % calculated_sequences) desc.append('') self.indexing_data.append('\n'.join(desc))