def write_content(self, oeb_book, out_stream, metadata=None): title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown') txt_records, txt_length = self._generate_text(oeb_book) crc32 = 0 section_lengths = [] compressor = zlib.compressobj(9) self.log.info('Compressing data...') for i in range(0, len(txt_records)): self.log.debug('\tCompressing record %i' % i) txt_records[i] = compressor.compress(txt_records[i]) txt_records[i] = txt_records[i] + compressor.flush(zlib.Z_FULL_FLUSH) section_lengths.append(len(txt_records[i])) crc32 = zlib.crc32(txt_records[i], crc32) & 0xffffffff header_record = self._header_record(txt_length, len(txt_records), crc32) section_lengths.insert(0, len(header_record)) out_stream.seek(0) hb = PdbHeaderBuilder('zTXTGPlm', title) hb.build_header(section_lengths, out_stream) for record in [header_record]+txt_records: out_stream.write(record)
def write_content(self, oeb_book, out_stream, metadata=None): pmlmlizer = PMLMLizer(self.log) pml = unicode_type(pmlmlizer.extract_content(oeb_book, self.opts)).encode( 'cp1252', 'replace') text, text_sizes = self._text(pml) chapter_index = self._index_item( br'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml) chapter_index += self._index_item( br'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml) chapter_index += self._index_item(br'(?s)\\x(?P<text>.+?)\\x', pml) link_index = self._index_item(br'(?s)\\Q="(?P<text>.+?)"', pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] hr = [ self._header_record(len(text), len(chapter_index), len(link_index), len(images)) ] ''' Record order as generated by Dropbook. 1. eReader Header 2. Compressed text 3. Small font page index 4. Large font page index 5. Chapter index 6. Links index 7. Images 8. (Extrapolation: there should be one more record type here though yet uncovered what it might be). 9. Metadata 10. Sidebar records 11. Footnote records 12. Text block size record 13. "MeTaInFo\x00" word record ''' sections = hr + text + chapter_index + link_index + images + metadata + [ text_sizes ] + [b'MeTaInFo\x00'] lengths = [ len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections ] pdbHeaderBuilder = PdbHeaderBuilder(IDENTITY, metadata[0].partition('\x00')[0]) pdbHeaderBuilder.build_header(lengths, out_stream) for item in sections: if item in images: out_stream.write(item[0]) out_stream.write(item[1]) else: out_stream.write(item)
def set_metadata(stream, mi): pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if pheader.section_data(0) != 132: return sections = [ pheader.section_data(x) for x in range(0, pheader.section_count()) ] hr = HeaderRecord(sections[0]) if hr.compression not in (2, 10): return # Create a metadata record for the file if one does not already exist if not hr.has_metadata: sections += [b'', b'MeTaInFo\x00'] last_data = len(sections) - 1 for i in range(0, 132, 2): val, = struct.unpack('>H', sections[0][i:i + 2]) if val >= hr.last_data_offset: sections[0][i:i + 2] = struct.pack('>H', last_data) sections[0][24:26] = struct.pack('>H', 1) # Set has metadata sections[0][44:46] = struct.pack('>H', last_data - 1) # Set location of metadata sections[0][52:54] = struct.pack( '>H', last_data) # Ensure last data offset is updated # Merge the metadata into the file file_mi = get_metadata(stream, False) file_mi.smart_update(mi) sections[hr.metadata_offset] = ( '%s\x00%s\x00%s\x00%s\x00%s\x00' % (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('cp1252', 'replace') # Rebuild the PDB wrapper because the offsets have changed due to the # new metadata. pheader_builder = PdbHeaderBuilder(pheader.ident, pheader.title) stream.seek(0) stream.truncate(0) pheader_builder.build_header([len(x) for x in sections], stream) # Write the data back to the file for item in sections: stream.write(item)
def set_metadata(stream, mi): pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if pheader.section_data(0) != 132: return sections = [pheader.section_data(x) for x in range(0, pheader.section_count())] hr = HeaderRecord(sections[0]) if hr.compression not in (2, 10): return # Create a metadata record for the file if one does not alreay exist if not hr.has_metadata: sections += ['', 'MeTaInFo\x00'] last_data = len(sections) - 1 for i in range(0, 132, 2): val, = struct.unpack('>H', sections[0][i:i + 2]) if val >= hr.last_data_offset: sections[0][i:i + 2] = struct.pack('>H', last_data) sections[0][24:26] = struct.pack('>H', 1) # Set has metadata sections[0][44:46] = struct.pack('>H', last_data - 1) # Set location of metadata sections[0][52:54] = struct.pack('>H', last_data) # Ensure last data offset is updated # Merge the metadata into the file file_mi = get_metadata(stream, False) file_mi.smart_update(mi) sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \ (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn) # Rebuild the PDB wrapper because the offsets have changed due to the # new metadata. pheader_builder = PdbHeaderBuilder(pheader.ident, pheader.title) stream.seek(0) stream.truncate(0) pheader_builder.build_header([len(x) for x in sections], stream) # Write the data back to the file for item in sections: stream.write(item)
def write_content(self, oeb_book, out_stream, metadata=None): pmlmlizer = PMLMLizer(self.log) pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') text, text_sizes = self._text(pml) chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml) chapter_index += self._index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml) chapter_index += self._index_item(r'(?s)\\x(?P<text>.+?)\\x', pml) link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))] ''' Record order as generated by Dropbook. 1. eReader Header 2. Compressed text 3. Small font page index 4. Large font page index 5. Chapter index 6. Links index 7. Images 8. (Extrapolation: there should be one more record type here though yet uncovered what it might be). 9. Metadata 10. Sidebar records 11. Footnote records 12. Text block size record 13. "MeTaInFo\x00" word record ''' sections = hr+text+chapter_index+link_index+images+metadata+[text_sizes]+['MeTaInFo\x00'] lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections] pdbHeaderBuilder = PdbHeaderBuilder(IDENTITY, metadata[0].partition('\x00')[0]) pdbHeaderBuilder.build_header(lengths, out_stream) for item in sections: if item in images: out_stream.write(item[0]) out_stream.write(item[1]) else: out_stream.write(item)
def write_content(self, oeb_book, out_stream, metadata=None): from calibre.ebooks.compression.palmdoc import compress_doc title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown') txt_records, txt_length = self._generate_text(oeb_book) header_record = self._header_record(txt_length, len(txt_records)) section_lengths = [len(header_record)] self.log.info('Compessing data...') for i in range(0, len(txt_records)): self.log.debug('\tCompressing record %i' % i) txt_records[i] = compress_doc(txt_records[i]) section_lengths.append(len(txt_records[i])) out_stream.seek(0) hb = PdbHeaderBuilder('TEXtREAd', title) hb.build_header(section_lengths, out_stream) for record in [header_record] + txt_records: out_stream.write(record)