Exemple #1
0
    def __init__(self, mf):
        self.mf = mf
        h, h8 = mf.mobi_header, mf.mobi8_header
        first_text_record = 1
        offset = 0
        self.resource_ranges = [(h8.first_resource_record,
                                 h8.last_resource_record, h8.first_image_index)
                                ]
        if mf.kf8_type == 'joint':
            offset = h.exth.kf8_header_index
            self.resource_ranges.insert(
                0, (h.first_resource_record, h.last_resource_record,
                    h.first_image_index))

        self.text_records = [
            TextRecord(i, r, h8.extra_data_flags, mf.decompress8)
            for i, r in enumerate(mf.records[first_text_record +
                                             offset:first_text_record +
                                             offset +
                                             h8.number_of_text_records])
        ]

        self.raw_text = b''.join(r.raw for r in self.text_records)
        self.header = self.mf.mobi8_header
        self.extract_resources(mf.records)
        self.read_fdst()
        self.read_indices()
        self.build_files()
        self.read_tbs()
Exemple #2
0
    def __init__(self, mf):
        self.mf = mf
        h, h8 = mf.mobi_header, mf.mobi8_header
        first_text_record = 1
        offset = 0
        res_end = len(mf.records)
        if mf.kf8_type == 'joint':
            offset = h.exth.kf8_header_index
            res_end = offset - 1

        self.resource_records = mf.records[h.first_non_book_record:res_end]
        self.text_records = [
            TextRecord(i, r, h8.extra_data_flags, mf.decompress8)
            for i, r in enumerate(mf.records[first_text_record +
                                             offset:first_text_record +
                                             offset +
                                             h8.number_of_text_records])
        ]

        self.raw_text = b''.join(r.raw for r in self.text_records)
        self.header = self.mf.mobi8_header
        self.extract_resources()
        self.read_fdst()
        self.read_indices()
        self.build_files()
        self.read_tbs()
Exemple #3
0
    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = getattr(self.mobi_header, 'primary_index_record', NULL_INDEX)
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(range(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(range(sir+1, sir+1+numi))

        ntr = self.mobi_header.number_of_text_records
        fii = self.mobi_header.first_image_index
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in range(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in range(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
                try:
                    fmt = what(None, r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)