コード例 #1
0
ファイル: apnx.py プロジェクト: AEliu/calibre
    def get_pages_fast(self, mobi_file_path):
        '''
        2300 characters of uncompressed text per page. This is
        not meant to map 1 to 1 to a print book but to be a
        close enough measure.

        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.

        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        '''
        text_length = 0
        pages = []
        count = 0

        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        while count < text_length:
            pages.append(count)
            count += 2300

        return pages
コード例 #2
0
ファイル: ereader.py プロジェクト: JimmXinu/calibre
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
コード例 #3
0
ファイル: ereader.py プロジェクト: jimman2003/calibre
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """
    mi = MetaInformation(None, [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if len(pheader.section_data(0)) == 132:
        hr = HeaderRecord(pheader.section_data(0))

        if hr.compression in (2, 10) and hr.has_metadata == 1:
            try:
                mdata = pheader.section_data(hr.metadata_offset)

                mdata = mdata.decode('cp1252', 'replace').split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
                mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4])
            except Exception:
                pass

            if extract_cover:
                mi.cover_data = get_cover(pheader, hr)

    if not mi.title:
        mi.title = pheader.title if pheader.title else _('Unknown')

    return mi
コード例 #4
0
    def get_pages_fast(self, mobi_file_path):
        '''
        2300 characters of uncompressed text per page. This is
        not meant to map 1 to 1 to a print book but to be a
        close enough measure.

        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.

        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        '''
        text_length = 0
        pages = []
        count = 0

        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        while count < text_length:
            pages.append(count)
            count += 2300

        return pages
コード例 #5
0
    def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0):
        '''
        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
        '''
        import uuid
        apnx_meta = { 'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
                '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': '' }

        with open(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
            if ident != 'BOOKMOBI':
                # Check that this is really a MOBI file.
                raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
            apnx_meta['acr'] = str(PdbHeaderReader(mf).name())

        # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
        with open(mobi_file_path, 'rb') as mf:
            mh = MetadataHeader(mf, default_log)
            if mh.mobi_version == 8:
                apnx_meta['format'] = 'MOBI_8'
            else:
                apnx_meta['format'] = 'MOBI_7'
            if mh.exth is None or not mh.exth.cdetype:
                apnx_meta['cdetype'] = 'EBOK'
            else:
                apnx_meta['cdetype'] = str(mh.exth.cdetype)
            if mh.exth is None or not mh.exth.uuid:
                apnx_meta['asin'] = ''
            else:
                apnx_meta['asin'] = str(mh.exth.uuid)

        # Get the pages depending on the chosen parser
        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            if accurate:
                try:
                    pages = self.get_pages_accurate(mobi_file_path)
                except:
                    # Fall back to the fast parser if we can't
                    # use the accurate one. Typically this is
                    # due to the file having DRM.
                    pages = self.get_pages_fast(mobi_file_path)
            else:
                pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            raise Exception(_('Could not generate page mapping.'))

        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages, apnx_meta)

        # Write the APNX.
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
            fsync(apnxf)
コード例 #6
0
ファイル: plucker.py プロジェクト: MarioJC/calibre
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in xrange(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
コード例 #7
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
コード例 #8
0
def set_metadata(stream, mi):
    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if pheader.section_data(0) != 132:
        return

    sections = [
        pheader.section_data(x) for x in range(0, pheader.section_count())
    ]
    hr = HeaderRecord(sections[0])

    if hr.compression not in (2, 10):
        return

    # Create a metadata record for the file if one does not already exist
    if not hr.has_metadata:
        sections += [b'', b'MeTaInFo\x00']
        last_data = len(sections) - 1

        for i in range(0, 132, 2):
            val, = struct.unpack('>H', sections[0][i:i + 2])
            if val >= hr.last_data_offset:
                sections[0][i:i + 2] = struct.pack('>H', last_data)

        sections[0][24:26] = struct.pack('>H', 1)  # Set has metadata
        sections[0][44:46] = struct.pack('>H', last_data -
                                         1)  # Set location of metadata
        sections[0][52:54] = struct.pack(
            '>H', last_data)  # Ensure last data offset is updated

    # Merge the metadata into the file
    file_mi = get_metadata(stream, False)
    file_mi.smart_update(mi)
    sections[hr.metadata_offset] = (
        '%s\x00%s\x00%s\x00%s\x00%s\x00' %
        (file_mi.title, authors_to_string(file_mi.authors), '',
         file_mi.publisher, file_mi.isbn)).encode('cp1252', 'replace')

    # Rebuild the PDB wrapper because the offsets have changed due to the
    # new metadata.
    pheader_builder = PdbHeaderBuilder(pheader.ident, pheader.title)
    stream.seek(0)
    stream.truncate(0)
    pheader_builder.build_header([len(x) for x in sections], stream)

    # Write the data back to the file
    for item in sections:
        stream.write(item)
コード例 #9
0
ファイル: __init__.py プロジェクト: tamsuiboy/calibre-haodoo
    def get_metadata(self, stream, ftype):
        header = PdbHeaderReader(stream)
        if header.ident not in (UPDB_IDENT, BPDB_IDENT):
            stream.seek(0)
            return super(HaoDooPdb, self).get_metadata(stream, ftype)
        reader = Reader(header, stream, None, None)

        return reader.get_metadata()
コード例 #10
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.pdb.header import PdbHeaderReader
        from calibre.ebooks.azw4.reader import Reader

        header = PdbHeaderReader(stream)
        reader = Reader(header, stream, log, options)
        opf = reader.extract_content(os.getcwd())

        return opf
コード例 #11
0
ファイル: __init__.py プロジェクト: tamsuiboy/calibre-haodoo
    def convert(self, stream, options, file_ext, log, accelerators):
        header = PdbHeaderReader(stream)
        if header.ident not in (UPDB_IDENT, BPDB_IDENT):
            return super(HaoDooPdb, self).convert(stream, options, file_ext,
                                                  log, accelerators)
        reader = Reader(header, stream, log, options)
        opf = reader.extract_content(os.getcwd())

        return opf
コード例 #12
0
ファイル: ereader.py プロジェクト: JimmXinu/calibre
def set_metadata(stream, mi):
    pheader = PdbHeaderReader(stream)

    # Only Dropbook produced 132 byte record0 files are supported
    if pheader.section_data(0) != 132:
        return

    sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
    hr = HeaderRecord(sections[0])

    if hr.compression not in (2, 10):
        return

    # Create a metadata record for the file if one does not alreay exist
    if not hr.has_metadata:
        sections += ['', 'MeTaInFo\x00']
        last_data = len(sections) - 1

        for i in range(0, 132, 2):
            val, = struct.unpack('>H', sections[0][i:i + 2])
            if val >= hr.last_data_offset:
                sections[0][i:i + 2] = struct.pack('>H', last_data)

        sections[0][24:26] = struct.pack('>H', 1)  # Set has metadata
        sections[0][44:46] = struct.pack('>H', last_data - 1)  # Set location of metadata
        sections[0][52:54] = struct.pack('>H', last_data)  # Ensure last data offset is updated

    # Merge the metadata into the file
    file_mi = get_metadata(stream, False)
    file_mi.smart_update(mi)
    sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \
        (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)

    # Rebuild the PDB wrapper because the offsets have changed due to the
    # new metadata.
    pheader_builder = PdbHeaderBuilder(pheader.ident, pheader.title)
    stream.seek(0)
    stream.truncate(0)
    pheader_builder.build_header([len(x) for x in sections], stream)

    # Write the data back to the file
    for item in sections:
        stream.write(item)
コード例 #13
0
ファイル: __init__.py プロジェクト: alexasahis/calibre-haodoo
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    reader = Reader(pheader, stream, None, None)

    return reader.get_metadata()
コード例 #14
0
def set_metadata(stream, mi):
    stream.seek(0)

    pheader = PdbHeaderReader(stream)

    MetadataWriter = MWRITER.get(pheader.ident, None)

    if MetadataWriter:
        MetadataWriter(stream, mi)

    stream.seek(0)
    stream.write(re.sub('[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + b'\x00')
コード例 #15
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """

    pheader = PdbHeaderReader(stream)

    MetadataReader = MREADER.get(pheader.ident, None)

    if MetadataReader is None:
        return MetaInformation(pheader.title, [_('Unknown')])

    return MetadataReader(stream, extract_cover)
コード例 #16
0
def main(args=sys.argv):
    if len(args) < 2:
        print('Error: requires input file.')
        return 1

    f = open(sys.argv[1], 'rb')

    pheader = PdbHeaderReader(f)

    pdb_header_info(pheader)
    ereader_header_info(pheader)
    section_lengths(pheader)

    return 0
コード例 #17
0
    def get_pages_exact(self, mobi_file_path, page_count):
        '''
        Given a specified page count (such as from a custom column),
        create our array of pages for the apnx file by dividing by
        the content size of the book.
        '''
        pages = []
        count = 0

        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        chars_per_page = int(text_length // page_count)
        while count < text_length:
            pages.append(count)
            count += chars_per_page

        if len(pages) > page_count:
            # Rounding created extra page entries
            pages = pages[:page_count]

        return pages
コード例 #18
0
ファイル: apnx.py プロジェクト: AEliu/calibre
    def get_pages_exact(self, mobi_file_path, page_count):
        '''
        Given a specified page count (such as from a custom column),
        create our array of pages for the apnx file by dividing by
        the content size of the book.
        '''
        pages = []
        count = 0

        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        chars_per_page = int(text_length / page_count)
        while count < text_length:
            pages.append(count)
            count += chars_per_page

        if len(pages) > page_count:
            # Rounding created extra page entries
            pages = pages[:page_count]

        return pages
コード例 #19
0
def get_metadata(stream, extract_cover=True):
    """
    Return metadata as a L{MetaInfo} object
    """

    pheader = PdbHeaderReader(stream)

    MetadataReader = MREADER.get(pheader.ident, None)

    if MetadataReader is None:
        t = pheader.title
        if isinstance(t, bytes):
            t = t.decode('utf-8', 'replace')
        return MetaInformation(t, [_('Unknown')])

    return MetadataReader(stream, extract_cover)
コード例 #20
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.pdb.header import PdbHeaderReader
        from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader

        header = PdbHeaderReader(stream)
        Reader = get_reader(header.ident)

        if Reader is None:
            raise PDBError('No reader available for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown'))))

        log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))

        reader = Reader(header, stream, log, options)
        opf = reader.extract_content(os.getcwdu())

        return opf
コード例 #21
0
ファイル: apnx.py プロジェクト: syn-gowthamsrungarapu/calibre
    def write_apnx(self,
                   mobi_file_path,
                   apnx_path,
                   accurate=True,
                   page_count=0):
        '''
        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
        '''
        # Check that this is really a MOBI file.
        with open(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
        if ident != 'BOOKMOBI':
            raise Exception(
                _('Not a valid MOBI file. Reports identity of %s') % ident)

        # Get the pages depending on the chosen parser
        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            if accurate:
                try:
                    pages = self.get_pages_accurate(mobi_file_path)
                except:
                    # Fall back to the fast parser if we can't
                    # use the accurate one. Typically this is
                    # due to the file having DRM.
                    pages = self.get_pages_fast(mobi_file_path)
            else:
                pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            raise Exception(_('Could not generate page mapping.'))

        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages)

        # Write the APNX.
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)