예제 #1
0
 def decompress_text(self, number):
     if self.header_record.compression == 1:
         return self.section_data(number)
     if self.header_record.compression == 2 or self.header_record.compression == 258:
         from ebook_converter.ebooks.compression.palmdoc import decompress_doc
         return decompress_doc(self.section_data(number))
     return b''
예제 #2
0
 def decompress_phtml(self, data):
     if self.header_record.compression == 2:
         if self.owner_id:
             raise NotImplementedError
         return zlib.decompress(data)
     elif self.header_record.compression == 1:
         from ebook_converter.ebooks.compression.palmdoc import decompress_doc
         return decompress_doc(data)
예제 #3
0
 def decompress_text(self, number):
     if self.header_record.compression == 2:
         from ebook_converter.ebooks.compression.palmdoc import decompress_doc
         return decompress_doc(self.section_data(number)).decode(
             'cp1252' if self.encoding is None else self.encoding,
             'replace')
     if self.header_record.compression == 10:
         return zlib.decompress(self.section_data(number)).decode(
             'cp1252' if self.encoding is None else self.encoding,
             'replace')
예제 #4
0
 def decompress_text(self, number):
     from ebook_converter.ebooks.compression.palmdoc import decompress_doc
     data = bytearray(self.section_data(number))
     data = bytes(bytearray(x ^ 0xA5 for x in data))
     return decompress_doc(data).decode(self.encoding or 'cp1252',
                                        'replace')
예제 #5
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70)
                        images.add(uid)
                        self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
                    except Exception as e:
                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error('Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' % col)
                            w, h = identify(open('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(open('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with open('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
                except Exception as e:
                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))

        # Run the HTML through the html processing plugin.
        from ebook_converter.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb