Beispiel #1
0
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
            if item.href not in self.image_hrefs:
                continue
            if item.media_type in OEB_RASTER_IMAGES:
                try:
                    if item.media_type != 'image/jpeg':
                        im = Image()
                        im.load(item.data)
                        im.set_compression_quality(70)
                        imdata = im.export('jpg')
                        raw_data = b64encode(imdata)
                    else:
                        raw_data = b64encode(item.data)
                    # Don't put the encoded image on a single line.
                    data = ''
                    col = 1
                    for char in raw_data:
                        if col == 72:
                            data += '\n'
                            col = 1
                        col += 1
                        data += char
                    images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s because '
                        '%s.' % (item.href, e))
        return ''.join(images)
Beispiel #2
0
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
            if item.href not in self.image_hrefs:
                continue
            if item.media_type in OEB_RASTER_IMAGES:
                try:
                    if item.media_type != 'image/jpeg':
                        im = Image()
                        im.load(item.data)
                        im.set_compression_quality(70)
                        imdata = im.export('jpg')
                        raw_data = b64encode(imdata)
                    else:
                        raw_data = b64encode(item.data)
                    # Don't put the encoded image on a single line.
                    data = ''
                    col = 1
                    for char in raw_data:
                        if col == 72:
                            data += '\n'
                            col = 1
                        col += 1
                        data += char
                    images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s because '
                        '%s.' % (item.href, e))
        return ''.join(images)
Beispiel #3
0
    def rescale_image(self, data):
        orig_w, orig_h, ifmt = identify_data(data)
        orig_data = data  # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h,
                                 compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None:  # not compressing
                return data
            else:
                maxsizeb = (orig_w *
                            orig_h) / self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data  # save it in case compression fails
        if len(scaled_data) <= maxsizeb:  # no compression required
            return scaled_data

        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')

        if len(data) >= len(scaled_data):  # compression failed
            return orig_data if len(orig_data) <= len(
                scaled_data) else scaled_data

        if len(data) >= len(orig_data):  # no improvement
            return orig_data

        return data
Beispiel #4
0
    def rescale_image(self, data):
        orig_w, orig_h, ifmt = identify_data(data)
        orig_data = data  # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None:  # not compressing
                return data
            else:
                maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data  # save it in case compression fails
        if len(scaled_data) <= maxsizeb:  # no compression required
            return scaled_data

        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')

        if len(data) >= len(scaled_data):  # compression failed
            return orig_data if len(orig_data) <= len(scaled_data) else scaled_data

        if len(data) >= len(orig_data):  # no improvement
            return orig_data

        return data
Beispiel #5
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' %
                               (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(
                            section_data.data,
                            section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(
                            d, section_data.header.paragraph_offsets).decode(
                                self.get_text_uid_encoding(section_header.uid),
                                'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        image_sizes = {}
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        with TemporaryFile(suffix='.palm') as itn:
                            with open(itn, 'wb') as itf:
                                itf.write(idata)
                            im = Image()
                            im.read(itn)
                            image_sizes[uid] = im.size
                            im.set_compression_quality(70)
                            im.save('%s.jpg' % uid)
                            self.log.debug(
                                'Wrote image with uid %s to images/%s.jpg' %
                                (uid, uid))
                    except Exception as e:
                        self.log.error(
                            'Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error(
                        'Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in image_sizes:
                                raise Exception('Image with uid: %s missing.' %
                                                col)
                            im = Image()
                            im.read('%s.jpg' % col)
                            w, h = im.size
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    canvas = create_canvas(width, height)
                    y_off = 0
                    for row in section_data.layout:
                        x_off = 0
                        largest_height = 0
                        for col in row:
                            im = Image()
                            im.read('%s.jpg' % col)
                            canvas.compose(im, x_off, y_off)
                            w, h = im.size
                            x_off += w
                            if largest_height < h:
                                largest_height = h
                        y_off += largest_height
                    canvas.set_compression_quality(70)
                    canvas.save('%s.jpg' % uid)
                    self.log.debug(
                        'Wrote composite image with uid %s to images/%s.jpg' %
                        (uid, uid))
                except Exception as e:
                    self.log.error(
                        'Failed to write composite image with uid %s: %s' %
                        (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'),
                                 self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb
Beispiel #6
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        image_sizes = {}
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        with TemporaryFile(suffix='.palm') as itn:
                            with open(itn, 'wb') as itf:
                                itf.write(idata)
                            im = Image()
                            im.read(itn)
                            image_sizes[uid] = im.size
                            im.set_compression_quality(70)
                            im.save('%s.jpg' % uid)
                            self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
                    except Exception as e:
                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error('Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in image_sizes:
                                raise Exception('Image with uid: %s missing.' % col)
                            im = Image()
                            im.read('%s.jpg' % col)
                            w, h = im.size
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    canvas = create_canvas(width, height)
                    y_off = 0
                    for row in section_data.layout:
                        x_off = 0
                        largest_height = 0
                        for col in row:
                            im = Image()
                            im.read('%s.jpg' % col)
                            canvas.compose(im, x_off, y_off)
                            w, h = im.size
                            x_off += w
                            if largest_height < h:
                                largest_height = h
                        y_off += largest_height
                    canvas.set_compression_quality(70)
                    canvas.save('%s.jpg' % uid)
                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
                except Exception as e:
                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb