def fb2mlize_images(self): ''' This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. ''' from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES images = [] for item in self.oeb_book.manifest: # Don't write the image if it's not referenced in the document's text. if item.href not in self.image_hrefs: continue if item.media_type in OEB_RASTER_IMAGES: try: if item.media_type != 'image/jpeg': im = Image() im.load(item.data) im.set_compression_quality(70) imdata = im.export('jpg') raw_data = b64encode(imdata) else: raw_data = b64encode(item.data) # Don't put the encoded image on a single line. data = '' col = 1 for char in raw_data: if col == 72: data += '\n' col = 1 col += 1 data += char images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data)) except Exception as e: self.log.error('Error: Could not include file %s because ' '%s.' % (item.href, e)) return ''.join(images)
def rescale_image(self, data): orig_w, orig_h, ifmt = identify_data(data) orig_data = data # save it in case compression fails if self.scale_news_images is not None: wmax, hmax = self.scale_news_images scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax) if scale: data = thumbnail(data, new_w, new_h, compression_quality=95)[-1] orig_w = new_w orig_h = new_h if self.compress_news_images_max_size is None: if self.compress_news_images_auto_size is None: # not compressing return data else: maxsizeb = (orig_w * orig_h) / self.compress_news_images_auto_size else: maxsizeb = self.compress_news_images_max_size * 1024 scaled_data = data # save it in case compression fails if len(scaled_data) <= maxsizeb: # no compression required return scaled_data img = Image() quality = 95 img.load(data) while len(data) >= maxsizeb and quality >= 5: quality -= 5 img.set_compression_quality(quality) data = img.export('jpg') if len(data) >= len(scaled_data): # compression failed return orig_data if len(orig_data) <= len( scaled_data) else scaled_data if len(data) >= len(orig_data): # no improvement return orig_data return data
def rescale_image(self, data): orig_w, orig_h, ifmt = identify_data(data) orig_data = data # save it in case compression fails if self.scale_news_images is not None: wmax, hmax = self.scale_news_images scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax) if scale: data = thumbnail(data, new_w, new_h, compression_quality=95)[-1] orig_w = new_w orig_h = new_h if self.compress_news_images_max_size is None: if self.compress_news_images_auto_size is None: # not compressing return data else: maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size else: maxsizeb = self.compress_news_images_max_size * 1024 scaled_data = data # save it in case compression fails if len(scaled_data) <= maxsizeb: # no compression required return scaled_data img = Image() quality = 95 img.load(data) while len(data) >= maxsizeb and quality >= 5: quality -= 5 img.set_compression_quality(quality) data = img.export('jpg') if len(data) >= len(scaled_data): # compression failed return orig_data if len(orig_data) <= len(scaled_data) else scaled_data if len(data) >= len(orig_data): # no improvement return orig_data return data
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml( section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml( d, section_data.header.paragraph_offsets).decode( self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. image_sizes = {} if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: with TemporaryFile(suffix='.palm') as itn: with open(itn, 'wb') as itf: itf.write(idata) im = Image() im.read(itn) image_sizes[uid] = im.size im.set_compression_quality(70) im.save('%s.jpg' % uid) self.log.debug( 'Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error( 'Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in image_sizes: raise Exception('Image with uid: %s missing.' % col) im = Image() im.read('%s.jpg' % col) w, h = im.size row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. canvas = create_canvas(width, height) y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = Image() im.read('%s.jpg' % col) canvas.compose(im, x_off, y_off) w, h = im.size x_off += w if largest_height < h: largest_height = h y_off += largest_height canvas.set_compression_quality(70) canvas.save('%s.jpg' % uid) self.log.debug( 'Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. image_sizes = {} if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: with TemporaryFile(suffix='.palm') as itn: with open(itn, 'wb') as itf: itf.write(idata) im = Image() im.read(itn) image_sizes[uid] = im.size im.set_compression_quality(70) im.save('%s.jpg' % uid) self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error('Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in image_sizes: raise Exception('Image with uid: %s missing.' % col) im = Image() im.read('%s.jpg' % col) w, h = im.size row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. canvas = create_canvas(width, height) y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = Image() im.read('%s.jpg' % col) canvas.compose(im, x_off, y_off) w, h = im.size x_off += w if largest_height < h: largest_height = h y_off += largest_height canvas.set_compression_quality(70) canvas.save('%s.jpg' % uid) self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb