def rescale_image(data, scale_news_images, compress_news_images_max_size, compress_news_images_auto_size): orig_data = data # save it in case compression fails img = image_from_data(data) orig_w, orig_h = img.width(), img.height() if scale_news_images is not None: wmax, hmax = scale_news_images if wmax < orig_w or hmax < orig_h: orig_w, orig_h, data = scale_image(img, wmax, hmax, compression_quality=95) if compress_news_images_max_size is None: if compress_news_images_auto_size is None: # not compressing return data maxsizeb = (orig_w * orig_h)/compress_news_images_auto_size else: maxsizeb = compress_news_images_max_size * 1024 if len(data) <= maxsizeb: # no compression required return data scaled_data = data # save it in case compression fails quality = 90 while len(data) >= maxsizeb and quality >= 5: data = image_to_data(image_from_data(scaled_data), compression_quality=quality) quality -= 5 if len(data) >= len(scaled_data): # compression failed return orig_data if len(orig_data) <= len(scaled_data) else scaled_data if len(data) >= len(orig_data): # no improvement return orig_data return data
def test_qt(self): from PyQt5.Qt import QImageReader, QNetworkAccessManager, QFontDatabase from calibre.utils.img import image_from_data, image_to_data, test # Ensure that images can be read before QApplication is constructed. # Note that this requires QCoreApplication.libraryPaths() to return the # path to the Qt plugins which it always does in the frozen build, # because the QT_PLUGIN_PATH env var is set. On non-frozen builds, # it should just work because the hard-coded paths of the Qt # installation should work. If they do not, then it is a distro # problem. fmts = set(map(unicode, QImageReader.supportedImageFormats())) testf = {'jpg', 'png', 'svg', 'ico', 'gif'} self.assertEqual(testf.intersection(fmts), testf, "Qt doesn't seem to be able to load some of its image plugins. Available plugins: %s" % fmts) data = I('blank.png', allow_user_override=False, data=True) img = image_from_data(data) image_from_data(P('catalog/mastheadImage.gif', allow_user_override=False, data=True)) for fmt in 'png bmp jpeg'.split(): d = image_to_data(img, fmt=fmt) image_from_data(d) # Run the imaging tests test() from calibre.gui2 import Application os.environ.pop('DISPLAY', None) app = Application([], headless=islinux) self.assertGreaterEqual(len(QFontDatabase().families()), 5, 'The QPA headless plugin is not able to locate enough system fonts via fontconfig') na = QNetworkAccessManager() self.assertTrue(hasattr(na, 'sslErrors'), 'Qt not compiled with openssl') from PyQt5.QtWebKitWidgets import QWebView QWebView() del QWebView del na del app
def test_image_formats(): # Must be run before QApplication is constructed # Test that the image formats are available without a QApplication being # constructed from calibre.utils.img import image_from_data, image_to_data data = I('blank.png', allow_user_override=False, data=True) img = image_from_data(data) image_from_data(P('catalog/mastheadImage.gif', allow_user_override=False, data=True)) for fmt in 'png bmp jpeg'.split(): d = image_to_data(img, fmt=fmt) image_from_data(d)
def test_qt(self): from PyQt5.QtGui import QImageReader, QFontDatabase from PyQt5.QtNetwork import QNetworkAccessManager from calibre.utils.img import image_from_data, image_to_data, test # Ensure that images can be read before QApplication is constructed. # Note that this requires QCoreApplication.libraryPaths() to return the # path to the Qt plugins which it always does in the frozen build, # because the QT_PLUGIN_PATH env var is set. On non-frozen builds, # it should just work because the hard-coded paths of the Qt # installation should work. If they do not, then it is a distro # problem. fmts = set( map(lambda x: x.data().decode('utf-8'), QImageReader.supportedImageFormats())) # no2to3 testf = {'jpg', 'png', 'svg', 'ico', 'gif'} self.assertEqual( testf.intersection(fmts), testf, "Qt doesn't seem to be able to load some of its image plugins. Available plugins: %s" % fmts) data = P('images/blank.png', allow_user_override=False, data=True) img = image_from_data(data) image_from_data( P('catalog/mastheadImage.gif', allow_user_override=False, data=True)) for fmt in 'png bmp jpeg'.split(): d = image_to_data(img, fmt=fmt) image_from_data(d) # Run the imaging tests test() from calibre.gui2 import Application os.environ.pop('DISPLAY', None) has_headless = isosx or islinux app = Application([], headless=has_headless) self.assertGreaterEqual( len(QFontDatabase().families()), 5, 'The QPA headless plugin is not able to locate enough system fonts via fontconfig' ) if has_headless: from calibre.ebooks.covers import create_cover create_cover('xxx', ['yyy']) na = QNetworkAccessManager() self.assertTrue(hasattr(na, 'sslErrors'), 'Qt not compiled with openssl') from PyQt5.QtWebKitWidgets import QWebView if iswindows: from PyQt5.Qt import QtWin QtWin QWebView() del QWebView del na del app
def test_image_formats(): # Must be run before QApplication is constructed # Test that the image formats are available without a QApplication being # constructed from calibre.utils.img import image_from_data, image_to_data, test data = I('blank.png', allow_user_override=False, data=True) img = image_from_data(data) image_from_data( P('catalog/mastheadImage.gif', allow_user_override=False, data=True)) for fmt in 'png bmp jpeg'.split(): d = image_to_data(img, fmt=fmt) image_from_data(d) # Run the imaging tests test()
def create_cover(report, icons=(), cols=5, size=120, padding=16): icons = icons or tuple(default_cover_icons(cols)) rows = int(math.ceil(len(icons) / cols)) with Canvas(cols * (size + padding), rows * (size + padding), bgcolor='#eee') as canvas: y = -size - padding // 2 x = 0 for i, icon in enumerate(icons): if i % cols == 0: y += padding + size x = padding // 2 else: x += size + padding if report and icon in report.name_map: ipath = os.path.join(report.path, report.name_map[icon]) else: ipath = I(icon, allow_user_override=False) with lopen(ipath, 'rb') as f: img = image_from_data(f.read()) scaled, nwidth, nheight = fit_image(img.width(), img.height(), size, size) img = img.scaled(nwidth, nheight, Qt.AspectRatioMode.IgnoreAspectRatio, Qt.TransformationMode.SmoothTransformation) dx = (size - nwidth) // 2 canvas.compose(img, x + dx, y) return canvas.export()
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None): ''' Convert image setting all transparent pixels to white and changing format to JPEG. Ensure the resultant image has a byte size less than maxsizeb. If dimen is not None, generate a thumbnail of width=dimen, height=dimen or width, height = dimen (depending on the type of dimen) Returns the image as a bytestring ''' if dimen is not None: if hasattr(dimen, '__len__'): width, height = dimen else: width = height = dimen data = scale_image(data, width=width, height=height, compression_quality=90)[-1] else: # Replace transparent pixels with white pixels and convert to JPEG data = save_cover_data_to(data) if len(data) <= maxsizeb: return data orig_data = data # save it in case compression fails quality = 90 while len(data) > maxsizeb and quality >= 5: data = image_to_data(image_from_data(orig_data), compression_quality=quality) quality -= 5 if len(data) <= maxsizeb: return data orig_data = data scale = 0.9 while len(data) > maxsizeb and scale >= 0.05: img = image_from_data(data) w, h = img.width(), img.height() img = resize_image(img, int(scale * w), int(scale * h)) data = image_to_data(img, compression_quality=quality) scale -= 0.05 return data
def icon(ctx, rd, which): sz = rd.query.get('sz') if sz != 'full': try: sz = int(rd.query.get('sz', 48)) except Exception: sz = 48 if which in {'', '_'}: raise HTTPNotFound() if which.startswith('_'): base = os.path.join(config_dir, 'tb_icons') path = os.path.abspath(os.path.join(base, *which[1:].split('/'))) if not path.startswith(base) or ':' in which: raise HTTPNotFound('Naughty, naughty!') else: base = P('images', allow_user_override=False) path = os.path.abspath(os.path.join(base, *which.split('/'))) if not path.startswith(base) or ':' in which: raise HTTPNotFound('Naughty, naughty!') path = os.path.relpath(path, base).replace(os.sep, '/') path = P('images/' + path) if sz == 'full': try: return share_open(path, 'rb') except EnvironmentError: raise HTTPNotFound() with lock: tdir = os.path.join(rd.tdir, 'icons') cached = os.path.join(tdir, '%d-%s.png' % (sz, which)) try: return share_open(cached, 'rb') except EnvironmentError: pass try: src = share_open(path, 'rb') except EnvironmentError: raise HTTPNotFound() with src: idata = src.read() img = image_from_data(idata) scaled, width, height = fit_image(img.width(), img.height(), sz, sz) if scaled: idata = scale_image(img, width, height, as_png=True)[-1] try: ans = share_open(cached, 'w+b') except EnvironmentError: try: os.mkdir(tdir) except EnvironmentError: pass ans = share_open(cached, 'w+b') ans.write(idata) ans.seek(0) return ans
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None): ''' Convert image setting all transparent pixels to white and changing format to JPEG. Ensure the resultant image has a byte size less than maxsizeb. If dimen is not None, generate a thumbnail of width=dimen, height=dimen or width, height = dimen (depending on the type of dimen) Returns the image as a bytestring ''' if dimen is not None: if hasattr(dimen, '__len__'): width, height = dimen else: width = height = dimen data = scale_image(data, width=width, height=height, compression_quality=90)[-1] else: # Replace transparent pixels with white pixels and convert to JPEG data = save_cover_data_to(data) if len(data) <= maxsizeb: return data orig_data = data # save it in case compression fails quality = 90 while len(data) > maxsizeb and quality >= 5: data = image_to_data(image_from_data(orig_data), compression_quality=quality) quality -= 5 if len(data) <= maxsizeb: return data orig_data = data scale = 0.9 while len(data) > maxsizeb and scale >= 0.05: img = image_from_data(data) w, h = img.width(), img.height() img = resize_image(img, int(scale*w), int(scale*h)) data = image_to_data(img, compression_quality=quality) scale -= 0.05 return data
def render(self): from calibre.utils.img import image_from_data, scale_image, crop_image with lopen(self.path_to_page, 'rb') as f: img = image_from_data(f.read()) width, height = img.width(), img.height() if self.num == 0: # First image so create a thumbnail from it with lopen(os.path.join(self.dest, 'thumbnail.png'), 'wb') as f: f.write(scale_image(img, as_png=True)[-1]) self.pages = [img] if width > height: if self.opts.landscape: self.rotate = True else: half = int(width/2) split1 = crop_image(img, 0, 0, half, height) split2 = crop_image(img, half, 0, width - half, height) self.pages = [split2, split1] if self.opts.right2left else [split1, split2] self.process_pages()
def process_result(log, result): plugin, data = result try: if getattr(plugin, 'auto_trim_covers', False): img = image_from_data(data) nimg = remove_borders_from_image(img) if nimg is not img: data = image_to_data(nimg) fmt, width, height = identify(data) if width < 0 or height < 0: raise ValueError('Could not read cover image dimensions') if width < 50 or height < 50: raise ValueError('Image too small') data = save_cover_data_to(data) except Exception: log.exception('Invalid cover from', plugin.name) return None return (plugin, width, height, fmt, data)
def render(self): from calibre.utils.img import image_from_data, scale_image, crop_image with lopen(self.path_to_page, 'rb') as f: img = image_from_data(f.read()) width, height = img.width(), img.height() if self.num == 0: # First image so create a thumbnail from it with lopen(os.path.join(self.dest, 'thumbnail.png'), 'wb') as f: f.write(scale_image(img, as_png=True)[-1]) self.pages = [img] if width > height: if self.opts.landscape: self.rotate = True else: half = width // 2 split1 = crop_image(img, 0, 0, half, height) split2 = crop_image(img, half, 0, width - half, height) self.pages = [split2, split1 ] if self.opts.right2left else [split1, split2] self.process_pages()
def HandleImage(self, imageData, imagePath): from calibre.utils.img import image_from_data, resize_image, image_to_data img = image_from_data(imageData) x, y = img.width(), img.height() if self.opts: if self.opts.snb_full_screen: SCREEN_X, SCREEN_Y = self.opts.output_profile.screen_size else: SCREEN_X, SCREEN_Y = self.opts.output_profile.comic_screen_size else: SCREEN_X = 540 SCREEN_Y = 700 # Handle big image only if x > SCREEN_X or y > SCREEN_Y: xScale = float(x) / SCREEN_X yScale = float(y) / SCREEN_Y scale = max(xScale, yScale) # TODO : intelligent image rotation # img = img.rotate(90) # x,y = y,x img = resize_image(img, x // scale, y // scale) with lopen(imagePath, 'wb') as f: f.write(image_to_data(img, fmt=imagePath.rpartition('.')[-1]))
def HandleImage(self, imageData, imagePath): from calibre.utils.img import image_from_data, resize_image, image_to_data img = image_from_data(imageData) x, y = img.width(), img.height() if self.opts: if self.opts.snb_full_screen: SCREEN_X, SCREEN_Y = self.opts.output_profile.screen_size else: SCREEN_X, SCREEN_Y = self.opts.output_profile.comic_screen_size else: SCREEN_X = 540 SCREEN_Y = 700 # Handle big image only if x > SCREEN_X or y > SCREEN_Y: xScale = float(x) / SCREEN_X yScale = float(y) / SCREEN_Y scale = max(xScale, yScale) # TODO : intelligent image rotation # img = img.rotate(90) # x,y = y,x img = resize_image(img, x / scale, y / scale) with lopen(imagePath, 'wb') as f: f.write(image_to_data(img, fmt=imagePath.rpartition('.')[-1]))
def create_cover(report, icons=(), cols=5, size=120, padding=16): icons = icons or tuple(default_cover_icons(cols)) rows = int(math.ceil(len(icons) / cols)) with Canvas(cols * (size + padding), rows * (size + padding), bgcolor='#eee') as canvas: y = -size - padding // 2 x = 0 for i, icon in enumerate(icons): if i % cols == 0: y += padding + size x = padding // 2 else: x += size + padding if report and icon in report.name_map: ipath = os.path.join(report.path, report.name_map[icon]) else: ipath = I(icon, allow_user_override=False) with lopen(ipath, 'rb') as f: img = image_from_data(f.read()) scaled, nwidth, nheight = fit_image(img.width(), img.height(), size, size) img = img.scaled(nwidth, nheight, Qt.IgnoreAspectRatio, Qt.SmoothTransformation) dx = (size - nwidth) // 2 canvas.compose(img, x + dx, y) return canvas.export()
def do_all(self): cache = self.db.new_api args = self.args # Title and authors if args.do_swap_ta: title_map = cache.all_field_for('title', self.ids) authors_map = cache.all_field_for('authors', self.ids) def new_title(authors): ans = authors_to_string(authors) return titlecase(ans) if args.do_title_case else ans new_title_map = {bid:new_title(authors) for bid, authors in authors_map.iteritems()} new_authors_map = {bid:string_to_authors(title) for bid, title in title_map.iteritems()} cache.set_field('authors', new_authors_map) cache.set_field('title', new_title_map) if args.do_title_case and not args.do_swap_ta: title_map = cache.all_field_for('title', self.ids) cache.set_field('title', {bid:titlecase(title) for bid, title in title_map.iteritems()}) if args.do_title_sort: lang_map = cache.all_field_for('languages', self.ids) title_map = cache.all_field_for('title', self.ids) def get_sort(book_id): if args.languages: lang = args.languages[0] else: try: lang = lang_map[book_id][0] except (KeyError, IndexError, TypeError, AttributeError): lang = 'eng' return title_sort(title_map[book_id], lang=lang) cache.set_field('sort', {bid:get_sort(bid) for bid in self.ids}) if args.au: authors = string_to_authors(args.au) cache.set_field('authors', {bid:authors for bid in self.ids}) if args.do_auto_author: aus_map = cache.author_sort_strings_for_books(self.ids) cache.set_field('author_sort', {book_id:' & '.join(aus_map[book_id]) for book_id in aus_map}) if args.aus and args.do_aus: cache.set_field('author_sort', {bid:args.aus for bid in self.ids}) # Covers if args.cover_action == 'remove': cache.set_cover({bid:None for bid in self.ids}) elif args.cover_action == 'generate': from calibre.ebooks.covers import generate_cover for book_id in self.ids: mi = self.db.get_metadata(book_id, index_is_id=True) cdata = generate_cover(mi, prefs=args.generate_cover_settings) cache.set_cover({book_id:cdata}) elif args.cover_action == 'fromfmt': for book_id in self.ids: fmts = cache.formats(book_id, verify_formats=False) if fmts: covers = [] for fmt in fmts: fmtf = cache.format(book_id, fmt, as_file=True) if fmtf is None: continue cdata, area = get_cover_data(fmtf, fmt) if cdata: covers.append((cdata, area)) covers.sort(key=lambda x: x[1]) if covers: cache.set_cover({book_id:covers[-1][0]}) elif args.cover_action == 'trim': from calibre.utils.img import remove_borders, image_to_data, image_from_data for book_id in self.ids: cdata = cache.cover(book_id) if cdata: img = image_from_data(cdata) nimg = remove_borders(img) if nimg is not img: cdata = image_to_data(nimg) cache.set_cover({book_id:cdata}) elif args.cover_action == 'clone': cdata = None for book_id in self.ids: cdata = cache.cover(book_id) if cdata: break if cdata: cache.set_cover({bid:cdata for bid in self.ids if bid != book_id}) # Formats if args.do_remove_format: cache.remove_formats({bid:(args.remove_format,) for bid in self.ids}) if args.restore_original: for book_id in self.ids: formats = cache.formats(book_id) originals = tuple(x.upper() for x in formats if x.upper().startswith('ORIGINAL_')) for ofmt in originals: cache.restore_original_format(book_id, ofmt) # Various fields if args.rating != -1: cache.set_field('rating', {bid:args.rating*2 for bid in self.ids}) if args.clear_pub: cache.set_field('publisher', {bid:'' for bid in self.ids}) if args.pub: cache.set_field('publisher', {bid:args.pub for bid in self.ids}) if args.clear_series: cache.set_field('series', {bid:'' for bid in self.ids}) if args.pubdate is not None: cache.set_field('pubdate', {bid:args.pubdate for bid in self.ids}) if args.adddate is not None: cache.set_field('timestamp', {bid:args.adddate for bid in self.ids}) if args.do_series: sval = args.series_start_value if args.do_series_restart else cache.get_next_series_num_for(args.series, current_indices=True) cache.set_field('series', {bid:args.series for bid in self.ids}) if not args.series: cache.set_field('series_index', {bid:1.0 for bid in self.ids}) else: def next_series_num(bid, i): if args.do_series_restart: return sval + (i * args.series_increment) next_num = _get_next_series_num_for_list(sorted(sval.itervalues()), unwrap=False) sval[bid] = next_num return next_num smap = {bid:next_series_num(bid, i) for i, bid in enumerate(self.ids)} if args.do_autonumber: cache.set_field('series_index', smap) elif tweaks['series_index_auto_increment'] != 'no_change': cache.set_field('series_index', {bid:1.0 for bid in self.ids}) if args.comments is not null: cache.set_field('comments', {bid:args.comments for bid in self.ids}) if args.do_remove_conv: cache.delete_conversion_options(self.ids) if args.clear_languages: cache.set_field('languages', {bid:() for bid in self.ids}) elif args.languages: cache.set_field('languages', {bid:args.languages for bid in self.ids}) if args.remove_all: cache.set_field('tags', {bid:() for bid in self.ids}) if args.add or args.remove: self.db.bulk_modify_tags(self.ids, add=args.add, remove=args.remove) if self.do_sr: for book_id in self.ids: self.s_r_func(book_id) if self.sr_calls: for field, book_id_val_map in self.sr_calls.iteritems(): self.refresh_books.update(self.db.new_api.set_field(field, book_id_val_map))
def prepare_masthead_image(path_to_image, out_path, mi_width, mi_height): with lopen(path_to_image, 'rb') as f: img = image_from_data(f.read()) img = blend_on_canvas(img, mi_width, mi_height) with lopen(out_path, 'wb') as f: f.write(image_to_data(img))
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag. has_key('src')): # noqa iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlsplit(iurl).scheme: iurl = urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): # noqa tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img' + str(c)) if isinstance(fname, unicode_type): fname = fname.encode('ascii', 'replace') data = self.preprocess_image_ext( data, iurl) if self.preprocess_image_ext is not None else data if data is None: continue itype = what(None, data) if itype == 'svg' or (itype is None and b'<svg' in data[:1024]): # SVG image imgpath = os.path.join(diskpath, fname + '.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: # Ensure image is valid img = image_from_data(data) if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpeg' data = image_to_data(img, fmt=itype) if self.compress_news_images and itype in {'jpg', 'jpeg'}: try: data = self.rescale_image(data) except Exception: self.log.exception('failed to compress image ' + iurl) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname + '.' + itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except Exception: traceback.print_exc() continue
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. images = set() if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70) images.add(uid) self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error('Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in images: raise Exception('Image with uid: %s missing.' % col) w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:] row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. with Canvas(width, height) as canvas: y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = image_from_data(lopen('%s.jpg' % col, 'rb').read()) canvas.compose(im, x_off, y_off) w, h = im.width(), im.height() x_off += w if largest_height < h: largest_height = h y_off += largest_height with lopen('%s.jpg' % uid) as out: out.write(canvas.export(compression_quality=70)) self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb
def test_qt(self): from PyQt5.QtCore import QTimer from PyQt5.QtWidgets import QApplication from PyQt5.QtWebEngineWidgets import QWebEnginePage from PyQt5.QtGui import QImageReader, QFontDatabase from PyQt5.QtNetwork import QNetworkAccessManager from calibre.utils.img import image_from_data, image_to_data, test # Ensure that images can be read before QApplication is constructed. # Note that this requires QCoreApplication.libraryPaths() to return the # path to the Qt plugins which it always does in the frozen build, # because Qt is patched to know the layout of the calibre application # package. On non-frozen builds, it should just work because the # hard-coded paths of the Qt installation should work. If they do not, # then it is a distro problem. fmts = set(map(lambda x: x.data().decode('utf-8'), QImageReader.supportedImageFormats())) # no2to3 testf = {'jpg', 'png', 'svg', 'ico', 'gif'} self.assertEqual(testf.intersection(fmts), testf, "Qt doesn't seem to be able to load some of its image plugins. Available plugins: %s" % fmts) data = P('images/blank.png', allow_user_override=False, data=True) img = image_from_data(data) image_from_data(P('catalog/mastheadImage.gif', allow_user_override=False, data=True)) for fmt in 'png bmp jpeg'.split(): d = image_to_data(img, fmt=fmt) image_from_data(d) # Run the imaging tests test() from calibre.gui2 import ensure_app, destroy_app display_env_var = os.environ.pop('DISPLAY', None) try: ensure_app() self.assertGreaterEqual(len(QFontDatabase().families()), 5, 'The QPA headless plugin is not able to locate enough system fonts via fontconfig') from calibre.ebooks.covers import create_cover create_cover('xxx', ['yyy']) na = QNetworkAccessManager() self.assertTrue(hasattr(na, 'sslErrors'), 'Qt not compiled with openssl') if iswindows: from PyQt5.Qt import QtWin QtWin p = QWebEnginePage() def callback(result): callback.result = result if hasattr(print_callback, 'result'): QApplication.instance().quit() def print_callback(result): print_callback.result = result if hasattr(callback, 'result'): QApplication.instance().quit() p.runJavaScript('1 + 1', callback) p.printToPdf(print_callback) QTimer.singleShot(5000, lambda: QApplication.instance().quit()) QApplication.instance().exec_() test_flaky = ismacos and not is_ci if not test_flaky: self.assertEqual(callback.result, 2, 'Simple JS computation failed') self.assertIn(b'Skia/PDF', bytes(print_callback.result), 'Print to PDF failed') del p del na destroy_app() del QWebEnginePage finally: if display_env_var is not None: os.environ['DISPLAY'] = display_env_var
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml( section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml( d, section_data.header.paragraph_offsets).decode( self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. images = set() if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70) images.add(uid) self.log.debug( 'Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error( 'Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in images: raise Exception('Image with uid: %s missing.' % col) w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:] row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. with Canvas(width, height) as canvas: y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = image_from_data( lopen('%s.jpg' % col, 'rb').read()) canvas.compose(im, x_off, y_off) w, h = im.width(), im.height() x_off += w if largest_height < h: largest_height = h y_off += largest_height with lopen('%s.jpg' % uid) as out: out.write(canvas.export(compression_quality=70)) self.log.debug( 'Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img'+str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') itype = what(None, data) if itype is None and b'<svg' in data[:1024]: # SVG image imgpath = os.path.join(diskpath, fname+'.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: # Ensure image is valid img = image_from_data(data) if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpeg' data = image_to_data(img, fmt=itype) if self.compress_news_images and itype in {'jpg','jpeg'}: try: data = self.rescale_image(data) except Exception: self.log.exception('failed to compress image '+iurl) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname+'.'+itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except Exception: traceback.print_exc() continue
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll('img', src=True): iurl = tag['src'] if iurl.startswith('data:'): try: data = urlopen(iurl).read() except Exception: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlsplit(iurl).scheme: iurl = urljoin(baseurl, iurl, False) found_in_cache = False with self.imagemap_lock: if iurl in self.imagemap: tag['src'] = self.imagemap[iurl] found_in_cache = True if found_in_cache: continue try: data = self.fetch_url(iurl) if data == b'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img' + str(c)) data = self.preprocess_image_ext( data, iurl) if self.preprocess_image_ext is not None else data if data is None: continue itype = what(None, data) if itype == 'svg' or (itype is None and b'<svg' in data[:1024]): # SVG image imgpath = os.path.join(diskpath, fname + '.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: from calibre.utils.img import image_from_data, image_to_data try: # Ensure image is valid img = image_from_data(data) if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpeg' data = image_to_data(img, fmt=itype) if self.compress_news_images and itype in {'jpg', 'jpeg'}: try: data = self.rescale_image(data) except Exception: self.log.exception('failed to compress image ' + iurl) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname + '.' + itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except Exception: traceback.print_exc() continue