def read_cover(stream, zin, mi, opfmeta, extract_cover): # search for an draw:image in a draw:frame with the name 'opf.cover' # if opf.metadata prop is false, just use the first image that # has a proper size (borrowed from docx) otext = odLoad(stream) cover_href = None cover_data = None cover_frame = None imgnum = 0 for frm in otext.topnode.getElementsByType(odFrame): img = frm.getElementsByType(odImage) if len(img) == 0: continue i_href = img[0].getAttribute('href') try: raw = zin.read(i_href) except KeyError: continue try: width, height, fmt = identify_data(raw) except: continue imgnum += 1 if opfmeta and frm.getAttribute('name').lower() == u'opf.cover': cover_href = i_href cover_data = (fmt, raw) cover_frame = frm.getAttribute('name') # could have upper case break if cover_href is None and imgnum == 1 and 0.8 <= height/width <= 1.8 and height*width >= 12000: # Pick the first image as the cover if it is of a suitable size cover_href = i_href cover_data = (fmt, raw) if not opfmeta: break if cover_href is not None: mi.cover = cover_href mi.odf_cover_frame = cover_frame if extract_cover: if not cover_data: raw = zin.read(cover_href) try: width, height, fmt = identify_data(raw) except: pass else: cover_data = (fmt, raw) mi.cover_data = cover_data
def read_cover(stream, zin, mi, opfmeta, extract_cover): # search for an draw:image in a draw:frame with the name 'opf.cover' # if opf.metadata prop is false, just use the first image that # has a proper size (borrowed from docx) otext = odLoad(stream) cover_href = None cover_data = None cover_frame = None imgnum = 0 for frm in otext.topnode.getElementsByType(odFrame): img = frm.getElementsByType(odImage) if len(img) == 0: continue i_href = img[0].getAttribute('href') try: raw = zin.read(i_href) except KeyError: continue try: width, height, fmt = identify_data(raw) except: continue imgnum += 1 if opfmeta and frm.getAttribute('name').lower() == u'opf.cover': cover_href = i_href cover_data = (fmt, raw) cover_frame = frm.getAttribute('name') # could have upper case break if cover_href is None and imgnum == 1 and 0.8 <= height / width <= 1.8 and height * width >= 12000: # Pick the first image as the cover if it is of a suitable size cover_href = i_href cover_data = (fmt, raw) if not opfmeta: break if cover_href is not None: mi.cover = cover_href mi.odf_cover_frame = cover_frame if extract_cover: if not cover_data: raw = zin.read(cover_href) try: width, height, fmt = identify_data(raw) except: pass else: cover_data = (fmt, raw) mi.cover_data = cover_data
def read_image(self, href): if href not in self.images: item = self.oeb.manifest.hrefs.get(href) if item is None or not isinstance(item.data, bytes): return try: width, height, fmt = identify_data(item.data) except Exception: self.log.warning('Replacing corrupted image with blank: %s' % href) item.data = I('blank.png', data=True, allow_user_override=False) width, height, fmt = identify_data(item.data) image_fname = 'media/' + self.create_filename(href, fmt) image_rid = self.document_relationships.add_image(image_fname) self.images[href] = Image(image_rid, image_fname, width, height, fmt, item) item.unload_data_from_memory() return self.images[href]
def workaround_ade_quirks(self, container, name): root = container.parsed(name) # ADE blows up floating images if their sizes are not specified for img in root.xpath('//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]'): if 'style' not in img.attrib: imgname = container.href_to_name(img.get('src'), name) width, height, fmt = identify_data(container.raw_data(imgname)) img.set('style', 'width: %dpx; height: %dpx' % (width, height))
def find_imgtype(data): imgtype = what(None, data) if imgtype is None: try: imgtype = identify_data(data)[2] except Exception: imgtype = 'unknown' return imgtype
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('authors', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) w, h, fmt = identify_data(data) except Exception: w, h, fmt = 0, 0, None if fmt and w and h: mi.cover_data = (fmt, data) return mi
def inspect_cover(self, href): from calibre.ebooks.oeb.base import urlnormalize for x in self.oeb.manifest: if x.href == urlnormalize(href): try: raw = x.data return identify_data(raw)[:2] except: self.log.exception('Failed to read image dimensions') return None, None
def workaround_ade_quirks(self, container, name): root = container.parsed(name) # ADE blows up floating images if their sizes are not specified for img in root.xpath( '//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]' ): if 'style' not in img.attrib: imgname = container.href_to_name(img.get('src'), name) width, height, fmt = identify_data(container.raw_data(imgname)) img.set('style', 'width: %dpx; height: %dpx' % (width, height))
def read_image(self, href): if href not in self.images: item = self.oeb.manifest.hrefs.get(href) if item is None or not isinstance(item.data, bytes): return width, height, fmt = identify_data(item.data) image_fname = 'media/' + self.create_filename(href, fmt) image_rid = self.document_relationships.add_image(image_fname) self.images[href] = Image(image_rid, image_fname, width, height, fmt, item) item.unload_data_from_memory() return self.images[href]
def get_cover(docx): doc = docx.document rid_map = docx.document_relationships[0] for image in images(doc): rid = get(image, 'r:embed') or get(image, 'r:id') if rid in rid_map: try: raw = docx.read(rid_map[rid]) width, height, fmt = identify_data(raw) except Exception: continue if 0.8 <= height/width <= 1.8 and height*width >= 160000: return (fmt, raw)
def load_image(self, data): self.is_valid = False try: fmt = identify_data(data)[-1].encode('ascii') except Exception: fmt = b'' self.original_image_format = fmt.decode('ascii').lower() self.selection_state.reset() self.original_image_data = data self.current_image = i = self.original_image = (QImage.fromData( data, format=fmt) if fmt else QImage.fromData(data)) self.is_valid = not i.isNull() self.update() self.image_changed.emit(self.current_image)
def extract_resources(self): resource_map = [] for x in ("fonts", "images"): os.mkdir(x) for i, sec in enumerate(self.resource_sections): fname_idx = i + 1 data = sec[0] typ = data[:4] href = None if typ in { b"FLIS", b"FCIS", b"SRCS", b"\xe9\x8e\r\n", b"RESC", b"BOUN", b"FDST", b"DATP", b"AUDI", b"VIDE", }: pass # Ignore these records elif typ == b"FONT": font = read_font_record(data) href = "fonts/%05d.%s" % (fname_idx, font["ext"]) if font["err"]: self.log.warn("Reading font record %d failed: %s" % (fname_idx, font["err"])) if font["headers"]: self.log.debug("Font record headers: %s" % font["headers"]) with open(href.replace("/", os.sep), "wb") as f: f.write(font["font_data"] if font["font_data"] else font["raw_data"]) if font["encrypted"]: self.encrypted_fonts.append(href) else: imgtype = what(None, data) if imgtype is None: from calibre.utils.magick.draw import identify_data try: imgtype = identify_data(data)[2] except Exception: imgtype = "unknown" href = "images/%05d.%s" % (fname_idx, imgtype) with open(href.replace("/", os.sep), "wb") as f: f.write(data) resource_map.append(href) return resource_map
def load_image(self, data): self.is_valid = False try: fmt = identify_data(data)[-1].encode('ascii') except Exception: fmt = b'' self.original_image_format = fmt.decode('ascii').lower() self.selection_state.reset() self.original_image_data = data self.current_image = i = self.original_image = ( QImage.fromData(data, format=fmt) if fmt else QImage.fromData(data)) self.is_valid = not i.isNull() self.update() self.image_changed.emit(self.current_image)
def extract_resources(self): from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF resource_map = [] for x in ('fonts', 'images'): os.mkdir(x) for i, sec in enumerate(self.resource_sections): fname_idx = i + 1 data = sec[0] typ = data[:4] href = None if typ in { b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE' }: pass # Ignore these records elif typ == b'RESC': self.resc_data = read_resc_record(data) elif typ == b'FONT': font = read_font_record(data) href = "fonts/%05d.%s" % (fname_idx, font['ext']) if font['err']: self.log.warn('Reading font record %d failed: %s' % (fname_idx, font['err'])) if font['headers']: self.log.debug('Font record headers: %s' % font['headers']) with open(href.replace('/', os.sep), 'wb') as f: f.write(font['font_data'] if font['font_data'] else font['raw_data']) if font['encrypted']: self.encrypted_fonts.append(href) else: if not (len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF): imgtype = what(None, data) if imgtype is None: from calibre.utils.magick.draw import identify_data try: imgtype = identify_data(data)[2] except Exception: imgtype = 'unknown' href = 'images/%05d.%s' % (fname_idx, imgtype) with open(href.replace('/', os.sep), 'wb') as f: f.write(data) resource_map.append(href) return resource_map
def test(src, url, sz=None): raw = P(src, data=True) conn.request('GET', url) r = conn.getresponse() self.ae(r.status, httplib.OK) data = r.read() if sz is None: self.ae(data, raw) else: self.ae(sz, identify_data(data)[0]) test_response(r) conn.request('GET', url, headers={'If-None-Match':r.getheader('ETag')}) r = conn.getresponse() self.ae(r.status, httplib.NOT_MODIFIED) self.ae(b'', r.read())
def get_cover(docx): doc = docx.document get = docx.namespace.get images = docx.namespace.XPath( '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]' ) rid_map = docx.document_relationships[0] for image in images(doc): rid = get(image, 'r:embed') or get(image, 'r:id') if rid in rid_map: try: raw = docx.read(rid_map[rid]) width, height, fmt = identify_data(raw) except Exception: continue if 0.8 <= height / width <= 1.8 and height * width >= 160000: return (fmt, raw)
def add_image(self, img, block, stylizer): src = img.get('src') if not src: return href = self.abshref(src) if href not in self.images: item = self.oeb.manifest.hrefs.get(href) if item is None or not isinstance(item.data, bytes): return width, height, fmt = identify_data(item.data) image_fname = 'media/' + self.create_filename(href, fmt) image_rid = self.document_relationships.add_image(image_fname) self.images[href] = Image(image_rid, image_fname, width, height, fmt, item) item.unload_data_from_memory() drawing = self.create_image_markup(img, stylizer, href) block.add_image(drawing) return self.images[href].rid
def get_cover(docx): doc = docx.document get = docx.namespace.get images = docx.namespace.XPath( '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]' ) rid_map = docx.document_relationships[0] for image in images(doc): rid = get(image, "r:embed") or get(image, "r:id") if rid in rid_map: try: raw = docx.read(rid_map[rid]) width, height, fmt = identify_data(raw) except Exception: continue if 0.8 <= height / width <= 1.8 and height * width >= 160000: return (fmt, raw)
def extract_resources(self): from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF resource_map = [] for x in ('fonts', 'images'): os.mkdir(x) for i, sec in enumerate(self.resource_sections): fname_idx = i+1 data = sec[0] typ = data[:4] href = None if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}: pass # Ignore these records elif typ == b'RESC': self.resc_data = read_resc_record(data) elif typ == b'FONT': font = read_font_record(data) href = "fonts/%05d.%s" % (fname_idx, font['ext']) if font['err']: self.log.warn('Reading font record %d failed: %s'%( fname_idx, font['err'])) if font['headers']: self.log.debug('Font record headers: %s'%font['headers']) with open(href.replace('/', os.sep), 'wb') as f: f.write(font['font_data'] if font['font_data'] else font['raw_data']) if font['encrypted']: self.encrypted_fonts.append(href) else: if len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF: continue imgtype = what(None, data) if imgtype is None: from calibre.utils.magick.draw import identify_data try: imgtype = identify_data(data)[2] except Exception: imgtype = 'unknown' href = 'images/%05d.%s'%(fname_idx, imgtype) with open(href.replace('/', os.sep), 'wb') as f: f.write(data) resource_map.append(href) return resource_map
def process_image(self, data): if not self.process_images: return data func = mobify_image if self.opts.mobi_keep_original_images else rescale_image try: return func(data) except Exception: from calibre.utils.magick.draw import identify_data if 'png' != identify_data(data)[-1].lower(): raise with PersistentTemporaryFile(suffix='.png') as pt: pt.write(data) try: from calibre.utils.img import optimize_png optimize_png(pt.name) data = open(pt.name, 'rb').read() finally: os.remove(pt.name) return func(data)
def _parse_cover_data(root, imgid, mi, ctx): from calibre.ebooks.fb2 import base64_decode elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root) if elm_binary: mimetype = elm_binary[0].get('content-type', 'image/jpeg') mime_extensions = guess_all_extensions(mimetype) if not mime_extensions and mimetype.startswith('image/'): mimetype_fromid = guess_type(imgid)[0] if mimetype_fromid and mimetype_fromid.startswith('image/'): mime_extensions = guess_all_extensions(mimetype_fromid) if mime_extensions: pic_data = elm_binary[0].text if pic_data: cdata = base64_decode(pic_data.strip()) fmt = identify_data(cdata)[-1] mi.cover_data = (fmt, cdata) else: prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
def image_to_hexstring(self, data): data = save_cover_data_to(data, 'cover.jpg', return_data=True) width, height = identify_data(data)[:2] raw_hex = '' for char in data: raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0') # Images must be broken up so that they are no longer than 129 chars # per line hex_string = '' col = 1 for char in raw_hex: if col == 129: hex_string += '\n' col = 1 col += 1 hex_string += char return (hex_string, width, height)
def _parse_cover_data(root, imgid, mi, ctx): from calibre.ebooks.fb2 import base64_decode elm_binary = ctx.XPath('//fb:binary[@id="%s"]' % imgid)(root) if elm_binary: mimetype = elm_binary[0].get('content-type', 'image/jpeg') mime_extensions = guess_all_extensions(mimetype) if not mime_extensions and mimetype.startswith('image/'): mimetype_fromid = guess_type(imgid)[0] if mimetype_fromid and mimetype_fromid.startswith('image/'): mime_extensions = guess_all_extensions(mimetype_fromid) if mime_extensions: pic_data = elm_binary[0].text if pic_data: cdata = base64_decode(pic_data.strip()) fmt = identify_data(cdata)[-1] mi.cover_data = (fmt, cdata) else: prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata c.close() stream.seek(0) cdata = None with ZipFile(stream, 'r') as zf: for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height/width <= 1.8 and height*width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata c.close() stream.seek(0) cdata = None with ZipFile(stream, 'r') as zf: for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def rescale_image(self, data): orig_w, orig_h, ifmt = identify_data(data) orig_data = data # save it in case compression fails if self.scale_news_images is not None: wmax, hmax = self.scale_news_images scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax) if scale: data = thumbnail(data, new_w, new_h, compression_quality=95)[-1] orig_w = new_w orig_h = new_h if self.compress_news_images_max_size is None: if self.compress_news_images_auto_size is None: # not compressing return data else: maxsizeb = (orig_w * orig_h) / self.compress_news_images_auto_size else: maxsizeb = self.compress_news_images_max_size * 1024 scaled_data = data # save it in case compression fails if len(scaled_data) <= maxsizeb: # no compression required return scaled_data img = Image() quality = 95 img.load(data) while len(data) >= maxsizeb and quality >= 5: quality -= 5 img.set_compression_quality(quality) data = img.export('jpg') if len(data) >= len(scaled_data): # compression failed return orig_data if len(orig_data) <= len( scaled_data) else scaled_data if len(data) >= len(orig_data): # no improvement return orig_data return data
def get_cover_data(stream, ext): # {{{ from calibre.ebooks.metadata.meta import get_metadata old = prefs['read_file_metadata'] if not old: prefs['read_file_metadata'] = True cdata = area = None try: with stream: mi = get_metadata(stream, ext) if mi.cover and os.access(mi.cover, os.R_OK): cdata = open(mi.cover).read() elif mi.cover_data[1] is not None: cdata = mi.cover_data[1] if cdata: width, height, fmt = identify_data(cdata) area = width*height except: cdata = area = None if old != prefs['read_file_metadata']: prefs['read_file_metadata'] = old return cdata, area
def rescale_image(self, data): orig_w, orig_h, ifmt = identify_data(data) orig_data = data # save it in case compression fails if self.scale_news_images is not None: wmax, hmax = self.scale_news_images scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax) if scale: data = thumbnail(data, new_w, new_h, compression_quality=95)[-1] orig_w = new_w orig_h = new_h if self.compress_news_images_max_size is None: if self.compress_news_images_auto_size is None: # not compressing return data else: maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size else: maxsizeb = self.compress_news_images_max_size * 1024 scaled_data = data # save it in case compression fails if len(scaled_data) <= maxsizeb: # no compression required return scaled_data img = Image() quality = 95 img.load(data) while len(data) >= maxsizeb and quality >= 5: quality -= 5 img.set_compression_quality(quality) data = img.export('jpg') if len(data) >= len(scaled_data): # compression failed return orig_data if len(orig_data) <= len(scaled_data) else scaled_data if len(data) >= len(orig_data): # no improvement return orig_data return data
def get_metadata(stream): with ZipFile(stream, 'r') as zf: mi = Metadata(_('Unknown')) cdata = None for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if zi.filename.lower() == 'docprops/core.xml': _read_doc_props(zf.read(zi), mi) elif zi.filename.lower() == 'docprops/app.xml': _read_app_props(zf.read(zi), mi) elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height/width <= 1.8 and height*width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def get_metadata(stream): with ZipFile(stream, 'r') as zf: mi = Metadata(_('Unknown')) cdata = None for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if zi.filename.lower() == 'docprops/core.xml': _read_doc_props(zf.read(zi), mi) elif zi.filename.lower() == 'docprops/app.xml': _read_app_props(zf.read(zi), mi) elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def create_epub_cover(container, cover_path, existing_image, options=None): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager ext = cover_path.rpartition('.')[-1].lower() cname, tname = 'cover.' + ext, 'titlepage.xhtml' recommended_folders = get_recommended_folders(container, (cname, tname)) if existing_image: raster_cover = existing_image manifest_id = {v: k for k, v in container.manifest_id_map.iteritems() }[existing_image] raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0] else: folder = recommended_folders[cname] if folder: cname = folder + '/' + cname raster_cover_item = container.generate_item(cname, id_prefix='cover') raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name) with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest: shutil.copyfileobj(src, dest) if options is None: opts = load_defaults('epub_output') keep_aspect = opts.get('preserve_cover_aspect_ratio', False) no_svg = opts.get('no_svg_cover', False) else: keep_aspect = options.get('keep_aspect', False) no_svg = options.get('no_svg', False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style) else: width, height = 600, 800 try: if existing_image: width, height = identify_data( container.raw_data(existing_image, decode=False))[:2] else: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = 'xMidYMid meet' if keep_aspect else 'none' templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar) templ = templ.replace('__viewbox__', '0 0 %d %d' % (width, height)) templ = templ.replace('__width__', str(width)) templ = templ.replace('__height__', str(height)) folder = recommended_folders[tname] if folder: tname = folder + '/' + tname titlepage_item = container.generate_item(tname, id_prefix='titlepage') titlepage = container.href_to_name(titlepage_item.get('href'), container.opf_name) raw = templ % container.name_to_href(raster_cover, titlepage).encode('utf-8') with container.open(titlepage, 'wb') as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get('id') != 'cover': from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set('id', newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set('idref', newid) raster_cover_item.set('id', 'cover') spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create('guide') container.insert_into_xml( guide, guide.makeelement(OPF('reference'), type='cover', title=_('Cover'), href=container.name_to_href( titlepage, base=container.opf_name))) metadata = container.opf_get_or_create('metadata') meta = metadata.makeelement(OPF('meta'), name='cover') meta.set('content', raster_cover_item.get('id')) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def __init__(self, mf): for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header', 'huffman_record_nums',): setattr(self, x, getattr(mf, x)) self.index_header = self.index_record = None self.indexing_record_nums = set() pir = self.mobi_header.primary_index_record if pir != NULL_INDEX: self.index_header = IndexHeader(self.records[pir]) numi = self.index_header.index_count self.cncx = CNCX(self.records[ pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks], self.index_header.index_encoding) self.index_record = IndexRecord(self.records[pir+1:pir+1+numi], self.index_header, self.cncx) self.indexing_record_nums = set(xrange(pir, pir+1+numi+self.index_header.num_of_cncx_blocks)) self.secondary_index_record = self.secondary_index_header = None sir = self.mobi_header.secondary_index_record if sir != NULL_INDEX: self.secondary_index_header = SecondaryIndexHeader(self.records[sir]) numi = self.secondary_index_header.index_count self.indexing_record_nums.add(sir) self.secondary_index_record = IndexRecord( self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx) self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi)) ntr = self.mobi_header.number_of_text_records fntbr = self.mobi_header.first_non_book_record fii = self.mobi_header.first_image_index if fntbr == NULL_INDEX: fntbr = len(self.records) self.text_records = [TextRecord(r, self.records[r], self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] self.font_records = [] image_index = 0 for i in xrange(fntbr, len(self.records)): if i in self.indexing_record_nums or i in self.huffman_record_nums: continue image_index += 1 r = self.records[i] fmt = None if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE', b'FONT'}: try: width, height, fmt = identify_data(r.raw) except: pass if fmt is not None: self.image_records.append(ImageRecord(image_index, r, fmt)) elif r.raw[:4] == b'FONT': self.font_records.append(FontRecord(i, r)) else: self.binary_records.append(BinaryRecord(i, r)) if self.index_record is not None: self.tbs_indexing = TBSIndexing(self.text_records, self.index_record.indices, self.mobi_header.type_raw)
def test_get(self): # {{{ 'Test /get' with self.create_server() as server: db = server.handler.router.ctx.library_broker.get(None) conn = server.connect() def get(what, book_id, library_id=None, q=''): q = ('?' + q) if q else q conn.request( 'GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else '') + q) r = conn.getresponse() return r, r.read() # Test various invalid parameters def bad(*args): r, data = get(*args) self.ae(r.status, httplib.NOT_FOUND) bad('xxx', 1) bad('fmt1', 10) bad('fmt1', 1, 'zzzz') bad('fmt1', 'xx') # Test simple fetching of format without metadata update r, data = get('fmt1', 1, db.server_library_id) self.ae(data, db.format(1, 'fmt1')) self.assertIsNotNone(r.getheader('Content-Disposition')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('fmt1', 1) self.ae(data, db.format(1, 'fmt1')) self.ae(r.getheader('Used-Cache'), 'yes') # Test fetching of format with metadata update raw = P('quick_start/eng.epub', data=True) r, data = get('epub', 1) self.ae(r.status, httplib.OK) etag = r.getheader('ETag') self.assertIsNotNone(etag) self.ae(r.getheader('Used-Cache'), 'no') self.assertTrue(data.startswith(b'PK')) self.assertGreaterEqual(len(data), len(raw)) db.set_field('title', {1: 'changed'}) r, data = get('epub', 1) self.assertNotEqual(r.getheader('ETag'), etag) etag = r.getheader('ETag') self.ae(r.getheader('Used-Cache'), 'no') mi = get_metadata(BytesIO(data), extract_cover=False) self.ae(mi.title, 'changed') r, data = get('epub', 1) self.ae(r.getheader('Used-Cache'), 'yes') # Test plugboards import calibre.library.save_to_disk as c orig, c.DEBUG = c.DEBUG, False try: db.set_pref( 'plugboards', { u'epub': { u'content_server': [[u'changed, {title}', u'title']] } }) # this is needed as the cache is not invalidated for plugboard changes db.set_field('title', {1: 'again'}) r, data = get('epub', 1) self.assertNotEqual(r.getheader('ETag'), etag) etag = r.getheader('ETag') self.ae(r.getheader('Used-Cache'), 'no') mi = get_metadata(BytesIO(data), extract_cover=False) self.ae(mi.title, 'changed, again') finally: c.DEBUG = orig # Test the serving of covers def change_cover(count, book_id=2): cpath = db.format_abspath(book_id, '__COVER_INTERNAL__') db.set_cover({2: I('lt.png', data=True)}) t = time.time() + 1 + count # Ensure mtime changes, needed on OS X where HFS+ has a 1s # mtime resolution os.utime(cpath, (t, t)) r, data = get('cover', 1) self.ae(r.status, httplib.OK) self.ae(data, db.cover(1)) self.ae(r.getheader('Used-Cache'), 'no') self.ae(r.getheader('Content-Type'), 'image/jpeg') r, data = get('cover', 1) self.ae(r.status, httplib.OK) self.ae(data, db.cover(1)) self.ae(r.getheader('Used-Cache'), 'yes') r, data = get('cover', 3) self.ae(r.status, httplib.NOT_FOUND) r, data = get('thumb', 1) self.ae(r.status, httplib.OK) self.ae(identify_data(data), (60, 60, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('thumb', 1) self.ae(r.status, httplib.OK) self.ae(r.getheader('Used-Cache'), 'yes') r, data = get('thumb', 1, q='sz=100') self.ae(r.status, httplib.OK) self.ae(identify_data(data), (100, 100, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('thumb', 1, q='sz=100x100') self.ae(r.status, httplib.OK) self.ae(r.getheader('Used-Cache'), 'yes') change_cover(1, 1) r, data = get('thumb', 1, q='sz=100') self.ae(r.status, httplib.OK) self.ae(identify_data(data), (100, 100, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') # Test file sharing in cache test_share_open() r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8') f, fdata = share_open(path, 'rb'), data # Now force an update change_cover(1) r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8') f2, f2data = share_open(path, 'rb'), data # Do it again change_cover(2) r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') self.ae(f.read(), fdata) self.ae(f2.read(), f2data) # Test serving of metadata as opf r, data = get('opf', 1) self.ae(r.status, httplib.OK) self.ae(r.getheader('Content-Type'), 'application/oebps-package+xml; charset=UTF-8') self.assertIsNotNone(r.getheader('Last-Modified')) opf = OPF(BytesIO(data), populate_spine=False, try_to_guess_cover=False) self.ae(db.field_for('title', 1), opf.title) self.ae(db.field_for('authors', 1), tuple(opf.authors)) conn.request('GET', '/get/opf/1', headers={'Accept-Encoding': 'gzip'}) r = conn.getresponse() self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip') raw = r.read() self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data) # Test serving metadata as json r, data = get('json', 1) self.ae(r.status, httplib.OK) self.ae(db.field_for('title', 1), json.loads(data)['title']) conn.request('GET', '/get/json/1', headers={'Accept-Encoding': 'gzip'}) r = conn.getresponse() self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip') raw = r.read() self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)
def mobimlize_elem(self, elem, stylizer, bstate, istates, ignore_valign=False): if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS: return style = stylizer.style(elem) # <mbp:frame-set/> does not exist lalalala if style["display"] in ("none", "oeb-page-head", "oeb-page-foot") or style["visibility"] == "hidden": id_ = elem.get("id", None) if id_: # Keep anchors so people can use display:none # to generate hidden TOCs tail = elem.tail elem.clear() elem.text = None elem.set("id", id_) elem.tail = tail elem.tag = XHTML("a") else: return tag = barename(elem.tag) istate = copy.copy(istates[-1]) istate.rendered = False istate.list_num = 0 if tag == "ol" and "start" in elem.attrib: try: istate.list_num = int(elem.attrib["start"]) - 1 except: pass istates.append(istate) left = 0 display = style["display"] if display == "table-cell": display = "inline" elif display.startswith("table"): display = "block" isblock = not display.startswith("inline") and style["display"] != "none" isblock = isblock and style["float"] == "none" isblock = isblock and tag != "br" if isblock: bstate.para = None istate.halign = style["text-align"] rawti = style._get("text-indent") istate.indent = style["text-indent"] if hasattr(rawti, "strip") and "%" in rawti: # We have a percentage text indent, these can come out looking # too large if the user chooses a wide output profile like # tablet istate.indent = min(style._unit_convert(rawti, base=500), istate.indent) if style["margin-left"] == "auto" and style["margin-right"] == "auto": istate.halign = "center" margin = asfloat(style["margin-left"]) padding = asfloat(style["padding-left"]) if tag != "body": left = margin + padding istate.left += left vmargin = asfloat(style["margin-top"]) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style["padding-top"]) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding elif not istate.href: margin = asfloat(style["margin-left"]) padding = asfloat(style["padding-left"]) lspace = margin + padding if lspace > 0: spaces = int(round((lspace * 3) / style["font-size"])) elem.text = (u"\xa0" * spaces) + (elem.text or "") margin = asfloat(style["margin-right"]) padding = asfloat(style["padding-right"]) rspace = margin + padding if rspace > 0: spaces = int(round((rspace * 3) / style["font-size"])) if len(elem) == 0: elem.text = (elem.text or "") + (u"\xa0" * spaces) else: last = elem[-1] last.text = (last.text or "") + (u"\xa0" * spaces) if bstate.content and style["page-break-before"] in PAGE_BREAKS: bstate.pbreak = True istate.fsize = self.mobimlize_font(style["font-size"]) istate.italic = True if style["font-style"] == "italic" else False weight = style["font-weight"] istate.bold = weight in ("bold", "bolder") or asfloat(weight) > 400 istate.preserve = style["white-space"] == "pre" istate.pre_wrap = style["white-space"] == "pre-wrap" istate.bgcolor = style["background-color"] istate.fgcolor = style["color"] istate.strikethrough = style.effective_text_decoration == "line-through" istate.underline = style.effective_text_decoration == "underline" ff = style["font-family"].lower() if hasattr(style["font-family"], "lower") else "" if "monospace" in ff or "courier" in ff or ff.endswith(" mono"): istate.family = "monospace" elif "sans-serif" in ff or "sansserif" in ff or "verdana" in ff or "arial" in ff or "helvetica" in ff: istate.family = "sans-serif" else: istate.family = "serif" if "id" in elem.attrib: istate.ids.add(elem.attrib["id"]) if "name" in elem.attrib: istate.ids.add(elem.attrib["name"]) if tag == "a" and "href" in elem.attrib: istate.href = elem.attrib["href"] istate.attrib.clear() if tag == "img" and "src" in elem.attrib: istate.attrib["src"] = elem.attrib["src"] istate.attrib["align"] = "baseline" cssdict = style.cssdict() valign = cssdict.get("vertical-align", None) if valign in ("top", "bottom", "middle"): istate.attrib["align"] = valign for prop in ("width", "height"): if cssdict[prop] != "auto": value = style[prop] if value == getattr(self.profile, prop): result = "100%" else: # Amazon's renderer does not support # img sizes in units other than px # See #7520 for test case try: pixs = int(round(float(value) / (72.0 / self.profile.dpi))) except: continue result = str(pixs) istate.attrib[prop] = result if "width" not in istate.attrib or "height" not in istate.attrib: href = self.current_spine_item.abshref(elem.attrib["src"]) try: item = self.oeb.manifest.hrefs[urlnormalize(href)] except: self.oeb.logger.warn("Failed to find image:", href) else: try: width, height = identify_data(item.data)[:2] except: self.oeb.logger.warn("Invalid image:", href) else: if "width" not in istate.attrib and "height" not in istate.attrib: istate.attrib["width"] = str(width) istate.attrib["height"] = str(height) else: ar = float(width) / float(height) if "width" not in istate.attrib: try: width = int(istate.attrib["height"]) * ar except: pass istate.attrib["width"] = str(int(width)) else: try: height = int(istate.attrib["width"]) / ar except: pass istate.attrib["height"] = str(int(height)) item.unload_data_from_memory() elif tag == "hr" and asfloat(style["width"]) > 0: prop = style["width"] / self.profile.width istate.attrib["width"] = "%d%%" % int(round(prop * 100)) elif display == "table": tag = "table" elif display == "table-row": tag = "tr" elif display == "table-cell": tag = "td" if tag in TABLE_TAGS and self.ignore_tables: tag = "span" if tag == "td" else "div" if tag in ("table", "td", "tr"): col = style.backgroundColor if col: elem.set("bgcolor", col) css = style.cssdict() if "border" in css or "border-width" in css: elem.set("border", "1") if tag in TABLE_TAGS: for attr in ("rowspan", "colspan", "width", "border", "scope", "bgcolor"): if attr in elem.attrib: istate.attrib[attr] = elem.attrib[attr] if tag == "q": t = elem.text if not t: t = "" elem.text = u"\u201c" + t t = elem.tail if not t: t = "" elem.tail = u"\u201d" + t text = None if elem.text: if istate.preserve or istate.pre_wrap: text = elem.text elif ( len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, "rpartition") and elem[0].tag.rpartition("}")[-1] not in INLINE_TAGS ): text = None else: text = COLLAPSE.sub(" ", elem.text) valign = style["vertical-align"] not_baseline = valign in ("super", "sub", "text-top", "text-bottom", "top", "bottom") or ( isinstance(valign, (float, int)) and abs(valign) != 0 ) issup = valign in ("super", "text-top", "top") or (isinstance(valign, (float, int)) and valign > 0) vtag = "sup" if issup else "sub" if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock: nroot = etree.Element(XHTML("html"), nsmap=MOBI_NSMAP) vbstate = BlockState(etree.SubElement(nroot, XHTML("body"))) vbstate.para = etree.SubElement(vbstate.body, XHTML("p")) self.mobimlize_elem(elem, stylizer, vbstate, istates, ignore_valign=True) if len(istates) > 0: istates.pop() if len(istates) == 0: istates.append(FormatState()) at_start = bstate.para is None if at_start: self.mobimlize_content("span", "", bstate, istates) parent = bstate.para if bstate.inline is None else bstate.inline if parent is not None: vtag = etree.SubElement(parent, XHTML(vtag)) vtag = etree.SubElement(vtag, XHTML("small")) # Add anchors for child in vbstate.body: if child is not vbstate.para: vtag.append(child) else: break if vbstate.para is not None: if vbstate.para.text: vtag.text = vbstate.para.text for child in vbstate.para: vtag.append(child) return if tag == "blockquote": old_mim = self.opts.mobi_ignore_margins self.opts.mobi_ignore_margins = False if ( text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or ( # We have an id but no text and no children, the id should still # be added. istate.ids and tag in ("a", "span", "i", "b", "u") and len(elem) == 0 ) ): if tag == "li" and len(istates) > 1 and "value" in elem.attrib: try: value = int(elem.attrib["value"]) istates[-2].list_num = value - 1 except: pass self.mobimlize_content(tag, text, bstate, istates) for child in elem: self.mobimlize_elem(child, stylizer, bstate, istates) tail = None if child.tail: if istate.preserve or istate.pre_wrap: tail = child.tail elif bstate.para is None and isspace(child.tail): tail = None else: tail = COLLAPSE.sub(" ", child.tail) if tail: self.mobimlize_content(tag, tail, bstate, istates) if tag == "blockquote": self.opts.mobi_ignore_margins = old_mim if bstate.content and style["page-break-after"] in PAGE_BREAKS: bstate.pbreak = True if isblock: para = bstate.para if para is not None and para.text == u"\xa0" and len(para) < 1: if style.height > 2: para.getparent().replace(para, etree.Element(XHTML("br"))) else: # This is too small to be rendered effectively, drop it para.getparent().remove(para) bstate.para = None bstate.istate = None vmargin = asfloat(style["margin-bottom"]) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style["padding-bottom"]) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding if bstate.nested and bstate.nested[-1].tag == elem.tag: bstate.nested.pop() istates.pop()
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll( lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img' + str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') itype = what(None, data) if itype is None and b'<svg' in data[:1024]: # SVG image imgpath = os.path.join(diskpath, fname + '.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpg' im = Image() im.load(data) data = im.export(itype) if self.compress_news_images and itype in {'jpg', 'jpeg'}: try: data = self.rescale_image(data) except: self.log.exception('failed to compress image ' + iurl) identify_data(data) else: identify_data(data) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname + '.' + itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except: traceback.print_exc() continue
def create_epub_cover(container, cover_path, existing_image, options=None): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager ext = cover_path.rpartition('.')[-1].lower() if existing_image: raster_cover = existing_image manifest_id = {v:k for k, v in container.manifest_id_map.iteritems()}[existing_image] raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0] else: raster_cover_item = container.generate_item('cover.'+ext, id_prefix='cover') raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name) with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest: shutil.copyfileobj(src, dest) if options is None: opts = load_defaults('epub_output') keep_aspect = opts.get('preserve_cover_aspect_ratio', False) no_svg = opts.get('no_svg_cover', False) else: keep_aspect = options.get('keep_aspect', False) no_svg = options.get('no_svg', False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style) else: width, height = 600, 800 try: if existing_image: width, height = identify_data(container.raw_data(existing_image, decode=False))[:2] else: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = 'xMidYMid meet' if keep_aspect else 'none' templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar) templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height)) templ = templ.replace('__width__', str(width)) templ = templ.replace('__height__', str(height)) titlepage_item = container.generate_item('titlepage.xhtml', id_prefix='titlepage') titlepage = container.href_to_name(titlepage_item.get('href'), container.opf_name) raw = templ%container.name_to_href(raster_cover).encode('utf-8') with container.open(titlepage, 'wb') as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get('id') != 'cover': from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set('id', newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set('idref', newid) raster_cover_item.set('id', 'cover') spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create('guide') container.insert_into_xml(guide, guide.makeelement( OPF('reference'), type='cover', title=_('Cover'), href=container.name_to_href(titlepage, base=container.opf_name))) metadata = container.opf_get_or_create('metadata') meta = metadata.makeelement(OPF('meta'), name='cover') meta.set('content', raster_cover_item.get('id')) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def mobimlize_elem(self, elem, stylizer, bstate, istates, ignore_valign=False): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return style = stylizer.style(elem) # <mbp:frame-set/> does not exist lalalala if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': id_ = elem.get('id', None) if id_: # Keep anchors so people can use display:none # to generate hidden TOCs tail = elem.tail elem.clear() elem.text = None elem.set('id', id_) elem.tail = tail elem.tag = XHTML('a') else: return tag = barename(elem.tag) istate = copy.copy(istates[-1]) istate.rendered = False istate.list_num = 0 if tag == 'ol' and 'start' in elem.attrib: try: istate.list_num = int(elem.attrib['start'])-1 except: pass istates.append(istate) left = 0 display = style['display'] if display == 'table-cell': display = 'inline' elif display.startswith('table'): display = 'block' isblock = (not display.startswith('inline') and style['display'] != 'none') isblock = isblock and style['float'] == 'none' isblock = isblock and tag != 'br' if isblock: bstate.para = None istate.halign = style['text-align'] istate.indent = style['text-indent'] if style['margin-left'] == 'auto' \ and style['margin-right'] == 'auto': istate.halign = 'center' margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) if tag != 'body': left = margin + padding istate.left += left vmargin = asfloat(style['margin-top']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-top']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding elif not istate.href: margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) lspace = margin + padding if lspace > 0: spaces = int(round((lspace * 3) / style['font-size'])) elem.text = (u'\xa0' * spaces) + (elem.text or '') margin = asfloat(style['margin-right']) padding = asfloat(style['padding-right']) rspace = margin + padding if rspace > 0: spaces = int(round((rspace * 3) / style['font-size'])) if len(elem) == 0: elem.text = (elem.text or '') + (u'\xa0' * spaces) else: last = elem[-1] last.text = (last.text or '') + (u'\xa0' * spaces) if bstate.content and style['page-break-before'] in PAGE_BREAKS: bstate.pbreak = True istate.fsize = self.mobimlize_font(style['font-size']) istate.italic = True if style['font-style'] == 'italic' else False weight = style['font-weight'] istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400 istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) istate.bgcolor = style['background-color'] istate.fgcolor = style['color'] istate.strikethrough = style.effective_text_decoration == 'line-through' istate.underline = style.effective_text_decoration == 'underline' ff = style['font-family'].lower() if style['font-family'] else '' if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'): istate.family = 'monospace' elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or 'arial' in ff or 'helvetica' in ff): istate.family = 'sans-serif' else: istate.family = 'serif' if 'id' in elem.attrib: istate.ids.add(elem.attrib['id']) if 'name' in elem.attrib: istate.ids.add(elem.attrib['name']) if tag == 'a' and 'href' in elem.attrib: istate.href = elem.attrib['href'] istate.attrib.clear() if tag == 'img' and 'src' in elem.attrib: istate.attrib['src'] = elem.attrib['src'] istate.attrib['align'] = 'baseline' cssdict = style.cssdict() valign = cssdict.get('vertical-align', None) if valign in ('top', 'bottom', 'middle'): istate.attrib['align'] = valign for prop in ('width', 'height'): if cssdict[prop] != 'auto': value = style[prop] if value == getattr(self.profile, prop): result = '100%' else: # Amazon's renderer does not support # img sizes in units other than px # See #7520 for test case try: pixs = int(round(float(value) / \ (72./self.profile.dpi))) except: continue result = str(pixs) istate.attrib[prop] = result if 'width' not in istate.attrib or 'height' not in istate.attrib: href = self.current_spine_item.abshref(elem.attrib['src']) try: item = self.oeb.manifest.hrefs[urlnormalize(href)] except: self.oeb.logger.warn('Failed to find image:', href) else: try: width, height = identify_data(item.data)[:2] except: self.oeb.logger.warn('Invalid image:', href) else: if 'width' not in istate.attrib and 'height' not in \ istate.attrib: istate.attrib['width'] = str(width) istate.attrib['height'] = str(height) else: ar = float(width)/float(height) if 'width' not in istate.attrib: try: width = int(istate.attrib['height'])*ar except: pass istate.attrib['width'] = str(int(width)) else: try: height = int(istate.attrib['width'])/ar except: pass istate.attrib['height'] = str(int(height)) item.unload_data_from_memory() elif tag == 'hr' and asfloat(style['width']) > 0: prop = style['width'] / self.profile.width istate.attrib['width'] = "%d%%" % int(round(prop * 100)) elif display == 'table': tag = 'table' elif display == 'table-row': tag = 'tr' elif display == 'table-cell': tag = 'td' if tag in TABLE_TAGS and self.ignore_tables: tag = 'span' if tag == 'td' else 'div' if tag in ('table', 'td', 'tr'): col = style.backgroundColor if col: elem.set('bgcolor', col) css = style.cssdict() if 'border' in css or 'border-width' in css: elem.set('border', '1') if tag in TABLE_TAGS: for attr in ('rowspan', 'colspan', 'width', 'border', 'scope', 'bgcolor'): if attr in elem.attrib: istate.attrib[attr] = elem.attrib[attr] if tag == 'q': t = elem.text if not t: t = '' elem.text = u'\u201c' + t t = elem.tail if not t: t = '' elem.tail = u'\u201d' + t text = None if elem.text: if istate.preserve: text = elem.text elif len(elem) > 0 and isspace(elem.text): text = None else: text = COLLAPSE.sub(' ', elem.text) valign = style['vertical-align'] not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom', 'top', 'bottom') or ( isinstance(valign, (float, int)) and abs(valign) != 0) issup = valign in ('super', 'text-top', 'top') or ( isinstance(valign, (float, int)) and valign > 0) vtag = 'sup' if issup else 'sub' if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock: nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) vbstate = BlockState(etree.SubElement(nroot, XHTML('body'))) vbstate.para = etree.SubElement(vbstate.body, XHTML('p')) self.mobimlize_elem(elem, stylizer, vbstate, istates, ignore_valign=True) if len(istates) > 0: istates.pop() if len(istates) == 0: istates.append(FormatState()) at_start = bstate.para is None if at_start: self.mobimlize_content('span', '', bstate, istates) parent = bstate.para if bstate.inline is None else bstate.inline if parent is not None: vtag = etree.SubElement(parent, XHTML(vtag)) vtag = etree.SubElement(vtag, XHTML('small')) # Add anchors for child in vbstate.body: if child is not vbstate.para: vtag.append(child) else: break if vbstate.para is not None: for child in vbstate.para: vtag.append(child) return if tag == 'blockquote': old_mim = self.opts.mobi_ignore_margins self.opts.mobi_ignore_margins = False if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or ( # We have an id but no text and no children, the id should still # be added. istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and len(elem)==0)): self.mobimlize_content(tag, text, bstate, istates) for child in elem: self.mobimlize_elem(child, stylizer, bstate, istates) tail = None if child.tail: if istate.preserve: tail = child.tail elif bstate.para is None and isspace(child.tail): tail = None else: tail = COLLAPSE.sub(' ', child.tail) if tail: self.mobimlize_content(tag, tail, bstate, istates) if tag == 'blockquote': self.opts.mobi_ignore_margins = old_mim if bstate.content and style['page-break-after'] in PAGE_BREAKS: bstate.pbreak = True if isblock: para = bstate.para if para is not None and para.text == u'\xa0' and len(para) < 1: if style.height > 2: para.getparent().replace(para, etree.Element(XHTML('br'))) else: # This is too small to be rendered effectively, drop it para.getparent().remove(para) bstate.para = None bstate.istate = None vmargin = asfloat(style['margin-bottom']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-bottom']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding if bstate.nested and bstate.nested[-1].tag == elem.tag: bstate.nested.pop() istates.pop()
def test_get(self): # {{{ 'Test /get' with self.create_server() as server: db = server.handler.router.ctx.library_broker.get(None) conn = server.connect() def get(what, book_id, library_id=None, q=''): q = ('?' + q) if q else q conn.request('GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else '') + q) r = conn.getresponse() return r, r.read() # Test various invalid parameters def bad(*args): r, data = get(*args) self.ae(r.status, httplib.NOT_FOUND) bad('xxx', 1) bad('fmt1', 10) bad('fmt1', 1, 'zzzz') bad('fmt1', 'xx') # Test simple fetching of format without metadata update r, data = get('fmt1', 1, db.server_library_id) self.ae(data, db.format(1, 'fmt1')) self.assertIsNotNone(r.getheader('Content-Disposition')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('fmt1', 1) self.ae(data, db.format(1, 'fmt1')) self.ae(r.getheader('Used-Cache'), 'yes') # Test fetching of format with metadata update raw = P('quick_start/eng.epub', data=True) r, data = get('epub', 1) self.ae(r.status, httplib.OK) etag = r.getheader('ETag') self.assertIsNotNone(etag) self.ae(r.getheader('Used-Cache'), 'no') self.assertTrue(data.startswith(b'PK')) self.assertGreaterEqual(len(data), len(raw)) db.set_field('title', {1:'changed'}) r, data = get('epub', 1) self.assertNotEqual(r.getheader('ETag'), etag) etag = r.getheader('ETag') self.ae(r.getheader('Used-Cache'), 'no') mi = get_metadata(BytesIO(data), extract_cover=False) self.ae(mi.title, 'changed') r, data = get('epub', 1) self.ae(r.getheader('Used-Cache'), 'yes') # Test plugboards import calibre.library.save_to_disk as c orig, c.DEBUG = c.DEBUG, False try: db.set_pref('plugboards', {u'epub': {u'content_server': [[u'changed, {title}', u'title']]}}) # this is needed as the cache is not invalidated for plugboard changes db.set_field('title', {1:'again'}) r, data = get('epub', 1) self.assertNotEqual(r.getheader('ETag'), etag) etag = r.getheader('ETag') self.ae(r.getheader('Used-Cache'), 'no') mi = get_metadata(BytesIO(data), extract_cover=False) self.ae(mi.title, 'changed, again') finally: c.DEBUG = orig # Test the serving of covers def change_cover(count, book_id=2): cpath = db.format_abspath(book_id, '__COVER_INTERNAL__') db.set_cover({2:I('lt.png', data=True)}) t = time.time() + 1 + count # Ensure mtime changes, needed on OS X where HFS+ has a 1s # mtime resolution os.utime(cpath, (t, t)) r, data = get('cover', 1) self.ae(r.status, httplib.OK) self.ae(data, db.cover(1)) self.ae(r.getheader('Used-Cache'), 'no') self.ae(r.getheader('Content-Type'), 'image/jpeg') r, data = get('cover', 1) self.ae(r.status, httplib.OK) self.ae(data, db.cover(1)) self.ae(r.getheader('Used-Cache'), 'yes') r, data = get('cover', 3) self.ae(r.status, httplib.NOT_FOUND) r, data = get('thumb', 1) self.ae(r.status, httplib.OK) self.ae(identify_data(data), (60, 60, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('thumb', 1) self.ae(r.status, httplib.OK) self.ae(r.getheader('Used-Cache'), 'yes') r, data = get('thumb', 1, q='sz=100') self.ae(r.status, httplib.OK) self.ae(identify_data(data), (100, 100, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') r, data = get('thumb', 1, q='sz=100x100') self.ae(r.status, httplib.OK) self.ae(r.getheader('Used-Cache'), 'yes') change_cover(1, 1) r, data = get('thumb', 1, q='sz=100') self.ae(r.status, httplib.OK) self.ae(identify_data(data), (100, 100, 'jpeg')) self.ae(r.getheader('Used-Cache'), 'no') # Test file sharing in cache test_share_open() r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8') f, fdata = share_open(path, 'rb'), data # Now force an update change_cover(1) r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8') f2, f2data = share_open(path, 'rb'), data # Do it again change_cover(2) r, data = get('cover', 2) self.ae(r.status, httplib.OK) self.ae(data, db.cover(2)) self.ae(r.getheader('Used-Cache'), 'no') self.ae(f.read(), fdata) self.ae(f2.read(), f2data) # Test serving of metadata as opf r, data = get('opf', 1) self.ae(r.status, httplib.OK) self.ae(r.getheader('Content-Type'), 'application/oebps-package+xml; charset=UTF-8') self.assertIsNotNone(r.getheader('Last-Modified')) opf = OPF(BytesIO(data), populate_spine=False, try_to_guess_cover=False) self.ae(db.field_for('title', 1), opf.title) self.ae(db.field_for('authors', 1), tuple(opf.authors)) conn.request('GET', '/get/opf/1', headers={'Accept-Encoding':'gzip'}) r = conn.getresponse() self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip') raw = r.read() self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data) # Test serving metadata as json r, data = get('json', 1) self.ae(r.status, httplib.OK) self.ae(db.field_for('title', 1), json.loads(data)['title']) conn.request('GET', '/get/json/1', headers={'Accept-Encoding':'gzip'}) r = conn.getresponse() self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip') raw = r.read() self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data)
def mobimlize_elem(self, elem, stylizer, bstate, istates, ignore_valign=False): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return style = stylizer.style(elem) # <mbp:frame-set/> does not exist lalalala if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': id_ = elem.get('id', None) if id_: # Keep anchors so people can use display:none # to generate hidden TOCs tail = elem.tail elem.clear() elem.text = None elem.set('id', id_) elem.tail = tail elem.tag = XHTML('a') else: return tag = barename(elem.tag) istate = copy.copy(istates[-1]) istate.rendered = False istate.list_num = 0 if tag == 'ol' and 'start' in elem.attrib: try: istate.list_num = int(elem.attrib['start']) - 1 except: pass istates.append(istate) left = 0 display = style['display'] if display == 'table-cell': display = 'inline' elif display.startswith('table'): display = 'block' isblock = (not display.startswith('inline') and style['display'] != 'none') isblock = isblock and style['float'] == 'none' isblock = isblock and tag != 'br' if isblock: bstate.para = None istate.halign = style['text-align'] istate.indent = style['text-indent'] if style['margin-left'] == 'auto' \ and style['margin-right'] == 'auto': istate.halign = 'center' margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) if tag != 'body': left = margin + padding istate.left += left vmargin = asfloat(style['margin-top']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-top']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding elif not istate.href: margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) lspace = margin + padding if lspace > 0: spaces = int(round((lspace * 3) / style['font-size'])) elem.text = (u'\xa0' * spaces) + (elem.text or '') margin = asfloat(style['margin-right']) padding = asfloat(style['padding-right']) rspace = margin + padding if rspace > 0: spaces = int(round((rspace * 3) / style['font-size'])) if len(elem) == 0: elem.text = (elem.text or '') + (u'\xa0' * spaces) else: last = elem[-1] last.text = (last.text or '') + (u'\xa0' * spaces) if bstate.content and style['page-break-before'] in PAGE_BREAKS: bstate.pbreak = True istate.fsize = self.mobimlize_font(style['font-size']) istate.italic = True if style['font-style'] == 'italic' else False weight = style['font-weight'] istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400 istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) istate.bgcolor = style['background-color'] istate.fgcolor = style['color'] istate.strikethrough = style.effective_text_decoration == 'line-through' istate.underline = style.effective_text_decoration == 'underline' ff = style['font-family'].lower() if style['font-family'] else '' if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'): istate.family = 'monospace' elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or 'arial' in ff or 'helvetica' in ff): istate.family = 'sans-serif' else: istate.family = 'serif' if 'id' in elem.attrib: istate.ids.add(elem.attrib['id']) if 'name' in elem.attrib: istate.ids.add(elem.attrib['name']) if tag == 'a' and 'href' in elem.attrib: istate.href = elem.attrib['href'] istate.attrib.clear() if tag == 'img' and 'src' in elem.attrib: istate.attrib['src'] = elem.attrib['src'] istate.attrib['align'] = 'baseline' cssdict = style.cssdict() valign = cssdict.get('vertical-align', None) if valign in ('top', 'bottom', 'middle'): istate.attrib['align'] = valign for prop in ('width', 'height'): if cssdict[prop] != 'auto': value = style[prop] if value == getattr(self.profile, prop): result = '100%' else: # Amazon's renderer does not support # img sizes in units other than px # See #7520 for test case try: pixs = int( round(float(value) / (72. / self.profile.dpi))) except: continue result = str(pixs) istate.attrib[prop] = result if 'width' not in istate.attrib or 'height' not in istate.attrib: href = self.current_spine_item.abshref(elem.attrib['src']) try: item = self.oeb.manifest.hrefs[urlnormalize(href)] except: self.oeb.logger.warn('Failed to find image:', href) else: try: width, height = identify_data(item.data)[:2] except: self.oeb.logger.warn('Invalid image:', href) else: if 'width' not in istate.attrib and 'height' not in \ istate.attrib: istate.attrib['width'] = str(width) istate.attrib['height'] = str(height) else: ar = float(width) / float(height) if 'width' not in istate.attrib: try: width = int(istate.attrib['height']) * ar except: pass istate.attrib['width'] = str(int(width)) else: try: height = int(istate.attrib['width']) / ar except: pass istate.attrib['height'] = str(int(height)) item.unload_data_from_memory() elif tag == 'hr' and asfloat(style['width']) > 0: prop = style['width'] / self.profile.width istate.attrib['width'] = "%d%%" % int(round(prop * 100)) elif display == 'table': tag = 'table' elif display == 'table-row': tag = 'tr' elif display == 'table-cell': tag = 'td' if tag in TABLE_TAGS and self.ignore_tables: tag = 'span' if tag == 'td' else 'div' if tag in ('table', 'td', 'tr'): col = style.backgroundColor if col: elem.set('bgcolor', col) css = style.cssdict() if 'border' in css or 'border-width' in css: elem.set('border', '1') if tag in TABLE_TAGS: for attr in ('rowspan', 'colspan', 'width', 'border', 'scope', 'bgcolor'): if attr in elem.attrib: istate.attrib[attr] = elem.attrib[attr] if tag == 'q': t = elem.text if not t: t = '' elem.text = u'\u201c' + t t = elem.tail if not t: t = '' elem.tail = u'\u201d' + t text = None if elem.text: if istate.preserve: text = elem.text elif (len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, 'rpartition') and elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS): text = None else: text = COLLAPSE.sub(' ', elem.text) valign = style['vertical-align'] not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom', 'top', 'bottom') or (isinstance( valign, (float, int)) and abs(valign) != 0) issup = valign in ('super', 'text-top', 'top') or (isinstance(valign, (float, int)) and valign > 0) vtag = 'sup' if issup else 'sub' if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock: nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) vbstate = BlockState(etree.SubElement(nroot, XHTML('body'))) vbstate.para = etree.SubElement(vbstate.body, XHTML('p')) self.mobimlize_elem(elem, stylizer, vbstate, istates, ignore_valign=True) if len(istates) > 0: istates.pop() if len(istates) == 0: istates.append(FormatState()) at_start = bstate.para is None if at_start: self.mobimlize_content('span', '', bstate, istates) parent = bstate.para if bstate.inline is None else bstate.inline if parent is not None: vtag = etree.SubElement(parent, XHTML(vtag)) vtag = etree.SubElement(vtag, XHTML('small')) # Add anchors for child in vbstate.body: if child is not vbstate.para: vtag.append(child) else: break if vbstate.para is not None: if vbstate.para.text: vtag.text = vbstate.para.text for child in vbstate.para: vtag.append(child) return if tag == 'blockquote': old_mim = self.opts.mobi_ignore_margins self.opts.mobi_ignore_margins = False if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or ( # We have an id but no text and no children, the id should still # be added. istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and len(elem) == 0)): self.mobimlize_content(tag, text, bstate, istates) for child in elem: self.mobimlize_elem(child, stylizer, bstate, istates) tail = None if child.tail: if istate.preserve: tail = child.tail elif bstate.para is None and isspace(child.tail): tail = None else: tail = COLLAPSE.sub(' ', child.tail) if tail: self.mobimlize_content(tag, tail, bstate, istates) if tag == 'blockquote': self.opts.mobi_ignore_margins = old_mim if bstate.content and style['page-break-after'] in PAGE_BREAKS: bstate.pbreak = True if isblock: para = bstate.para if para is not None and para.text == u'\xa0' and len(para) < 1: if style.height > 2: para.getparent().replace(para, etree.Element(XHTML('br'))) else: # This is too small to be rendered effectively, drop it para.getparent().remove(para) bstate.para = None bstate.istate = None vmargin = asfloat(style['margin-bottom']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-bottom']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding if bstate.nested and bstate.nested[-1].tag == elem.tag: bstate.nested.pop() istates.pop()
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img'+str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') itype = imghdr.what(None, data) if itype is None and b'<svg' in data[:1024]: # SVG image imgpath = os.path.join(diskpath, fname+'.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: if itype not in {'png', 'jpg', 'jpeg'}: itype == 'png' if itype == 'gif' else 'jpg' im = Image() im.load(data) data = im.export(itype) else: identify_data(data) imgpath = os.path.join(diskpath, fname+'.'+itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except: traceback.print_exc() continue