def existsExternalImageUrl(url): """ Check if the external URL exists (by issuing a HTTP request. """ class HeadRequest(Request): def get_method(self): return "HEAD" if not url.startswith('http'): return False try: urlopen(HeadRequest(url)) return True except Exception, e: LOG.warn('External(?) image reference not found (%s)' % e) return False
def existsExternalImageUrl(url): """ Check if the external URL exists (by issuing a HTTP request. """ class HeadRequest(Request): def get_method(self): return "HEAD" if not url.startswith('http'): return False try: urlopen(HeadRequest(url)) return True except Exception, e: LOG.warn('External(?) image reference not found (%s)' % e) return False
def makeImagesLocal(root, params): """ deal with internal and external image references """ ref_catalog = getToolByName(params['context'], 'reference_catalog') destdir = params['destdir'] ini_filename = os.path.join(destdir, 'images.ini') fp_ini = file(ini_filename, 'w') images_seen = dict() for document_node in CSSSelector('div.mode-flat.level-0,div.mode-nested.level-1')(root): document_obj = ref_catalog.lookupObject(document_node.get('uid')) for img in document_node.xpath(xpath_query(['img'])): # 'internal' images are marked with class="internal resource" # in order to prevent image fetching later on if 'internal-resource' in (img.get('class') or '') or img.get('processed'): continue scale = '' src = img.get('src') LOG.info('Introspecting image: %s' % src) img_obj = resolveImage(document_obj, src) if img_obj is None: # like some external image URL LOG.info(' Remote image fetching: %s' % src) try: response = urllib2.urlopen(str(src)) img_data = response.read() img_basename = src.split('/')[-1] except (ValueError, urllib2.URLError), e: LOG.warn('No image found: %s - removed from output (reason: %s)' % (src, e)) img.getparent().remove(img) continue tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename file(tmpname, 'wb').write(img_data) # write supplementary information to an .ini file per image img_id = os.path.basename(tmpname) print >>fp_ini, '[%s]' % img_id print >>fp_ini, 'id = %s' % img_id print >>fp_ini, 'filename = %s' % tmpname print >>fp_ini, 'url = %s' % str(src) print >>fp_ini, 'scale = %s' % '' img.attrib['src'] = img_id img.attrib['originalscale'] = '' images_seen[src] = img_id LOG.info(' Assigned new id: %s' % img_id) continue # resolved did find a local image LOG.info(' Local processing: %s' % src) img_filename = images_seen.get(src) if not img_filename: img_data = None for attr in ['data', '_data']: try: img_data = str(getattr(img_obj, attr)) continue except AttributeError: pass if img_data is None: LOG.warn('No image found: %s - removed from output' % src) img.extract() continue tmpname = tempfile.mktemp(dir=destdir) file(tmpname, 'wb').write(img_data) # determine graphic format using PIL pil_image = PIL.Image.open(tmpname) format = pil_image.format.lower() # generate unique and speaking image names img_id = img_obj.getId() dest_img_name = os.path.join(destdir, img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) else: running = True count = 0 while running: img_id = os.path.splitext(img_obj.getId())[0] img_id = '%s-%d.%s' % (img_id, count, format) dest_img_name = os.path.join(params['destdir'], img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) tmpname = dest_img_name running = False del pil_image else: count += 1 LOG.info(' Exported to: %s' % dest_img_name) # now also export the preview scale as well # (needed for EPUB export/conversion) preview_filename = os.path.join(os.path.dirname(dest_img_name), 'preview_' + os.path.basename(dest_img_name)) try: preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview') if preview_img == '': # no scales created? img_obj.Schema().getField('image').createScales(img_obj) preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview') except AttributeError: # Schema (for News Item images) preview_img = None if preview_img and safe_hasattr(preview_img, 'data'): file(preview_filename, 'wb').write(str(preview_img.data)) LOG.info(' Exported preview scale to: %s' % preview_filename) # determine image scale from 'src' attribute src_parts = src.split('/') if '@@images' in src_parts: scale = src_parts[-1] elif src_parts[-1].startswith('image_'): scale = src_parts[-1][6:] print >>fp_ini, '[%s]' % os.path.basename(dest_img_name) print >>fp_ini, 'filename = %s' % dest_img_name print >>fp_ini, 'id = %s' % img_id try: print >>fp_ini, 'title = %s' % img_obj.Title() print >>fp_ini, 'description = %s' % img_obj.Description() except AttributeError: print >>fp_ini, 'title = s' print >>fp_ini, 'description = s' print >>fp_ini, 'scale = %s' % scale images_seen[src] = os.path.basename(dest_img_name) img_filename = dest_img_name img.attrib['src'] = os.path.basename(img_filename) LOG.info(' Assigned new id: %s' % img.get('src')) img.attrib['originalscale'] = scale img.attrib['style'] = 'width: 100%' # need for PrinceXML8 img.attrib['processed'] = '1' # image scaling # add content-info debug information # don't add scale as style since the outer image-container # has the style set try: pdf_scale = img_obj.getField('pdfScale').get(img_obj) except AttributeError: pdf_scale = 100 img.attrib['scale'] = str(pdf_scale) # now move <img> tag into a dedicated <div> div = lxml.html.Element('div') div.attrib['class'] = 'image-container' div.attrib['style'] = 'width: %d%%' % pdf_scale div.attrib['scale'] = str(pdf_scale) new_img = lxml.html.Element('img') new_img.attrib.update(img.attrib.items()) div.insert(0, new_img) try: displayInline_field = img_obj.getField('displayInline') except AttributeError: displayInline_field = False if displayInline_field and not displayInline_field.get(img_obj): # image caption img_caption_position = img_obj.getField('captionPosition').get(img_obj) img_caption = lxml.html.Element('div') img_caption.attrib['class'] = 'image-caption' # exclude from image enumeration exclude_field = img_obj.getField('excludeFromImageEnumeration') if exclude_field and not exclude_field.get(img_obj): # add description span = lxml.html.Element('span') description = unicode(img_obj.Description(), 'utf-8') class_ = description and 'image-caption-description image-caption-with-description' or \ 'image-caption-description image-caption-without-description' if description: span.text = description span.attrib['class'] = class_ img_caption.insert(0, span) if not description: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-description' warn.text = u'image has no description' img_caption.append(warn) # add title span = lxml.html.Element('span') title = unicode(img_obj.Title(), 'utf-8') class_ = description and 'image-caption-title image-caption-with-title' or \ 'image-caption-title image-caption-without-title' if title: span.text = title span.attrib['class'] = class_ img_caption.insert(0, span) if not title: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-title' warn.text = u'image has no title' img_caption.append(warn) # add title and description to container if img_caption_position == 'top': div.insert(0, img_caption) else: div.append(img_caption) div.tail = img.tail img.getparent().replace(img, div)
def makeImagesLocal(root, params): """ deal with internal and external image references """ ref_catalog = getToolByName(params['context'], 'reference_catalog') destdir = params['destdir'] ini_filename = os.path.join(destdir, 'images.ini') fp_ini = file(ini_filename, 'w') images_seen = dict() for document_node in CSSSelector( 'div.mode-flat.level-0,div.mode-nested.level-1')(root): document_obj = ref_catalog.lookupObject(document_node.get('uid')) for img in document_node.xpath(xpath_query(['img'])): # 'internal' images are marked with class="internal resource" # in order to prevent image fetching later on if 'internal-resource' in (img.get('class') or '') or img.get('processed'): continue scale = '' src = img.get('src') LOG.info('Introspecting image: %s' % src) img_obj = resolveImage(document_obj, src) if img_obj is None: # like some external image URL LOG.info(' Remote image fetching: %s' % src) try: response = urllib2.urlopen(str(src)) img_data = response.read() img_basename = src.split('/')[-1] except (ValueError, urllib2.URLError), e: LOG.warn( 'No image found: %s - removed from output (reason: %s)' % (src, e)) img.getparent().remove(img) continue tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename file(tmpname, 'wb').write(img_data) # write supplementary information to an .ini file per image img_id = os.path.basename(tmpname) print >> fp_ini, '[%s]' % img_id print >> fp_ini, 'id = %s' % img_id print >> fp_ini, 'filename = %s' % tmpname print >> fp_ini, 'url = %s' % str(src) print >> fp_ini, 'scale = %s' % '' img.attrib['src'] = img_id img.attrib['originalscale'] = '' images_seen[src] = img_id LOG.info(' Assigned new id: %s' % img_id) continue # resolved did find a local image LOG.info(' Local processing: %s' % src) img_filename = images_seen.get(src) if not img_filename: img_data = None for attr in ['data', '_data']: try: img_data = str(getattr(img_obj, attr)) continue except AttributeError: pass if img_data is None: LOG.warn('No image found: %s - removed from output' % src) img.extract() continue tmpname = tempfile.mktemp(dir=destdir) file(tmpname, 'wb').write(img_data) # determine graphic format using PIL pil_image = PIL.Image.open(tmpname) format = pil_image.format.lower() # generate unique and speaking image names img_id = img_obj.getId() dest_img_name = os.path.join(destdir, img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) else: running = True count = 0 while running: img_id = os.path.splitext(img_obj.getId())[0] img_id = '%s-%d.%s' % (img_id, count, format) dest_img_name = os.path.join(params['destdir'], img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) tmpname = dest_img_name running = False del pil_image else: count += 1 LOG.info(' Exported to: %s' % dest_img_name) # now also export the preview scale as well # (needed for EPUB export/conversion) preview_filename = os.path.join( os.path.dirname(dest_img_name), 'preview_' + os.path.basename(dest_img_name)) try: preview_img = img_obj.Schema().getField('image').getScale( img_obj, scale='preview') if preview_img == '': # no scales created? img_obj.Schema().getField('image').createScales( img_obj) preview_img = img_obj.Schema().getField( 'image').getScale(img_obj, scale='preview') except AttributeError: # Schema (for News Item images) preview_img = None if preview_img and safe_hasattr(preview_img, 'data'): file(preview_filename, 'wb').write(str(preview_img.data)) LOG.info(' Exported preview scale to: %s' % preview_filename) # determine image scale from 'src' attribute src_parts = src.split('/') if '@@images' in src_parts: scale = src_parts[-1] elif src_parts[-1].startswith('image_'): scale = src_parts[-1][6:] print >> fp_ini, '[%s]' % os.path.basename(dest_img_name) print >> fp_ini, 'filename = %s' % dest_img_name print >> fp_ini, 'id = %s' % img_id try: print >> fp_ini, 'title = %s' % img_obj.Title() print >> fp_ini, 'description = %s' % img_obj.Description() except AttributeError: print >> fp_ini, 'title = s' print >> fp_ini, 'description = s' print >> fp_ini, 'scale = %s' % scale images_seen[src] = os.path.basename(dest_img_name) img_filename = dest_img_name img.attrib['src'] = os.path.basename(img_filename) LOG.info(' Assigned new id: %s' % img.get('src')) img.attrib['originalscale'] = scale img.attrib['style'] = 'width: 100%' # need for PrinceXML8 img.attrib['processed'] = '1' # image scaling # add content-info debug information # don't add scale as style since the outer image-container # has the style set try: pdf_scale = img_obj.getField('pdfScale').get(img_obj) except AttributeError: pdf_scale = 100 img.attrib['scale'] = str(pdf_scale) # now move <img> tag into a dedicated <div> div = lxml.html.Element('div') div.attrib['class'] = 'image-container' div.attrib['style'] = 'width: %d%%' % pdf_scale div.attrib['scale'] = str(pdf_scale) new_img = lxml.html.Element('img') new_img.attrib.update(img.attrib.items()) div.insert(0, new_img) try: displayInline_field = img_obj.getField('displayInline') except AttributeError: displayInline_field = False if displayInline_field and not displayInline_field.get(img_obj): # image caption img_caption_position = img_obj.getField('captionPosition').get( img_obj) img_caption = lxml.html.Element('div') img_caption.attrib['class'] = 'image-caption' # exclude from image enumeration exclude_field = img_obj.getField('excludeFromImageEnumeration') if exclude_field and not exclude_field.get(img_obj): # add description span = lxml.html.Element('span') description = unicode(img_obj.Description(), 'utf-8') class_ = description and 'image-caption-description image-caption-with-description' or \ 'image-caption-description image-caption-without-description' if description: span.text = description span.attrib['class'] = class_ img_caption.insert(0, span) if not description: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-description' warn.text = u'image has no description' img_caption.append(warn) # add title span = lxml.html.Element('span') title = unicode(img_obj.Title(), 'utf-8') class_ = description and 'image-caption-title image-caption-with-title' or \ 'image-caption-title image-caption-without-title' if title: span.text = title span.attrib['class'] = class_ img_caption.insert(0, span) if not title: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-title' warn.text = u'image has no title' img_caption.append(warn) # add title and description to container if img_caption_position == 'top': div.insert(0, img_caption) else: div.append(img_caption) div.tail = img.tail img.getparent().replace(img, div)