def __call__(self, *args, **kw): try: return self.__call2__(*args, **kw) except: LOG.error('Conversion failed', exc_info=True) raise
def __call__(self, html, input_encoding=None, output_encoding=unicode, return_body=False): if not isinstance(html, unicode): if not input_encoding: raise TypeError('Input data must be unicode') html = unicode(html, input_encoding) html = html.strip() if not html: return u'' root = lxml.html.fromstring(html) for name in self.transformation_names: method = TRANSFORMATIONS.get(name) params = dict( context=self.context, request=getattr(self.context, 'REQUEST', None), destdir=self.destdir, ) if method is None: raise ValueError('No transformation "%s" registered' % name) ts = time.time() argspec = inspect.getargspec(method) if isinstance(argspec, tuple): args = argspec[0] # Python 2.4 else: args = argspec.args if 'params' in args: method(root, params) else: method(root) LOG.info('Transformation %-30s: %3.6f seconds' % (name, time.time() - ts)) if return_body: body = root.xpath('//body')[0] html_new = body.text + u''.join([ lxml.html.tostring(b, encoding=output_encoding) for b in body ]) else: html_new = lxml.html.tostring(root, encoding=output_encoding) if html_new.startswith('<div>') and html_new.endswith('</div>'): html_new = html_new[5:-6].strip() return html_new.strip()
def existsExternalImageUrl(url): """ Check if the external URL exists (by issuing a HTTP request. """ class HeadRequest(Request): def get_method(self): return "HEAD" if not url.startswith('http'): return False try: urlopen(HeadRequest(url)) return True except Exception, e: LOG.warn('External(?) image reference not found (%s)' % e) return False
def __call__(self, html, input_encoding=None, output_encoding=unicode, return_body=False): if not isinstance(html, unicode): if not input_encoding: raise TypeError('Input data must be unicode') html = unicode(html, input_encoding) html = html.strip() if not html: return u'' root = lxml.html.fromstring(html) for name in self.transformation_names: method = TRANSFORMATIONS.get(name) params = dict(context=self.context, request=getattr(self.context, 'REQUEST', None), destdir=self.destdir, ) if method is None: raise ValueError('No transformation "%s" registered' % name) ts = time.time() argspec = inspect.getargspec(method) if isinstance(argspec, tuple): args = argspec[0] # Python 2.4 else: args = argspec.args if 'params' in args: method(root, params) else: method(root) LOG.info('Transformation %-30s: %3.6f seconds' % (name, time.time()-ts)) if return_body: body = root.xpath('//body')[0] html_new = body.text + u''.join([lxml.html.tostring(b, encoding=output_encoding) for b in body]) else: html_new = lxml.html.tostring(root, encoding=output_encoding) if html_new.startswith('<div>') and html_new.endswith('</div>'): html_new = html_new[5:-6].strip() return html_new.strip()
def makeImagesLocal(root, params): """ deal with internal and external image references """ ref_catalog = getToolByName(params['context'], 'reference_catalog') destdir = params['destdir'] ini_filename = os.path.join(destdir, 'images.ini') fp_ini = file(ini_filename, 'w') images_seen = dict() for document_node in CSSSelector('div.mode-flat.level-0,div.mode-nested.level-1')(root): document_obj = ref_catalog.lookupObject(document_node.get('uid')) for img in document_node.xpath(xpath_query(['img'])): # 'internal' images are marked with class="internal resource" # in order to prevent image fetching later on if 'internal-resource' in (img.get('class') or '') or img.get('processed'): continue scale = '' src = img.get('src') LOG.info('Introspecting image: %s' % src) img_obj = resolveImage(document_obj, src) if img_obj is None: # like some external image URL LOG.info(' Remote image fetching: %s' % src) try: response = urllib2.urlopen(str(src)) img_data = response.read() img_basename = src.split('/')[-1] except (ValueError, urllib2.URLError), e: LOG.warn('No image found: %s - removed from output (reason: %s)' % (src, e)) img.getparent().remove(img) continue tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename file(tmpname, 'wb').write(img_data) # write supplementary information to an .ini file per image img_id = os.path.basename(tmpname) print >>fp_ini, '[%s]' % img_id print >>fp_ini, 'id = %s' % img_id print >>fp_ini, 'filename = %s' % tmpname print >>fp_ini, 'url = %s' % str(src) print >>fp_ini, 'scale = %s' % '' img.attrib['src'] = img_id img.attrib['originalscale'] = '' images_seen[src] = img_id LOG.info(' Assigned new id: %s' % img_id) continue # resolved did find a local image LOG.info(' Local processing: %s' % src) img_filename = images_seen.get(src) if not img_filename: img_data = None for attr in ['data', '_data']: try: img_data = str(getattr(img_obj, attr)) continue except AttributeError: pass if img_data is None: LOG.warn('No image found: %s - removed from output' % src) img.extract() continue tmpname = tempfile.mktemp(dir=destdir) file(tmpname, 'wb').write(img_data) # determine graphic format using PIL pil_image = PIL.Image.open(tmpname) format = pil_image.format.lower() # generate unique and speaking image names img_id = img_obj.getId() dest_img_name = os.path.join(destdir, img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) else: running = True count = 0 while running: img_id = os.path.splitext(img_obj.getId())[0] img_id = '%s-%d.%s' % (img_id, count, format) dest_img_name = os.path.join(params['destdir'], img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) tmpname = dest_img_name running = False del pil_image else: count += 1 LOG.info(' Exported to: %s' % dest_img_name) # now also export the preview scale as well # (needed for EPUB export/conversion) preview_filename = os.path.join(os.path.dirname(dest_img_name), 'preview_' + os.path.basename(dest_img_name)) try: preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview') if preview_img == '': # no scales created? img_obj.Schema().getField('image').createScales(img_obj) preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview') except AttributeError: # Schema (for News Item images) preview_img = None if preview_img and safe_hasattr(preview_img, 'data'): file(preview_filename, 'wb').write(str(preview_img.data)) LOG.info(' Exported preview scale to: %s' % preview_filename) # determine image scale from 'src' attribute src_parts = src.split('/') if '@@images' in src_parts: scale = src_parts[-1] elif src_parts[-1].startswith('image_'): scale = src_parts[-1][6:] print >>fp_ini, '[%s]' % os.path.basename(dest_img_name) print >>fp_ini, 'filename = %s' % dest_img_name print >>fp_ini, 'id = %s' % img_id try: print >>fp_ini, 'title = %s' % img_obj.Title() print >>fp_ini, 'description = %s' % img_obj.Description() except AttributeError: print >>fp_ini, 'title = s' print >>fp_ini, 'description = s' print >>fp_ini, 'scale = %s' % scale images_seen[src] = os.path.basename(dest_img_name) img_filename = dest_img_name img.attrib['src'] = os.path.basename(img_filename) LOG.info(' Assigned new id: %s' % img.get('src')) img.attrib['originalscale'] = scale img.attrib['style'] = 'width: 100%' # need for PrinceXML8 img.attrib['processed'] = '1' # image scaling # add content-info debug information # don't add scale as style since the outer image-container # has the style set try: pdf_scale = img_obj.getField('pdfScale').get(img_obj) except AttributeError: pdf_scale = 100 img.attrib['scale'] = str(pdf_scale) # now move <img> tag into a dedicated <div> div = lxml.html.Element('div') div.attrib['class'] = 'image-container' div.attrib['style'] = 'width: %d%%' % pdf_scale div.attrib['scale'] = str(pdf_scale) new_img = lxml.html.Element('img') new_img.attrib.update(img.attrib.items()) div.insert(0, new_img) try: displayInline_field = img_obj.getField('displayInline') except AttributeError: displayInline_field = False if displayInline_field and not displayInline_field.get(img_obj): # image caption img_caption_position = img_obj.getField('captionPosition').get(img_obj) img_caption = lxml.html.Element('div') img_caption.attrib['class'] = 'image-caption' # exclude from image enumeration exclude_field = img_obj.getField('excludeFromImageEnumeration') if exclude_field and not exclude_field.get(img_obj): # add description span = lxml.html.Element('span') description = unicode(img_obj.Description(), 'utf-8') class_ = description and 'image-caption-description image-caption-with-description' or \ 'image-caption-description image-caption-without-description' if description: span.text = description span.attrib['class'] = class_ img_caption.insert(0, span) if not description: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-description' warn.text = u'image has no description' img_caption.append(warn) # add title span = lxml.html.Element('span') title = unicode(img_obj.Title(), 'utf-8') class_ = description and 'image-caption-title image-caption-with-title' or \ 'image-caption-title image-caption-without-title' if title: span.text = title span.attrib['class'] = class_ img_caption.insert(0, span) if not title: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-title' warn.text = u'image has no title' img_caption.append(warn) # add title and description to container if img_caption_position == 'top': div.insert(0, img_caption) else: div.append(img_caption) div.tail = img.tail img.getparent().replace(img, div)
def makeImagesLocal(root, params): """ deal with internal and external image references """ ref_catalog = getToolByName(params['context'], 'reference_catalog') destdir = params['destdir'] ini_filename = os.path.join(destdir, 'images.ini') fp_ini = file(ini_filename, 'w') images_seen = dict() for document_node in CSSSelector( 'div.mode-flat.level-0,div.mode-nested.level-1')(root): document_obj = ref_catalog.lookupObject(document_node.get('uid')) for img in document_node.xpath(xpath_query(['img'])): # 'internal' images are marked with class="internal resource" # in order to prevent image fetching later on if 'internal-resource' in (img.get('class') or '') or img.get('processed'): continue scale = '' src = img.get('src') LOG.info('Introspecting image: %s' % src) img_obj = resolveImage(document_obj, src) if img_obj is None: # like some external image URL LOG.info(' Remote image fetching: %s' % src) try: response = urllib2.urlopen(str(src)) img_data = response.read() img_basename = src.split('/')[-1] except (ValueError, urllib2.URLError), e: LOG.warn( 'No image found: %s - removed from output (reason: %s)' % (src, e)) img.getparent().remove(img) continue tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename file(tmpname, 'wb').write(img_data) # write supplementary information to an .ini file per image img_id = os.path.basename(tmpname) print >> fp_ini, '[%s]' % img_id print >> fp_ini, 'id = %s' % img_id print >> fp_ini, 'filename = %s' % tmpname print >> fp_ini, 'url = %s' % str(src) print >> fp_ini, 'scale = %s' % '' img.attrib['src'] = img_id img.attrib['originalscale'] = '' images_seen[src] = img_id LOG.info(' Assigned new id: %s' % img_id) continue # resolved did find a local image LOG.info(' Local processing: %s' % src) img_filename = images_seen.get(src) if not img_filename: img_data = None for attr in ['data', '_data']: try: img_data = str(getattr(img_obj, attr)) continue except AttributeError: pass if img_data is None: LOG.warn('No image found: %s - removed from output' % src) img.extract() continue tmpname = tempfile.mktemp(dir=destdir) file(tmpname, 'wb').write(img_data) # determine graphic format using PIL pil_image = PIL.Image.open(tmpname) format = pil_image.format.lower() # generate unique and speaking image names img_id = img_obj.getId() dest_img_name = os.path.join(destdir, img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) else: running = True count = 0 while running: img_id = os.path.splitext(img_obj.getId())[0] img_id = '%s-%d.%s' % (img_id, count, format) dest_img_name = os.path.join(params['destdir'], img_id) if not os.path.exists(dest_img_name): os.rename(tmpname, dest_img_name) tmpname = dest_img_name running = False del pil_image else: count += 1 LOG.info(' Exported to: %s' % dest_img_name) # now also export the preview scale as well # (needed for EPUB export/conversion) preview_filename = os.path.join( os.path.dirname(dest_img_name), 'preview_' + os.path.basename(dest_img_name)) try: preview_img = img_obj.Schema().getField('image').getScale( img_obj, scale='preview') if preview_img == '': # no scales created? img_obj.Schema().getField('image').createScales( img_obj) preview_img = img_obj.Schema().getField( 'image').getScale(img_obj, scale='preview') except AttributeError: # Schema (for News Item images) preview_img = None if preview_img and safe_hasattr(preview_img, 'data'): file(preview_filename, 'wb').write(str(preview_img.data)) LOG.info(' Exported preview scale to: %s' % preview_filename) # determine image scale from 'src' attribute src_parts = src.split('/') if '@@images' in src_parts: scale = src_parts[-1] elif src_parts[-1].startswith('image_'): scale = src_parts[-1][6:] print >> fp_ini, '[%s]' % os.path.basename(dest_img_name) print >> fp_ini, 'filename = %s' % dest_img_name print >> fp_ini, 'id = %s' % img_id try: print >> fp_ini, 'title = %s' % img_obj.Title() print >> fp_ini, 'description = %s' % img_obj.Description() except AttributeError: print >> fp_ini, 'title = s' print >> fp_ini, 'description = s' print >> fp_ini, 'scale = %s' % scale images_seen[src] = os.path.basename(dest_img_name) img_filename = dest_img_name img.attrib['src'] = os.path.basename(img_filename) LOG.info(' Assigned new id: %s' % img.get('src')) img.attrib['originalscale'] = scale img.attrib['style'] = 'width: 100%' # need for PrinceXML8 img.attrib['processed'] = '1' # image scaling # add content-info debug information # don't add scale as style since the outer image-container # has the style set try: pdf_scale = img_obj.getField('pdfScale').get(img_obj) except AttributeError: pdf_scale = 100 img.attrib['scale'] = str(pdf_scale) # now move <img> tag into a dedicated <div> div = lxml.html.Element('div') div.attrib['class'] = 'image-container' div.attrib['style'] = 'width: %d%%' % pdf_scale div.attrib['scale'] = str(pdf_scale) new_img = lxml.html.Element('img') new_img.attrib.update(img.attrib.items()) div.insert(0, new_img) try: displayInline_field = img_obj.getField('displayInline') except AttributeError: displayInline_field = False if displayInline_field and not displayInline_field.get(img_obj): # image caption img_caption_position = img_obj.getField('captionPosition').get( img_obj) img_caption = lxml.html.Element('div') img_caption.attrib['class'] = 'image-caption' # exclude from image enumeration exclude_field = img_obj.getField('excludeFromImageEnumeration') if exclude_field and not exclude_field.get(img_obj): # add description span = lxml.html.Element('span') description = unicode(img_obj.Description(), 'utf-8') class_ = description and 'image-caption-description image-caption-with-description' or \ 'image-caption-description image-caption-without-description' if description: span.text = description span.attrib['class'] = class_ img_caption.insert(0, span) if not description: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-description' warn.text = u'image has no description' img_caption.append(warn) # add title span = lxml.html.Element('span') title = unicode(img_obj.Title(), 'utf-8') class_ = description and 'image-caption-title image-caption-with-title' or \ 'image-caption-title image-caption-without-title' if title: span.text = title span.attrib['class'] = class_ img_caption.insert(0, span) if not title: warn = lxml.html.Element('span') warn.attrib['class'] = 'warning-no-title' warn.text = u'image has no title' img_caption.append(warn) # add title and description to container if img_caption_position == 'top': div.insert(0, img_caption) else: div.append(img_caption) div.tail = img.tail img.getparent().replace(img, div)
def __call2__(self, *args, **kw): """ URL parameters: 'language' - 'de', 'en'....used to override the language of the document 'converter' - default to on the converters registered with zopyx.convert2 (default: pdf-prince) 'resource' - the name of a registered resource (directory) 'template' - the name of a custom template name within the choosen 'resource' """ # Output directory tmpdir_prefix = os.path.join(tempfile.gettempdir(), 'produce-and-publish') if not os.path.exists(tmpdir_prefix): os.makedirs(tmpdir_prefix) destdir = tempfile.mkdtemp(dir=tmpdir_prefix, prefix=self.context.getId() + '-') # debug/logging params = kw.copy() params.update(self.request.form) LOG.info('new job (%s, %s) - outdir: %s' % (args, params, destdir)) # get hold of the language (hyphenation support) language = getLanguageForObject(self.context) if params.get('language'): language = params.get('language') # Check for CSS injection custom_css = None custom_stylesheet = params.get('custom_stylesheet') if custom_stylesheet: custom_css = str(self.context.restrictedTraverse(custom_stylesheet, None)) if custom_css is None: raise ValueError( 'Could not access custom CSS at %s' % custom_stylesheet) # check for resource parameter resource = params.get('resource') if resource: resources_directory = resources_registry.get(resource) if not resources_directory: raise ValueError('No resource "%s" configured' % resource) if not os.path.exists(resources_directory): raise ValueError( 'Resource directory for resource "%s" does not exist' % resource) self.copyResources(resources_directory, destdir) # look up custom template in resources directory template_name = params.get('template', 'pdf_template') if not template_name.endswith('.pt'): template_name += '.pt' template_filename = os.path.join(resources_directory, template_name) if not os.path.exists(template_filename): raise IOError('No template found (%s)' % template_filename) template = ViewPageTemplateFile2(template_filename) else: template = self.template html_view = self.context.restrictedTraverse('@@asHTML', None) if not html_view: raise RuntimeError( 'Object at does not provide @@asHTML view (%s, %s)' % (self.context.absolute_url(1), self.context.portal_type)) html_fragment = html_view() # arbitrary application data data = params.get('data', None) # Now render the complete HTML document html = template(self, language=language, request=self.request, body=html_fragment, custom_css=custom_css, data=data, ) # and apply transformations html = self.transformHtml(html, destdir) # hack to replace '&' with '&' html = html.replace('& ', '& ') # and store it in a dedicated working directory dest_filename = os.path.join(destdir, 'index.html') fp = codecs.open(dest_filename, 'wb', encoding='utf-8') fp.write(html) fp.close() # split HTML document into parts and store them on the filesystem # (making only sense for folderish content) if IATFolder.providedBy(self.context) and not 'no-split' in params: splitter.split_html(dest_filename, destdir) # copy over global styles etc. resources_dir = os.path.join(cwd, 'resources') self.copyResources(resources_dir, destdir) # copy over language dependent hyphenation data if language: hyphen_file = os.path.join(resources_dir, 'hyphenation', language + '.hyp') if os.path.exists(hyphen_file): shutil.copy(hyphen_file, destdir) hyphen_css_file = os.path.join(resources_dir, 'languages', language + '.css') if os.path.exists(hyphen_css_file): shutil.copy(hyphen_css_file, destdir) # now copy over resources (of a derived view) self.copyResources(getattr(self, 'local_resources', ''), destdir) if ZIP_OUTPUT or 'zip_output' in params: archivename = tempfile.mktemp(suffix='.zip') fp = zipfile.ZipFile(archivename, "w", zipfile.ZIP_DEFLATED) for root, dirs, files in os.walk(destdir): #NOTE: ignore empty directories for fn in files: absfn = os.path.join(root, fn) zfn = absfn[len(destdir) + len(os.sep):] fp.write(absfn, zfn) fp.close() LOG.info('ZIP file written to %s' % archivename) if 'no_conversion' in params: return destdir if LOCAL_CONVERSION: from zopyx.convert2 import Converter c = Converter(dest_filename) result = c(params.get('converter', 'pdf-pisa')) if result['status'] != 0: raise RuntimeError('Error during PDF conversion (%r)' % result) pdf_file = result['output_filename'] LOG.info('Output file: %s' % pdf_file) return pdf_file else: # Produce & Publish server integration from zopyx.smartprintng.client.zip_client import Proxy2 proxy = Proxy2(URL) result = proxy.convertZIP2(destdir, self.request.get('converter', 'pdf-prince')) LOG.info('Output file: %s' % result['output_filename']) return result['output_filename']
def __call2__(self, *args, **kw): """ URL parameters: 'language' - 'de', 'en'....used to override the language of the document 'converter' - default to on the converters registered with zopyx.convert2 (default: pdf-prince) 'resource' - the name of a registered resource (directory) 'template' - the name of a custom template name within the choosen 'resource' """ # Output directory tmpdir_prefix = os.path.join(tempfile.gettempdir(), 'produce-and-publish') if not os.path.exists(tmpdir_prefix): os.makedirs(tmpdir_prefix) destdir = tempfile.mkdtemp(dir=tmpdir_prefix, prefix=self.context.getId() + '-') # debug/logging params = kw.copy() params.update(self.request.form) LOG.info('new job (%s, %s) - outdir: %s' % (args, params, destdir)) # get hold of the language (hyphenation support) language = getLanguageForObject(self.context) if params.get('language'): language = params.get('language') # Check for CSS injection custom_css = None custom_stylesheet = params.get('custom_stylesheet') if custom_stylesheet: custom_css = str( self.context.restrictedTraverse(custom_stylesheet, None)) if custom_css is None: raise ValueError('Could not access custom CSS at %s' % custom_stylesheet) # check for resource parameter resource = params.get('resource') if resource: resources_directory = resources_registry.get(resource) if not resources_directory: raise ValueError('No resource "%s" configured' % resource) if not os.path.exists(resources_directory): raise ValueError( 'Resource directory for resource "%s" does not exist' % resource) self.copyResources(resources_directory, destdir) # look up custom template in resources directory template_name = params.get('template', 'pdf_template') if not template_name.endswith('.pt'): template_name += '.pt' template_filename = os.path.join(resources_directory, template_name) if not os.path.exists(template_filename): raise IOError('No template found (%s)' % template_filename) template = ViewPageTemplateFile2(template_filename) else: template = self.template # call the dedicated @@asHTML on the top-level node. For a leaf document # this will return either a HTML fragment for a single document or @@asHTML # might be defined as an aggregator for a bunch of documents (e.g. if the # top-level is a folderish object html_view = self.context.restrictedTraverse('@@asHTML', None) if not html_view: raise RuntimeError( 'Object at does not provide @@asHTML view (%s, %s)' % (self.context.absolute_url(1), self.context.portal_type)) html_fragment = html_view() # arbitrary application data data = params.get('data', None) # Now render the complete HTML document html = template( self, language=language, request=self.request, body=html_fragment, custom_css=custom_css, data=data, ) # and apply transformations html = self.transformHtml(html, destdir) # hack to replace '&' with '&' html = html.replace('& ', '& ') # and store it in a dedicated working directory dest_filename = os.path.join(destdir, 'index.html') fp = codecs.open(dest_filename, 'wb', encoding='utf-8') fp.write(html) fp.close() # split HTML document into parts and store them on the filesystem # (making only sense for folderish content) if IATFolder.providedBy(self.context) and not 'no-split' in params: splitter.split_html(dest_filename, destdir) # copy over global styles etc. resources_dir = os.path.join(cwd, 'resources') self.copyResources(resources_dir, destdir) # copy over language dependent hyphenation data if language: hyphen_file = os.path.join(resources_dir, 'hyphenation', language + '.hyp') if os.path.exists(hyphen_file): shutil.copy(hyphen_file, destdir) hyphen_css_file = os.path.join(resources_dir, 'languages', language + '.css') if os.path.exists(hyphen_css_file): shutil.copy(hyphen_css_file, destdir) # now copy over resources (of a derived view) self.copyResources(getattr(self, 'local_resources', ''), destdir) if ZIP_OUTPUT or 'zip_output' in params: archivename = tempfile.mktemp(suffix='.zip') fp = zipfile.ZipFile(archivename, "w", zipfile.ZIP_DEFLATED) for root, dirs, files in os.walk(destdir): #NOTE: ignore empty directories for fn in files: absfn = os.path.join(root, fn) zfn = absfn[len(destdir) + len(os.sep):] #XXX: relative path fp.write(absfn, zfn) fp.close() LOG.info('ZIP file written to %s' % archivename) if 'no_conversion' in params: return destdir if LOCAL_CONVERSION: from zopyx.convert2 import Converter c = Converter(dest_filename) result = c(params.get('converter', 'pdf-pisa')) if result['status'] != 0: raise RuntimeError('Error during PDF conversion (%r)' % result) pdf_file = result['output_filename'] LOG.info('Output file: %s' % pdf_file) return pdf_file else: # Produce & Publish server integration from zopyx.smartprintng.client.zip_client import Proxy2 proxy = Proxy2(URL) result = proxy.convertZIP2( destdir, self.request.get('converter', 'pdf-prince')) LOG.info('Output file: %s' % result['output_filename']) return result['output_filename']