def convert(self, data, cache, **kwargs): base_name = sansext(kwargs.get("filename") or 'unknown.xml') dtds = self.config['dtds'] tmpdir, fullname = self.initialize_tmpdir(data, filename=base_name) try: try: doctype = get_doctype(data) except DTException: try: doctype = get_dtd(data) except DTException: log('Unable to get doctype nor dtd in %s' % data) doctype = None if doctype and dtds.has_key(doctype): data = self.invokeCommand(fullname, dtds[doctype]) elif self.config['default_transform']: data = self.invokeCommand(fullname, self.config['default_transform']) cache.setData(data) path, images = self.subObjects(tmpdir) objects = {} if images: self.fixImages(path, images, objects) cache.setSubObjects(objects) return cache finally: self.cleanDir(tmpdir)
def ZopeFind(self, *args, **kwargs): """Don't break ZopeFind feature when a transform can't be loaded """ try: return Folder.ZopeFind(self, *args, **kwargs) except MissingBinary: log('ZopeFind: catched MissingBinary exception')
def convert(self, data, cache, **kwargs): base_name = sansext(kwargs.get("filename") or 'unknown.xml') dtds = self.config['dtds'] tmpdir, fullname = self.initialize_tmpdir(data, filename=base_name) try: try: doctype = get_doctype(data) except DTException: try: doctype = get_dtd(data) except DTException: log('Unable to get doctype nor dtd in %s' % data) doctype = None if doctype and doctype in dtds: data = self.invokeCommand(fullname, dtds[doctype]) elif self.config['default_transform']: data = self.invokeCommand(fullname, self.config['default_transform']) cache.setData(data) path, images = self.subObjects(tmpdir) objects = {} if images: self.fixImages(path, images, objects) cache.setSubObjects(objects) return cache finally: self.cleanDir(tmpdir)
def _load_transform(self): try: m = import_from_name(self.module) except ImportError as err: transform = BrokenTransform(self.id, self.module, err) msg = ("Cannot register transform %s (ImportError), using " "BrokenTransform: Error\n %s" % (self.id, err)) self.title = 'BROKEN' log(msg, severity=ERROR) return transform if not hasattr(m, 'register'): msg = ("Invalid transform module %s: no register function " "defined" % self.module) raise TransformException(msg) try: transform = m.register() except Exception as err: transform = BrokenTransform(self.id, self.module, err) msg = ("Cannot register transform %s, using BrokenTransform: " "Error\n %s" % (self.id, err)) self.title = 'BROKEN' log(msg, severity=ERROR) else: self.title = '' self._v_transform = transform return transform
def set_parameters(self, REQUEST=None, **kwargs): """ set transform's parameters """ if not kwargs: kwargs = REQUEST.form self.preprocess_param(kwargs) for param, value in kwargs.items(): try: self.get_parameter_value(param) except KeyError: log("Warning: ignored parameter %r" % param) continue meta = self.get_parameter_infos(param) self._config[param] = VALIDATORS[meta[0]](value) tr_tool = getToolByName(self, "portal_transforms") # need to remap transform if necessary (i.e. configurable inputs / output) if kwargs.has_key("inputs") or kwargs.has_key("output"): tr_tool._unmapTransform(self) if not hasattr(self, "_v_transform"): self._load_transform() self.inputs = kwargs.get("inputs", self._v_transform.inputs) self.output = kwargs.get("output", self._v_transform.output) tr_tool._mapTransform(self) # track output encoding if kwargs.has_key("output_encoding"): self.output_encoding = kwargs["output_encoding"] if REQUEST is not None: REQUEST["RESPONSE"].redirect(tr_tool.absolute_url() + "/manage_main")
def getShortPathName(binary): if WIN32: try: binary = win32api.GetShortPathName(binary) except win32api.error: log("Failed to GetShortPathName for '%s'" % binary) return binary
def convert(self, orig, data, **kwargs): # note if we need an upgrade. if 'disable_transform' not in self.config: log(logging.ERROR, 'PortalTransforms safe_html transform needs ' 'to be updated. Please re-install the PortalTransforms ' 'product to fix.') # if we have a config that we don't want to delete # we need a disable option if self.config.get('disable_transform'): data.setData(orig) return data for repeat in range(2): try: safe = scrubHTML( bodyfinder(orig), valid=self.config.get('valid_tags', {}), nasty=self.config.get('nasty_tags', {}), remove_javascript=self.config.get( 'remove_javascript', True), raise_error=False) except IllegalHTML, inst: data.setData(msg_pat % ("Error", str(inst))) break else: data.setData(safe) orig = safe
def _unmapTransform(self, transform): """unmap transform from internal structures""" registry = getToolByName(self, 'mimetypes_registry') for i in transform.inputs: for mti in registry.lookup(i): for mt in mti.mimetypes: try: mt_in = self._mtmap[mt] except KeyError: continue output = transform.output mto = registry.lookup(output) for mt2 in mto[0].mimetypes: try: l = mt_in[mt2] except KeyError: continue for i in range(len(l)): if transform.name() == l[i].name(): l.pop(i) break else: log('Can\'t find transform %s from %s to %s' % (transform.name(), mti, mt), severity=WARNING)
def set_parameters(self, REQUEST=None, **kwargs): """ set transform's parameters """ if not kwargs: kwargs = REQUEST.form self.preprocess_param(kwargs) for param, value in kwargs.items(): try: self.get_parameter_value(param) except KeyError: log('Warning: ignored parameter %r' % param) continue meta = self.get_parameter_infos(param) self._config[param] = VALIDATORS[meta[0]](value) tr_tool = getToolByName(self, 'portal_transforms') # need to remap transform if necessary (i.e. configurable # inputs / output) if 'inputs' in kwargs or 'output' in kwargs: tr_tool._unmapTransform(self) if not hasattr(self, '_v_transform'): self._load_transform() self.inputs = kwargs.get('inputs', self._v_transform.inputs) self.output = kwargs.get('output', self._v_transform.output) tr_tool._mapTransform(self) # track output encoding if 'output_encoding' in kwargs: self.output_encoding = kwargs['output_encoding'] if REQUEST is not None: REQUEST['RESPONSE'].redirect( tr_tool.absolute_url() + '/manage_main')
def convert(self, orig, data, **kwargs): # note if we need an upgrade. if not self.config.has_key('disable_transform'): log(ERROR, 'PortalTransforms safe_html transform needs to be ' 'updated. Please re-install the PortalTransforms product to fix.') # if we have a config that we don't want to delete # we need a disable option if self.config.get('disable_transform'): data.setData(orig) return data repaired = 0 while True: try: # Do 2 passes. This provides more reliable filtering of certain # malicious HTML (cf upstream commit svn10522). for repeat in range(2): orig = scrubHTML( orig, valid=self.config.get('valid_tags', {}), nasty=self.config.get('nasty_tags', {}), remove_javascript=self.config.get('remove_javascript', True), raise_error=False, default_encoding=self.config.get('default_encoding', 'utf-8')) except IllegalHTML, inst: data.setData(msg_pat % ("Error", str(inst))) break except (HTMLParseError, UnicodeDecodeError): if repeat: raise # try to repair only on first pass # ouch ! # HTMLParser is not able to parse very dirty HTML string if not repaired: # try to repair any broken html with help of lxml encoding = kwargs.get('encoding') # recover parameter is equal to True by default # in lxml API. I pass the argument to improve readability # of above code. try: lparser = LHTMLParser(encoding=encoding, recover=True, remove_comments=True) except LookupError: # Provided encoding is not known by parser so discard it lparser = LHTMLParser(recover=True, remove_comments=True) repaired_html_tree = etree.HTML(orig, parser=lparser) elif repaired > (soupfromstring is not None): # Neither lxml nor BeautifulSoup worked so give up ! raise else: # Can BeautifulSoup perform miracles ? # This function may raise HTMLParseError. # So consider this parsing as last chance # to get parsable html. repaired_html_tree = soupfromstring(orig) orig = tostring(repaired_html_tree, include_meta_content_type=True, method='xml') repaired += 1
def convert(self, orig, data, **kwargs): # do the format msg = "Calling convert on BROKEN transform %s (%s). Error: %s" % \ (self.id, self.module, self.error) log(msg, severity=WARNING) print(msg) data.setData('') return data
def objectItems(self, *args, **kwargs): """Don't break ZopeFind feature when a transform can't be loaded """ try: return Folder.objectItems(self, *args, **kwargs) except MissingBinary: log('objectItems: catched MissingBinary exception') return []
def convert(self, orig, data, **kwargs): # do the format msg = "Calling convert on BROKEN transform %s (%s). Error: %s" % \ (self.id, self.module, self.error) log(msg, severity=WARNING) print msg data.setData('') return data
def _load_transform(self): try: m = import_from_name(self.module) except ImportError, err: transform = BrokenTransform(self.id, self.module, err) msg = "Cannot register transform %s (ImportError), using BrokenTransform: Error\n %s" % (self.id, err) self.title = "BROKEN" log(msg, severity=ERROR) return transform
def _load_transform(self): try: m = import_from_name(self.module) except ImportError, err: transform = BrokenTransform(self.id, self.module, err) msg = ("Cannot register transform %s (ImportError), using " "BrokenTransform: Error\n %s" % (self.id, err)) self.title = 'BROKEN' log(msg, severity=ERROR) return transform
def invokeCommand(self, input_name): command = '%(binary_path)s %(command_line)s' % self.config input, output, error = popen3(command % input_name) input.close() # first read stderr, else we may hang on stout # but, still hang my windows, so commented it :-( # error_data = error.read() error_data = 'error while running "%s"' % (command % input_name) error.close() data = output.read() output.close() if error_data and not data: data = error_data else: log('Error while running "%s":\n %s' % (command % input_name, error_data)) return data
def _unmapTransform(self, transform): """unmap transform from internal structures""" registry = getToolByName(self, 'mimetypes_registry') for i in transform.inputs: for mti in registry.lookup(i): for mt in mti.mimetypes: mt_in = self._mtmap.get(mt, {}) output = transform.output mto = registry.lookup(output) for mt2 in mto[0].mimetypes: l = mt_in[mt2] for i in range(len(l)): if transform.name() == l[i].name(): l.pop(i) break else: log('Can\'t find transform %s from %s to %s' % ( transform.name(), mti, mt), severity=DEBUG)
### should raise an ImportError as well (dumb, I know) from logging import DEBUG, ERROR from Products.PortalTransforms.utils import log from Products.PortalTransforms.libtransforms.utils import MissingBinary modules = ( 'html_to_odt', 'odt_to_doc', 'odt_to_pdf', ) g = globals() transforms = [] for m in modules: try: ns = __import__(m, g, g, None) transforms.append(ns.register()) except ImportError, e: msg = "Problem importing module %s : %s" % (m, e) log(msg, severity=ERROR) except MissingBinary, e: log(str(e), severity=DEBUG) except Exception, e: import traceback traceback.print_exc() log("Raised error %s for %s" % (e, m), severity=ERROR) def initialize(engine): for transform in transforms: engine.registerTransform(transform)
def convertTo(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs): """Convert orig to a given mimetype * orig is an encoded string * data an optional IDataStream object. If None a new datastream will be created and returned * optional object argument is the object on which is bound the data. If present that object will be used by the engine to bound cached data. * additional arguments (kwargs) will be passed to the transformations. Some usual arguments are : filename, mimetype, encoding return an object implementing IDataStream or None if no path has been found. """ target_mimetype = str(target_mimetype) if object is not None: cache = Cache(object, context=context) data = cache.getCache(target_mimetype) if data is not None: time, data = data if self.max_sec_in_cache == 0 or time < self.max_sec_in_cache: return data if data is None: data = self._wrap(target_mimetype) registry = getToolByName(self, 'mimetypes_registry') if not getattr(aq_base(registry), 'classify', None): # avoid problems when importing a site with an old mimetype # registry return None orig_mt = registry.classify(orig, mimetype=kwargs.get('mimetype'), filename=kwargs.get('filename')) orig_mt = str(orig_mt) if not orig_mt: log('Unable to guess input mime type (filename=%s, mimetype=%s)' % (kwargs.get('mimetype'), kwargs.get('filename')), severity=WARNING) return None target_mt = registry.lookup(target_mimetype) if target_mt: target_mt = target_mt[0] else: log('Unable to match target mime type %s' % str(target_mimetype), severity=WARNING) return None ## fastpath # If orig_mt and target_mt are the same, we only allow # a one-hop transform, a.k.a. filter. # XXX disabled filtering for now if orig_mt == str(target_mt): data.setData(orig) md = data.getMetadata() md['mimetype'] = str(orig_mt) if object is not None: cache.setCache(str(target_mimetype), data) return data ## get a path to output mime type requirements = self.getRequirementListByMimetype( str(orig_mt), str(target_mt)) path = self._findPath(orig_mt, target_mt, list(requirements)) if not path and requirements: log('Unable to satisfy requirements %s' % ', '.join(requirements), severity=WARNING) path = self._findPath(orig_mt, target_mt) if not path: log('NO PATH FROM %s TO %s : %s' % (orig_mt, target_mimetype, path), severity=WARNING) return None if len(path) > 1: ## create a chain on the fly (sly) transform = chain() for t in path: transform.registerTransform(t) else: transform = path[0] result = transform.convert(orig, data, context=context, usedby=usedby, **kwargs) self._setMetaData(result, transform) # set cache if possible if object is not None and result.isCacheable(): cache.setCache(str(target_mimetype), result) # return IDataStream object return result
from zope.interface import implements from Products.CMFDefault.utils import bodyfinder from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.libtransforms.commandtransform import commandtransform from Products.PortalTransforms.libtransforms.utils import bin_search from Products.PortalTransforms.libtransforms.utils import sansext from Products.PortalTransforms.utils import log try: import textile as textile_transformer except ImportError: HAS_TEXTILE = False log('textile_to_html: Could not import textile.') else: HAS_TEXTILE = True class textile: implements(ITransform) __name__ = "textile_to_html" inputs = ("text/x-web-textile",) output = "text/html" def name(self): return self.__name__
def reload(self): """ reload the module where the transformation class is defined """ log("Reloading transform %s" % self.module) m = import_from_name(self.module) reload(m) self._tr_init()
Author: - Tom Lazar <*****@*****.**> at the archipelago sprint 2006 - Juraj Hájovský <*****@*****.**> 2015 """ from zope.interface import implements from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.utils import log try: import misaka as m except ImportError: HAS_MARKDOWN = False log("markdown_to_html: Could not import misaka.") else: HAS_MARKDOWN = True class markdown: implements(ITransform) __name__ = "markdown_to_html" inputs = ("text/x-web-markdown",) output = "text/html" def name(self): return self.__name__ def convert(self, orig, data, **kwargs):
""" Uses the http://www.freewisdom.org/projects/python-markdown/ module Author: Tom Lazar <*****@*****.**> at the archipelago sprint 2006 """ from zope.interface import implements from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.utils import log try: import markdown as markdown_transformer except ImportError: HAS_MARKDOWN = False log('markdown_to_html: Could not import python-markdown.') else: HAS_MARKDOWN = True class markdown: implements(ITransform) __name__ = "markdown_to_html" inputs = ("text/x-web-markdown", ) output = "text/html" def name(self): return self.__name__ def convert(self, orig, data, **kwargs):
handy work author: Tom Lazar <*****@*****.**> at the archipelago sprint 2006 """ from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.utils import log from zope.interface import implementer HAS_TEXTILE = True try: import textile as textile_transformer except ImportError: HAS_TEXTILE = False log('textile_to_html: Could not import textile.') @implementer(ITransform) class textile(object): __name__ = "textile_to_html" inputs = ("text/x-web-textile", ) output = "text/html" def name(self): return self.__name__ def convert(self, orig, data, **kwargs): if HAS_TEXTILE: html = textile_transformer.textile(orig,
Author: Tom Lazar <*****@*****.**> at the archipelago sprint 2006 """ from Products.CMFPlone.utils import safe_unicode from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.utils import log from Products.PortalTransforms.utils import safe_nativestring from zope.interface import implementer try: import markdown as markdown_transformer except ImportError: HAS_MARKDOWN = False log('markdown_to_html: Could not import python-markdown.') else: HAS_MARKDOWN = True @implementer(ITransform) class markdown(object): __name__ = "markdown_to_html" inputs = ("text/x-web-markdown",) output = "text/html" def __init__(self, name=None, enabled_extensions=('markdown.extensions.fenced_code', 'markdown.extensions.nl2br', ), **kwargs): self.config = { 'enabled_extensions': enabled_extensions, }
def convertTo(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs): """Convert orig to a given mimetype * orig is an encoded string * data an optional IDataStream object. If None a new datastream will be created and returned * optional object argument is the object on which is bound the data. If present that object will be used by the engine to bound cached data. * additional arguments (kwargs) will be passed to the transformations. Some usual arguments are : filename, mimetype, encoding return an object implementing IDataStream or None if no path has been found. """ target_mimetype = str(target_mimetype) if object is not None: cache = Cache(object, context=context) data = cache.getCache(target_mimetype) if data is not None: time, data = data if self.max_sec_in_cache == 0 or time < self.max_sec_in_cache: return data if data is None: data = self._wrap(target_mimetype) registry = getToolByName(self, 'mimetypes_registry') if not getattr(aq_base(registry), 'classify', None): # avoid problems when importing a site with an old mimetype # registry return None orig_mt = registry.classify(orig, mimetype=kwargs.get('mimetype'), filename=kwargs.get('filename')) orig_mt = str(orig_mt) if not orig_mt: log('Unable to guess input mime type (filename=%s, mimetype=%s)' % (kwargs.get('mimetype'), kwargs.get('filename')), severity=DEBUG) return None target_mt = registry.lookup(target_mimetype) if target_mt: target_mt = target_mt[0] else: log('Unable to match target mime type %s' % str(target_mimetype), severity=DEBUG) return None # fastpath # If orig_mt and target_mt are the same, we only allow # a one-hop transform, a.k.a. filter. # XXX disabled filtering for now if orig_mt == str(target_mt): data.setData(orig) md = data.getMetadata() md['mimetype'] = str(orig_mt) if object is not None: cache.setCache(str(target_mimetype), data) return data # get a path to output mime type requirements = self._policies.get(str(target_mt), []) path = self._findPath(orig_mt, target_mt, list(requirements)) if not path and requirements: log('Unable to satisfy requirements %s' % ', '.join(requirements), severity=DEBUG) path = self._findPath(orig_mt, target_mt) if not path: log('NO PATH FROM %s TO %s : %s' % (orig_mt, target_mimetype, path), severity=DEBUG) return None if len(path) > 1: # create a chain on the fly (sly) transform = chain() for t in path: transform.registerTransform(t) else: transform = path[0] result = transform.convert(orig, data, context=context, usedby=usedby, **kwargs) self._setMetaData(result, transform) # set cache if possible if object is not None and result.isCacheable(): cache.setCache(str(target_mimetype), result) # return IDataStream object return result
def reload(self): """ reload the module where the transformation class is defined """ log('Reloading transform %s' % self.module) if not self.module.startswith('erp5.'): reload(import_from_name(self.module)) self._tr_init()