class TransformTool(UniqueObject, ActionProviderBase, Folder): id = 'portal_transforms' meta_type = id.title().replace('_', ' ') isPrincipiaFolderish = 1 # Show up in the ZMI __implements__ = iengine implements(IPortalTransformsTool) meta_types = all_meta_types = ( { 'name' : 'Transform', 'action' : 'manage_addTransformForm'}, { 'name' : 'TransformsChain', 'action' : 'manage_addTransformsChainForm'}, ) manage_addTransformForm = PageTemplateFile('addTransform', _www) manage_addTransformsChainForm = PageTemplateFile('addTransformsChain', _www) manage_cacheForm = PageTemplateFile('setCacheTime', _www) manage_editTransformationPolicyForm = PageTemplateFile('editTransformationPolicy', _www) manage_reloadAllTransforms = PageTemplateFile('reloadAllTransforms', _www) manage_options = ((Folder.manage_options[0],) + Folder.manage_options[2:] + ( { 'label' : 'Caches', 'action' : 'manage_cacheForm'}, { 'label' : 'Policy', 'action' : 'manage_editTransformationPolicyForm'}, { 'label' : 'Reload transforms', 'action' : 'manage_reloadAllTransforms'}, ) ) security = ClassSecurityInfo() def __init__(self, policies=None, max_sec_in_cache=3600): self._mtmap = PersistentMapping() self._policies = policies or PersistentMapping() self.max_sec_in_cache = max_sec_in_cache self._new_style_pt = 1 # mimetype oriented conversions (iengine interface) ######################## def unregisterTransform(self, name): """ unregister a transform name is the name of a registered transform """ self._unmapTransform(getattr(self, name)) if name in self.objectIds(): self._delObject(name) def convertTo(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs): """Convert orig to a given mimetype * orig is an encoded string * data an optional idatastream object. If None a new datastream will be created and returned * optional object argument is the object on which is bound the data. If present that object will be used by the engine to bound cached data. * additional arguments (kwargs) will be passed to the transformations. Some usual arguments are : filename, mimetype, encoding return an object implementing idatastream or None if no path has been found. """ target_mimetype = str(target_mimetype) if object is not None: cache = Cache(object) data = cache.getCache(target_mimetype) if data is not None: time, data = data if self.max_sec_in_cache == 0 or time < self.max_sec_in_cache: return data if data is None: data = self._wrap(target_mimetype) registry = getToolByName(self, 'mimetypes_registry') if not getattr(aq_base(registry), 'classify', None): # avoid problems when importing a site with an old mimetype registry # XXX return None or orig? return None orig_mt = registry.classify(orig, mimetype=kwargs.get('mimetype'), filename=kwargs.get('filename')) orig_mt = str(orig_mt) if not orig_mt: log('Unable to guess input mime type (filename=%s, mimetype=%s)' %( kwargs.get('mimetype'), kwargs.get('filename')), severity=DEBUG) return None target_mt = registry.lookup(target_mimetype) if target_mt: target_mt = target_mt[0] else: log('Unable to match target mime type %s'% str(target_mimetype), severity=DEBUG) return None ## fastpath # If orig_mt and target_mt are the same, we only allow # a one-hop transform, a.k.a. filter. # XXX disabled filtering for now filter_only = False if orig_mt == str(target_mt): filter_only = True data.setData(orig) md = data.getMetadata() md['mimetype'] = str(orig_mt) if object is not None: cache.setCache(str(target_mimetype), data) return data ## get a path to output mime type requirements = self._policies.get(str(target_mt), []) path = self._findPath(orig_mt, target_mt, list(requirements)) if not path and requirements: log('Unable to satisfy requirements %s' % ', '.join(requirements), severity=DEBUG) path = self._findPath(orig_mt, target_mt) if not path: log('NO PATH FROM %s TO %s : %s' % (orig_mt, target_mimetype, path), severity=DEBUG) return None #XXX raise TransformError if len(path) > 1: ## create a chain on the fly (sly) transform = chain() for t in path: transform.registerTransform(t) else: transform = path[0] result = transform.convert(orig, data, context=context, usedby=usedby, **kwargs) assert(idatastream.isImplementedBy(result), 'result doesn\'t implemented idatastream') self._setMetaData(result, transform) # set cache if possible if object is not None and result.isCacheable(): cache.setCache(str(target_mimetype), result) # return idatastream object return result security.declarePublic('convertToData') def convertToData(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs): """Convert to a given mimetype and return the raw data ignoring subobjects. see convertTo for more information """ data =self.convertTo(target_mimetype, orig, data, object, usedby, context, **kwargs) if data: return data.getData() return None security.declarePublic('convert') def convert(self, name, orig, data=None, context=None, **kwargs): """run a tranform of a given name on data * name is the name of a registered transform see convertTo docstring for more info """ if not data: data = self._wrap(name) try: transform = getattr(self, name) except AttributeError: raise Exception('No such transform "%s"' % name) data = transform.convert(orig, data, context=context, **kwargs) self._setMetaData(data, transform) return data def __call__(self, name, orig, data=None, context=None, **kwargs): """run a transform by its name, returning the raw data product * name is the name of a registered transform. return an encoded string. see convert docstring for more info on additional arguments. """ data = self.convert(name, orig, data, context, **kwargs) return data.getData() # utilities ############################################################### def _setMetaData(self, datastream, transform): """set metadata on datastream according to the given transform (mime type and optionaly encoding) """ md = datastream.getMetadata() if hasattr(transform, 'output_encoding'): md['encoding'] = transform.output_encoding md['mimetype'] = transform.output def _wrap(self, name): """wrap a data object in an icache""" return datastream(name) def _unwrap(self, data): """unwrap data from an icache""" if idatastream.isImplementedBy(data): data = data.getData() return data def _mapTransform(self, transform): """map transform to internal structures""" registry = getToolByName(self, 'mimetypes_registry') inputs = getattr(transform, 'inputs', None) if not inputs: raise TransformException('Bad transform %s : no input MIME type' % (transform)) for i in inputs: mts = registry.lookup(i) if not mts: msg = 'Input MIME type %r for transform %s is not registered '\ 'in the MIME types registry' % (i, transform.name()) raise TransformException(msg) for mti in mts: for mt in mti.mimetypes: mt_in = self._mtmap.setdefault(mt, PersistentMapping()) output = getattr(transform, 'output', None) if not output: msg = 'Bad transform %s : no output MIME type' raise TransformException(msg % transform.name()) mto = registry.lookup(output) if not mto: msg = 'Output MIME type %r for transform %s is not '\ 'registered in the MIME types registry' % \ (output, transform.name()) raise TransformException(msg) if len(mto) > 1: msg = 'Wildcarding not allowed in transform\'s output '\ 'MIME type' raise TransformException(msg) for mt2 in mto[0].mimetypes: try: if not transform in mt_in[mt2]: mt_in[mt2].append(transform) except KeyError: mt_in[mt2] = PersistentList([transform]) def _unmapTransform(self, transform): """unmap transform from internal structures""" registry = getToolByName(self, 'mimetypes_registry') for i in transform.inputs: for mti in registry.lookup(i): for mt in mti.mimetypes: mt_in = self._mtmap.get(mt, {}) output = transform.output mto = registry.lookup(output) for mt2 in mto[0].mimetypes: l = mt_in[mt2] for i in range(len(l)): if transform.name() == l[i].name(): l.pop(i) break else: log('Can\'t find transform %s from %s to %s' % ( transform.name(), mti, mt), severity=DEBUG) def _findPath(self, orig, target, required_transforms=()): """return the shortest path for transformation from orig mimetype to target mimetype """ path = [] if not self._mtmap: return None # naive algorithm : # find all possible paths with required transforms # take the shortest # # it should be enough since we should not have so much possible paths shortest, winner = 9999, None for path in self._getPaths(str(orig), str(target), required_transforms): if len(path) < shortest: winner = path shortest = len(path) return winner def _getPaths(self, orig, target, requirements, path=None, result=None): """return a all path for transformation from orig mimetype to target mimetype """ if path is None: result = [] path = [] requirements = list(requirements) outputs = self._mtmap.get(orig) if outputs is None: return result path.append(None) for o_mt, transforms in outputs.items(): for transform in transforms: required = 0 name = transform.name() if name in requirements: requirements.remove(name) required = 1 if transform in path: # avoid infinite loop... continue path[-1] = transform if o_mt == target: if not requirements: result.append(path[:]) else: self._getPaths(o_mt, target, requirements, path, result) if required: requirements.append(name) path.pop() return result security.declarePrivate('manage_afterAdd') def manage_afterAdd(self, item, container): """ overload manage_afterAdd to finish initialization when the transform tool is added """ Folder.manage_afterAdd(self, item, container) transforms.initialize(self) # XXX required? #try: # # first initialization # transforms.initialize(self) #except: # # may fail on copy # pass security.declareProtected(ManagePortal, 'manage_addTransform') def manage_addTransform(self, id, module, REQUEST=None): """ add a new transform to the tool """ transform = Transform(id, module) self._setObject(id, transform) self._mapTransform(transform) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') security.declareProtected(ManagePortal, 'manage_addTransform') def manage_addTransformsChain(self, id, description, REQUEST=None): """ add a new transform to the tool """ transform = TransformsChain(id, description) self._setObject(id, transform) self._mapTransform(transform) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') security.declareProtected(ManagePortal, 'manage_addTransform') def manage_setCacheValidityTime(self, seconds, REQUEST=None): """set the lifetime of cached data in seconds""" self.max_sec_in_cache = int(seconds) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') security.declareProtected(ManagePortal, 'reloadTransforms') def reloadTransforms(self, ids=()): """ reload transforms with the given ids if no ids, reload all registered transforms return a list of (transform_id, transform_module) describing reloaded transforms """ if not ids: ids = self.objectIds() reloaded = [] for id in ids: o = getattr(self, id) o.reload() reloaded.append((id, o.module)) return reloaded # Policy handling methods ################################################# def manage_addPolicy(self, output_mimetype, required_transforms, REQUEST=None): """ add a policy for a given output mime types""" registry = getToolByName(self, 'mimetypes_registry') if not registry.lookup(output_mimetype): raise TransformException('Unknown MIME type') if self._policies.has_key(output_mimetype): msg = 'A policy for output %s is yet defined' % output_mimetype raise TransformException(msg) required_transforms = tuple(required_transforms) self._policies[output_mimetype] = required_transforms if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_editTransformationPolicyForm') def manage_delPolicies(self, outputs, REQUEST=None): """ remove policies for given output mime types""" for mimetype in outputs: del self._policies[mimetype] if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_editTransformationPolicyForm') def listPolicies(self): """ return the list of defined policies a policy is a 2-uple (output_mime_type, [list of required transforms]) """ # XXXFIXME: backward compat, should be removed latter if not hasattr(self, '_policies'): self._policies = PersistentMapping() return self._policies.items() # mimetype oriented conversions (iengine interface) ######################## def registerTransform(self, transform): """register a new transform transform isn't a Zope Transform (the wrapper) but the wrapped transform the persistence wrapper will be created here """ # needed when call from transform.transforms.initialize which # register non zope transform module = str(transform.__module__) transform = Transform(transform.name(), module, transform) if not itransform.isImplementedBy(transform): raise TransformException('%s does not implement itransform' % transform) name = transform.name() __traceback_info__ = (name, transform) if name not in self.objectIds(): self._setObject(name, transform) self._mapTransform(transform) security.declareProtected(ManagePortal, 'ZopeFind') def ZopeFind(self, *args, **kwargs): """Don't break ZopeFind feature when a transform can't be loaded """ try: return Folder.ZopeFind(self, *args, **kwargs) except MissingBinary: log('ZopeFind: catched MissingBinary exception') security.declareProtected(View, 'objectItems') def objectItems(self, *args, **kwargs): """Don't break ZopeFind feature when a transform can't be loaded """ try: return Folder.objectItems(self, *args, **kwargs) except MissingBinary: log('objectItems: catched MissingBinary exception') return [] # available mimetypes #################################################### def listAvailableTextInputs(self): """ Returns a list of mimetypes that can be used as input for textfields by building a list of the inputs beginning with "text/" of all transforms. """ available_types = [] candidate_transforms = [object[1] for object in self.objectItems()] for candidate in candidate_transforms: for input in candidate.inputs: if input.startswith("text/") and input not in available_types: available_types.append(input) return available_types
class MimeTypesRegistry(UniqueObject, ActionProviderBase, Folder): """Mimetype registry that deals with a) registering types b) wildcarding of rfc-2046 types c) classifying data into a given type """ __implements__ = (IMimetypesRegistry, ISourceAdapter) implements(IMimetypesRegistryTool) id = 'mimetypes_registry' meta_type = 'MimeTypes Registry' isPrincipiaFolderish = 1 # Show up in the ZMI meta_types = all_meta_types = ( { 'name' : 'MimeType', 'action' : 'manage_addMimeTypeForm'}, ) manage_options = ( ( { 'label' : 'MimeTypes', 'action' : 'manage_main'},) + Folder.manage_options[2:] ) manage_addMimeTypeForm = PageTemplateFile('addMimeType', _www) manage_main = PageTemplateFile('listMimeTypes', _www) manage_editMimeTypeForm = PageTemplateFile('editMimeType', _www) security = ClassSecurityInfo() # FIXME __allow_access_to_unprotected_subobjects__ = 1 def __init__(self,): self.encodings_map = encodings_map.copy() self.suffix_map = suffix_map.copy() # Major key -> minor IMimetype objects self._mimetypes = PersistentMapping() # ext -> IMimetype mapping self.extensions = PersistentMapping() # glob -> (regex, mimetype) mapping self.globs = OOBTree() self.manage_addProperty('defaultMimetype', 'text/plain', 'string') self.manage_addProperty('unicodePolicies', 'strict ignore replace', 'tokens') self.manage_addProperty('unicodePolicy', 'unicodePolicies', 'selection') self.manage_addProperty('fallbackEncoding', 'latin1', 'string') # initialize mime types initialize(self) self._new_style_mtr = 1 security.declareProtected(ManagePortal, 'register') def register(self, mimetype): """ Register a new mimetype mimetype must implement IMimetype """ mimetype = aq_base(mimetype) assert IMimetype.isImplementedBy(mimetype) for t in mimetype.mimetypes: self.register_mimetype(t, mimetype) for extension in mimetype.extensions: self.register_extension(extension, mimetype) for glob in mimetype.globs: self.register_glob(glob, mimetype) security.declareProtected(ManagePortal, 'register_mimetype') def register_mimetype(self, mt, mimetype): major, minor = split(mt) if not major or not minor or minor == '*': raise MimeTypeException('Can\'t register mime type %s' % mt) group = self._mimetypes.setdefault(major, PersistentMapping()) if group.has_key(minor): if group.get(minor) != mimetype: log('Warning: redefining mime type %s (%s)' % ( mt, mimetype.__class__)) group[minor] = mimetype security.declareProtected(ManagePortal, 'register_extension') def register_extension(self, extension, mimetype): """ Associate a file's extension to a IMimetype extension is a string representing a file extension (not prefixed by a dot) mimetype must implement IMimetype """ mimetype = aq_base(mimetype) if self.extensions.has_key(extension): if self.extensions.get(extension) != mimetype: log('Warning: redefining extension %s from %s to %s' % ( extension, self.extensions[extension], mimetype)) # we don't validate fmt yet, but its ["txt", "html"] self.extensions[extension] = mimetype security.declareProtected(ManagePortal, 'register_glob') def register_glob(self, glob, mimetype): """ Associate a glob to a IMimetype glob is a shell-like glob that will be translated to a regex to match against whole filename. mimetype must implement IMimetype. """ globs = getattr(self, 'globs', None) if globs is None: self.globs = globs = OOBTree() mimetype = aq_base(mimetype) existing = globs.get(glob) if existing is not None: regex, mt = existing if mt != mimetype: log('Warning: redefining glob %s from %s to %s' % ( glob, mt, mimetype)) # we don't validate fmt yet, but its ["txt", "html"] pattern = re.compile(fnmatch.translate(glob)) globs[glob] = (pattern, mimetype) security.declareProtected(ManagePortal, 'unregister') def unregister(self, mimetype): """ Unregister a new mimetype mimetype must implement IMimetype """ assert IMimetype.isImplementedBy(mimetype) for t in mimetype.mimetypes: major, minor = split(t) group = self._mimetypes.get(major, {}) if group.get(minor) == mimetype: del group[minor] for e in mimetype.extensions: if self.extensions.get(e) == mimetype: del self.extensions[e] globs = getattr(self, 'globs', None) if globs is not None: for glob in mimetype.globs: existing = globs.get(glob) if existing is None: continue regex, mt = existing if mt == mimetype: del globs[glob] security.declarePublic('mimetypes') def mimetypes(self): """Return all defined mime types, each one implements at least IMimetype """ res = {} for g in self._mimetypes.values(): for mt in g.values(): res[mt] =1 return [aq_base(mtitem) for mtitem in res.keys()] security.declarePublic('list_mimetypes') def list_mimetypes(self): """Return all defined mime types, as string""" return [str(mt) for mt in self.mimetypes()] security.declarePublic('lookup') def lookup(self, mimetypestring): """Lookup for IMimetypes object matching mimetypestring mimetypestring may have an empty minor part or containing a wildcard (*) mimetypestring may and IMimetype object (in this case it will be returned unchanged Return a list of mimetypes objects associated with the RFC-2046 name return an empty list if no one is known. """ if IMimetype.isImplementedBy(mimetypestring): return (aq_base(mimetypestring), ) __traceback_info__ = (repr(mimetypestring), str(mimetypestring)) major, minor = split(str(mimetypestring)) group = self._mimetypes.get(major, {}) if not minor or minor == '*': res = group.values() else: res = group.get(minor) if res: res = (res,) else: return () return tuple([aq_base(mtitem) for mtitem in res]) security.declarePublic('lookupExtension') def lookupExtension(self, filename): """Lookup for IMimetypes object matching filename Filename maybe a file name like 'content.txt' or an extension like 'rest' Return an IMimetype object associated with the file's extension or None """ if filename.find('.') != -1: base, ext = os.path.splitext(filename) ext = ext[1:] # remove the dot while self.suffix_map.has_key(ext): base, ext = os.path.splitext(base + self.suffix_map[ext]) ext = ext[1:] # remove the dot else: ext = filename base = None # XXX This code below make no sense and may break because base # isn't defined. if self.encodings_map.has_key(ext) and base: encoding = self.encodings_map[ext] base, ext = os.path.splitext(base) ext = ext[1:] # remove the dot else: encoding = None return aq_base(self.extensions.get(ext)) security.declarePublic('globFilename') def globFilename(self, filename): """Lookup for IMimetypes object matching filename Filename must be a complete filename with extension. Return an IMimetype object associated with the glob's or None """ globs = getattr(self, 'globs', None) if globs is None: return None for key in globs.keys(): glob, mimetype = globs[key] if glob.match(filename): return aq_base(mimetype) return None security.declarePublic('lookupGlob') def lookupGlob(self, glob): globs = getattr(self, 'globs', None) if globs is None: return None return aq_base(globs.get(glob)) def _classifiers(self): return [mt for mt in self.mimetypes() if IClassifier.isImplementedBy(mt)] security.declarePublic('classify') def classify(self, data, mimetype=None, filename=None): """Classify works as follows: 1) you tell me the rfc-2046 name and I give you an IMimetype object 2) the filename includes an extension from which we can guess the mimetype 3) we can optionally introspect the data 4) default to self.defaultMimetype if no data was provided else to application/octet-stream of no filename was provided, else to text/plain Return an IMimetype object or None """ mt = None if mimetype: mt = self.lookup(mimetype) if mt: mt = mt[0] elif filename: mt = self.lookupExtension(filename) if mt is None: mt = self.globFilename(filename) if data and not mt: for c in self._classifiers(): if c.classify(data): mt = c break if not mt: mstr = magic.guessMime(data) if mstr: mt = self.lookup(mstr)[0] if not mt: if not data: mtlist = self.lookup(self.defaultMimetype) elif filename: mtlist = self.lookup('application/octet-stream') else: failed = 'text/x-unknown-content-type' filename = filename or '' data = data or '' ct, enc = guess_content_type(filename, data, None) if ct == failed: ct = 'text/plain' mtlist = self.lookup(ct) if len(mtlist)>0: mt = mtlist[0] else: return None # Remove acquisition wrappers return aq_base(mt) def __call__(self, data, **kwargs): """ Return a triple (data, filename, mimetypeobject) given some raw data and optional paramters method from the isourceAdapter interface """ mimetype = kwargs.get('mimetype', None) filename = kwargs.get('filename', None) encoding = kwargs.get('encoding', None) mt = None if hasattr(data, 'filename'): filename = os.path.basename(data.filename) elif hasattr(data, 'name'): filename = os.path.basename(data.name) if hasattr(data, 'read'): _data = data.read() if hasattr(data, 'seek'): data.seek(0) data = _data # We need to figure out if data is binary and skip encoding if # it is mt = self.classify(data, mimetype=mimetype, filename=filename) if not mt.binary and not type(data) is UnicodeType: # if no encoding specified, try to guess it from data if encoding is None: encoding = self.guess_encoding(data) # ugly workaround for # https://sourceforge.net/tracker/?func=detail&aid=1068001&group_id=75272&atid=543430 # covered by # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=843590&group_id=5470 # dont remove this code unless python is fixed. if encoding is "macintosh": encoding = 'mac_roman' try: try: data = unicode(data, encoding, self.unicodePolicy) except (ValueError, LookupError): # wrong unicodePolicy data = unicode(data, encoding) except: data = unicode(data, self.fallbackEncoding) return (data, filename, aq_base(mt)) security.declarePublic('guess_encoding') def guess_encoding(self, data): """ Try to guess encoding from a text value if no encoding guessed, used the default charset from site properties (Zope) with a fallback to UTF-8 (should never happen with correct site_properties, but always raise Attribute error without Zope) """ if type(data) is type(u''): # data maybe unicode but with another encoding specified data = data.encode('UTF-8') encoding = guess_encoding(data) if encoding is None: try: site_props = self.portal_properties.site_properties encoding = site_props.getProperty('default_charset', 'UTF-8') except: encoding = 'UTF-8' return encoding security.declareProtected(ManagePortal, 'manage_delObjects') def manage_delObjects(self, ids, REQUEST=None): """ delete the selected mime types """ for id in ids: self.unregister(self.lookup(id)[0]) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') security.declareProtected(ManagePortal, 'manage_addMimeType') def manage_addMimeType(self, id, mimetypes, extensions, icon_path, binary=0, globs=None, REQUEST=None): """add a mime type to the tool""" mt = MimeTypeItem(id, mimetypes, extensions=extensions, binary=binary, icon_path=icon_path, globs=globs) self.register(mt) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') security.declareProtected(ManagePortal, 'manage_editMimeType') def manage_editMimeType(self, name, new_name, mimetypes, extensions, icon_path, binary=0, globs=None, REQUEST=None): """Edit a mime type by name """ mt = self.lookup(name)[0] self.unregister(mt) mt.edit(new_name, mimetypes, extensions, icon_path=icon_path, binary=binary, globs=globs) self.register(mt) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')