def do_convert(self, filename=None): if filename is None and exists(self.output + '.nofilename'): output = self.output + '.nofilename' else: output = self.output input = open(self.input) orig = input.read() input.close() data = datastream(self.transform.name()) res_data = self.transform.convert(orig, data, filename=filename) self.assert_(idatastream.isImplementedBy(res_data)) got = res_data.getData() try: output = open(output) except IOError: import sys print >>sys.stderr, 'No output file found.' print >>sys.stderr, 'File %s created, check it !' % self.output output = open(output, 'w') output.write(got) output.close() self.assert_(0) expected = output.read() print self.normalize if self.normalize is not None: expected = self.normalize(expected) got = self.normalize(got) output.close() self.assertEquals(got, expected, '[%s]\n\n!=\n\n[%s]\n\nIN %s(%s)' % ( got, expected, self.transform.name(), self.input)) self.assertEquals(self.subobjects, len(res_data.getSubObjects()), '%s\n\n!=\n\n%s\n\nIN %s(%s)' % ( self.subobjects, len(res_data.getSubObjects()), self.transform.name(), self.input))
def transform(self, instance, mt, **kwargs): """Takes a mimetype so object.foo.transform('text/plain') should return a plain text version of the raw content return None if no data or if data is untranformable to desired output mime type """ encoding = self.original_encoding orig = self.getRaw(encoding, instance) if not orig: return None #on ZODB Transaction commit there is by specification #no acquisition context. If it is not present, take #the untransformed getRaw, this is necessary for #being used with APE # Also don't break if transform was applied with a stale instance # from the catalog while rebuilding the catalog if not hasattr(instance, 'aq_parent'): return orig transformer = getToolByName(instance, 'portal_transforms') data = transformer.convertTo(mt, orig, object=self, usedby=self.id, context=instance, mimetype=self.mimetype, filename=self.filename) if data: assert idatastream.isImplementedBy(data) _data = data.getData() instance.addSubObjects(data.getSubObjects()) portal_encoding = kwargs.get('encoding',None) or \ self.portalEncoding(instance) encoding = data.getMetadata().get("encoding") or encoding \ or portal_encoding if portal_encoding != encoding: _data = unicode(_data, encoding).encode(portal_encoding) return _data # we have not been able to transform data # return the raw data if it's not binary data # FIXME: is this really the behaviour we want ? if not self.isBinary(): portal_encoding = kwargs.get('encoding',None) or \ self.portalEncoding(instance) if portal_encoding != encoding: orig = self.getRaw(portal_encoding) return orig return None
def convertTo(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs): """Convert orig to a given mimetype * orig is an encoded string * data an optional idatastream object. If None a new datastream will be created and returned * optional object argument is the object on which is bound the data. If present that object will be used by the engine to bound cached data. * additional arguments (kwargs) will be passed to the transformations. Some usual arguments are : filename, mimetype, encoding return an object implementing idatastream or None if no path has been found. """ target_mimetype = str(target_mimetype) if object is not None: cache = Cache(object) data = cache.getCache(target_mimetype) if data is not None: time, data = data if self.max_sec_in_cache == 0 or time < self.max_sec_in_cache: return data if data is None: data = self._wrap(target_mimetype) registry = getToolByName(self, 'mimetypes_registry') if not getattr(aq_base(registry), 'classify', None): # avoid problems when importing a site with an old mimetype registry # XXX return None or orig? return None orig_mt = registry.classify(orig, mimetype=kwargs.get('mimetype'), filename=kwargs.get('filename')) orig_mt = str(orig_mt) if not orig_mt: log('Unable to guess input mime type (filename=%s, mimetype=%s)' %( kwargs.get('mimetype'), kwargs.get('filename')), severity=DEBUG) return None target_mt = registry.lookup(target_mimetype) if target_mt: target_mt = target_mt[0] else: log('Unable to match target mime type %s'% str(target_mimetype), severity=DEBUG) return None ## fastpath # If orig_mt and target_mt are the same, we only allow # a one-hop transform, a.k.a. filter. # XXX disabled filtering for now filter_only = False if orig_mt == str(target_mt): filter_only = True data.setData(orig) md = data.getMetadata() md['mimetype'] = str(orig_mt) if object is not None: cache.setCache(str(target_mimetype), data) return data ## get a path to output mime type requirements = self._policies.get(str(target_mt), []) path = self._findPath(orig_mt, target_mt, list(requirements)) if not path and requirements: log('Unable to satisfy requirements %s' % ', '.join(requirements), severity=DEBUG) path = self._findPath(orig_mt, target_mt) if not path: log('NO PATH FROM %s TO %s : %s' % (orig_mt, target_mimetype, path), severity=DEBUG) return None #XXX raise TransformError if len(path) > 1: ## create a chain on the fly (sly) transform = chain() for t in path: transform.registerTransform(t) else: transform = path[0] result = transform.convert(orig, data, context=context, usedby=usedby, **kwargs) assert(idatastream.isImplementedBy(result), 'result doesn\'t implemented idatastream') self._setMetaData(result, transform) # set cache if possible if object is not None and result.isCacheable(): cache.setCache(str(target_mimetype), result) # return idatastream object return result
def _unwrap(self, data): """unwrap data from an icache""" if idatastream.isImplementedBy(data): data = data.getData() return data