def test_get_cached(self, client_env): # we can get an already cached doc client = Client(cache_dir=client_env.cache_dir) result_path, cache_key, metadata = client.convert(client_env.src_doc) assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1' cached_path = client.get_cached(cache_key) assert filecmp.cmp(result_path, cached_path, shallow=False) assert client_env.cache_dir in cached_path
def get_cached(self, cache_key): """Get a cached document. Retrieve the document representation stored under `cache_key` in cache if it exists. Returns `None` otherwise. """ client = Client(cache_dir=self.cache_dir) return client.get_cached(cache_key)
def test_get_cached(self): # we can get an already cached doc client = Client(cache_dir=self.cachedir) result_path, cache_key, metadata = client.convert(self.src_doc) self.resultdir = os.path.dirname(result_path) # for cleanup assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1' cached_path = client.get_cached(cache_key) assert filecmp.cmp(result_path, cached_path, shallow=False) assert self.cachedir in cached_path
def test_get_cached_no_manager(self, client_env): # no cache_dir -> no cache_manager, but we cope with it client = Client(cache_dir=None) assert client.get_cached( '396199333edbf40ad43e62a1c1397793_1_1') is None
def test_get_cached_no_file(self, client_env): # when asking for cached files we cope with nonexistent docs client = Client(cache_dir=client_env.cache_dir) assert client.get_cached( '396199333edbf40ad43e62a1c1397793_1_1') is None
def test_get_cached_no_file(self): # when asking for cached files we cope with nonexistent docs client = Client(cache_dir=self.cachedir) assert client.get_cached( '164dfcf01584bd0e3595b62fb53cf12c_1_1') is None
def test_get_cached_no_cache_dir(self): # when asking for cached files we cope with no cache dirs client = Client() assert client.get_cached( '164dfcf01584bd0e3595b62fb53cf12c_1_1') is None
class Document(commandtransform): """A document that can be converted via ulif.openoffice client. `name` - basename of file `data` - (binary) data of file, the file contents """ def __init__(self, name, data, cache_dir=None): commandtransform.__init__(self, name) name = self.name() self.tmpdir, self.fullname = self.initialize_tmpdir( data, filename=name) self.cache_dir = cache_dir self.client = Client(cache_dir=cache_dir) def __del__(self): """Remove the temporary directory and loop on all base destructors. This method is protected against diamond inheritance. """ if isdir(self.tmpdir): self.cleanDir(self.tmpdir) basekeys = [] for base in self.__class__.__bases__: basekey = str(base) if basekey in basekeys: continue basekeys.append(basekey) if hasattr(base, '__del__'): base.__del__(self) @classmethod def subObjects(cls, path): """Overwritten from base. Return `path` and a list of basenames of allowed files found in `path`. Allowed files are such with filename extension '.png', '.jpg', '.gif', '.css'. The ``.css`` filename extension is not allowed in the original method. """ filenames = [] for filename in os.listdir(path): result = re.match("^.+\.(?P<ext>.+)$", filename) if result is not None: ext = result.group('ext') if ext in ('png', 'jpg', 'gif', 'css'): filenames.append(filename) path = os.path.join(path, '') return path, filenames def convert(self, cache_key=None): """Convert the document to HTML. Returns the main document content as string and a cache_key for quick later retrieval. Additional documents (images, etc.) which are result of the conversion are placed in the `tmpdir` of this `Document`. If `cache_key` is given (and a `cache_dir` set before) we will lookup the cache before performing any real conversion. Raises `IOError` if conversion fails. """ name = self.name() src_path = os.path.join(self.tmpdir, name) resultpath = self.client.get_cached(cache_key) if resultpath is not None: # Lookup cached doc by cache key (fast) newdir = copy_to_secure_location(resultpath) resultpath = os.path.join(newdir, os.path.basename(resultpath)) if resultpath is None: # Lookup cached doc by source (expensive) resultpath, cache_key = self.client.get_cached_by_source( src_path, OPTIONS_HTML) if resultpath is not None: newdir = copy_to_secure_location(resultpath) resultpath = os.path.join(newdir, os.path.basename(resultpath)) if resultpath is None: # Convert to HTML, new doc will be in resultpath resultpath, cache_key, metadata = self.client.convert( src_path, OPTIONS_HTML) if metadata['error']: descr = metadata.get('error-descr', 'Descr. not avail.') raise IOError('Could not convert: %s [%s]' % (name, descr)) newdir = os.path.dirname(resultpath) html = open(resultpath, 'r').read() self.cleanDir(self.tmpdir) self.tmpdir = newdir return html, cache_key def convertToPDF(self, cache_key=None): """Convert the document to PDF. Returns the generated document contents as string and a cache key. The cache_key might be None if no cache_dir was set before. If `cache_key` is given (and a `cache_dir` set before) we will lookup the cache before performing any real conversion. Raises `IOError` if conversion fails. """ pdffilepath = self.client.get_cached(cache_key) if pdffilepath is not None: return open(pdffilepath, 'r').read(), cache_key name = self.name() src_path = os.path.join(self.tmpdir, name) pdffilepath, cache_key = self.client.get_cached_by_source( src_path, OPTIONS_PDF) if pdffilepath is not None: return open(pdffilepath, 'r').read(), cache_key pdffilepath, cache_key, metadata = self.client.convert( src_path, OPTIONS_PDF) if metadata['error']: descr = metadata.get('error-descr', 'Descr. not avail.') raise IOError('Could not convert: %s [%s]' % (name, descr)) pdf = open(pdffilepath, 'r').read() # Remove temporary dir... self.tmpdir = os.path.dirname(pdffilepath) self.cleanDir(self.tmpdir) return pdf, cache_key