def test_prepare_cache_dir(self, tmpdir): # _prepare_cache_dir creates a cache dir normally cm = CacheManager(str(tmpdir.join("cache"))) new_dir = tmpdir.join("new_dir") cm.cache_dir = str(new_dir) cm._prepare_cache_dir() assert new_dir.isdir() is True
def test_get_bucket_path(self, tmpdir): # we can get a bucket path from a hash value cm = CacheManager(str(tmpdir.join("cache"))) tmpdir.join("src.txt").write("source1\n") hash_val = cm.get_hash(str(tmpdir / "src.txt")) assert cm._get_bucket_path(hash_val) == ( tmpdir / "cache" / "73" / "737b337e605199de28b3b64c674f9422")
def test_get_cached_file_by_src_failed(self, cache_env): # uncached files result in `None` as result cm = CacheManager(str(cache_env)) result, key = cm.get_cached_file_by_source( str(cache_env / "src1.txt")) assert result is None assert key is None
def test_prepare_cache_dir_broken(self, tmpdir): # we fail loudly if we cannot create a cache dir cm = CacheManager(str(tmpdir)) tmpdir.join("not-a-dir.txt").write("foo") # broken dir cm.cache_dir = str(tmpdir / "not-a-dir.txt") with pytest.raises(IOError): cm._prepare_cache_dir()
def test_get_all_sources(self): cm = CacheManager(self.workdir) result1 = cm.getAllSources() self.assertTrue(isinstance(result1, types.GeneratorType)) self.assertEqual(list(result1), []) cm.registerDoc( self.src_path1, self.result_path1, suffix=None) cm.registerDoc( self.src_path2, self.result_path2, suffix='foo') result2 = list(cm.getAllSources()) self.assertTrue(len(result2) == 2) open(os.path.join(self.workdir, 'crapfile'), 'wb').write('crap') result3 = list(cm.getAllSources()) self.assertFalse('crap' in result3) os.mkdir(os.path.join(self.workdir, 'crapdir')) result4 = list(cm.getAllSources()) self.assertFalse('crapdir' in result4) os.makedirs(os.path.join(self.workdir, '66', 'invalid_hashdir')) result5 = list(cm.getAllSources()) self.assertFalse('66' in result5) return
def test_get_cached_file(self, cache_env): # we can get a file cached before. cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == (cache_env / "result1.txt").read()
def test_get_cached_file_by_src(self): # we can get a cached file by source file and options cm = CacheManager(self.workdir) # without a cache key my_id1 = cm.register_doc(self.src_path1, self.result_path1) result, key = cm.get_cached_file_by_source(self.src_path1) assert filecmp.cmp(result, self.result_path1, shallow=False) assert key == '737b337e605199de28b3b64c674f9422_1_1'
def test_get_bucket_path_from_path(self): cm = CacheManager(self.workdir) path = cm._getBucketPathFromPath(self.src_path1) expected_path_end = os.path.join( '73', '737b337e605199de28b3b64c674f9422') self.assertEqual(os.listdir(self.workdir), []) self.assertTrue(path.endswith(expected_path_end)) return
def test_get_bucket_from_path(self): cache_dir_len1 = len(os.listdir(self.workdir)) cm = CacheManager(self.workdir) bucket1 = cm.getBucketFromPath(self.src_path1) cache_dir_len2 = len(os.listdir(self.workdir)) self.assertTrue(isinstance(bucket1, Bucket)) self.assertTrue(cache_dir_len2 == cache_dir_len1+1) return
def test_get_hash(self): cm = CacheManager(self.workdir) hash1 = cm.getHash(self.src_path1) hash2 = cm.getHash(self.src_path2) self.assertEqual(hash1, '737b337e605199de28b3b64c674f9422') self.assertEqual(hash2, 'd5aa51d7fb180729089d2de904f7dffe') self.assertRaises(TypeError, cm.getHash) return
def register_fakedoc_in_cache(self, src, options): # register a fake doc in cache. Result cache_key is based on # path to src document and options given. cm = CacheManager(self.cachedir) fake_result_path = os.path.join(self.workdir, 'result.html') open(fake_result_path, 'w').write('A fake result.') marker = get_marker(options) cache_key = cm.register_doc(src, fake_result_path, repr_key=marker) return cache_key
def test_get_cached_file_w_key_from_file(self, cache_env): # we can get a cached file, stored under a key, which is a file cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt"), repr_key=StringIO('foo')) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == (cache_env / "result1.txt").read()
def test_get_cached_file(self, cache_env): # we can get a file cached before. cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc( str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == ( cache_env / "result1.txt").read()
def test_get_cached_file_by_src(self, cache_env): # we can get a cached file by source file and options cm = CacheManager(str(cache_env / "cache")) # without a cache key my_id = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt")) assert open(path, "r").read() == (cache_env / "result1.txt").read() assert key == '737b337e605199de28b3b64c674f9422_1_1' assert my_id == key
def test_markerhandling(self, tmpdir): # we can dissolve markers from cache_keys. cm = CacheManager(str(tmpdir)) marker_string = cm._compose_cache_key('somefakedhash', 3) assert marker_string == "somefakedhash_3" hash_val, bucket_marker = cm._dissolve_cache_key("somefakedhash_3") assert hash_val == "somefakedhash" assert bucket_marker == "3" assert cm._dissolve_cache_key("asd") == (None, None) assert cm._dissolve_cache_key(None) == (None, None)
def test_get_cached_file_w_key_from_file(self, cache_env): # we can get a cached file, stored under a key, which is a file cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc( str(cache_env / "src1.txt"), str(cache_env / "result1.txt"), repr_key=StringIO('foo')) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == ( cache_env / "result1.txt").read()
def test_get_hash(self, cache_env, samples_dir): # we can compute a hash for a source file. cm = CacheManager(str(cache_env)) hash1 = cm.get_hash(str(cache_env / "src1.txt")) hash2 = cm.get_hash(str(cache_env / "src2.txt")) hash3 = cm.get_hash(str(samples_dir / "testdoc1.doc")) assert hash1 == '737b337e605199de28b3b64c674f9422' assert hash2 == 'd5aa51d7fb180729089d2de904f7dffe' assert hash3 == '443a07e0e92b7dc6b21f8be6a388f05f' with pytest.raises(TypeError): cm.get_hash()
def test_markerhandling(self): cm = CacheManager(self.workdir) marker_string = cm._composeMarker( 'somefakedhash', 3) self.assertEqual(marker_string, 'somefakedhash_3') hash, bucket_marker = cm._dissolveMarker('somefakedhash_3') self.assertEqual(hash, 'somefakedhash') self.assertEqual(bucket_marker, '3') self.assertEqual(cm._dissolveMarker('asd'), (None, None)) self.assertEqual(cm._dissolveMarker(object()), (None, None)) return
def test_get_cached(self): # we can get cached docs cm = CacheManager(self.cachedir) fake_result_path = os.path.join(self.src_dir, 'result.txt') open(fake_result_path, 'wb').write('The Result\n') key = cm.register_doc(self.src_path, fake_result_path, 'somekey') assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1' result_path = self.proxy.get_cached(key) assert result_path is not None assert result_path != fake_result_path assert filecmp.cmp(result_path, fake_result_path, shallow=False)
def test_get_hash(self): cm = CacheManager(self.workdir) hash1 = cm.get_hash(self.src_path1) hash2 = cm.get_hash(self.src_path2) src = os.path.join( # a binary stream not convertible to utf-8 os.path.dirname(__file__), 'input', 'testdoc1.doc') hash3 = cm.get_hash(src) self.assertEqual(hash1, '737b337e605199de28b3b64c674f9422') self.assertEqual(hash2, 'd5aa51d7fb180729089d2de904f7dffe') self.assertEqual(hash3, '443a07e0e92b7dc6b21f8be6a388f05f') self.assertRaises(TypeError, cm.get_hash) return
def test_get_cached(self): # we can get cached docs cm = CacheManager(self.cachedir) fake_result_path = os.path.join(self.src_dir, 'result.txt') with open(fake_result_path, 'w') as fd: fd.write('The Result\n') key = cm.register_doc(self.src_path, fake_result_path, 'somekey') assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1' result_path = self.proxy.get_cached(key) assert result_path is not None assert result_path != fake_result_path assert filecmp.cmp(result_path, fake_result_path, shallow=False)
def test_get_bucket_path_from_hash(self): cm = CacheManager(self.workdir) hash_val = cm.getHash(self.src_path1) path = cm._getBucketPathFromHash(hash_val) expected_path_end = os.path.join( '73', '737b337e605199de28b3b64c674f9422') self.assertEqual(os.listdir(self.workdir), []) self.assertTrue(path.endswith(expected_path_end)) path = cm._getBucketPathFromHash('nonsense') self.assertEqual(path, None) return
def test_get_cached_file_by_src(self, cache_env): # we can get a cached file by source file and options cm = CacheManager(str(cache_env / "cache")) # without a cache key my_id = cm.register_doc( str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path, key = cm.get_cached_file_by_source( str(cache_env / "src1.txt")) assert open(path, "r").read() == ( cache_env / "result1.txt").read() assert key == '737b337e605199de28b3b64c674f9422_1_1' assert my_id == key
def test_register_doc(self): cm = CacheManager(self.workdir) marker1 = cm.registerDoc( self.src_path1, self.result_path1, suffix=None) marker2 = cm.registerDoc( self.src_path1, self.result_path1, suffix=None) marker3 = cm.registerDoc( self.src_path1, self.result_path2, suffix='foo') marker4 = cm.registerDoc( self.src_path2, self.result_path2, suffix='foo') self.assertEqual(marker1, '737b337e605199de28b3b64c674f9422_1') self.assertEqual(marker2, '737b337e605199de28b3b64c674f9422_1') self.assertEqual(marker3, '737b337e605199de28b3b64c674f9422_1') self.assertEqual(marker4, 'd5aa51d7fb180729089d2de904f7dffe_1') return
def test_convert_to_pdf_cached_wo_cache_key(self): # We can get a cached doc also without a cache key (but # it is extensive) self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir) pdf1, cache_key1 = self.doc.convertToPDF() # store doc in cache # modfiy result to distuingish it from freshly converted doc from ulif.openoffice.cachemanager import CacheManager cm = CacheManager(self.workdir) cached_path = cm.get_cached_file(cache_key1) open(cached_path, 'wb').write('My Fake Result') # now re-get the document. We should get the cached copy self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir) pdf2, cache_key2 = self.doc.convertToPDF() self.assertEqual(pdf2, 'My Fake Result') self.assertEqual(cache_key2, cache_key1)
def test_init_fails_loudly(self, tmpdir): # If we get a file as cache dir (instead of a directory), we # fail loudly... a_file = tmpdir.join("some_file.txt") a_file.write("this-is-not-a-dir") with pytest.raises(IOError): CacheManager(str(a_file))
def test_get_cached_file(self): cm = CacheManager(self.workdir) path = cm.getCachedFile(self.src_path1) self.assertTrue(path is None) self.assertEqual(os.listdir(self.workdir), []) cm.registerDoc(self.src_path1, self.result_path1, suffix=None) path1 = cm.getCachedFile(self.src_path1) path2 = cm.getCachedFile(self.src_path1, suffix='bar') path3 = cm.getCachedFile(self.src_path1, suffix='foo') self.assertTrue(path1 is not None) self.assertTrue(path2 is None) self.assertTrue(path3 is None) cm.registerDoc(self.src_path2, self.result_path1, suffix='foo') path1 = cm.getCachedFile(self.src_path2) path2 = cm.getCachedFile(self.src_path2, suffix='bar') path3 = cm.getCachedFile(self.src_path2, suffix='foo') self.assertTrue(path1 is None) self.assertTrue(path2 is None) self.assertTrue(path3 is not None) return
def convert_doc(src_doc, options, cache_dir): """Convert `src_doc` according to the other parameters. `src_doc` is the path to the source document. `options` is a dict of options for processing, passed to the processors. `cache_dir` may be ``None`` in which no caching is requested during processing. Generates a converted representation of `src_doc` by calling :class:`ulif.openoffice.processor.MetaProcessor` with `options` as parameters. Afterwards the conversion result is stored in cache (if allowed/possible) for speedup of upcoming requests. Returns a triple: ``(<PATH>, <CACHE_KEY>, <METADATA>)`` where ``<PATH>`` is the path to the resulting document, ``<CACHE_KEY>`` an identifier (string) to retrieve a generated doc from cache on future requests, and ``<METADATA>`` is a dict of values returned during request (and set by the document processors, notably setting the `error` keyword). If errors happen or caching is disabled, ``<CACHE_KEY>`` is ``None``. """ result_path = None cache_key = None repr_key = get_marker(options) # Create unique marker out of options metadata = dict(error=False) # Generate result input_copy_dir = tempfile.mkdtemp() input_copy = os.path.join(input_copy_dir, os.path.basename(src_doc)) shutil.copy2(src_doc, input_copy) try: proc = MetaProcessor(options=options) # Removes original doc result_path, metadata = proc.process(input_copy) except Exception as exc: shutil.rmtree(input_copy_dir) raise exc error_state = metadata.get('error', False) if cache_dir and not error_state and result_path is not None: # Cache away generated doc cache_key = CacheManager(cache_dir).register_doc( src_doc, result_path, repr_key) return result_path, cache_key, metadata
def test_get_cached_file_by_src_w_key(self): cm = CacheManager(self.workdir) my_id = cm.register_doc(self.src_path1, self.result_path1, 'mykey') result1, key1 = cm.get_cached_file_by_source(self.src_path1, 'mykey') assert filecmp.cmp(result1, self.result_path1, shallow=False) assert key1 == '737b337e605199de28b3b64c674f9422_1_1' result2, key2 = cm.get_cached_file_by_source( self.src_path1, 'otherkey') assert result2 is None assert key2 is None cm.register_doc(self.src_path1, self.result_path2, 'otherkey') result3, key3 = cm.get_cached_file_by_source( self.src_path1, 'otherkey') assert filecmp.cmp(result3, self.result_path2, shallow=False) assert key3 == '737b337e605199de28b3b64c674f9422_1_2'
def test_get_cached_file_from_marker(self): cm = CacheManager(self.workdir) path1 = cm.getCachedFileFromMarker('not-a-marker') path2 = cm.getCachedFileFromMarker('737b337e605199de28b3b64c674f9422_1') path3 = cm.getCachedFileFromMarker('not-a-marker-with_underscore') marker1 = cm.registerDoc( self.src_path1, self.result_path1, suffix=None) path4 = cm.getCachedFileFromMarker(marker1) marker2 = cm.registerDoc( self.src_path1, self.result_path2, suffix='foo') path5 = cm.getCachedFileFromMarker(marker2, suffix='foo') self.assertTrue(path1 is None) self.assertTrue(path2 is None) self.assertTrue(path3 is None) self.assertTrue(path4 is not None) self.assertTrue(path5 is not None) return
def test_prepare_cache_dir(self): new_cache_dir = os.path.join(self.workdir, 'newcache') broken_cache_dir = os.path.join(self.workdir, 'broken') open(broken_cache_dir, 'wb').write('broken') cm = CacheManager(self.workdir) cm.cache_dir = None self.assertEqual(cm.prepareCacheDir(), None) cm.cache_dir = new_cache_dir cm.prepareCacheDir() self.assertTrue(os.path.isdir(new_cache_dir)) cm.cache_dir = broken_cache_dir self.assertRaises(IOError, cm.prepareCacheDir) return
def test_get_cached_file(self): cm = CacheManager(self.workdir) path = cm.get_cached_file(self.src_path1) self.assertTrue(path is None) self.assertEqual(os.listdir(self.workdir), []) my_id1 = cm.register_doc(self.src_path1, self.result_path1) path1 = cm.get_cached_file(my_id1) self.assertTrue(path1 is not None) my_id2 = cm.register_doc( self.src_path2, self.result_path1, repr_key='foo') path1 = cm.get_cached_file(my_id2) self.assertTrue(path1 is not None) my_id3 = cm.register_doc( self.src_path2, self.result_path1, repr_key=StringIO('foo')) path1 = cm.get_cached_file(my_id3) self.assertTrue(path1 is not None) self.assertEqual(my_id2, my_id3) self.assertEqual(cm.get_cached_file('nonsense_really'), None) return
def test_register_doc(self, cache_env): # we can register docs cm = CacheManager(str(cache_env / "cache")) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") marker1 = cm.register_doc(src1, result1) assert marker1 == '737b337e605199de28b3b64c674f9422_1_1' marker2 = cm.register_doc(src1, result1) assert marker2 == '737b337e605199de28b3b64c674f9422_1_1' marker3 = cm.register_doc(src1, result2, repr_key="foo") assert marker3 == '737b337e605199de28b3b64c674f9422_1_2' marker4 = cm.register_doc(src2, result2, repr_key="foo") assert marker4 == 'd5aa51d7fb180729089d2de904f7dffe_1_1' marker5 = cm.register_doc(src2, result2, repr_key=StringIO("bar")) assert marker5 == 'd5aa51d7fb180729089d2de904f7dffe_1_2'
def test_keys(self, cache_env): # we can get all cache keys cm = CacheManager(str(cache_env / "cache")) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") key1 = cm.register_doc(src1, result1, 'foo') assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1'] assert key1 == '737b337e605199de28b3b64c674f9422_1_1' key2 = cm.register_doc(src1, result2, 'bar') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', ] assert key2 == '737b337e605199de28b3b64c674f9422_1_2' key3 = cm.register_doc(src2, result1, 'baz') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', 'd5aa51d7fb180729089d2de904f7dffe_1_1', ] assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
def test_keys_custom_level(self, cache_env): # we can get all cache keys, even if a custom cache level is set # (and keys are stored in different location). cm = CacheManager(str(cache_env / "cache"), level=3) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") key1 = cm.register_doc(src1, result1, 'foo') assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1'] assert key1 == '737b337e605199de28b3b64c674f9422_1_1' key2 = cm.register_doc(src1, result2, 'bar') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', ] assert key2 == '737b337e605199de28b3b64c674f9422_1_2' key3 = cm.register_doc(src2, result1, 'baz') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', 'd5aa51d7fb180729089d2de904f7dffe_1_1', ] assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
def test_get_cached_file_by_src_w_key(self, cache_env): cm = CacheManager(str(cache_env / "cache")) src = cache_env / "src1.txt" result1 = cache_env / "result1.txt" result2 = cache_env / "result2.txt" my_id1 = cm.register_doc(str(src), str(result1), 'mykey') path1, key1 = cm.get_cached_file_by_source(str(src), 'mykey') assert filecmp.cmp(path1, str(result1), shallow=False) assert key1 == '737b337e605199de28b3b64c674f9422_1_1' assert key1 == my_id1 # yet not existent cache file path2, key2 = cm.get_cached_file_by_source(str(src), 'otherkey') assert path2 is None assert key2 is None # store and retrieve 2nd cache file my_id3 = cm.register_doc(str(src), str(result2), 'otherkey') path3, key3 = cm.get_cached_file_by_source(str(src), 'otherkey') assert filecmp.cmp(path3, str(result2), shallow=False) assert key3 == '737b337e605199de28b3b64c674f9422_1_2' assert key3 == my_id3 return
def test_register_doc(self): cm = CacheManager(self.workdir) marker1 = cm.register_doc( self.src_path1, self.result_path1) marker2 = cm.register_doc( self.src_path1, self.result_path1) marker3 = cm.register_doc( self.src_path1, self.result_path2, repr_key=b'foo') marker4 = cm.register_doc( self.src_path2, self.result_path2, repr_key=b'foo') marker5 = cm.register_doc( self.src_path2, self.result_path2, repr_key=StringIO(b'bar')) self.assertEqual(marker1, b'737b337e605199de28b3b64c674f9422_1_1') self.assertEqual(marker2, b'737b337e605199de28b3b64c674f9422_1_1') self.assertEqual(marker3, b'737b337e605199de28b3b64c674f9422_1_2') self.assertEqual(marker4, b'd5aa51d7fb180729089d2de904f7dffe_1_1') self.assertEqual(marker5, b'd5aa51d7fb180729089d2de904f7dffe_1_2') return
def test_keys_custom_level(self): # we can get all cache keys also with custom level set cm = CacheManager(self.workdir, level=3) key1 = cm.register_doc(self.src_path1, self.result_path1, 'foo') self.assertEqual( list(cm.keys()), ['737b337e605199de28b3b64c674f9422_1_1'] ) key2 = cm.register_doc(self.src_path1, self.result_path2, 'bar') self.assertEqual( sorted(list(cm.keys())), ['737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', ] ) key3 = cm.register_doc(self.src_path2, self.result_path1, 'baz') self.assertEqual( sorted(list(cm.keys())), ['737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', 'd5aa51d7fb180729089d2de904f7dffe_1_1', ] ) return
def test_get_cached_file_empty(self, cache_env): # while cache is empty we get `None` when asking for cached files. cm = CacheManager(str(cache_env / "cache")) path = cm.get_cached_file(str(cache_env / "src1.txt")) assert path is None
def test_get_cached_file_invalid_cache_key(self, tmpdir): # invalid/unused cache keys return `None` as cached file. cm = CacheManager(str(tmpdir)) assert cm.get_cached_file("not-existing") is None
def test_get_cached_file_by_src_failed(self, cache_env): # uncached files result in `None` as result cm = CacheManager(str(cache_env)) result, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt")) assert result is None assert key is None
class Client(object): """A client to trigger document conversions. """ def __init__(self, cache_dir=None): self.cache_dir = cache_dir self.cache_manager = None if self.cache_dir is not None: self.cache_manager = CacheManager(self.cache_dir) def convert(self, src_doc_path, options={}): """Convert `src_doc_path` according to `options`. Calls :func:`convert_doc` internally and returns the result given by this function. """ return convert_doc(src_doc_path, options, self.cache_dir) def get_cached(self, cache_key): """Get the document from cache stored under `cache_key`. Returns ``None`` if no such file can be found or no cache dir was set at all. .. warning:: The returned path (if any) is part of cache! Do not remove or change the file. Copy it to another location instead. .. versionadded:: 1.1 """ if self.cache_manager is not None: return self.cache_manager.get_cached_file(cache_key) return None def get_cached_by_source(self, src_doc_path, options={}): """Get the document from cache by source doc and options. Find a cached document, which was created from the given `src_doc_path` and `options`. Returns the path to the document and a cache key you are encouraged to use for future access. Please note that this method is much more expensive than :meth:`get_cached`. Use it only if the `cache_key` returned upon registering a doc is absolutely not available any more. Returns ``(None, None)`` if no such file can be found or no cache dir was set at all. .. warning:: The returned path (if any) is part of cache! Do not remove or change the file. Copy it to another location instead. .. versionadded:: 1.1 """ repr_key = get_marker(options) if self.cache_manager is not None: return self.cache_manager.get_cached_file_by_source( src_doc_path, repr_key) return None, None
class RESTfulDocConverter(object): """A WSGI app that caches and converts office documents via LibreOffice. It acts as a RESTful document store that supports HTTP actions to add/modify/retrieve converted documents. Accepted arguments: - `cache_dir`: Path to a directory, where cached files can be stored. The directory is created if it does not exist. """ # cf: https://routes.readthedocs.io/en/latest/restful.html # http://www.ianbicking.org/blog/2010/03/12/a-webob-app-example/ map = Mapper() map.resource('doc', 'docs') #: A cache manager instance. cache_manager = None template_dir = os.path.join(os.path.dirname(__file__), 'templates') def __init__(self, cache_dir=None): self.cache_dir = cache_dir self.cache_manager = None if self.cache_dir is not None: self.cache_manager = CacheManager(self.cache_dir) def _url(self, req, *args, **kw): """Generate an URL pointing to some REST service. `req` is the current request. Arguments and keywords are passed on to the generated :class:`routes.util.URLGenerator` instance. So you can use it like the `url` method described in the `routes` docs, except that you have to pass in the `req` parameter first. """ url = URLGenerator(self.map, req.environ) return url(*args, **kw) @wsgify def __call__(self, req): results = self.map.routematch(environ=req.environ) if not results: return exc.HTTPNotFound() match, route = results return getattr(self, match['action'])(req) def index(self, req): # get index of all docs return Response(str(mydocs.keys())) def create(self, req): # post a new doc options = dict([(name, val) for name, val in req.params.items() if name not in ('CREATE', 'doc', 'docid')]) if 'out_fmt' in req.params.keys(): options['oocp-out-fmt'] = options['out_fmt'] del options['out_fmt'] if 'CREATE' in req.params.keys(): if options.get('oocp-out-fmt', 'html') == 'pdf': options['meta-procord'] = 'unzip,oocp,zip' doc = req.POST['doc'] # write doc to filesystem tmp_dir = tempfile.mkdtemp() src_path = os.path.join(tmp_dir, doc.filename) with open(src_path, 'wb') as f: for chunk in iter(lambda: doc.file.read(8 * 1024), b''): f.write(chunk) # do the conversion result_path, id_tag, metadata = convert_doc(src_path, options, self.cache_dir) # deliver the created file resp = make_response(result_path) if id_tag is not None: # we can only signal new resources if cache is enabled resp.status = '201 Created' resp.location = self._url(req, 'doc', id=id_tag, qualified=True) return resp def new(self, req): # get a form to create a new doc template = open(os.path.join(self.template_dir, 'form_new.tpl')).read() template = template.format(target_url=self._url(req, 'docs')) return Response(template) def update(self, req): # put/update an existing doc pass # pragma: no cover def delete(self, req): # delete a doc pass # pragma: no cover def edit(self, req): # edit a doc pass # pragma: no cover def show(self, req): # show a doc doc_id = req.path.split('/')[-1] result_path = self.cache_manager.get_cached_file(doc_id) if result_path is None: return exc.HTTPNotFound() return make_response(result_path)
def __init__(self, cache_dir=None): self.cache_dir = cache_dir self.cache_manager = None if self.cache_dir is not None: self.cache_manager = CacheManager(self.cache_dir)
def test_compose_marker(self, tmpdir): # we can compose cache keys cm = CacheManager(str(tmpdir)) marker2 = cm._compose_cache_key('some_hash_digest', 'bucket_marker') assert marker2 == 'some_hash_digest_bucket_marker'
def test_init_creates_dir(self, tmpdir): # a cache dir is created if neccessary cache_dir = tmpdir / "cache" assert cache_dir.exists() is False CacheManager(str(cache_dir)) assert cache_dir.isdir() is True
def test_prepare_cache_dir_none(self, tmpdir): # we can create a cache manager without any cache dir cm = CacheManager(str(tmpdir)) cm.cache_dir = None cm._prepare_cache_dir() assert cm.cache_dir is None
def test_init(self, tmpdir): # we can initialize a cache manager with default depth cm = CacheManager(str(tmpdir.join("cache"))) assert cm.level == 1 assert cm.cache_dir == tmpdir.join("cache")
def test_init_level(self, tmpdir): # we can set a level (depth) when creating cache managers cm = CacheManager(str(tmpdir.join("cache")), level=3) assert cm.level == 3