def test_md5(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.cache.filesystem import make_metadata rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) fn = self.make_test_file() md5 = md5_for_file(fn) cache = new_cache(rc.filesystem("fscache")) cache.put(fn, "foo1") abs_path = cache.path("foo1") self.assertEquals(md5, cache.md5("foo1")) cache = new_cache(rc.filesystem("compressioncache")) cache.put(fn, "foo2", metadata=make_metadata(fn)) abs_path = cache.path("foo2") self.assertEquals(md5, cache.md5("foo2")) os.remove(fn)
def test_caches(self): '''Basic test of put(), get() and has() for all cache types''' from functools import partial from ambry.run import get_runconfig, RunConfig from ambry.filesystem import Filesystem from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle #self.start_server() # For the rest-cache #fn = '/tmp/1mbfile' #with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'), RunConfig.USER_ACCOUNTS)) for i, fsname in enumerate(['fscache', 'limitedcache', 'compressioncache', 'cached-s3', 'cached-compressed-s3']): config = rc.filesystem(fsname) cache = new_cache(config) print '---', fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath,identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith('http'): self.assertTrue(os.path.exists(r), 'Not a url: {}: {}'.format(r,str(cache))) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem('s3cache-noupstream')) r = cache.put(fn, 'a')
def test_caches(self): '''Basic test of put(), get() and has() for all cache types''' from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle self.start_server() # For the rest-cache #fn = '/tmp/1mbfile' #with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig( (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG)) for i, fsname in enumerate([ 'fscache', 'limitedcache', 'compressioncache', 'cached-s3', 'cached-compressed-s3' ]): #'compressioncache', config = rc.filesystem(fsname) cache = new_cache(config) print '---', fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith('http'): self.assertTrue(os.path.exists(r), str(cache)) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem('s3cache-noupstream')) r = cache.put(fn, 'a')
def test_caches(self): """Basic test of put(), get() and has() for all cache types""" from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle self.start_server() # For the rest-cache # fn = '/tmp/1mbfile' # with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) for i, fsname in enumerate( ["fscache", "limitedcache", "compressioncache", "cached-s3", "cached-compressed-s3"] ): #'compressioncache', config = rc.filesystem(fsname) cache = new_cache(config) print "---", fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith("http"): self.assertTrue(os.path.exists(r), str(cache)) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem("s3cache-noupstream")) r = cache.put(fn, "a")
def test_s3(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.bundle import DbBundle rc = get_runconfig( (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) identity = b.identity fsname = 'cached-compressed-s3' config = rc.filesystem(fsname) cache = new_cache(config) r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5)) for p in b.partitions: r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5)) r = cache.get(b.identity.cache_key)
def test_s3(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.bundle import DbBundle rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) identity = b.identity fsname = "cached-compressed-s3" config = rc.filesystem(fsname) cache = new_cache(config) r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5)) for p in b.partitions: r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5)) r = cache.get(b.identity.cache_key)
def test_files(self): ''' Test some of the server's file functions :return: ''' from ambry.cache import new_cache from ambry.bundle import DbBundle fs = new_cache(self.server_rc.filesystem('rrc-fs')) fs.clean() remote = new_cache(self.server_rc.filesystem('rrc')) config = self.start_server() l = new_library(config) l.put_bundle(self.bundle) l.push() ident = self.bundle.identity ck = ident.cache_key # The remote is tied to the REST server, so it has the # bundle, but the new filesystem cache does not. self.assertFalse(fs.has(ck)) self.assertTrue(remote.has(ck)) # But if we tie them together, the FS cache should have it fs.upstream = remote self.assertTrue(fs.has(ck)) path = fs.get(ck) b = DbBundle(path) self.assertEquals(ck, b.identity.cache_key) # It should have been copied, so the fs should still have # it after disconnecting. fs.upstream = None self.assertTrue(fs.has(ck))
def test_basic(self): from ambry.cache import new_cache c = new_cache(self.rc.filesystem('google')) print c print c.list()
def test_s3(self): #ambry.util.get_logger('ambry.filesystem').setLevel(logging.DEBUG) # Set up the test directory and make some test files. from ambry.cache import new_cache root = self.rc.group('filesystem').root os.makedirs(root) testfile = os.path.join(root,'testfile') with open(testfile,'w+') as f: for i in range(1024): f.write('.'*1023) f.write('\n') #fs = self.bundle.filesystem #local = fs.get_cache('downloads') cache = new_cache(self.rc.filesystem('s3')) repo_dir = cache.cache_dir print "Repo Dir: {}".format(repo_dir) for i in range(0,10): logger.info("Putting "+str(i)) cache.put(testfile,'many'+str(i)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many1'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3'))) p = cache.get('many1') self.assertTrue(p is not None) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many1'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3'))) p = cache.get('many2') self.assertTrue(p is not None) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3'))) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many7'))) p = cache.get('many3') self.assertTrue(p is not None) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many3'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many7')))
def test_compression(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import temp_file_name, md5_for_file, copy_file_or_flo rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) comp_cache = new_cache(rc.filesystem("compressioncache")) test_file_name = "test_file" fn = temp_file_name() print "orig file ", fn with open(fn, "wb") as f: for i in range(1000): f.write("{:03d}:".format(i)) cf = comp_cache.put(fn, test_file_name) with open(cf) as stream: from ambry.util.sgzip import GzipFile stream = GzipFile(stream) uncomp_cache = new_cache(rc.filesystem("fscache")) uncomp_stream = uncomp_cache.put_stream("decomp") copy_file_or_flo(stream, uncomp_stream) uncomp_stream.close() dcf = uncomp_cache.get("decomp") self.assertEquals(md5_for_file(fn), md5_for_file(dcf)) os.remove(fn)
def test_load(self): from ambry.run import get_runconfig, RunConfig from ambry.client.rest import RemoteLibrary from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.identity import Identity config = self.start_server() l = new_library(config) rl = RemoteLibrary(self.server_url) # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) s = set([i.fqname for i in rl.list().values()]) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s) dsident = rl.dataset('diEGPXmDC8001') s = set([i.fqname for i in dsident.partitions.values()]) self.assertEquals(4, len(s)) self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s) self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s) self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s) # # Upload the dataset to S3, clear the library, then load it back in # rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS)) cache = new_cache(rc.filesystem('cached-compressed-s3')) fn = self.bundle.database.path identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(file=fn)) self.assertTrue(bool(cache.has(relpath))) # clear the library. l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list()])) # Load from S3, directly in to the local library identity.add_md5(md5_for_file(fn)) l.load(identity.cache_key, identity.md5) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Do it one more time, using the remote library l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) rl.load_dataset(identity) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Check that we can get the record from the library self.assertEquals(identity.vid, rl.resolve(identity.vid).vid) self.assertEquals(identity.vid, rl.resolve(identity.vname).vid) self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid) self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)