def test_simple_install(self): from ambry.client.rest import RemoteLibrary from ambry.cache.remote import RestReadCache config = self.start_server() # Create the library so we can get the same remote config l = new_library(config) s3 = l.upstream.last_upstream() s3.clean() print "S3 cache ", str(s3) if not s3.has(self.bundle.identity.cache_key): print 'Uploading: ', self.bundle.identity.cache_key s3.put(self.bundle.database.path,self.bundle.identity.cache_key) self.web_exists(s3,self.bundle.identity.cache_key) for p in self.bundle.partitions: if not s3.has(p.identity.cache_key): print 'Uploading: ', p.identity.cache_key s3.put(p.database.path,p.identity.cache_key) self.web_exists(s3,p.identity.cache_key) else: print 'Has : ', p.identity.cache_key # # Kick the remote library to load the dataset # rl = RemoteLibrary(self.server_url) ident = self.bundle.identity ident.add_md5(file=self.bundle.database.path) rl.load_dataset(ident) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) return # Try variants of find. r = api.find(self.bundle.identity.name) self.assertEquals(self.bundle.identity.name, r[0].name) r = api.find(QueryCommand().identity(name = self.bundle.identity.name)) self.assertEquals(self.bundle.identity.name, r[0].name) for partition in self.bundle.partitions: r = api.find((QueryCommand().partition(name = partition.identity.name)).to_dict()) self.assertEquals(partition.identity.name, r[0].name)
def sync_remotes(self, clean=False): from ambry.client.rest import RemoteLibrary if clean: self.database.session.query(Dataset).filter(Dataset.location == Dataset.LOCATION.REMOTE).delete() self.files.query.type(Dataset.LOCATION.REMOTE).delete() if not self.remotes: return for url in self.remotes: self.logger.info("Remote sync: {}".format(url)) rl = RemoteLibrary(url) for ident in rl.list().values(): if self.files.query.type(Dataset.LOCATION.REMOTE).ref(ident.vid).one_maybe: continue self.sync_remote_dataset(url, ident) self.logger.info("Remote {} sync: {}".format(url, ident.fqname))
def test_load(self): from ambry.run import get_runconfig, RunConfig from ambry.client.rest import RemoteLibrary from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.identity import Identity config = self.start_server() l = new_library(config) rl = RemoteLibrary(self.server_url) # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) s = set([i.fqname for i in rl.list().values()]) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s) dsident = rl.dataset('diEGPXmDC8001') s = set([i.fqname for i in dsident.partitions.values()]) self.assertEquals(4, len(s)) self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s) self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s) self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s) # # Upload the dataset to S3, clear the library, then load it back in # rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS)) cache = new_cache(rc.filesystem('cached-compressed-s3')) fn = self.bundle.database.path identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(file=fn)) self.assertTrue(bool(cache.has(relpath))) # clear the library. l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list()])) # Load from S3, directly in to the local library identity.add_md5(md5_for_file(fn)) l.load(identity.cache_key, identity.md5) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Do it one more time, using the remote library l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) rl.load_dataset(identity) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Check that we can get the record from the library self.assertEquals(identity.vid, rl.resolve(identity.vid).vid) self.assertEquals(identity.vid, rl.resolve(identity.vname).vid) self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid) self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)