def test_resolve(self): from ambry.client.rest import RemoteLibrary l = self.get_library() l.purge() print l.info # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) ident = self.bundle.identity # Local Library self.assertEquals(ident.vid, l.resolve(ident.vid).vid) self.assertEquals(ident.vid, l.resolve(ident.vname).vid) self.assertEquals(ident.vid, l.resolve(ident.cache_key).vid) self.assertEquals(ident.vid, l.resolve(ident.sname).vid) for p in self.bundle.partitions: print '--', p.identity.cache_key dsid = l.resolve(p.identity.vid) self.assertEquals(ident.vid, dsid.vid) self.assertEquals(p.identity.vid, dsid.partition.vid) dsid = l.resolve(p.identity.cache_key) if not dsid: ck = p.identity.cache_key l.resolve(ck) self.assertIsNotNone(dsid) self.assertEquals(ident.vid, dsid.vid) self.assertEquals(p.identity.vid, dsid.partition.vid) # Remote Library self.start_server() rl = RemoteLibrary(self.server_url) self.assertEquals(ident.vid, rl.resolve(ident.vid).vid) self.assertEquals(ident.vid, rl.resolve(ident.vname).vid) self.assertEquals(ident.vid, rl.resolve(ident.cache_key).vid) self.assertEquals(ident.vid, (rl.resolve(ident.sname).vid)) for p in self.bundle.partitions: print '--',p.identity.cache_key dsid = rl.resolve(p.identity.vid) self.assertEquals(ident.vid, dsid.vid) self.assertEquals(p.identity.vid, dsid.partition.vid) dsid = rl.resolve(p.identity.cache_key) self.assertEquals(ident.vid, dsid.vid) self.assertEquals(p.identity.vid, dsid.partition.vid) print rl.resolve('source/dataset-subset-variation-0.0.1/geot1.geodb')
def resolve_ref_one(self, ref, location = Dataset.LOCATION.LIBRARY): from requests.exceptions import ConnectionError from ambry.client.rest import RemoteLibrary import semantic_version from ..identity import Identity if self.local_resolver: ip,ident = self.local_resolver.resolve_ref_one(ref, location) if ident: idents = [ident] else: idents = [] else: ip = Identity.classify(ref) idents = [] # If the local returned a result, we only need to go to the # remote if this is a semantic version request, to possible # get a newer version if len(idents) == 0 or isinstance(ip.version, semantic_version.Spec): if self.urls: for url in self.urls: rl = RemoteLibrary(url) try: ident = rl.resolve(ref, location) except ConnectionError: continue if ident: ident.locations.set(Dataset.LOCATION.REMOTE) ident.url = url idents.append(ident) if not idents: return ip, None idents = sorted(idents, reverse=True, key=lambda x: x.on.revision ) # Since we sorted by revision, and the internal resolutions take care of semantic versioning, # if this is a semantic version request, the idents array should be sorted with the highest revision number # for the spec at the top return ip, idents.pop(0)
def test_load(self): from ambry.run import get_runconfig, RunConfig from ambry.client.rest import RemoteLibrary from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.identity import Identity config = self.start_server() l = new_library(config) rl = RemoteLibrary(self.server_url) # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) s = set([i.fqname for i in rl.list().values()]) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s) dsident = rl.dataset('diEGPXmDC8001') s = set([i.fqname for i in dsident.partitions.values()]) self.assertEquals(4, len(s)) self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s) self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s) self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s) # # Upload the dataset to S3, clear the library, then load it back in # rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS)) cache = new_cache(rc.filesystem('cached-compressed-s3')) fn = self.bundle.database.path identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(file=fn)) self.assertTrue(bool(cache.has(relpath))) # clear the library. l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list()])) # Load from S3, directly in to the local library identity.add_md5(md5_for_file(fn)) l.load(identity.cache_key, identity.md5) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Do it one more time, using the remote library l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) rl.load_dataset(identity) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Check that we can get the record from the library self.assertEquals(identity.vid, rl.resolve(identity.vid).vid) self.assertEquals(identity.vid, rl.resolve(identity.vname).vid) self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid) self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)
def test_joint_resolve(self): '''Test resolving from either a remote or local library, from the local interface ''' from ambry.identity import Identity from ambry.client.rest import RemoteLibrary from ambry.library.query import RemoteResolver self.start_server() config = self.server_library_config() # Create the library so we can get the same remote config l = new_library(config) s3 = l.upstream.last_upstream() print l.info db = l.database db.enable_delete = True db.clean() l.put_bundle(self.bundle) # This might not do anything if the files already are in s3 def push_cb(action, metadata, time): print action, metadata['fqname'] l.push(cb=push_cb) # Check they are on the web self.web_exists(s3,self.bundle.identity.cache_key) for p in self.bundle.partitions: self.web_exists(s3,p.identity.cache_key) # Check the basic resolvers ident = self.bundle.identity self.assertEquals(ident.vid, l.resolve(ident.vid).vid) rl = RemoteLibrary(self.server_url) self.assertEquals(ident.vid, rl.resolve(ident.vname).vid) # That's the basics, now test the primary use case with the remote resolver. # Remote resolver only rr = RemoteResolver(local_resolver=None, remote_urls=[self.server_url]) self.assertEquals(ident.vid, rr.resolve_ref_one(ident.vid)[1].vid) self.assertEquals('http://localhost:7979', rr.resolve_ref_one(ident.vid)[1].url) # Local Resolver only rr = RemoteResolver(local_resolver=l.database.resolver, remote_urls=None) self.assertEquals(ident.vid, rr.resolve_ref_one(ident.vid)[1].vid) self.assertIsNone(rr.resolve_ref_one(ident.vid)[1].url) self.stop_server() # Remote resolver only rr = RemoteResolver(local_resolver=None, remote_urls=[self.server_url]) self.assertIsNone(rr.resolve_ref_one(ident.vid)[1]) # Combined rr = RemoteResolver(local_resolver=l.database.resolver, remote_urls=[self.server_url]) self.assertEquals(ident.vid, rr.resolve_ref_one(ident.vid)[1].vid) self.assertIsNone(rr.resolve_ref_one(ident.vid)[1].url)