Beispiel #1
0
    def test_simple_install(self):
        from ambry.client.rest import RemoteLibrary
        from ambry.cache.remote import RestReadCache
        
        config = self.start_server()

        # Create the library so we can get the same remote config
        l = new_library(config)


        s3 = l.upstream.last_upstream()

        s3.clean()

        print "S3 cache ", str(s3)

        if not s3.has(self.bundle.identity.cache_key):
            print 'Uploading: ', self.bundle.identity.cache_key
            s3.put(self.bundle.database.path,self.bundle.identity.cache_key)
            self.web_exists(s3,self.bundle.identity.cache_key)

        for p in self.bundle.partitions:
            if not s3.has(p.identity.cache_key):
                print 'Uploading: ', p.identity.cache_key
                s3.put(p.database.path,p.identity.cache_key)
                self.web_exists(s3,p.identity.cache_key)
            else:
                print 'Has      : ', p.identity.cache_key

        #
        # Kick the remote library to load the dataset
        #
        rl = RemoteLibrary(self.server_url)
        ident = self.bundle.identity
        ident.add_md5(file=self.bundle.database.path)
        rl.load_dataset(ident)
        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))


        return

        # Try variants of find. 
        r = api.find(self.bundle.identity.name)
        self.assertEquals(self.bundle.identity.name, r[0].name)
        
        r = api.find(QueryCommand().identity(name = self.bundle.identity.name))
        self.assertEquals(self.bundle.identity.name, r[0].name)

        for partition in self.bundle.partitions:
            r = api.find((QueryCommand().partition(name = partition.identity.name)).to_dict())
            self.assertEquals(partition.identity.name, r[0].name)
Beispiel #2
0
    def sync_remotes(self, clean=False):

        from ambry.client.rest import RemoteLibrary

        if clean:
            self.database.session.query(Dataset).filter(Dataset.location == Dataset.LOCATION.REMOTE).delete()
            self.files.query.type(Dataset.LOCATION.REMOTE).delete()

        if not self.remotes:
            return

        for url in self.remotes:

            self.logger.info("Remote sync: {}".format(url))
            rl = RemoteLibrary(url)
            for ident in rl.list().values():

                if self.files.query.type(Dataset.LOCATION.REMOTE).ref(ident.vid).one_maybe:
                    continue

                self.sync_remote_dataset(url, ident)

                self.logger.info("Remote {} sync: {}".format(url, ident.fqname))
Beispiel #3
0
    def test_load(self):

        from ambry.run import  get_runconfig, RunConfig
        from ambry.client.rest import RemoteLibrary
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.identity import Identity

        config = self.start_server()
        l = new_library(config)

        rl = RemoteLibrary(self.server_url)


        #
        # Check that the library can list datasets that are inserted externally
        #

        l.put_bundle(self.bundle)

        s = set([i.fqname for i in rl.list().values()])

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s)

        dsident = rl.dataset('diEGPXmDC8001')

        s = set([i.fqname for i in dsident.partitions.values()])

        self.assertEquals(4, len(s))

        self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s)
        self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s)
        self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s)

        #
        # Upload the dataset to S3, clear the library, then load it back in
        #

        rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS))
        cache = new_cache(rc.filesystem('cached-compressed-s3'))

        fn = self.bundle.database.path
        identity = self.bundle.identity
        relpath = identity.cache_key

        r = cache.put(fn, relpath, identity.to_meta(file=fn))


        self.assertTrue(bool(cache.has(relpath)))

        # clear the library.

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list()]))

        # Load from  S3, directly in to the local library

        identity.add_md5(md5_for_file(fn))

        l.load(identity.cache_key, identity.md5)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Do it one more time, using the remote library

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list().values()]))

        rl.load_dataset(identity)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Check that we can get the record from the library

        self.assertEquals(identity.vid, rl.resolve(identity.vid).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.vname).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)