def get_library(path=None, root=None, db=None): import ambry.library as _l """Return the default library for this installation.""" rc = config(path=path, root=root, db=db ) return _l.new_library(rc)
def get_library(path=None, root=None, db=None): import ambry.library as _l """Return the default library for this installation.""" rc = config(path=path, root=root, db=db) return _l.new_library(rc)
def test_push(self): from ambry.identity import Identity from functools import partial config = self.server_library_config() # Create the library so we can get the same remote config l = new_library(config) s3 = l.upstream.last_upstream() remote = self.remotes.values()[0] print l.info db = l.database db.enable_delete = True try: db.drop() db.create() except: pass s3 = l.upstream.last_upstream() s3.clean() l.put_bundle(self.bundle) def push_cb(expect, action, metadata, time): import json self.assertIn(action, expect) identity = Identity.from_dict(json.loads(metadata['identity'])) print action, identity.cache_key def throw_cb(action, metadata, time): raise Exception("Push shonld not run") l.push(remote, cb=partial(push_cb, ('Pushed', 'Pushing'))) # ALl should be pushed, so should not run l.push(remote, cb=throw_cb) # Resetting library, but not s3, should already have all # records db = l.database db.enable_delete = True db.drop() db.create() l.put_bundle(self.bundle) l.push(remote, cb=partial(push_cb, ('Has'))) self.web_exists(s3, self.bundle.identity.cache_key) for p in self.bundle.partitions: self.web_exists(s3, p.identity.cache_key) l.sync_upstream()
def test_adhoc(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) print l.resolve('211sandiego.org-calls-p1ye2014-orig-calls')
def get_library(self, name = 'default'): """Return the same library that the server uses. """ from ambry.library import new_library config = self.server_rc.library(name) l = new_library(config, reset = True) return l
def get_library(self, name='default'): """Clear out the database before the test run""" from ambry.library import new_library config = self.rc.library(name) l = new_library(config, reset=True) return l
def source_command(args, rc): from ..library import new_library from . import global_logger l = new_library(rc.library(args.name)) l.logger = global_logger st = l.source globals()['source_' + args.subcommand](args, l, st, rc)
def test_library_get(self): from ambry.library import new_library vid = self.bundle.identity.vid config = self.server_library_config() # Create the library so we can get the same remote config server_l = new_library(config) server_l.put_bundle(self.bundle) # Local only; no connection to server local_l = new_library(self.server_rc.library("local")) # A library that connects to the server remote_l = new_library(self.server_rc.library("reader")) remote_l.purge() self.assertTrue(len(remote_l.list()) == 0) self.assertEquals(vid, server_l.resolve(vid).vid) self.assertIsNone(local_l.resolve(vid)) self.assertIsNone(remote_l.resolve(vid)) self.start_server() self.assertEquals(vid, remote_l.resolve(vid).vid) b = remote_l.get(vid) print b.identity.fqname for p in self.bundle.partitions: b = remote_l.get(p.identity.vid) self.assertTrue(p.identity.fqname, b.partition.identity.fqname) self.assertEqual(1, len(remote_l.list())) # Test out syncing. remote_l.sync_remotes()
def run_command(args, rc): from ambry.library import new_library from ambry.cli import global_logger try: l = new_library(rc) l.logger = global_logger except Exception as e: l = None args.subcommand(args, l, rc) # Note the calls to sp.set_defaults(subcommand=...)
def get_library(self, name='default'): """Clear out the database before the test run""" from ambry.library import new_library # create database self.copy_or_build_bundle() config = self.rc.library(name) l = new_library(config, reset=True) return l
def test_accounts(self): """ Tests library, database and environment accounts. """ l = self.library() l.drop() l.create() lcsp = LibraryConfigSyncProxy(l) lcsp.sync() # db = self.library().database # for v in l.database.root_dataset.config.library: # print v l.filesystem.downloads('foo', 'bar') l.filesystem.build('foo', 'bar') for k, v in l.accounts.items(): act = l.account(k) if k in ( 'ambry', 'google_spreadsheets', ): continue if act.major_type != 'ambry': self.assertTrue(bool(act.decrypt_secret())) self.assertTrue(bool(act.account_id)) for remote in l.remotes: self.assertTrue(bool(remote.url)) os.environ['AMBRY_DB'] = l.database.dsn = 'sqlite://' os.environ['AMBRY_ACCOUNT_PASSWORD'] = l._account_password self.assertEqual(l.database.dsn, os.getenv('AMBRY_DB')) l = new_library() try: for k, v in l.accounts.items(): act = l.account(k) if k in ( 'ambry', 'google_spreadsheets', ): continue if act.major_type != 'ambry': self.assertTrue(bool(act.decrypt_secret())) self.assertTrue(bool(act.account_id)) for remote in l.remotes: self.assertTrue(bool(remote.url)) finally: l.close()
def test_remote_sync(self): from ambry.library import new_library vid = self.bundle.identity.vid config = self.server_library_config() # Create the library so we can get the same remote config server_l = new_library(config) server_l.put_bundle(self.bundle) # A library that connects to the server remote_l = new_library(self.server_rc.library("reader")) remote_l.purge() self.assertTrue(len(remote_l.list()) == 0) self.assertEquals(vid, server_l.resolve(vid).vid) self.assertIsNone(remote_l.resolve(vid)) self.start_server() remote_l.sync_remotes() #print server_l.info #print remote_l.info r = remote_l.resolve(vid) self.assertEquals(vid, r.vid) b = remote_l.get(vid) for p in self.bundle.partitions: print "Check ", p.identity b = remote_l.get(p.identity.vid) self.assertTrue(p.identity.fqname, b.partition.identity.fqname) self.assertEqual(1, len(remote_l.list()))
def production_run(config, reloader=False): lf = lambda: new_library(config, True) l = lf() l.database.create() logger.info("starting production server for library '{}' on http://{}:{}".format(l.name, l.host, l.port)) install(LibraryPlugin(lf)) return run(host=l.host, port=l.port, reloader=reloader)
def test_simple_install(self): from ambry.client.rest import RemoteLibrary from ambry.cache.remote import RestReadCache config = self.start_server() # Create the library so we can get the same remote config l = new_library(config) s3 = l.upstream.last_upstream() s3.clean() print "S3 cache ", str(s3) if not s3.has(self.bundle.identity.cache_key): print 'Uploading: ', self.bundle.identity.cache_key s3.put(self.bundle.database.path,self.bundle.identity.cache_key) self.web_exists(s3,self.bundle.identity.cache_key) for p in self.bundle.partitions: if not s3.has(p.identity.cache_key): print 'Uploading: ', p.identity.cache_key s3.put(p.database.path,p.identity.cache_key) self.web_exists(s3,p.identity.cache_key) else: print 'Has : ', p.identity.cache_key # # Kick the remote library to load the dataset # rl = RemoteLibrary(self.server_url) ident = self.bundle.identity ident.add_md5(file=self.bundle.database.path) rl.load_dataset(ident) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) return # Try variants of find. r = api.find(self.bundle.identity.name) self.assertEquals(self.bundle.identity.name, r[0].name) r = api.find(QueryCommand().identity(name = self.bundle.identity.name)) self.assertEquals(self.bundle.identity.name, r[0].name) for partition in self.bundle.partitions: r = api.find((QueryCommand().partition(name = partition.identity.name)).to_dict()) self.assertEquals(partition.identity.name, r[0].name)
def get_library(self, name='default'): """Clear out the database before the test run""" from ambry.library import new_library config = self.rc.library(name) l = new_library(config, reset=True) l.database.enable_delete = True l.database.drop() l.database.create() return l
def x_test_remote(self): from ambry.run import RunConfig from ambry.library import new_library rc = get_runconfig((os.path.join(self.bundle_dir,'server-test-config.yaml'),RunConfig.USER_CONFIG)) config = rc.library('default') library = new_library(config) print library.upstream print library.upstream.last_upstream() print library.cache print library.cache.last_upstream()
def test_accounts(self): """ Tests library, database and environment accounts. """ l = self.library() l.drop() l.create() lcsp = LibraryConfigSyncProxy(l) lcsp.sync() # db = self.library().database # for v in l.database.root_dataset.config.library: # print v l.filesystem.downloads('foo', 'bar') l.filesystem.build('foo', 'bar') for k, v in l.accounts.items(): act = l.account(k) if k in ('ambry', 'google_spreadsheets',): continue if act.major_type != 'ambry': self.assertTrue(bool(act.decrypt_secret())) self.assertTrue(bool(act.account_id)) for remote in l.remotes: self.assertTrue(bool(remote.url)) os.environ['AMBRY_DB'] = l.database.dsn = 'sqlite://' os.environ['AMBRY_ACCOUNT_PASSWORD'] = l._account_password self.assertEqual(l.database.dsn, os.getenv('AMBRY_DB')) l = new_library() try: for k, v in l.accounts.items(): act = l.account(k) if k in ('ambry', 'google_spreadsheets',): continue if act.major_type != 'ambry': self.assertTrue(bool(act.decrypt_secret())) self.assertTrue(bool(act.account_id)) for remote in l.remotes: self.assertTrue(bool(remote.url)) finally: l.close()
def local_run(config, reloader=False): global stoppable_wsgi_server_run stoppable_wsgi_server_run = None debug() lf = lambda: new_library(config, True) l = lf() l.database.create() logger.info("starting local server for library '{}' on http://{}:{}".format(l.name, l.host, l.port)) install(LibraryPlugin(lf)) return run(host=l.host, port=l.port, reloader=reloader)
def local_debug_run(config): debug() port = config["port"] if config["port"] else 7979 host = config["host"] if config["host"] else "localhost" logger.info("starting debug server on http://{}:{}".format(host, port)) lf = lambda: new_library(config, True) l = lf() l.database.create() install(LibraryPlugin(lf)) return run(host=host, port=port, reloader=True, server="stoppable")
def test_files(self): ''' Test some of the server's file functions :return: ''' from ambry.cache import new_cache from ambry.bundle import DbBundle fs = new_cache(self.server_rc.filesystem('rrc-fs')) fs.clean() remote = new_cache(self.server_rc.filesystem('rrc')) config = self.start_server() l = new_library(config) l.put_bundle(self.bundle) l.push() ident = self.bundle.identity ck = ident.cache_key # The remote is tied to the REST server, so it has the # bundle, but the new filesystem cache does not. self.assertFalse(fs.has(ck)) self.assertTrue(remote.has(ck)) # But if we tie them together, the FS cache should have it fs.upstream = remote self.assertTrue(fs.has(ck)) path = fs.get(ck) b = DbBundle(path) self.assertEquals(ck, b.identity.cache_key) # It should have been copied, so the fs should still have # it after disconnecting. fs.upstream = None self.assertTrue(fs.has(ck))
def build(bundle_dir): from ambry.library import new_library # Import the bundle file from the directory bundle_class = load_bundle(bundle_dir) bundle = bundle_class(bundle_dir) l = new_library(rc.library(args.library_name)) if l.get(bundle.identity.vid) and not args.force: prt("{} Bundle is already in library", bundle.identity.name) return elif bundle.is_built and not args.force and not args.clean: prt("{} Bundle is already built",bundle.identity.name) return else: if args.dryrun: prt("{} Would build but in dry run ", bundle.identity.name) return repo.bundle = bundle if args.clean: bundle.clean() # Re-create after cleaning is important for something ... bundle = bundle_class(bundle_dir) prt("{} Building ", bundle.identity.name) if not bundle.run_prepare(): fatal("{} Prepare failed", bundle.identity.name) if not bundle.run_build(): fatal("{} Build failed", bundle.identity.name) if args.install and not args.dryrun: if not bundle.run_install(force=True): fatal('{} Install failed', bundle.identity.name)
def x_test_search(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) #for ds in l.datasets(): print ds.vid l.search.index_datasets() for r in l.search.search_datasets("title:zip"): ds = l.dataset(r) print r, ds.vname, ds.data.get('title') for r in l.search.search_partitions("doc:0E06"): print r
def test_run(config): """Run method to be called from unit tests.""" from bottle import run, debug # @UnresolvedImport debug() lf = lambda: new_library(config, True) l = lf() l.database.create() global_logger.info( "Starting test server on http://{}:{}".format(l.host, l.port)) global_logger.info("Library at: {}".format(l.database.dsn)) install(LibraryPlugin(lf)) return run(host=l.host, port=l.port, reloader=False, server='stoppable')
def test_search(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) print l.search #for ds in l.datasets(): print ds.vid l.search.index_datasets() for r in l.search.search_datasets("title:zip"): ds = l.dataset(r) print r, ds.vname, ds.data.get('title') for r in l.search.search_partitions("doc:0E06"): print r
def x_test_search_parse(self): from ambry.library import new_library from ambry.library.search import SearchTermParser stp = SearchTermParser() config = get_runconfig().library('default') l = new_library(config, reset=True) e = lambda x: l.search.make_query_from_terms(stp.parse(x)) print e('births ') print e('births source cdph') print e('births with mother source cdph') print e('births with mother in California by tracts') print e('births with mother with birth in California by tracts')
def build(bundle_dir): from ambry.library import new_library # Import the bundle file from the directory bundle_class = load_bundle(bundle_dir) bundle = bundle_class(bundle_dir) l = new_library(rc.library(args.library_name)) if l.get(bundle.identity.vid) and not args.force: prt("{} Bundle is already in library", bundle.identity.name) return elif bundle.is_built and not args.force and not args.clean: prt("{} Bundle is already built", bundle.identity.name) return else: if args.dryrun: prt("{} Would build but in dry run ", bundle.identity.name) return repo.bundle = bundle if args.clean: bundle.clean() # Re-create after cleaning is important for something ... bundle = bundle_class(bundle_dir) prt("{} Building ", bundle.identity.name) if not bundle.run_prepare(): fatal("{} Prepare failed", bundle.identity.name) if not bundle.run_build(): fatal("{} Build failed", bundle.identity.name) if args.install and not args.dryrun: if not bundle.run_install(force=True): fatal('{} Install failed', bundle.identity.name)
def test_run(config): """Run method to be called from unit tests""" from bottle import run, debug # @UnresolvedImport debug() port = config["port"] if config["port"] else 7979 host = config["host"] if config["host"] else "localhost" lf = lambda: new_library(config, True) l = lf() l.database.create() logger.info("Starting test server on http://{}:{}".format(host, port)) logger.info("Library at: {}".format(l.database.dsn)) install(LibraryPlugin(lf)) return run(host=host, port=port, reloader=False, server="stoppable")
def __init__(self, content_type='html', blueprints=None): from jinja2 import Environment, PackageLoader try: from ambry.library import new_library self.library = new_library() self.doc_cache = self.library.doc_cache except: raise self.css_files = ['css/style.css', 'css/pygments.css'] self.env = Environment(loader=PackageLoader('ambry.ui', 'templates')) self.extracts = [] # Set to true to get Render to return json instead self.content_type = content_type self.blueprints = blueprints
def x_test_remote_library_partitions(self): self.start_server() l = self.get_library() r = l.put(self.bundle) r = l.get(self.bundle.identity.name) self.assertEquals(self.bundle.identity.name, r.identity.name) for partition in self.bundle.partitions: r = l.put(partition) # Get the partition with a name r = l.get(partition.identity.name) self.assertTrue(r is not False) self.assertEquals(partition.identity.name, r.partition.identity.name) self.assertEquals(self.bundle.identity.name, r.identity.name) # Copy all of the newly added files to the server. l.push() l2 = new_library('clean') l2.purge() r = l2.get('b1DxuZ001') self.assertTrue(r is not None and r is not False) print r self.assertTrue(r.partition is not None and r.partition is not False) self.assertEquals(r.partition.identity.id_,'b1DxuZ001' ) self.assertTrue(os.path.exists(r.partition.database.path))
def x_test_dump(self): import time import logging l = new_library(self.server_rc.library('default-remote'), reset = True) l.clean() self.start_server() l.run_dumper_thread() l.run_dumper_thread() self.assertFalse(l.database.needs_dump()) l.put(self.bundle) self.assertTrue(l.database.needs_dump()) l.run_dumper_thread() time.sleep(6) self.assertFalse(l.database.needs_dump()) l.run_dumper_thread() l.put(self.bundle) l.run_dumper_thread() time.sleep(7) print l.database.needs_dump() self.assertFalse(l.database.needs_dump()) self.assertEquals(self.bundle.identity.name, l.get(self.bundle.identity.name).identity.name) l.clean() self.assertEqual(None, l.get(self.bundle.identity.name)) l.restore() self.assertEquals(self.bundle.identity.name, l.get(self.bundle.identity.name).identity.name)
def get_library(self, name='default'): """Clear out the database before the test run""" config = self.rc.library(name) return new_library(config, reset=True)
def test_load(self): from ambry.run import get_runconfig, RunConfig from ambry.client.rest import RemoteLibrary from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.identity import Identity config = self.start_server() l = new_library(config) rl = RemoteLibrary(self.server_url) # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) s = set([i.fqname for i in rl.list().values()]) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s) dsident = rl.dataset('diEGPXmDC8001') s = set([i.fqname for i in dsident.partitions.values()]) self.assertEquals(4, len(s)) self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s) self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s) self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s) # # Upload the dataset to S3, clear the library, then load it back in # rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS)) cache = new_cache(rc.filesystem('cached-compressed-s3')) fn = self.bundle.database.path identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(file=fn)) self.assertTrue(bool(cache.has(relpath))) # clear the library. l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list()])) # Load from S3, directly in to the local library identity.add_md5(md5_for_file(fn)) l.load(identity.cache_key, identity.md5) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Do it one more time, using the remote library l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) rl.load_dataset(identity) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Check that we can get the record from the library self.assertEquals(identity.vid, rl.resolve(identity.vid).vid) self.assertEquals(identity.vid, rl.resolve(identity.vname).vid) self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid) self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)
def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None): """ Custom worker for bundle operations :param inqueue: :param outqueue: :param initializer: :param initargs: :param maxtasks: :return: """ from ambry.library import new_library from ambry.run import get_runconfig import traceback assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0) put = outqueue.put get = inqueue.get if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() if initializer is not None: initializer(*initargs) try: task = get() except (EOFError, IOError): debug('worker got EOFError or IOError -- exiting') return if task is None: debug('worker got sentinel -- exiting') return job, i, func, args, kwds = task # func = mapstar = map(*args) # Since there is only one source build per process, we know the structure # of the args beforehand. mp_func = args[0][0] mp_args = list(args[0][1][0]) library = new_library(get_runconfig()) library.database.close() # Maybe it is still open after the fork. library.init_debug() bundle_vid = mp_args[0] try: b = library.bundle(bundle_vid) library.logger = b.logger # So library logs to the same file as the bundle. b = b.cast_to_subclass() b.multi = True # In parent it is a number, in child, just needs to be true to get the right logger template b.is_subprocess = True b.limited_run = bool(int(os.getenv('AMBRY_LIMITED_RUN', 0))) assert b._progress == None # Don't want to share connections across processes mp_args[0] = b result = (True, [mp_func(*mp_args)]) except Exception as e: import traceback tb = traceback.format_exc() b.error( 'Subprocess {} raised an exception: {}'.format( os.getpid(), e.message), False) b.error(tb, False) result = (False, e) assert result b.progress.close() library.close() try: put((job, i, result)) except Exception as e: wrapped = MaybeEncodingError(e, result[1]) debug("Possible encoding error while sending result: %s" % (wrapped)) put((job, i, (False, wrapped)))
def remote_command(args, rc): from ambry.library import new_library l = new_library(rc.library(args.library_name)) globals()['remote_' + args.subcommand](args, l, rc)
def _get_library(cls, config): from ambry.library import new_library return new_library(config if config else cls.get_rc())
def library(name='default'): """Return the default library for this installation.""" return _l.new_library(config().library(name))