def __init__(self, app_config, import_name, static_path=None, static_url_path=None, static_folder='static', template_folder='templates', instance_path=None, instance_relative_config=False): from flask.ext.cache import Cache from ambry.library import Library from ambry.run import get_runconfig self._initialized = False self.csrf = CsrfProtect() self.login_manager = LoginManager() super(Application, self).__init__(import_name, static_path, static_url_path, static_folder, template_folder, instance_path, instance_relative_config) self.config.update(app_config) l = Library(get_runconfig(), read_only=True, echo=False) self.cache = Cache(config={ 'CACHE_TYPE': 'filesystem', 'CACHE_DIR': l.filesystem.cache('ui') }) self.cache.init_app(self)
def test_md5(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.cache.filesystem import make_metadata rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) fn = self.make_test_file() md5 = md5_for_file(fn) cache = new_cache(rc.filesystem("fscache")) cache.put(fn, "foo1") abs_path = cache.path("foo1") self.assertEquals(md5, cache.md5("foo1")) cache = new_cache(rc.filesystem("compressioncache")) cache.put(fn, "foo2", metadata=make_metadata(fn)) abs_path = cache.path("foo2") self.assertEquals(md5, cache.md5("foo2")) os.remove(fn)
def __init__(self, config=None, search=None, echo=None, read_only=False): from sqlalchemy.exc import OperationalError from ambry.orm.exc import DatabaseMissingError if config: self._config = config else: self._config = get_runconfig() self.logger = logger self.read_only = read_only # allow optimizations that assume we aren't building bundles. self._echo = echo self._fs = LibraryFilesystem(config) self._db = Database(self._fs.database_dsn, echo=echo) self._account_password = self.config.accounts.password self._warehouse = None # Will be populated in the warehouse property. try: self._db.open() except OperationalError as e: raise DatabaseMissingError("Failed to open database '{}': {} ".format(self._db.dsn, e)) self.processes = None # Number of multiprocessing proccors. Default to all of them if search: self._search = Search(self, search) else: self._search = None
def test_s3(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.bundle import DbBundle rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) identity = b.identity fsname = "cached-compressed-s3" config = rc.filesystem(fsname) cache = new_cache(config) r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5)) for p in b.partitions: r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5)) r = cache.get(b.identity.cache_key)
def test_s3(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.bundle import DbBundle rc = get_runconfig( (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) identity = b.identity fsname = 'cached-compressed-s3' config = rc.filesystem(fsname) cache = new_cache(config) r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5)) for p in b.partitions: r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5)) r = cache.get(b.identity.cache_key)
def setUp(self): import bundles.testbundle.bundle from ambry.run import RunConfig import manifests, configs self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.config_dir = os.path.dirname(configs.__file__) self.rc = get_runconfig( (os.path.join(self.config_dir, 'test.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() #print "Deleting: {}".format(self.rc.group('filesystem').root) #ambry.util.rm_rf(self.rc.group('filesystem').root) self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry') with open(self.m) as f: self.m_contents = f.read()
def setUp(self): import bundles.testbundle.bundle from ambry.run import RunConfig import configs from shutil import rmtree super(Test, self).setUp() self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.config_dir = os.path.dirname(configs.__file__) self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) # Delete the whole test tree every run. test_folder = self.rc.group('filesystem').root if os.path.exists(test_folder): rmtree(test_folder) self.mf = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry') self.bundle = Bundle() self.waho = None
def test_adhoc(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) print l.resolve('211sandiego.org-calls-p1ye2014-orig-calls')
def setUp(self): import os from ambry.run import get_runconfig #self.test_dir = tempfile.mkdtemp(prefix='test_cli_') self.test_dir = '/tmp/test_cli' self.config_file = os.path.join(self.test_dir, 'config.yaml') self.rc = get_runconfig((self.config_file,RunConfig.USER_ACCOUNTS))
def test_caches(self): '''Basic test of put(), get() and has() for all cache types''' from functools import partial from ambry.run import get_runconfig, RunConfig from ambry.filesystem import Filesystem from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle #self.start_server() # For the rest-cache #fn = '/tmp/1mbfile' #with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'), RunConfig.USER_ACCOUNTS)) for i, fsname in enumerate(['fscache', 'limitedcache', 'compressioncache', 'cached-s3', 'cached-compressed-s3']): config = rc.filesystem(fsname) cache = new_cache(config) print '---', fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath,identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith('http'): self.assertTrue(os.path.exists(r), 'Not a url: {}: {}'.format(r,str(cache))) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem('s3cache-noupstream')) r = cache.put(fn, 'a')
def test_caches(self): '''Basic test of put(), get() and has() for all cache types''' from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle self.start_server() # For the rest-cache #fn = '/tmp/1mbfile' #with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig( (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG)) for i, fsname in enumerate([ 'fscache', 'limitedcache', 'compressioncache', 'cached-s3', 'cached-compressed-s3' ]): #'compressioncache', config = rc.filesystem(fsname) cache = new_cache(config) print '---', fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith('http'): self.assertTrue(os.path.exists(r), str(cache)) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem('s3cache-noupstream')) r = cache.put(fn, 'a')
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'client-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_CONFIG)) self.copy_or_build_bundle() self.bundle = Bundle()
def reset(self): from ambry.run import get_runconfig if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) os.makedirs(self.test_dir) self.config_file = self.new_config_file() self.rc = get_runconfig((self.config_file, RunConfig.USER_ACCOUNTS)) self.library = self.get_library()
def setUp(self): import bundles.testbundle.bundle rm_rf('/tmp/server') self.copy_or_build_bundle() self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir, 'client-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS) ) self.server_rc = get_runconfig((os.path.join(self.bundle_dir, 'server-test-config.yaml'), RunConfig.USER_ACCOUNTS)) self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'client-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_CONFIG )) self.copy_or_build_bundle() self.bundle = Bundle()
def new_library(config=None): if config is None: config = get_runconfig() l = Library(config) global global_library global_library = l return l
def test_runconfig(self): """Check the the RunConfig expands the library configuration""" from ambry.run import get_runconfig, RunConfig rc = get_runconfig( (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS)) l = rc.library('library1') self.assertEquals('database1', l['database']['_name']) self.assertEquals('filesystem1', l['filesystem']['_name']) self.assertEquals('filesystem2', l['filesystem']['upstream']['_name']) self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name'])
def setUp(self): import os from ambry.run import get_runconfig, RunConfig self.copy_or_build_bundle() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir self.rc = get_runconfig((os.path.join(self.bundle_dir, 'geo-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS) )
def x_test_remote(self): from ambry.run import RunConfig from ambry.library import new_library rc = get_runconfig((os.path.join(self.bundle_dir,'server-test-config.yaml'),RunConfig.USER_CONFIG)) config = rc.library('default') library = new_library(config) print library.upstream print library.upstream.last_upstream() print library.cache print library.cache.last_upstream()
def setUp(self): import os from ambry.run import get_runconfig, RunConfig self.copy_or_build_bundle() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'geo-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS))
def test_caches(self): """Basic test of put(), get() and has() for all cache types""" from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.bundle import DbBundle self.start_server() # For the rest-cache # fn = '/tmp/1mbfile' # with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) for i, fsname in enumerate( ["fscache", "limitedcache", "compressioncache", "cached-s3", "cached-compressed-s3"] ): #'compressioncache', config = rc.filesystem(fsname) cache = new_cache(config) print "---", fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith("http"): self.assertTrue(os.path.exists(r), str(cache)) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem("s3cache-noupstream")) r = cache.put(fn, "a")
def test_number_service(self): ## For this test, setup these access keys in the ## Redis Server: ## ## redis-cli set assignment_class:test-ac-authoritative authoritative ## redis-cli set assignment_class:test-ac-registered registered ## redis-cli set assignment_class:fe78d179-8e61-4cc5-ba7b-263d8d3602b9 unregistered from ambry.identity import NumberServer from ambry.run import get_runconfig from ambry.dbexceptions import ConfigurationError rc = get_runconfig() try: ng = rc.service('numbers') except ConfigurationError: return # You'll need to run a local service at this address host = "numbers" port = 7977 unregistered_key = 'fe78d179-8e61-4cc5-ba7b-263d8d3602b9' ns = NumberServer(host=host, port=port, key='test-ac-registered') n = ns.next() self.assertEqual(6,len(str(n))) # Next request is authoritative, so no need to sleep here. ns = NumberServer(host=host, port=port, key='test-ac-authoritative') n = ns.next() self.assertEqual(4,len(str(n))) ns.sleep() # Avoid being rate limited # Override to use a local numbers server: ns = NumberServer(host=host, port=port, key= unregistered_key) n = ns.next() self.assertEqual(8,len(str(n))) n1 = ns.find('foobar') self.assertEquals(str(n1), str(ns.find('foobar'))) self.assertEquals(str(n1), str(ns.find('foobar')))
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'library-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_ACCOUNTS) ) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root) Test.rm_rf(self.rc.group('filesystem').root)
def __call__(self, environ, start_response): if not self._initialized: from ambry.library import Library from ambry.run import get_runconfig rc = get_runconfig() l = Library(rc, read_only=True, echo=False) secret_key = None if os.getenv('AMBRY_UI_SECRET'): app.logger.info("Using secret_key from env") secret_key = os.getenv('AMBRY_UI_SECRET') if not secret_key and l.ui_config.secret: app.logger.info("Using secret_key from library") secret_key = l.ui_config.secret if not secret_key: from uuid import uuid4 app.logger.warn( "SECRET_KEY was not set. Setting to a random value") secret_key = str( uuid4()) # Must be the same for all worker processes. if not self.config['WTF_CSRF_SECRET_KEY']: self.config['WTF_CSRF_SECRET_KEY'] = secret_key self.config['SECRET_KEY'] = secret_key title = os.getenv('AMBRY_UI_TITLE', "Ambry Data Library"), if l.ui_config.website_title: title = l.ui_config.website_title self.config['website_title'] = title self.secret_key = secret_key self.csrf.init_app(self) self.session_interface = ItsdangerousSessionInterface() self.login_manager.init_app(app) Bootstrap(app) self._initialized = True return super(Application, self).__call__(environ, start_response)
def reset(self): from ambry.run import get_runconfig import os, tempfile, shutil if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) os.makedirs(self.test_dir) self.config_file = self.new_config_file() self.rc = get_runconfig((self.config_file, RunConfig.USER_ACCOUNTS)) self.library = self.get_library()
def __init__(self): from ambry.library import Library from render import Renderer from ambry.run import get_runconfig rc = get_runconfig() self.library = Library(rc, read_only=True, echo = False) self.renderer = Renderer(self.library) import logging path = self.library.filesystem.logs() logging.basicConfig(filename=path, level=logging.DEBUG)
def __call__(self, environ, start_response): if not self._initialized: from ambry.library import Library from ambry.run import get_runconfig rc = get_runconfig() l = Library(rc, read_only=True, echo=False) secret_key = None if os.getenv('AMBRY_UI_SECRET'): app.logger.info("Using secret_key from env") secret_key = os.getenv('AMBRY_UI_SECRET') if not secret_key and l.ui_config.secret: app.logger.info("Using secret_key from library") secret_key = l.ui_config.secret if not secret_key: from uuid import uuid4 app.logger.warn("SECRET_KEY was not set. Setting to a random value") secret_key = str(uuid4()) # Must be the same for all worker processes. if not self.config['WTF_CSRF_SECRET_KEY']: self.config['WTF_CSRF_SECRET_KEY'] = secret_key self.config['SECRET_KEY'] = secret_key title = os.getenv('AMBRY_UI_TITLE', "Ambry Data Library"), if l.ui_config.website_title: title = l.ui_config.website_title self.config['website_title'] = title self.secret_key = secret_key self.csrf.init_app(self) self.session_interface = ItsdangerousSessionInterface() self.login_manager.init_app(app) Bootstrap(app) self._initialized = True return super(Application, self).__call__(environ, start_response)
def setUp(self): import testbundle.bundle from ambry.run import RunConfig self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'warehouse-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root_dir) ambry.util.rm_rf(self.rc.group('filesystem').root_dir)
def setUp(self): super(Test, self).setUp() import test.bundles.testbundle.bundle self.bundle_dir = os.path.dirname(test.bundles.testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir, 'library-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() Test.rm_rf(self.rc.group('filesystem').root)
def setUp(self): super(Test, self).setUp() # import bundles.testbundle.bundle self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'library-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() Test.rm_rf(self.rc.group('filesystem').root)
def x_test_search(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) #for ds in l.datasets(): print ds.vid l.search.index_datasets() for r in l.search.search_datasets("title:zip"): ds = l.dataset(r) print r, ds.vname, ds.data.get('title') for r in l.search.search_partitions("doc:0E06"): print r
def test_runconfig(self): """Check the the RunConfig expands the library configuration""" from ambry.run import get_runconfig, RunConfig rc = get_runconfig((os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS)) l = rc.library('library1') self.assertEquals('database1', l['database']['_name']) self.assertEquals('filesystem1', l['filesystem']['_name']) self.assertEquals('filesystem2', l['filesystem']['upstream']['_name']) self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name']) self.assertEquals( 'devtest.sandiegodata.org', l['filesystem']['upstream']['upstream']['account']['_name'])
def x_test_search_parse(self): from ambry.library import new_library from ambry.library.search import SearchTermParser stp = SearchTermParser() config = get_runconfig().library('default') l = new_library(config, reset=True) e = lambda x: l.search.make_query_from_terms(stp.parse(x)) print e('births ') print e('births source cdph') print e('births with mother source cdph') print e('births with mother in California by tracts') print e('births with mother with birth in California by tracts')
def test_search(self): from ambry.library import new_library config = get_runconfig().library('default') l = new_library(config, reset=True) print l.search #for ds in l.datasets(): print ds.vid l.search.index_datasets() for r in l.search.search_datasets("title:zip"): ds = l.dataset(r) print r, ds.vname, ds.data.get('title') for r in l.search.search_partitions("doc:0E06"): print r
def __init__(self, app_config, import_name, static_path=None, static_url_path=None, static_folder='static', template_folder='templates', instance_path=None, instance_relative_config=False): from flask.ext.cache import Cache from ambry.library import Library from ambry.run import get_runconfig self._initialized = False self.csrf = CsrfProtect() self.login_manager = LoginManager() super(Application, self).__init__(import_name, static_path, static_url_path, static_folder, template_folder, instance_path, instance_relative_config) self.config.update(app_config) l = Library(get_runconfig(), read_only=True, echo=False) self.cache = Cache(config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': l.filesystem.cache('ui')}) self.cache.init_app(self)
def setUp(self): import testbundle.bundle, shutil, os self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'source-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() bundle = Bundle() self.source_save_dir = str( self.rc.group('filesystem').root) + '-source' self.setup_source_dir() print "Deleting: {}".format(self.rc.group('filesystem').root) ambry.util.rm_rf(self.rc.group('filesystem').root) bdir = os.path.join(self.rc.sourcerepo.dir, 'testbundle') pats = shutil.ignore_patterns('build', 'build-save', '*.pyc', '.git', '.gitignore', '.ignore', '__init__.py') print "Copying test dir tree to ", bdir shutil.copytree(bundle.bundle_dir, bdir, ignore=pats) # Import the bundle file from the directory from ambry.run import import_file import imp rp = os.path.realpath(os.path.join(bdir, 'bundle.py')) mod = import_file(rp) dir_ = os.path.dirname(rp) self.bundle = mod.Bundle(dir_) print self.bundle.bundle_dir
def setUp(self): import testbundle.bundle import shutil import os self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir, 'source-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() bundle = Bundle() self.source_save_dir = str(self.rc.group('filesystem').root) + '-source' self.setup_source_dir() print "Deleting: {}".format(self.rc.group('filesystem').root) ambry.util.rm_rf(self.rc.group('filesystem').root) bdir = os.path.join(self.rc.sourcerepo.dir, 'testbundle') pats = shutil.ignore_patterns('build', 'build-save', '*.pyc', '.git', '.gitignore', '.ignore', '__init__.py') print "Copying test dir tree to ", bdir shutil.copytree(bundle.bundle_dir, bdir, ignore=pats) # Import the bundle file from the directory from ambry.run import import_file rp = os.path.realpath(os.path.join(bdir, 'bundle.py')) mod = import_file(rp) dir_ = os.path.dirname(rp) self.bundle = mod.Bundle(dir_) print self.bundle.bundle_dir
def setUp(self): import bundles.testbundle.bundle from ambry.run import RunConfig import manifests, configs self.bundle_dir = os.path.dirname( bundles.testbundle.bundle.__file__) self.config_dir = os.path.dirname(configs.__file__) self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() #print "Deleting: {}".format(self.rc.group('filesystem').root) #ambry.util.rm_rf(self.rc.group('filesystem').root) self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry') with open(self.m) as f: self.m_contents = f.read()
def test_compression(self): from ambry.run import get_runconfig from ambry.cache import new_cache from ambry.util import temp_file_name, md5_for_file, copy_file_or_flo rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG)) comp_cache = new_cache(rc.filesystem("compressioncache")) test_file_name = "test_file" fn = temp_file_name() print "orig file ", fn with open(fn, "wb") as f: for i in range(1000): f.write("{:03d}:".format(i)) cf = comp_cache.put(fn, test_file_name) with open(cf) as stream: from ambry.util.sgzip import GzipFile stream = GzipFile(stream) uncomp_cache = new_cache(rc.filesystem("fscache")) uncomp_stream = uncomp_cache.put_stream("decomp") copy_file_or_flo(stream, uncomp_stream) uncomp_stream.close() dcf = uncomp_cache.get("decomp") self.assertEquals(md5_for_file(fn), md5_for_file(dcf)) os.remove(fn)
def __init__(self, config=None, search=None, echo=None, read_only=False): from sqlalchemy.exc import OperationalError from ambry.orm.exc import DatabaseMissingError if config: self._config = config else: self._config = get_runconfig() self.logger = logger self.read_only = read_only # allow optimizations that assume we aren't building bundles. self._echo = echo self._fs = LibraryFilesystem(config) self._db = Database(self._fs.database_dsn, echo=echo) self._account_password = self.config.accounts.password self._warehouse = None # Will be populated in the warehouse property. try: self._db.open() except OperationalError as e: raise DatabaseMissingError( "Failed to open database '{}': {} ".format(self._db.dsn, e)) self.processes = None # Number of multiprocessing proccors. Default to all of them if search: self._search = Search(self, search) else: self._search = None
def __init__(self): from ambry.library import Library from ambry.run import get_runconfig rc = get_runconfig() self.library = Library(rc, read_only=True, echo=False)
# This is the key that can be distributed publically. It is only to # keep bots and spiders from sucking up a bunch of numbers. rds.set("assignment_class:" + unregistered_key, 'unregistered') install(RedisPlugin(pool)) print host, port return run(host=host, port=port, reloader=reloader, server='paste') if __name__ == '__main__': import argparse from ambry.run import get_runconfig from ..util import print_yaml import uuid rc = get_runconfig() d = rc.servers( 'numbers', { 'host': 'localhost', 'port': 8080, 'unregistered_key': str( uuid.uuid4())}) try: d = d.to_dict() except: pass d['redis'] = d.get('redis', {'host': 'localhost', 'port': 6379}) parser = argparse.ArgumentParser(prog='python -mambry.server.numbers', description='Run an Ambry numbers server')
def config(): """Return the default run_config object for this installation.""" from ambry.run import get_runconfig return get_runconfig()
def main(argsv=None, ext_logger=None): import ambry._meta import os import sys parser = argparse.ArgumentParser( prog='ambry', description='Ambry {}. Management interface for ambry, libraries and repositories. '.format( ambry._meta.__version__)) parser.add_argument( '-l', '--library', dest='library_name', default="default", help="Name of library, from the library secton of the config") parser.add_argument( '-c', '--config', default=os.getenv(AMBRY_CONFIG_ENV_VAR), action='append', help="Path to a run config file. Alternatively, set the AMBRY_CONFIG env var") parser.add_argument( '--single-config', default=False, action="store_true", help="Load only the config file specified") parser.add_argument( '-E', '--exceptions', default=False, action="store_true", help="Show full exception trace on all exceptions") cmd = parser.add_subparsers(title='commands', help='command help') from .library import library_parser, library_command from .warehouse import warehouse_command, warehouse_parser from .remote import remote_parser, remote_command from test import test_parser, test_command from config import config_parser, config_command from ckan import ckan_parser, ckan_command from source import source_command, source_parser from bundle import bundle_command, bundle_parser from root import root_command, root_parser from ..dbexceptions import ConfigurationError library_parser(cmd) warehouse_parser(cmd) ckan_parser(cmd) source_parser(cmd) remote_parser(cmd) test_parser(cmd) config_parser(cmd) bundle_parser(cmd) root_parser(cmd) args = parser.parse_args() if args.single_config: if args.config is None or len(args.config) > 1: raise Exception( "--single_config can only be specified with one -c") else: rc_path = args.config elif args.config is not None and len(args.config) == 1: rc_path = args.config.pop() else: rc_path = args.config funcs = { 'bundle': bundle_command, 'library': library_command, 'warehouse': warehouse_command, 'remote': remote_command, 'test': test_command, 'ckan': ckan_command, 'source': source_command, 'config': config_command, 'root': root_command, } global global_logger if ext_logger: global_logger = ext_logger else: name = "{}.{}".format(args.command, args.subcommand) global_logger = get_logger(name, template="%(levelname)s: %(message)s") global_logger.setLevel(logging.INFO) f = funcs.get(args.command, False) if args.command == 'config' and args.subcommand == 'install': rc = None else: try: rc = get_runconfig(rc_path) except ConfigurationError: fatal( "Could not find configuration file at {}\nRun 'ambry config install; to create one ", rc_path) global global_run_config global_run_config = rc if not f: fatal("Error: No command: " + args.command) else: try: f(args, rc) except KeyboardInterrupt: prt('\nExiting...') pass except ConfigurationError as e: if args.exceptions: raise fatal("{}: {}".format(str(e.__class__.__name__), str(e)))
def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None): """ Custom worker for bundle operations :param inqueue: :param outqueue: :param initializer: :param initargs: :param maxtasks: :return: """ from ambry.library import new_library from ambry.run import get_runconfig import traceback assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0) put = outqueue.put get = inqueue.get if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() if initializer is not None: initializer(*initargs) try: task = get() except (EOFError, IOError): debug('worker got EOFError or IOError -- exiting') return if task is None: debug('worker got sentinel -- exiting') return job, i, func, args, kwds = task # func = mapstar = map(*args) # Since there is only one source build per process, we know the structure # of the args beforehand. mp_func = args[0][0] mp_args = list(args[0][1][0]) library = new_library(get_runconfig()) library.database.close() # Maybe it is still open after the fork. library.init_debug() bundle_vid = mp_args[0] try: b = library.bundle(bundle_vid) library.logger = b.logger # So library logs to the same file as the bundle. b = b.cast_to_subclass() b.multi = True # In parent it is a number, in child, just needs to be true to get the right logger template b.is_subprocess = True b.limited_run = bool(int(os.getenv('AMBRY_LIMITED_RUN', 0))) assert b._progress == None # Don't want to share connections across processes mp_args[0] = b result = (True, [mp_func(*mp_args)]) except Exception as e: import traceback tb = traceback.format_exc() b.error( 'Subprocess {} raised an exception: {}'.format( os.getpid(), e.message), False) b.error(tb, False) result = (False, e) assert result b.progress.close() library.close() try: put((job, i, result)) except Exception as e: wrapped = MaybeEncodingError(e, result[1]) debug("Possible encoding error while sending result: %s" % (wrapped)) put((job, i, (False, wrapped)))
HINT: Add `ckan` section to the ~/.ambry/accounts.yaml. Example: ckan: host: http://demo.ckan.org # host with the ckan instance organization: org1 # default organization apikey: <apikey> # your api key ''' class UnpublishedAccessError(Exception): pass logger = get_logger(__name__) rc = get_runconfig() if rc.accounts: CKAN_CONFIG = rc.accounts.get('ckan') else: CKAN_CONFIG = None if CKAN_CONFIG and set(['host', 'organization', 'apikey']).issubset(list(CKAN_CONFIG.keys())): ckan = ckanapi.RemoteCKAN( CKAN_CONFIG.host, apikey=CKAN_CONFIG.apikey, user_agent='ambry/1.0 (+http://ambry.io)') else: ckan = None
def test_load(self): from ambry.run import get_runconfig, RunConfig from ambry.client.rest import RemoteLibrary from ambry.cache import new_cache from ambry.util import md5_for_file from ambry.identity import Identity config = self.start_server() l = new_library(config) rl = RemoteLibrary(self.server_url) # # Check that the library can list datasets that are inserted externally # l.put_bundle(self.bundle) s = set([i.fqname for i in rl.list().values()]) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s) dsident = rl.dataset('diEGPXmDC8001') s = set([i.fqname for i in dsident.partitions.values()]) self.assertEquals(4, len(s)) self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s) self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s) self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s) # # Upload the dataset to S3, clear the library, then load it back in # rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS)) cache = new_cache(rc.filesystem('cached-compressed-s3')) fn = self.bundle.database.path identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath, identity.to_meta(file=fn)) self.assertTrue(bool(cache.has(relpath))) # clear the library. l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list()])) # Load from S3, directly in to the local library identity.add_md5(md5_for_file(fn)) l.load(identity.cache_key, identity.md5) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Do it one more time, using the remote library l.purge() self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) rl.load_dataset(identity) self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', set([i.fqname for i in rl.list().values()])) # Check that we can get the record from the library self.assertEquals(identity.vid, rl.resolve(identity.vid).vid) self.assertEquals(identity.vid, rl.resolve(identity.vname).vid) self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid) self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)
def main(argsv=None, ext_logger=None): from ..dbexceptions import ConfigurationError from ambry.valuetype.exceptions import TooManyCastingErrors global global_logger # For failures in importing CLI modules. Re-set later. global_logger = get_logger(__name__, template='%(levelname)s: %(message)s') extras = get_extra_commands() # We haven't parsed the args yet, so we have to check for the exceptions arg a different way commands = get_commands(extras, exceptions=('-E' in argsv or '--exceptions' in argsv)) parser = get_parser(commands) args = parser.parse_args() if args.single_config: if args.config is None or len(args.config) > 1: raise Exception('--single_config can only be specified with one -c') else: rc_path = args.config elif args.config is not None and len(args.config) == 1: rc_path = args.config.pop() else: rc_path = args.config if ext_logger: global_logger = ext_logger else: name = '{}.{}'.format(args.command, args.subcommand) global_logger = get_logger(name, template='%(levelname)s: %(message)s') global_logger.setLevel(logging.INFO) run_command, _ = commands.get(args.command, None) if args.command == 'config' and args.subcommand == 'install': rc = None else: try: rc = get_runconfig(rc_path) except ConfigurationError as e: fatal("Could not find configuration file \nRun 'ambry config install; to create one ") global global_run_config global_run_config = rc if args.test_library: rc.group('filesystem')['root'] = rc.group('filesystem')['test'] if run_command is None: fatal('Error: No command: ' + args.command) else: try: run_command(args, rc) except KeyboardInterrupt: prt('\nExiting...') except ConfigurationError as e: if args.exceptions: raise fatal('{}: {}'.format(str(e.__class__.__name__), str(e))) except TooManyCastingErrors: fatal("Casting errors")