Esempio n. 1
0
    def __init__(self,
                 app_config,
                 import_name,
                 static_path=None,
                 static_url_path=None,
                 static_folder='static',
                 template_folder='templates',
                 instance_path=None,
                 instance_relative_config=False):

        from flask.ext.cache import Cache
        from ambry.library import Library
        from ambry.run import get_runconfig

        self._initialized = False
        self.csrf = CsrfProtect()
        self.login_manager = LoginManager()

        super(Application,
              self).__init__(import_name, static_path, static_url_path,
                             static_folder, template_folder, instance_path,
                             instance_relative_config)

        self.config.update(app_config)

        l = Library(get_runconfig(), read_only=True, echo=False)

        self.cache = Cache(config={
            'CACHE_TYPE': 'filesystem',
            'CACHE_DIR': l.filesystem.cache('ui')
        })
        self.cache.init_app(self)
Esempio n. 2
0
    def test_md5(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.cache.filesystem import make_metadata

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        fn = self.make_test_file()

        md5 = md5_for_file(fn)

        cache = new_cache(rc.filesystem("fscache"))

        cache.put(fn, "foo1")

        abs_path = cache.path("foo1")

        self.assertEquals(md5, cache.md5("foo1"))

        cache = new_cache(rc.filesystem("compressioncache"))

        cache.put(fn, "foo2", metadata=make_metadata(fn))

        abs_path = cache.path("foo2")

        self.assertEquals(md5, cache.md5("foo2"))

        os.remove(fn)
Esempio n. 3
0
    def __init__(self, config=None, search=None, echo=None, read_only=False):
        from sqlalchemy.exc import OperationalError
        from ambry.orm.exc import DatabaseMissingError

        if config:
            self._config = config
        else:
            self._config = get_runconfig()

        self.logger = logger

        self.read_only = read_only  # allow optimizations that assume we aren't building bundles.

        self._echo = echo

        self._fs = LibraryFilesystem(config)

        self._db = Database(self._fs.database_dsn, echo=echo)

        self._account_password = self.config.accounts.password

        self._warehouse = None  # Will be populated in the warehouse property.

        try:
            self._db.open()
        except OperationalError as e:

            raise DatabaseMissingError("Failed to open database '{}': {} ".format(self._db.dsn, e))

        self.processes = None  # Number of multiprocessing proccors. Default to all of them

        if search:
            self._search = Search(self, search)
        else:
            self._search = None
Esempio n. 4
0
    def test_s3(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.bundle import DbBundle

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)
        identity = b.identity

        fsname = "cached-compressed-s3"

        config = rc.filesystem(fsname)
        cache = new_cache(config)

        r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5))

        for p in b.partitions:
            r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5))

        r = cache.get(b.identity.cache_key)
Esempio n. 5
0
    def test_s3(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.bundle import DbBundle

        rc = get_runconfig(
            (os.path.join(self.bundle_dir,
                          'test-run-config.yaml'), RunConfig.USER_CONFIG))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)
        identity = b.identity

        fsname = 'cached-compressed-s3'

        config = rc.filesystem(fsname)
        cache = new_cache(config)

        r = cache.put(fn, b.identity.cache_key,
                      b.identity.to_meta(md5=b.database.md5))

        for p in b.partitions:
            r = cache.put(p.database.path, p.identity,
                          p.identity.to_meta(md5=p.database.md5))

        r = cache.get(b.identity.cache_key)
Esempio n. 6
0
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig(
            (os.path.join(self.config_dir, 'test.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__),
                              'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()
Esempio n. 7
0
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig

        import configs
        from shutil import rmtree

        super(Test, self).setUp()

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'),
                                 os.path.join(self.bundle_dir, 'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        # Delete the whole test tree every run.
        test_folder = self.rc.group('filesystem').root
        if os.path.exists(test_folder):
            rmtree(test_folder)

        self.mf = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry')

        self.bundle = Bundle()
        self.waho = None
Esempio n. 8
0
    def test_adhoc(self):
        from ambry.library import new_library

        config = get_runconfig().library('default')

        l = new_library(config, reset=True)

        print l.resolve('211sandiego.org-calls-p1ye2014-orig-calls')
Esempio n. 9
0
    def setUp(self):
        import os
        from ambry.run import  get_runconfig

        #self.test_dir = tempfile.mkdtemp(prefix='test_cli_')
        self.test_dir = '/tmp/test_cli'

        self.config_file =  os.path.join(self.test_dir, 'config.yaml')
        self.rc = get_runconfig((self.config_file,RunConfig.USER_ACCOUNTS))
Esempio n. 10
0
    def test_caches(self):
        '''Basic test of put(), get() and has() for all cache types'''
        from functools import partial
        from ambry.run import  get_runconfig, RunConfig
        from ambry.filesystem import Filesystem
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        #self.start_server() # For the rest-cache

        #fn = '/tmp/1mbfile'
        #with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),
                            RunConfig.USER_ACCOUNTS))

        for i, fsname in enumerate(['fscache', 'limitedcache', 'compressioncache',
                                    'cached-s3', 'cached-compressed-s3']):

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print '---', fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath,identity.to_meta(md5=md5))

            r = cache.get(relpath)

            if not r.startswith('http'):
                self.assertTrue(os.path.exists(r), 'Not a url: {}: {}'.format(r,str(cache)))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))


        cache = new_cache(rc.filesystem('s3cache-noupstream'))
        r = cache.put(fn, 'a')
Esempio n. 11
0
    def test_caches(self):
        '''Basic test of put(), get() and has() for all cache types'''
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        self.start_server()  # For the rest-cache

        #fn = '/tmp/1mbfile'
        #with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig(
            (os.path.join(self.bundle_dir,
                          'test-run-config.yaml'), RunConfig.USER_CONFIG))

        for i, fsname in enumerate([
                'fscache', 'limitedcache', 'compressioncache', 'cached-s3',
                'cached-compressed-s3'
        ]):  #'compressioncache',

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print '---', fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath, identity.to_meta(md5=md5))
            r = cache.get(relpath)

            if not r.startswith('http'):
                self.assertTrue(os.path.exists(r), str(cache))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))

        cache = new_cache(rc.filesystem('s3cache-noupstream'))
        r = cache.put(fn, 'a')
Esempio n. 12
0
    def setUp(self):
        import testbundle.bundle
        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'client-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_CONFIG))

        self.copy_or_build_bundle()

        self.bundle = Bundle()
Esempio n. 13
0
    def reset(self):
        from ambry.run import get_runconfig

        if os.path.exists(self.test_dir):
            shutil.rmtree(self.test_dir)

        os.makedirs(self.test_dir)

        self.config_file = self.new_config_file()
        self.rc = get_runconfig((self.config_file, RunConfig.USER_ACCOUNTS))
        self.library = self.get_library()
Esempio n. 14
0
    def setUp(self):
        import bundles.testbundle.bundle

        rm_rf('/tmp/server')

        self.copy_or_build_bundle()

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)

        self.rc = get_runconfig((os.path.join(self.bundle_dir, 'client-test-config.yaml'),
                                 os.path.join(self.bundle_dir, 'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS)
        )

        self.server_rc = get_runconfig((os.path.join(self.bundle_dir, 'server-test-config.yaml'),
                                        RunConfig.USER_ACCOUNTS))
         

        self.bundle = Bundle()  
        self.bundle_dir = self.bundle.bundle_dir
Esempio n. 15
0
    def setUp(self):
        import testbundle.bundle
        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig((os.path.join(self.bundle_dir,'client-test-config.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_CONFIG
                                 ))

        self.copy_or_build_bundle()

        self.bundle = Bundle()    
Esempio n. 16
0
def new_library(config=None):

    if config is None:
        config = get_runconfig()

    l = Library(config)

    global global_library

    global_library = l

    return l
Esempio n. 17
0
def new_library(config=None):

    if config is None:
        config = get_runconfig()

    l = Library(config)

    global global_library

    global_library = l

    return l
Esempio n. 18
0
    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name'])
Esempio n. 19
0
    def setUp(self):
        import os
        from ambry.run import get_runconfig, RunConfig

        self.copy_or_build_bundle()

        self.bundle = Bundle()    
        self.bundle_dir = self.bundle.bundle_dir

        self.rc = get_runconfig((os.path.join(self.bundle_dir, 'geo-test-config.yaml'),
                                 os.path.join(self.bundle_dir, 'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS)
        )
Esempio n. 20
0
    def x_test_remote(self):
        from ambry.run import RunConfig
        from ambry.library import new_library
        
        rc = get_runconfig((os.path.join(self.bundle_dir,'server-test-config.yaml'),RunConfig.USER_CONFIG))

        config = rc.library('default')
        library =  new_library(config)

        print library.upstream
        print library.upstream.last_upstream()
        print library.cache
        print library.cache.last_upstream()  
Esempio n. 21
0
    def setUp(self):
        import os
        from ambry.run import get_runconfig, RunConfig

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'geo-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))
Esempio n. 22
0
    def test_caches(self):
        """Basic test of put(), get() and has() for all cache types"""
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        self.start_server()  # For the rest-cache

        # fn = '/tmp/1mbfile'
        # with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        for i, fsname in enumerate(
            ["fscache", "limitedcache", "compressioncache", "cached-s3", "cached-compressed-s3"]
        ):  #'compressioncache',

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print "---", fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath, identity.to_meta(md5=md5))
            r = cache.get(relpath)

            if not r.startswith("http"):
                self.assertTrue(os.path.exists(r), str(cache))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))

        cache = new_cache(rc.filesystem("s3cache-noupstream"))
        r = cache.put(fn, "a")
Esempio n. 23
0
    def test_number_service(self):
        
        ## For this test, setup these access keys in the
        ## Redis Server:
        ##
        ## redis-cli set assignment_class:test-ac-authoritative authoritative
        ## redis-cli set assignment_class:test-ac-registered registered
        ## redis-cli set assignment_class:fe78d179-8e61-4cc5-ba7b-263d8d3602b9 unregistered
        
        from ambry.identity import NumberServer
        from ambry.run import  get_runconfig
        from ambry.dbexceptions import ConfigurationError

        rc = get_runconfig()

        try:
            ng = rc.service('numbers')
        except ConfigurationError:
            return

        # You'll need to run a local service at this address
        host = "numbers"
        port = 7977
        unregistered_key = 'fe78d179-8e61-4cc5-ba7b-263d8d3602b9'

        ns = NumberServer(host=host, port=port, key='test-ac-registered')

        n = ns.next()
        self.assertEqual(6,len(str(n)))

        # Next request is authoritative, so no need to sleep here.


        ns = NumberServer(host=host, port=port, key='test-ac-authoritative')

        n = ns.next()
        self.assertEqual(4,len(str(n)))

        ns.sleep() # Avoid being rate limited

        # Override to use a local numbers server:


        ns = NumberServer(host=host, port=port, key= unregistered_key)
        n = ns.next()
        self.assertEqual(8,len(str(n)))

        n1 = ns.find('foobar')

        self.assertEquals(str(n1), str(ns.find('foobar')))
        self.assertEquals(str(n1), str(ns.find('foobar')))
Esempio n. 24
0
    def setUp(self):
        import testbundle.bundle
        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig((os.path.join(self.bundle_dir,'library-test-config.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS)
                                 )

        self.copy_or_build_bundle()

        self.bundle = Bundle()    

        print "Deleting: {}".format(self.rc.group('filesystem').root)
        Test.rm_rf(self.rc.group('filesystem').root)
Esempio n. 25
0
    def __call__(self, environ, start_response):

        if not self._initialized:
            from ambry.library import Library
            from ambry.run import get_runconfig

            rc = get_runconfig()
            l = Library(rc, read_only=True, echo=False)

            secret_key = None

            if os.getenv('AMBRY_UI_SECRET'):
                app.logger.info("Using secret_key from env")
                secret_key = os.getenv('AMBRY_UI_SECRET')

            if not secret_key and l.ui_config.secret:
                app.logger.info("Using secret_key from library")
                secret_key = l.ui_config.secret

            if not secret_key:
                from uuid import uuid4
                app.logger.warn(
                    "SECRET_KEY was not set. Setting to a random value")
                secret_key = str(
                    uuid4())  # Must be the same for all worker processes.

            if not self.config['WTF_CSRF_SECRET_KEY']:
                self.config['WTF_CSRF_SECRET_KEY'] = secret_key

            self.config['SECRET_KEY'] = secret_key

            title = os.getenv('AMBRY_UI_TITLE', "Ambry Data Library"),

            if l.ui_config.website_title:
                title = l.ui_config.website_title

            self.config['website_title'] = title

            self.secret_key = secret_key

            self.csrf.init_app(self)

            self.session_interface = ItsdangerousSessionInterface()

            self.login_manager.init_app(app)
            Bootstrap(app)

            self._initialized = True

        return super(Application, self).__call__(environ, start_response)
Esempio n. 26
0
    def reset(self):
        from ambry.run import get_runconfig
        import os, tempfile, shutil

        if os.path.exists(self.test_dir):
            shutil.rmtree(self.test_dir)

        os.makedirs(self.test_dir)

        self.config_file = self.new_config_file()

        self.rc = get_runconfig((self.config_file, RunConfig.USER_ACCOUNTS))

        self.library = self.get_library()
Esempio n. 27
0
    def __init__(self):
        from ambry.library import Library
        from render import Renderer
        from ambry.run import get_runconfig

        rc = get_runconfig()
        self.library = Library(rc, read_only=True, echo = False)
        self.renderer = Renderer(self.library)

        import logging

        path = self.library.filesystem.logs()

        logging.basicConfig(filename=path, level=logging.DEBUG)
Esempio n. 28
0
    def __call__(self, environ, start_response):

        if not self._initialized:
            from ambry.library import Library
            from ambry.run import get_runconfig

            rc = get_runconfig()
            l = Library(rc, read_only=True, echo=False)

            secret_key = None

            if os.getenv('AMBRY_UI_SECRET'):
                app.logger.info("Using secret_key from env")
                secret_key = os.getenv('AMBRY_UI_SECRET')

            if not secret_key and l.ui_config.secret:
                app.logger.info("Using secret_key from library")
                secret_key = l.ui_config.secret

            if not secret_key:
                from uuid import uuid4
                app.logger.warn("SECRET_KEY was not set. Setting to a random value")
                secret_key = str(uuid4()) # Must be the same for all worker processes.

            if not self.config['WTF_CSRF_SECRET_KEY']:
                self.config['WTF_CSRF_SECRET_KEY'] = secret_key

            self.config['SECRET_KEY'] = secret_key

            title = os.getenv('AMBRY_UI_TITLE', "Ambry Data Library"),

            if l.ui_config.website_title:
                title = l.ui_config.website_title

            self.config['website_title'] = title

            self.secret_key = secret_key

            self.csrf.init_app(self)

            self.session_interface = ItsdangerousSessionInterface()

            self.login_manager.init_app(app)
            Bootstrap(app)


            self._initialized = True

        return super(Application, self).__call__(environ, start_response)
Esempio n. 29
0
    def setUp(self):
        import testbundle.bundle
        from ambry.run import RunConfig

        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig((os.path.join(self.bundle_dir,'warehouse-test-config.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()    

        print "Deleting: {}".format(self.rc.group('filesystem').root_dir)
        ambry.util.rm_rf(self.rc.group('filesystem').root_dir)
Esempio n. 30
0
    def setUp(self):

        super(Test, self).setUp()

        import test.bundles.testbundle.bundle

        self.bundle_dir = os.path.dirname(test.bundles.testbundle.bundle.__file__)
        self.rc = get_runconfig((os.path.join(self.bundle_dir, 'library-test-config.yaml'),
                                 os.path.join(self.bundle_dir, 'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        Test.rm_rf(self.rc.group('filesystem').root)
Esempio n. 31
0
    def setUp(self):

        super(Test, self).setUp()  #

        import bundles.testbundle.bundle

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'library-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        Test.rm_rf(self.rc.group('filesystem').root)
Esempio n. 32
0
    def x_test_search(self):

        from ambry.library import new_library

        config = get_runconfig().library('default')

        l = new_library(config, reset=True)

        #for ds in l.datasets():  print ds.vid

        l.search.index_datasets()

        for r in l.search.search_datasets("title:zip"):
            ds = l.dataset(r)
            print r, ds.vname, ds.data.get('title')

        for r in l.search.search_partitions("doc:0E06"):
            print r
Esempio n. 33
0
    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig((os.path.join(self.bundle_dir,
                                         'test-run-config.yaml'),
                            RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3',
                          l['filesystem']['upstream']['upstream']['_name'])
        self.assertEquals(
            'devtest.sandiegodata.org',
            l['filesystem']['upstream']['upstream']['account']['_name'])
Esempio n. 34
0
    def x_test_search_parse(self):

        from ambry.library import new_library

        from ambry.library.search import SearchTermParser

        stp = SearchTermParser()

        config = get_runconfig().library('default')

        l = new_library(config, reset=True)

        e = lambda x: l.search.make_query_from_terms(stp.parse(x))

        print e('births ')
        print e('births source cdph')
        print e('births with mother source cdph')
        print e('births with mother in California by tracts')
        print e('births with mother with birth in California by tracts')
Esempio n. 35
0
    def test_search(self):
        from ambry.library import new_library

        config = get_runconfig().library('default')

        l = new_library(config, reset=True)

        print l.search

        #for ds in l.datasets():  print ds.vid

        l.search.index_datasets()

        for r in  l.search.search_datasets("title:zip"):
            ds = l.dataset(r)
            print r, ds.vname, ds.data.get('title')

        for r in l.search.search_partitions("doc:0E06"):
            print r
Esempio n. 36
0
    def x_test_search_parse(self):

        from ambry.library import new_library

        from ambry.library.search import SearchTermParser

        stp = SearchTermParser()

        config = get_runconfig().library('default')

        l = new_library(config, reset=True)


        e = lambda x: l.search.make_query_from_terms(stp.parse(x))

        print e('births ')
        print e('births source cdph')
        print e('births with mother source cdph')
        print e('births with mother in California by tracts')
        print e('births with mother with birth in California by tracts')
Esempio n. 37
0
    def __init__(self, app_config, import_name, static_path=None, static_url_path=None, static_folder='static',
                 template_folder='templates', instance_path=None, instance_relative_config=False):

        from flask.ext.cache import Cache
        from ambry.library import Library
        from ambry.run import get_runconfig

        self._initialized = False
        self.csrf = CsrfProtect()
        self.login_manager = LoginManager()

        super(Application, self).__init__(import_name, static_path, static_url_path, static_folder,
                                          template_folder, instance_path, instance_relative_config)

        self.config.update(app_config)


        l = Library(get_runconfig(), read_only=True, echo=False)

        self.cache = Cache(config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': l.filesystem.cache('ui')})
        self.cache.init_app(self)
Esempio n. 38
0
    def setUp(self):
        import testbundle.bundle, shutil, os

        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'source-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        bundle = Bundle()

        self.source_save_dir = str(
            self.rc.group('filesystem').root) + '-source'

        self.setup_source_dir()

        print "Deleting: {}".format(self.rc.group('filesystem').root)
        ambry.util.rm_rf(self.rc.group('filesystem').root)

        bdir = os.path.join(self.rc.sourcerepo.dir, 'testbundle')

        pats = shutil.ignore_patterns('build', 'build-save', '*.pyc', '.git',
                                      '.gitignore', '.ignore', '__init__.py')

        print "Copying test dir tree to ", bdir
        shutil.copytree(bundle.bundle_dir, bdir, ignore=pats)

        # Import the bundle file from the directory
        from ambry.run import import_file
        import imp

        rp = os.path.realpath(os.path.join(bdir, 'bundle.py'))
        mod = import_file(rp)

        dir_ = os.path.dirname(rp)
        self.bundle = mod.Bundle(dir_)

        print self.bundle.bundle_dir
Esempio n. 39
0
    def setUp(self):
        import testbundle.bundle
        import shutil
        import os

        self.bundle_dir = os.path.dirname(testbundle.bundle.__file__)
        self.rc = get_runconfig((os.path.join(self.bundle_dir, 'source-test-config.yaml'),
                                 os.path.join(self.bundle_dir, 'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        bundle = Bundle()

        self.source_save_dir = str(self.rc.group('filesystem').root) + '-source'

        self.setup_source_dir()

        print "Deleting: {}".format(self.rc.group('filesystem').root)
        ambry.util.rm_rf(self.rc.group('filesystem').root)

        bdir = os.path.join(self.rc.sourcerepo.dir, 'testbundle')

        pats = shutil.ignore_patterns('build', 'build-save', '*.pyc', '.git', '.gitignore', '.ignore', '__init__.py')

        print "Copying test dir tree to ", bdir
        shutil.copytree(bundle.bundle_dir, bdir, ignore=pats)

        # Import the bundle file from the directory
        from ambry.run import import_file

        rp = os.path.realpath(os.path.join(bdir, 'bundle.py'))
        mod = import_file(rp)

        dir_ = os.path.dirname(rp)
        self.bundle = mod.Bundle(dir_)

        print self.bundle.bundle_dir
Esempio n. 40
0
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname( bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()    

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()
Esempio n. 41
0
    def test_compression(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import temp_file_name, md5_for_file, copy_file_or_flo

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        comp_cache = new_cache(rc.filesystem("compressioncache"))

        test_file_name = "test_file"

        fn = temp_file_name()
        print "orig file ", fn
        with open(fn, "wb") as f:
            for i in range(1000):
                f.write("{:03d}:".format(i))

        cf = comp_cache.put(fn, test_file_name)

        with open(cf) as stream:
            from ambry.util.sgzip import GzipFile

            stream = GzipFile(stream)

            uncomp_cache = new_cache(rc.filesystem("fscache"))

            uncomp_stream = uncomp_cache.put_stream("decomp")

            copy_file_or_flo(stream, uncomp_stream)

        uncomp_stream.close()

        dcf = uncomp_cache.get("decomp")

        self.assertEquals(md5_for_file(fn), md5_for_file(dcf))

        os.remove(fn)
Esempio n. 42
0
    def __init__(self, config=None, search=None, echo=None, read_only=False):
        from sqlalchemy.exc import OperationalError
        from ambry.orm.exc import DatabaseMissingError

        if config:
            self._config = config
        else:
            self._config = get_runconfig()

        self.logger = logger

        self.read_only = read_only  # allow optimizations that assume we aren't building bundles.

        self._echo = echo

        self._fs = LibraryFilesystem(config)

        self._db = Database(self._fs.database_dsn, echo=echo)

        self._account_password = self.config.accounts.password

        self._warehouse = None  # Will be populated in the warehouse property.

        try:
            self._db.open()
        except OperationalError as e:

            raise DatabaseMissingError(
                "Failed to open database '{}': {} ".format(self._db.dsn, e))

        self.processes = None  # Number of multiprocessing proccors. Default to all of them

        if search:
            self._search = Search(self, search)
        else:
            self._search = None
Esempio n. 43
0
    def __init__(self):
        from ambry.library import Library
        from ambry.run import get_runconfig

        rc = get_runconfig()
        self.library = Library(rc, read_only=True, echo=False)
Esempio n. 44
0
    # This is the key that can be distributed publically. It is only to
    # keep bots and spiders from sucking up a bunch of numbers.
    rds.set("assignment_class:" + unregistered_key, 'unregistered')

    install(RedisPlugin(pool))

    print host, port

    return run(host=host, port=port, reloader=reloader, server='paste')

if __name__ == '__main__':
    import argparse
    from ambry.run import get_runconfig
    from ..util import print_yaml
    import uuid
    rc = get_runconfig()

    d = rc.servers(
        'numbers', {
            'host': 'localhost', 'port': 8080, 'unregistered_key': str(
                uuid.uuid4())})

    try:
        d = d.to_dict()
    except:
        pass

    d['redis'] = d.get('redis', {'host': 'localhost', 'port': 6379})

    parser = argparse.ArgumentParser(prog='python -mambry.server.numbers',
                                     description='Run an Ambry numbers server')
Esempio n. 45
0
def config():
    """Return the default run_config object for this installation."""
    from ambry.run import get_runconfig
    return get_runconfig()
Esempio n. 46
0
def main(argsv=None, ext_logger=None):
    import ambry._meta
    import os
    import sys

    parser = argparse.ArgumentParser(
        prog='ambry',
        description='Ambry {}. Management interface for ambry, libraries and repositories. '.format(
            ambry._meta.__version__))

    parser.add_argument(
        '-l',
        '--library',
        dest='library_name',
        default="default",
        help="Name of library, from the library secton of the config")
    parser.add_argument(
        '-c',
        '--config',
        default=os.getenv(AMBRY_CONFIG_ENV_VAR),
        action='append',
        help="Path to a run config file. Alternatively, set the AMBRY_CONFIG env var")
    parser.add_argument(
        '--single-config',
        default=False,
        action="store_true",
        help="Load only the config file specified")
    parser.add_argument(
        '-E',
        '--exceptions',
        default=False,
        action="store_true",
        help="Show full exception trace on all exceptions")

    cmd = parser.add_subparsers(title='commands', help='command help')

    from .library import library_parser, library_command
    from .warehouse import warehouse_command, warehouse_parser
    from .remote import remote_parser, remote_command
    from test import test_parser, test_command
    from config import config_parser, config_command
    from ckan import ckan_parser, ckan_command
    from source import source_command, source_parser
    from bundle import bundle_command, bundle_parser
    from root import root_command, root_parser
    from ..dbexceptions import ConfigurationError

    library_parser(cmd)
    warehouse_parser(cmd)
    ckan_parser(cmd)
    source_parser(cmd)
    remote_parser(cmd)
    test_parser(cmd)
    config_parser(cmd)
    bundle_parser(cmd)
    root_parser(cmd)

    args = parser.parse_args()

    if args.single_config:
        if args.config is None or len(args.config) > 1:
            raise Exception(
                "--single_config can only be specified with one -c")
        else:
            rc_path = args.config
    elif args.config is not None and len(args.config) == 1:
        rc_path = args.config.pop()
    else:
        rc_path = args.config

    funcs = {
        'bundle': bundle_command,
        'library': library_command,
        'warehouse': warehouse_command,
        'remote': remote_command,
        'test': test_command,
        'ckan': ckan_command,
        'source': source_command,
        'config': config_command,
        'root': root_command,

    }

    global global_logger

    if ext_logger:
        global_logger = ext_logger
    else:
        name = "{}.{}".format(args.command, args.subcommand)
        global_logger = get_logger(name, template="%(levelname)s: %(message)s")

    global_logger.setLevel(logging.INFO)

    f = funcs.get(args.command, False)

    if args.command == 'config' and args.subcommand == 'install':
        rc = None
    else:
        try:
            rc = get_runconfig(rc_path)

        except ConfigurationError:
            fatal(
                "Could not find configuration file at {}\nRun 'ambry config install; to create one ",
                rc_path)

        global global_run_config
        global_run_config = rc

    if not f:
        fatal("Error: No command: " + args.command)
    else:
        try:
            f(args, rc)
        except KeyboardInterrupt:
            prt('\nExiting...')
            pass
        except ConfigurationError as e:
            if args.exceptions:
                raise
            fatal("{}: {}".format(str(e.__class__.__name__), str(e)))
Esempio n. 47
0
def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
    """ Custom worker for bundle operations

    :param inqueue:
    :param outqueue:
    :param initializer:
    :param initargs:
    :param maxtasks:
    :return:
    """
    from ambry.library import new_library
    from ambry.run import get_runconfig
    import traceback

    assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)

    put = outqueue.put
    get = inqueue.get

    if hasattr(inqueue, '_writer'):
        inqueue._writer.close()
        outqueue._reader.close()

    if initializer is not None:
        initializer(*initargs)

    try:
        task = get()
    except (EOFError, IOError):
        debug('worker got EOFError or IOError -- exiting')
        return

    if task is None:
        debug('worker got sentinel -- exiting')
        return

    job, i, func, args, kwds = task

    # func = mapstar = map(*args)

    # Since there is only one source build per process, we know the structure
    # of the args beforehand.
    mp_func = args[0][0]
    mp_args = list(args[0][1][0])

    library = new_library(get_runconfig())
    library.database.close()  # Maybe it is still open after the fork.
    library.init_debug()

    bundle_vid = mp_args[0]

    try:

        b = library.bundle(bundle_vid)
        library.logger = b.logger  # So library logs to the same file as the bundle.

        b = b.cast_to_subclass()
        b.multi = True  # In parent it is a number, in child, just needs to be true to get the right logger template
        b.is_subprocess = True
        b.limited_run = bool(int(os.getenv('AMBRY_LIMITED_RUN', 0)))

        assert b._progress == None  # Don't want to share connections across processes

        mp_args[0] = b
        result = (True, [mp_func(*mp_args)])

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        b.error(
            'Subprocess {} raised an exception: {}'.format(
                os.getpid(), e.message), False)
        b.error(tb, False)
        result = (False, e)

    assert result

    b.progress.close()
    library.close()

    try:
        put((job, i, result))
    except Exception as e:
        wrapped = MaybeEncodingError(e, result[1])
        debug("Possible encoding error while sending result: %s" % (wrapped))
        put((job, i, (False, wrapped)))
Esempio n. 48
0
HINT:
Add `ckan` section to the ~/.ambry/accounts.yaml. Example:

ckan:
    host: http://demo.ckan.org  # host with the ckan instance
    organization: org1  # default organization
    apikey: <apikey>  # your api key
'''


class UnpublishedAccessError(Exception):
    pass

logger = get_logger(__name__)

rc = get_runconfig()

if rc.accounts:
    CKAN_CONFIG = rc.accounts.get('ckan')
else:
    CKAN_CONFIG = None


if CKAN_CONFIG and set(['host', 'organization', 'apikey']).issubset(list(CKAN_CONFIG.keys())):
    ckan = ckanapi.RemoteCKAN(
        CKAN_CONFIG.host,
        apikey=CKAN_CONFIG.apikey,
        user_agent='ambry/1.0 (+http://ambry.io)')
else:
    ckan = None
Esempio n. 49
0
    def test_load(self):

        from ambry.run import  get_runconfig, RunConfig
        from ambry.client.rest import RemoteLibrary
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.identity import Identity

        config = self.start_server()
        l = new_library(config)

        rl = RemoteLibrary(self.server_url)


        #
        # Check that the library can list datasets that are inserted externally
        #

        l.put_bundle(self.bundle)

        s = set([i.fqname for i in rl.list().values()])

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s)

        dsident = rl.dataset('diEGPXmDC8001')

        s = set([i.fqname for i in dsident.partitions.values()])

        self.assertEquals(4, len(s))

        self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s)
        self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s)
        self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s)

        #
        # Upload the dataset to S3, clear the library, then load it back in
        #

        rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS))
        cache = new_cache(rc.filesystem('cached-compressed-s3'))

        fn = self.bundle.database.path
        identity = self.bundle.identity
        relpath = identity.cache_key

        r = cache.put(fn, relpath, identity.to_meta(file=fn))


        self.assertTrue(bool(cache.has(relpath)))

        # clear the library.

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list()]))

        # Load from  S3, directly in to the local library

        identity.add_md5(md5_for_file(fn))

        l.load(identity.cache_key, identity.md5)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Do it one more time, using the remote library

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list().values()]))

        rl.load_dataset(identity)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Check that we can get the record from the library

        self.assertEquals(identity.vid, rl.resolve(identity.vid).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.vname).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)
Esempio n. 50
0
def main(argsv=None, ext_logger=None):
    from ..dbexceptions import ConfigurationError
    from ambry.valuetype.exceptions import TooManyCastingErrors

    global global_logger
    # For failures in importing CLI modules. Re-set later.
    global_logger = get_logger(__name__, template='%(levelname)s: %(message)s')

    extras = get_extra_commands()

    # We haven't parsed the args yet, so we have to check for the exceptions arg a different way
    commands =  get_commands(extras, exceptions=('-E' in argsv or '--exceptions' in argsv))

    parser = get_parser(commands)

    args = parser.parse_args()

    if args.single_config:
        if args.config is None or len(args.config) > 1:
            raise Exception('--single_config can only be specified with one -c')
        else:
            rc_path = args.config
    elif args.config is not None and len(args.config) == 1:
        rc_path = args.config.pop()
    else:
        rc_path = args.config


    if ext_logger:
        global_logger = ext_logger
    else:
        name = '{}.{}'.format(args.command, args.subcommand)
        global_logger = get_logger(name,  template='%(levelname)s: %(message)s')

    global_logger.setLevel(logging.INFO)

    run_command, _ = commands.get(args.command, None)

    if args.command == 'config' and args.subcommand == 'install':
        rc = None
    else:
        try:
            rc = get_runconfig(rc_path)

        except ConfigurationError as e:
            fatal("Could not find configuration file \nRun 'ambry config install; to create one ")

        global global_run_config
        global_run_config = rc

    if args.test_library:
        rc.group('filesystem')['root'] = rc.group('filesystem')['test']

    if run_command is None:
        fatal('Error: No command: ' + args.command)
    else:
        try:
            run_command(args, rc)
        except KeyboardInterrupt:
            prt('\nExiting...')
        except ConfigurationError as e:
            if args.exceptions:
                raise
            fatal('{}: {}'.format(str(e.__class__.__name__), str(e)))
        except TooManyCastingErrors:
            fatal("Casting errors")