def __init__(self, omega, bucket=None, defaults=None, celeryconf=None): from omegaml.util import settings self.omega = omega defaults = defaults or settings() self.bucket = bucket self.pure_python = getattr(defaults, 'OMEGA_FORCE_PYTHON_CLIENT', False) self.pure_python = self.pure_python or self._client_is_pure_python() # initialize celery as a runtimes taskpkgs = defaults.OMEGA_CELERY_IMPORTS celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG # ensure we use current value celeryconf['CELERY_ALWAYS_EAGER'] = bool(defaults.OMEGA_LOCAL_RUNTIME) self.celeryapp = Celery('omegaml') self.celeryapp.config_from_object(celeryconf) # needed to get it to actually load the tasks # https://stackoverflow.com/a/35735471 self.celeryapp.autodiscover_tasks(taskpkgs, force=True) self.celeryapp.finalize() # temporary requirements, use .require() to set self._require_kwargs = dict(task={}, routing={}) # fixed default arguments, use .require(always=True) to set self._task_default_kwargs = dict(task={}, routing={}) # default routing label self._default_label = self.celeryapp.conf.get('CELERY_DEFAULT_QUEUE')
def test_user_extensions_config(self): # check we get default without patching from omegaml.util import settings from omegaml import _base_config as _real_base_config with patch('omegaml.client.userconf.get_user_config_from_api') as mock: mock.return_value = { 'objects': [{ 'data': { "OMEGA_USER_EXTENSIONS": { "OMEGA_STORE_BACKENDS": { "test.backend": 'omegaml.backends.npndarray.NumpyNDArrayBackend' } } } }] } with patch('omegaml._base_config', new=BareObj) as defaults: from omegaml.client.userconf import get_omega_from_apikey # link callbacks used by get_omega_from_api_key _real_base_config.update_from_obj(_real_base_config, attrs=defaults) defaults.update_from_dict = _real_base_config.update_from_dict defaults.load_user_extensions = _real_base_config.load_user_extensions defaults.load_framework_support = lambda *args, **kwargs: None defaults.OMEGA_MY_OWN_SETTING = 'foo' om = get_omega_from_apikey('foo', 'bar') self.assertIsNotNone(om.defaults.OMEGA_USER_EXTENSIONS) self.assertIn('test.backend', om.defaults.OMEGA_STORE_BACKENDS) # test that all default values are still there, i.e. config was updated, not replaced for real_k, real_v in _real_base_config.OMEGA_STORE_BACKENDS.items( ): self.assertIn(real_k, om.defaults.OMEGA_STORE_BACKENDS) # restore defaults defaults = settings(reload=True)
def __init__(self, modelname, runtime=None): self.modelname = modelname self.runtime = runtime self.pure_python = getattr(settings(), 'OMEGA_FORCE_PYTHON_CLIENT', False) self.pure_python = self.pure_python or self._client_is_pure_python() self.apply_mixins()
def __init__(self, defaults=None, mongo_url=None, celeryconf=None, bucket=None, **kwargs): """ Initialize the client API Without arguments create the client API according to the user's configuration in :code:`~/omegaml/config.yml`. Arguments override the user's configuration. :param defaults: the DefaultsContext :param mongo_url: the fully qualified URI to the mongo database, of format :code:`mongodb://user:password@host:port/database` :param celeryconf: the celery configuration dictionary """ from omegaml.util import settings # avoid circular imports from omegaml.notebook.jobs import OmegaJobs from omegaml.runtimes import OmegaRuntime from omegaml.store import OmegaStore # celery and mongo configuration self.defaults = defaults or settings() self.mongo_url = mongo_url or self.defaults.OMEGA_MONGO_URL self.bucket = bucket # setup storage locations self.models = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='models/', defaults=self.defaults) self.datasets = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='data/', defaults=self.defaults) self._jobdata = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='jobs/', defaults=self.defaults) self.scripts = OmegaStore(mongo_url=self.mongo_url, prefix='scripts/', defaults=self.defaults) # runtimes environments self.runtime = OmegaRuntime(self, bucket=bucket, defaults=self.defaults, celeryconf=celeryconf) self.jobs = OmegaJobs(store=self._jobdata)
def apply_mixins(self): """ apply mixins in defaults.OMEGA_RUNTIME_MIXINS """ from omegaml import settings defaults = settings() for mixin in defaults.OMEGA_RUNTIME_MIXINS: extend_instance(self, mixin)
def setUp(self): self.defaults = settings() OMEGA_STORE_BACKENDS = self.defaults.OMEGA_STORE_BACKENDS self.backend = backend = 'omegaml.backends.package.PythonPipSourcedPackageData' if PythonPipSourcedPackageData.KIND not in OMEGA_STORE_BACKENDS: OMEGA_STORE_BACKENDS[PythonPipSourcedPackageData.KIND] = backend self.om = Omega() delete_database() self.pkgsdir = self.om.scripts.get_backend_bykind( PythonPipSourcedPackageData.KIND).packages_path mkdirs(self.pkgsdir)
class Metadata(Document): """ Metadata stores information about objects in OmegaStore """ # fields #: this is the name of the data name = StringField(unique_with=['bucket', 'prefix']) #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=MDREGISTRY.KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) # the actual db is defined in settings, OMEGA_MONGO_URL meta = { 'db_alias': 'omega', 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __eq__(self, other): return self.objid == other.objid def __unicode__(self): fields = ('name', 'bucket', 'prefix', 'created', 'kind') kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in fields) return u"Metadata(%s)" % ','.join(kwargs)
def make_Metadata(db_alias='omega', collection=None): # this is to create context specific Metadata class that takes the # database from the given alias at the time of use from omegaml.documents import Metadata as Metadata_base collection = collection or settings().OMEGA_MONGO_COLLECTION class Metadata(Metadata_base, Document): # override db_alias in gridfile gridfile = FileField(db_alias=db_alias, collection_name=collection) # the actual db is defined at runtime meta = { 'db_alias': db_alias, 'strict': False, 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __new__(cls, *args, **kwargs): # undo the Metadata.__new__ protection newcls = super(Metadata, cls).__real_new__(cls) return newcls def __eq__(self, other): return self.objid == other.objid def __unicode__(self): fields = ('name', 'bucket', 'prefix', 'created', 'kind') kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in fields) return u"Metadata(%s)" % ','.join(kwargs) def save(self, *args, **kwargs): assert self.name is not None, "a dataset name is needed before saving" self.modified = datetime.datetime.now() return super(Metadata_base, self).save(*args, **kwargs) def to_json(self, **kwargs): kwargs['json_options'] = kwargs.get('json_options', LEGACY_JSON_OPTIONS) return super().to_json(**kwargs) def to_dict(self): return self.to_mongo().to_dict() return Metadata
def get_omega_for_task(cls, auth=None): """ Get Omega instance configured for user in auth If auth is passed, a request is made to OMEGA_RESTAPI_URL to retrieve the configuration object for this user. If auth is the tuple (None, None, 'default') the omegaml module is returned, which is configured to the default instance with authentication according to the installation. To raise an error instead set settings OMEGA_ALLOW_TASK_DEFAULT_AUTH=False :param auth: the OmegaRuntimeAuthentication object :return: the Omega instance configured for the user """ from omegaml.util import settings default_auth = (None, None, 'default') is_auth_provided = lambda auth: (auth is not None and auth != default_auth) defaults = settings() if is_auth_provided(auth): if isinstance(auth, (list, tuple)): # we get a serialized tuple, recreate auth object # -- this is a hack to easily support python 2/3 client/server mix userid, apikey, qualifier = auth # by default assume worker is in cluster # TODO refactor this setting to eedefaults view = defaults.OMEGA_WORKER_INCLUSTER om = cls.get_omega_from_apikey(userid, apikey, qualifier=qualifier, view=view) else: raise ValueError( 'cannot parse authentication as {}'.format(auth)) elif auth == default_auth: # we provide the default implementation as per configuration from omegaml import _omega om = _omega._om if not defaults.OMEGA_ALLOW_TASK_DEFAULT_AUTH: raise ValueError( 'Default task authentication is not allowed, got {}'. format(auth)) else: raise ValueError( 'missing authentication tuple as (userid, apikey, qualifier), got {}' .format(auth)) return om
def __init__(self, mongo_url=None, backend=None, broker=None, celeryconf=None, defaults=None): """ Initialize the client API Without arguments create the client API according to the user's configuration in :code:`~/omegaml/config.yml`. Arguments override the user's configuration. :param mongo_url: the fully qualified URI to the mongo database, of format :code:`mongodb://user:password@host:port/database` :param broker: the celery broker URI :param backend: the celery result backend URI :param celeryconf: the celery configuration dictionary :param celerykwargs: kwargs to create the Celery instance """ from omegaml.util import settings # avoid circular imports from omegaml.notebook.jobs import OmegaJobs from omegaml.runtimes import OmegaRuntime from omegaml.store import OmegaStore # celery and mongo configuration self.defaults = defaults or settings() self.mongo_url = mongo_url or self.defaults.OMEGA_MONGO_URL self.broker = broker or self.defaults.OMEGA_BROKER self.backend = backend or self.defaults.OMEGA_RESULT_BACKEND self.celeryconf = celeryconf # setup storage locations self.models = OmegaStore(mongo_url=mongo_url, prefix='models/', defaults=self.defaults) self.datasets = OmegaStore(mongo_url=mongo_url, prefix='data/', defaults=self.defaults) self._jobdata = OmegaStore(mongo_url=mongo_url, prefix='jobs/', defaults=self.defaults) # runtimes environments self.runtime = OmegaRuntime(self, backend=backend, broker=broker, celeryconf=celeryconf, defaults=self.defaults) self.jobs = OmegaJobs(store=self._jobdata)
class Metadata: """ Metadata stores information about objects in OmegaStore """ # NOTE THIS IS ONLY HERE FOR DOCUMENTATION PURPOSE. # # If you use this class to save a document, it will raise a NameError # # The actual Metadata class is created in make_Metadata() below. # Rationale: If we let mongoengine create Metadata here the class # is bound to a specific MongoClient instance. Using make_Metadata # binds the class to the specific instance that exists at the time # of creation. Open to better ways. # fields #: this is the name of the data name = StringField(unique_with=['bucket', 'prefix']) #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=MDREGISTRY.KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) #: created datetime modified = DateTimeField(default=datetime.datetime.now)
def __init__(self, omega, defaults=None, celeryconf=None): self.omega = omega defaults = defaults or settings() self.pure_python = getattr(defaults, 'OMEGA_FORCE_PYTHON_CLIENT', False) self.pure_python = self.pure_python or self._client_is_pure_python() # initialize celery as a runtimes taskpkgs = defaults.OMEGA_CELERY_IMPORTS celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG self.celeryapp = Celery('omegaml') self.celeryapp.config_from_object(celeryconf) # needed to get it to actually load the tasks (???) # https://stackoverflow.com/a/35735471 self.celeryapp.autodiscover_tasks(taskpkgs, force=True) self.celeryapp.finalize() # temporary requirements, use .require() to set self._require_kwargs = {} # fixed default arguments, use .require(always=True) to set self._task_default_kwargs = {}
def __init__(self, omega, backend=None, broker=None, celerykwargs=None, celeryconf=None, defaults=None): self.backend = backend or 'amqp' self.broker = broker or 'amqp://guest@localhost//' self.omega = omega defaults = defaults or settings() # initialize celery as a runtimes taskpkgs = defaults.OMEGA_CELERY_IMPORTS celerykwargs = celerykwargs or defaults.OMEGA_CELERY_CONFIG celerykwargs.update({'backend': self.backend, 'broker': self.broker, 'include': taskpkgs, }) celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG self.celeryapp = Celery('omegaml', **celerykwargs) self.celeryapp.conf.update(celeryconf) # needed to get it to actually load the tasks (???) # https://stackoverflow.com/a/35735471 self.celeryapp.autodiscover_tasks(taskpkgs, force=True) self.celeryapp.finalize()
def setUp(self): self.basepath = os.path.join( os.path.dirname(sys.modules['omegaml'].__file__), 'example') self.defaults = settings() OMEGA_STORE_BACKENDS = self.defaults.OMEGA_STORE_BACKENDS OMEGA_STORE_MIXINS = self.defaults.OMEGA_STORE_MIXINS self.backend = backend = 'omegaml.backends.package.PythonPackageData' self.mixin = mixin = 'omegaml.mixins.store.package.PythonPackageMixin' if PythonPackageData.KIND not in OMEGA_STORE_BACKENDS: OMEGA_STORE_BACKENDS[PythonPackageData.KIND] = backend if mixin not in OMEGA_STORE_MIXINS: OMEGA_STORE_MIXINS.append(mixin) self.om = Omega() delete_database() self.pkgsdir = self.om.scripts.get_backend_bykind( PythonPackageData.KIND).packages_path self.distdir = os.path.join(self.basepath, 'demo', 'helloworld', 'dist') rmtree(self.distdir, ignore_errors=True) mkdirs(self.distdir) mkdirs(self.pkgsdir)
class Metadata(Metadata_base, Document): # override db_alias in gridfile gridfile = FileField(db_alias=db_alias, collection_name=collection or settings().OMEGA_MONGO_COLLECTION) # the actual db is defined at runtime meta = { 'db_alias': db_alias, 'strict': False, 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __new__(cls, *args, **kwargs): # undo the Metadata.__new__ protection newcls = super(Metadata, cls).__real_new__(cls) return newcls def __eq__(self, other): return self.objid == other.objid def __unicode__(self): fields = ('name', 'bucket', 'prefix', 'created', 'kind') kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in fields) return u"Metadata(%s)" % ','.join(kwargs) def save(self, *args, **kwargs): assert self.name is not None, "a dataset name is needed before saving" self.modified = datetime.datetime.now() return super(Metadata_base, self).save(*args, **kwargs)
def test_config_from_apikey(self): """ Test an Omega instance can be created from user specific configs """ import omegaml as om from omegaml.util import settings # check we get default without patching defaults = settings() setup = om.setup with patch.object(defaults, 'OMEGA_MONGO_URL') as mock: defaults.OMEGA_MONGO_URL = 'foo' om.setup() self.assertEqual(om.datasets.mongo_url, 'foo') # reset om.datasets to restored defaults setup() self.assertNotEqual(om.datasets.mongo_url, 'foo') # now test we can change the default through config # we patch the actual api call to avoid having to set up the user db # the objective here is to test get_omega_from_apikey with patch('omegaml.client.userconf.get_user_config_from_api') as mock: mock.return_value = { 'objects': [ { 'data': { 'OMEGA_MONGO_URL': 'updated-foo' } } ] } with patch.object(defaults, 'OMEGA_MONGO_URL') as mock: from omegaml.client.userconf import get_omega_from_apikey defaults.OMEGA_MONGO_URL = 'foo' om.setup() self.assertEqual(om.datasets.mongo_url, 'foo') om = get_omega_from_apikey('foo', 'bar') self.assertEqual(om.datasets.mongo_url, 'updated-foo') setup() self.assertNotEqual(om.datasets.mongo_url, 'foo')
def om(self): om = Omega(defaults=settings(reload=True)) return om
def test_config_from_apikey(self): """ Test an Omega instance can be created from user specific configs """ import omegaml as om from omegaml.util import settings # check we get default without patching from omegaml import _base_config as _real_base_config with patch('omegaml._base_config', new=BareObj) as defaults: # link callbacks used by get_omega_from_api_key _real_base_config.update_from_obj(_real_base_config, attrs=defaults) defaults.update_from_dict = _real_base_config.update_from_dict defaults.load_user_extensions = lambda *args, **kwargs: None defaults.load_framework_support = lambda *args, **kwargs: None setup = om.setup defaults.MY_OWN_SETTING = 'foo' settings(reload=True) om = om.setup() self.assertEqual(om.defaults.MY_OWN_SETTING, 'foo') # reset om.datasets to restored defaults om = setup() self.assertNotEqual(om.datasets.mongo_url, 'foo') # now test we can change the default through config # we patch the actual api call to avoid having to set up the user db # the objective here is to test get_omega_from_apikey with patch('omegaml.client.userconf.get_user_config_from_api') as mock: mock.return_value = { 'objects': [{ 'data': { 'OMEGA_MONGO_URL': 'updated-foo', 'OMEGA_MY_OWN_SETTING': 'updated-foo', 'OMEGA_CELERY_CONFIG': { 'TEST_SETTING': 'pickle', } } }] } with patch('omegaml._base_config', new=BareObj) as defaults: from omegaml.client.userconf import get_omega_from_apikey # link callbacks used by get_omega_from_api_key _real_base_config.update_from_obj(_real_base_config, attrs=defaults) defaults.update_from_dict = _real_base_config.update_from_dict defaults.load_user_extensions = lambda *args, **kwargs: None defaults.load_framework_support = lambda *args, **kwargs: None defaults.OMEGA_MY_OWN_SETTING = 'foo' om = get_omega_from_apikey('foo', 'bar') self.assertEqual(om.defaults.OMEGA_MY_OWN_SETTING, 'updated-foo') self.assertEqual(om.datasets.mongo_url, 'updated-foo') self.assertEqual( om.defaults.OMEGA_CELERY_CONFIG['TEST_SETTING'], 'pickle') # test that all default values are still there, i.e. the OMEGA_CELERY_CONFIG was updated, not replaced for real_k, real_v in _real_base_config.OMEGA_CELERY_CONFIG.items( ): self.assertIn(real_k, om.defaults.OMEGA_CELERY_CONFIG) # restore defaults defaults = settings(reload=True) om = setup() self.assertNotEqual(om.datasets.mongo_url, 'foo')
from __future__ import absolute_import ''' make sure Celery is correctly configured see http://chriskief.com/2013/11/15/celery-3-1-with-django-django-celery-rabbitmq-and-macports/ ''' import os from celery import Celery from omegaml.util import settings # get rid of celery's Django compatibility mode os.environ['DJANGO_SETTINGS_MODULE'] = '' defaults = settings() app = Celery('omegaml') app.config_from_object(defaults.OMEGA_CELERY_CONFIG) app.autodiscover_tasks(defaults.OMEGA_CELERY_IMPORTS, related_name='tasks')
class Metadata(Document): """ Metadata stores information about objects in OmegaStore """ # default kinds of data PANDAS_DFROWS = 'pandas.dfrows' # dataframe PANDAS_SEROWS = 'pandas.serows' # series PANDAS_HDF = 'pandas.hdf' PYTHON_DATA = 'python.data' PANDAS_DFGROUP = 'pandas.dfgroup' SKLEARN_JOBLIB = 'sklearn.joblib' OMEGAML_JOBS = 'script.ipynb' SPARK_MLLIB = 'spark.mllib' OMEGAML_RUNNING_JOBS = 'job.run' #: the list of accepted data types. extend using OmegaStore.register_backend KINDS = [ PANDAS_DFROWS, PANDAS_SEROWS, PANDAS_HDF, PYTHON_DATA, SKLEARN_JOBLIB, PANDAS_DFGROUP, OMEGAML_JOBS, OMEGAML_RUNNING_JOBS, SPARK_MLLIB] # fields #: this is the name of the data name = StringField() #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) # the actual db is defined in settings, OMEGA_MONGO_URL meta = { 'db_alias': 'omega', 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __unicode__(self): kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in ('bucket', 'prefix', 'created', 'kind')) return u"Metadata(%s)" % ','.join(kwargs)