Beispiel #1
0
    def __init__(self, omega, bucket=None, defaults=None, celeryconf=None):
        from omegaml.util import settings

        self.omega = omega
        defaults = defaults or settings()
        self.bucket = bucket
        self.pure_python = getattr(defaults, 'OMEGA_FORCE_PYTHON_CLIENT',
                                   False)
        self.pure_python = self.pure_python or self._client_is_pure_python()
        # initialize celery as a runtimes
        taskpkgs = defaults.OMEGA_CELERY_IMPORTS
        celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG
        # ensure we use current value
        celeryconf['CELERY_ALWAYS_EAGER'] = bool(defaults.OMEGA_LOCAL_RUNTIME)
        self.celeryapp = Celery('omegaml')
        self.celeryapp.config_from_object(celeryconf)
        # needed to get it to actually load the tasks
        # https://stackoverflow.com/a/35735471
        self.celeryapp.autodiscover_tasks(taskpkgs, force=True)
        self.celeryapp.finalize()
        # temporary requirements, use .require() to set
        self._require_kwargs = dict(task={}, routing={})
        # fixed default arguments, use .require(always=True) to set
        self._task_default_kwargs = dict(task={}, routing={})
        # default routing label
        self._default_label = self.celeryapp.conf.get('CELERY_DEFAULT_QUEUE')
Beispiel #2
0
 def test_user_extensions_config(self):
     # check we get default without patching
     from omegaml.util import settings
     from omegaml import _base_config as _real_base_config
     with patch('omegaml.client.userconf.get_user_config_from_api') as mock:
         mock.return_value = {
             'objects': [{
                 'data': {
                     "OMEGA_USER_EXTENSIONS": {
                         "OMEGA_STORE_BACKENDS": {
                             "test.backend":
                             'omegaml.backends.npndarray.NumpyNDArrayBackend'
                         }
                     }
                 }
             }]
         }
         with patch('omegaml._base_config', new=BareObj) as defaults:
             from omegaml.client.userconf import get_omega_from_apikey
             # link callbacks used by get_omega_from_api_key
             _real_base_config.update_from_obj(_real_base_config,
                                               attrs=defaults)
             defaults.update_from_dict = _real_base_config.update_from_dict
             defaults.load_user_extensions = _real_base_config.load_user_extensions
             defaults.load_framework_support = lambda *args, **kwargs: None
             defaults.OMEGA_MY_OWN_SETTING = 'foo'
             om = get_omega_from_apikey('foo', 'bar')
             self.assertIsNotNone(om.defaults.OMEGA_USER_EXTENSIONS)
             self.assertIn('test.backend', om.defaults.OMEGA_STORE_BACKENDS)
             # test that all default values are still there, i.e. config was updated, not replaced
             for real_k, real_v in _real_base_config.OMEGA_STORE_BACKENDS.items(
             ):
                 self.assertIn(real_k, om.defaults.OMEGA_STORE_BACKENDS)
             # restore defaults
         defaults = settings(reload=True)
Beispiel #3
0
 def __init__(self, modelname, runtime=None):
     self.modelname = modelname
     self.runtime = runtime
     self.pure_python = getattr(settings(), 'OMEGA_FORCE_PYTHON_CLIENT',
                                False)
     self.pure_python = self.pure_python or self._client_is_pure_python()
     self.apply_mixins()
Beispiel #4
0
    def __init__(self, defaults=None, mongo_url=None, celeryconf=None, bucket=None,
                 **kwargs):
        """
        Initialize the client API

        Without arguments create the client API according to the user's
        configuration in :code:`~/omegaml/config.yml`.

        Arguments override the user's configuration.

            :param defaults: the DefaultsContext
        :param mongo_url: the fully qualified URI to the mongo database,
        of format :code:`mongodb://user:password@host:port/database`
        :param celeryconf: the celery configuration dictionary
        """
        from omegaml.util import settings
        # avoid circular imports
        from omegaml.notebook.jobs import OmegaJobs
        from omegaml.runtimes import OmegaRuntime
        from omegaml.store import OmegaStore
        # celery and mongo configuration
        self.defaults = defaults or settings()
        self.mongo_url = mongo_url or self.defaults.OMEGA_MONGO_URL
        self.bucket = bucket
        # setup storage locations
        self.models = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='models/', defaults=self.defaults)
        self.datasets = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='data/', defaults=self.defaults)
        self._jobdata = OmegaStore(mongo_url=self.mongo_url, bucket=bucket, prefix='jobs/', defaults=self.defaults)
        self.scripts = OmegaStore(mongo_url=self.mongo_url, prefix='scripts/', defaults=self.defaults)
        # runtimes environments
        self.runtime = OmegaRuntime(self, bucket=bucket, defaults=self.defaults, celeryconf=celeryconf)
        self.jobs = OmegaJobs(store=self._jobdata)
Beispiel #5
0
 def apply_mixins(self):
     """
     apply mixins in defaults.OMEGA_RUNTIME_MIXINS
     """
     from omegaml import settings
     defaults = settings()
     for mixin in defaults.OMEGA_RUNTIME_MIXINS:
         extend_instance(self, mixin)
Beispiel #6
0
 def setUp(self):
     self.defaults = settings()
     OMEGA_STORE_BACKENDS = self.defaults.OMEGA_STORE_BACKENDS
     self.backend = backend = 'omegaml.backends.package.PythonPipSourcedPackageData'
     if PythonPipSourcedPackageData.KIND not in OMEGA_STORE_BACKENDS:
         OMEGA_STORE_BACKENDS[PythonPipSourcedPackageData.KIND] = backend
     self.om = Omega()
     delete_database()
     self.pkgsdir = self.om.scripts.get_backend_bykind(
         PythonPipSourcedPackageData.KIND).packages_path
     mkdirs(self.pkgsdir)
Beispiel #7
0
    class Metadata(Document):
        """
        Metadata stores information about objects in OmegaStore
        """

        # fields
        #: this is the name of the data
        name = StringField(unique_with=['bucket', 'prefix'])
        #: bucket
        bucket = StringField()
        #: prefix
        prefix = StringField()
        #: kind of data
        kind = StringField(choices=MDREGISTRY.KINDS)
        #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
        gridfile = FileField(
            db_alias='omega',
            collection_name=settings().OMEGA_MONGO_COLLECTION)
        #: for PANDAS_DFROWS this is the collection
        collection = StringField()
        #: for PYTHON_DATA this is the actual document
        objid = ObjectIdField()
        #: omegaml technical attributes, e.g. column indicies
        kind_meta = DictField()
        #: customer-defined other meta attributes
        attributes = DictField()
        #: s3file attributes
        s3file = DictField()
        #: location URI
        uri = StringField()
        #: created datetime
        created = DateTimeField(default=datetime.datetime.now)
        # the actual db is defined in settings, OMEGA_MONGO_URL
        meta = {
            'db_alias': 'omega',
            'indexes': [
                # unique entry
                {
                    'fields': ['bucket', 'prefix', 'name'],
                },
                'created',  # most recent is last, i.e. [-1]
            ]
        }

        def __eq__(self, other):
            return self.objid == other.objid

        def __unicode__(self):
            fields = ('name', 'bucket', 'prefix', 'created', 'kind')
            kwargs = ('%s=%s' % (k, getattr(self, k))
                      for k in self._fields.keys() if k in fields)
            return u"Metadata(%s)" % ','.join(kwargs)
Beispiel #8
0
def make_Metadata(db_alias='omega', collection=None):
    # this is to create context specific Metadata class that takes the
    # database from the given alias at the time of use
    from omegaml.documents import Metadata as Metadata_base
    collection = collection or settings().OMEGA_MONGO_COLLECTION

    class Metadata(Metadata_base, Document):
        # override db_alias in gridfile
        gridfile = FileField(db_alias=db_alias, collection_name=collection)
        # the actual db is defined at runtime
        meta = {
            'db_alias':
            db_alias,
            'strict':
            False,
            'indexes': [
                # unique entry
                {
                    'fields': ['bucket', 'prefix', 'name'],
                },
                'created',  # most recent is last, i.e. [-1]
            ]
        }

        def __new__(cls, *args, **kwargs):
            # undo the Metadata.__new__ protection
            newcls = super(Metadata, cls).__real_new__(cls)
            return newcls

        def __eq__(self, other):
            return self.objid == other.objid

        def __unicode__(self):
            fields = ('name', 'bucket', 'prefix', 'created', 'kind')
            kwargs = ('%s=%s' % (k, getattr(self, k))
                      for k in self._fields.keys() if k in fields)
            return u"Metadata(%s)" % ','.join(kwargs)

        def save(self, *args, **kwargs):
            assert self.name is not None, "a dataset name is needed before saving"
            self.modified = datetime.datetime.now()
            return super(Metadata_base, self).save(*args, **kwargs)

        def to_json(self, **kwargs):
            kwargs['json_options'] = kwargs.get('json_options',
                                                LEGACY_JSON_OPTIONS)
            return super().to_json(**kwargs)

        def to_dict(self):
            return self.to_mongo().to_dict()

    return Metadata
Beispiel #9
0
    def get_omega_for_task(cls, auth=None):
        """
        Get Omega instance configured for user in auth

        If auth is passed, a request is made to OMEGA_RESTAPI_URL to
        retrieve the configuration object for this user.

        If auth is the tuple (None, None, 'default') the omegaml module
        is returned, which is configured to the default instance with
        authentication according to the installation. To raise an
        error instead set settings OMEGA_ALLOW_TASK_DEFAULT_AUTH=False

        :param auth: the OmegaRuntimeAuthentication object
        :return: the Omega instance configured for the user
        """
        from omegaml.util import settings

        default_auth = (None, None, 'default')
        is_auth_provided = lambda auth: (auth is not None and auth !=
                                         default_auth)
        defaults = settings()

        if is_auth_provided(auth):
            if isinstance(auth, (list, tuple)):
                # we get a serialized tuple, recreate auth object
                # -- this is a hack to easily support python 2/3 client/server mix
                userid, apikey, qualifier = auth
                # by default assume worker is in cluster
                # TODO refactor this setting to eedefaults
                view = defaults.OMEGA_WORKER_INCLUSTER
                om = cls.get_omega_from_apikey(userid,
                                               apikey,
                                               qualifier=qualifier,
                                               view=view)
            else:
                raise ValueError(
                    'cannot parse authentication as {}'.format(auth))
        elif auth == default_auth:
            # we provide the default implementation as per configuration
            from omegaml import _omega
            om = _omega._om
            if not defaults.OMEGA_ALLOW_TASK_DEFAULT_AUTH:
                raise ValueError(
                    'Default task authentication is not allowed, got {}'.
                    format(auth))
        else:
            raise ValueError(
                'missing authentication tuple as (userid, apikey, qualifier), got {}'
                .format(auth))
        return om
Beispiel #10
0
    def __init__(self,
                 mongo_url=None,
                 backend=None,
                 broker=None,
                 celeryconf=None,
                 defaults=None):
        """
        Initialize the client API

        Without arguments create the client API according to the user's
        configuration in :code:`~/omegaml/config.yml`.

        Arguments override the user's configuration.

        :param mongo_url: the fully qualified URI to the mongo database,
        of format :code:`mongodb://user:password@host:port/database`
        :param broker: the celery broker URI
        :param backend: the celery result backend URI
        :param celeryconf: the celery configuration dictionary
        :param celerykwargs: kwargs to create the Celery instance
        """
        from omegaml.util import settings
        # avoid circular imports
        from omegaml.notebook.jobs import OmegaJobs
        from omegaml.runtimes import OmegaRuntime
        from omegaml.store import OmegaStore
        # celery and mongo configuration
        self.defaults = defaults or settings()
        self.mongo_url = mongo_url or self.defaults.OMEGA_MONGO_URL
        self.broker = broker or self.defaults.OMEGA_BROKER
        self.backend = backend or self.defaults.OMEGA_RESULT_BACKEND
        self.celeryconf = celeryconf
        # setup storage locations
        self.models = OmegaStore(mongo_url=mongo_url,
                                 prefix='models/',
                                 defaults=self.defaults)
        self.datasets = OmegaStore(mongo_url=mongo_url,
                                   prefix='data/',
                                   defaults=self.defaults)
        self._jobdata = OmegaStore(mongo_url=mongo_url,
                                   prefix='jobs/',
                                   defaults=self.defaults)
        # runtimes environments
        self.runtime = OmegaRuntime(self,
                                    backend=backend,
                                    broker=broker,
                                    celeryconf=celeryconf,
                                    defaults=self.defaults)
        self.jobs = OmegaJobs(store=self._jobdata)
Beispiel #11
0
class Metadata:
    """
    Metadata stores information about objects in OmegaStore
    """

    # NOTE THIS IS ONLY HERE FOR DOCUMENTATION PURPOSE.
    #
    # If you use this class to save a document, it will raise a NameError
    #
    # The actual Metadata class is created in make_Metadata() below.
    # Rationale: If we let mongoengine create Metadata here the class
    # is bound to a specific MongoClient instance. Using make_Metadata
    # binds the class to the specific instance that exists at the time
    # of creation. Open to better ways.

    # fields
    #: this is the name of the data
    name = StringField(unique_with=['bucket', 'prefix'])
    #: bucket
    bucket = StringField()
    #: prefix
    prefix = StringField()
    #: kind of data
    kind = StringField(choices=MDREGISTRY.KINDS)
    #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
    gridfile = FileField(
        db_alias='omega',
        collection_name=settings().OMEGA_MONGO_COLLECTION)
    #: for PANDAS_DFROWS this is the collection
    collection = StringField()
    #: for PYTHON_DATA this is the actual document
    objid = ObjectIdField()
    #: omegaml technical attributes, e.g. column indicies
    kind_meta = DictField()
    #: customer-defined other meta attributes
    attributes = DictField()
    #: s3file attributes
    s3file = DictField()
    #: location URI
    uri = StringField()
    #: created datetime
    created = DateTimeField(default=datetime.datetime.now)
    #: created datetime
    modified = DateTimeField(default=datetime.datetime.now)
Beispiel #12
0
 def __init__(self, omega, defaults=None, celeryconf=None):
     self.omega = omega
     defaults = defaults or settings()
     self.pure_python = getattr(defaults, 'OMEGA_FORCE_PYTHON_CLIENT',
                                False)
     self.pure_python = self.pure_python or self._client_is_pure_python()
     # initialize celery as a runtimes
     taskpkgs = defaults.OMEGA_CELERY_IMPORTS
     celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG
     self.celeryapp = Celery('omegaml')
     self.celeryapp.config_from_object(celeryconf)
     # needed to get it to actually load the tasks (???)
     # https://stackoverflow.com/a/35735471
     self.celeryapp.autodiscover_tasks(taskpkgs, force=True)
     self.celeryapp.finalize()
     # temporary requirements, use .require() to set
     self._require_kwargs = {}
     # fixed default arguments, use .require(always=True) to set
     self._task_default_kwargs = {}
Beispiel #13
0
 def __init__(self, omega, backend=None,
              broker=None, celerykwargs=None, celeryconf=None, defaults=None):
     self.backend = backend or 'amqp'
     self.broker = broker or 'amqp://guest@localhost//'
     self.omega = omega
     defaults = defaults or settings()
     # initialize celery as a runtimes
     taskpkgs = defaults.OMEGA_CELERY_IMPORTS
     celerykwargs = celerykwargs or defaults.OMEGA_CELERY_CONFIG
     celerykwargs.update({'backend': self.backend,
                          'broker': self.broker,
                          'include': taskpkgs,
                          })
     celeryconf = celeryconf or defaults.OMEGA_CELERY_CONFIG
     self.celeryapp = Celery('omegaml', **celerykwargs)
     self.celeryapp.conf.update(celeryconf)
     # needed to get it to actually load the tasks (???)
     # https://stackoverflow.com/a/35735471
     self.celeryapp.autodiscover_tasks(taskpkgs, force=True)
     self.celeryapp.finalize()
Beispiel #14
0
 def setUp(self):
     self.basepath = os.path.join(
         os.path.dirname(sys.modules['omegaml'].__file__), 'example')
     self.defaults = settings()
     OMEGA_STORE_BACKENDS = self.defaults.OMEGA_STORE_BACKENDS
     OMEGA_STORE_MIXINS = self.defaults.OMEGA_STORE_MIXINS
     self.backend = backend = 'omegaml.backends.package.PythonPackageData'
     self.mixin = mixin = 'omegaml.mixins.store.package.PythonPackageMixin'
     if PythonPackageData.KIND not in OMEGA_STORE_BACKENDS:
         OMEGA_STORE_BACKENDS[PythonPackageData.KIND] = backend
     if mixin not in OMEGA_STORE_MIXINS:
         OMEGA_STORE_MIXINS.append(mixin)
     self.om = Omega()
     delete_database()
     self.pkgsdir = self.om.scripts.get_backend_bykind(
         PythonPackageData.KIND).packages_path
     self.distdir = os.path.join(self.basepath, 'demo', 'helloworld',
                                 'dist')
     rmtree(self.distdir, ignore_errors=True)
     mkdirs(self.distdir)
     mkdirs(self.pkgsdir)
Beispiel #15
0
    class Metadata(Metadata_base, Document):
        # override db_alias in gridfile
        gridfile = FileField(db_alias=db_alias,
                             collection_name=collection
                             or settings().OMEGA_MONGO_COLLECTION)
        # the actual db is defined at runtime
        meta = {
            'db_alias':
            db_alias,
            'strict':
            False,
            'indexes': [
                # unique entry
                {
                    'fields': ['bucket', 'prefix', 'name'],
                },
                'created',  # most recent is last, i.e. [-1]
            ]
        }

        def __new__(cls, *args, **kwargs):
            # undo the Metadata.__new__ protection
            newcls = super(Metadata, cls).__real_new__(cls)
            return newcls

        def __eq__(self, other):
            return self.objid == other.objid

        def __unicode__(self):
            fields = ('name', 'bucket', 'prefix', 'created', 'kind')
            kwargs = ('%s=%s' % (k, getattr(self, k))
                      for k in self._fields.keys() if k in fields)
            return u"Metadata(%s)" % ','.join(kwargs)

        def save(self, *args, **kwargs):
            assert self.name is not None, "a dataset name is needed before saving"
            self.modified = datetime.datetime.now()
            return super(Metadata_base, self).save(*args, **kwargs)
Beispiel #16
0
 def test_config_from_apikey(self):
     """
     Test an Omega instance can be created from user specific configs
     """
     import omegaml as om
     from omegaml.util import settings
     # check we get default without patching
     defaults = settings()
     setup = om.setup
     with patch.object(defaults, 'OMEGA_MONGO_URL') as mock:
         defaults.OMEGA_MONGO_URL = 'foo'
         om.setup()
         self.assertEqual(om.datasets.mongo_url, 'foo')
     # reset om.datasets to restored defaults
     setup()
     self.assertNotEqual(om.datasets.mongo_url, 'foo')
     # now test we can change the default through config
     # we patch the actual api call to avoid having to set up the user db
     # the objective here is to test get_omega_from_apikey
     with patch('omegaml.client.userconf.get_user_config_from_api') as mock:
         mock.return_value = {
             'objects': [
                {
                     'data': {
                         'OMEGA_MONGO_URL': 'updated-foo'
                     }
                 }
             ]
         }
         with patch.object(defaults, 'OMEGA_MONGO_URL') as mock:
             from omegaml.client.userconf import get_omega_from_apikey
             defaults.OMEGA_MONGO_URL = 'foo'
             om.setup()
             self.assertEqual(om.datasets.mongo_url, 'foo')
             om = get_omega_from_apikey('foo', 'bar')
             self.assertEqual(om.datasets.mongo_url, 'updated-foo')
     setup()
     self.assertNotEqual(om.datasets.mongo_url, 'foo')
Beispiel #17
0
 def om(self):
     om = Omega(defaults=settings(reload=True))
     return om
Beispiel #18
0
 def test_config_from_apikey(self):
     """
     Test an Omega instance can be created from user specific configs
     """
     import omegaml as om
     from omegaml.util import settings
     # check we get default without patching
     from omegaml import _base_config as _real_base_config
     with patch('omegaml._base_config', new=BareObj) as defaults:
         # link callbacks used by get_omega_from_api_key
         _real_base_config.update_from_obj(_real_base_config,
                                           attrs=defaults)
         defaults.update_from_dict = _real_base_config.update_from_dict
         defaults.load_user_extensions = lambda *args, **kwargs: None
         defaults.load_framework_support = lambda *args, **kwargs: None
         setup = om.setup
         defaults.MY_OWN_SETTING = 'foo'
         settings(reload=True)
         om = om.setup()
         self.assertEqual(om.defaults.MY_OWN_SETTING, 'foo')
         # reset om.datasets to restored defaults
         om = setup()
         self.assertNotEqual(om.datasets.mongo_url, 'foo')
     # now test we can change the default through config
     # we patch the actual api call to avoid having to set up the user db
     # the objective here is to test get_omega_from_apikey
     with patch('omegaml.client.userconf.get_user_config_from_api') as mock:
         mock.return_value = {
             'objects': [{
                 'data': {
                     'OMEGA_MONGO_URL': 'updated-foo',
                     'OMEGA_MY_OWN_SETTING': 'updated-foo',
                     'OMEGA_CELERY_CONFIG': {
                         'TEST_SETTING': 'pickle',
                     }
                 }
             }]
         }
         with patch('omegaml._base_config', new=BareObj) as defaults:
             from omegaml.client.userconf import get_omega_from_apikey
             # link callbacks used by get_omega_from_api_key
             _real_base_config.update_from_obj(_real_base_config,
                                               attrs=defaults)
             defaults.update_from_dict = _real_base_config.update_from_dict
             defaults.load_user_extensions = lambda *args, **kwargs: None
             defaults.load_framework_support = lambda *args, **kwargs: None
             defaults.OMEGA_MY_OWN_SETTING = 'foo'
             om = get_omega_from_apikey('foo', 'bar')
             self.assertEqual(om.defaults.OMEGA_MY_OWN_SETTING,
                              'updated-foo')
             self.assertEqual(om.datasets.mongo_url, 'updated-foo')
             self.assertEqual(
                 om.defaults.OMEGA_CELERY_CONFIG['TEST_SETTING'], 'pickle')
             # test that all default values are still there, i.e. the OMEGA_CELERY_CONFIG was updated, not replaced
             for real_k, real_v in _real_base_config.OMEGA_CELERY_CONFIG.items(
             ):
                 self.assertIn(real_k, om.defaults.OMEGA_CELERY_CONFIG)
     # restore defaults
     defaults = settings(reload=True)
     om = setup()
     self.assertNotEqual(om.datasets.mongo_url, 'foo')
Beispiel #19
0
from __future__ import absolute_import
'''
make sure Celery is correctly configured
see http://chriskief.com/2013/11/15/celery-3-1-with-django-django-celery-rabbitmq-and-macports/
'''

import os

from celery import Celery
from omegaml.util import settings

# get rid of celery's Django compatibility mode
os.environ['DJANGO_SETTINGS_MODULE'] = ''

defaults = settings()
app = Celery('omegaml')
app.config_from_object(defaults.OMEGA_CELERY_CONFIG)
app.autodiscover_tasks(defaults.OMEGA_CELERY_IMPORTS, related_name='tasks')
Beispiel #20
0
class Metadata(Document):
    """
    Metadata stores information about objects in OmegaStore
    """
    # default kinds of data
    PANDAS_DFROWS = 'pandas.dfrows'  # dataframe
    PANDAS_SEROWS = 'pandas.serows'  # series
    PANDAS_HDF = 'pandas.hdf'
    PYTHON_DATA = 'python.data'
    PANDAS_DFGROUP = 'pandas.dfgroup'
    SKLEARN_JOBLIB = 'sklearn.joblib'
    OMEGAML_JOBS = 'script.ipynb'
    SPARK_MLLIB = 'spark.mllib'
    OMEGAML_RUNNING_JOBS = 'job.run'
    #: the list of accepted data types. extend using OmegaStore.register_backend
    KINDS = [
        PANDAS_DFROWS, PANDAS_SEROWS, PANDAS_HDF, PYTHON_DATA, SKLEARN_JOBLIB,
        PANDAS_DFGROUP, OMEGAML_JOBS, OMEGAML_RUNNING_JOBS, SPARK_MLLIB]
    # fields
    #: this is the name of the data
    name = StringField()
    #: bucket
    bucket = StringField()
    #: prefix
    prefix = StringField()
    #: kind of data
    kind = StringField(choices=KINDS)
    #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
    gridfile = FileField(
        db_alias='omega',
        collection_name=settings().OMEGA_MONGO_COLLECTION)
    #: for PANDAS_DFROWS this is the collection
    collection = StringField()
    #: for PYTHON_DATA this is the actual document
    objid = ObjectIdField()
    #: omegaml technical attributes, e.g. column indicies
    kind_meta = DictField()
    #: customer-defined other meta attributes
    attributes = DictField()
    #: s3file attributes
    s3file = DictField()
    #: location URI
    uri = StringField()
    #: created datetime
    created = DateTimeField(default=datetime.datetime.now)
    # the actual db is defined in settings, OMEGA_MONGO_URL
    meta = {
        'db_alias': 'omega',
        'indexes': [
            # unique entry
            {
                'fields': ['bucket', 'prefix', 'name'],
            },
            'created',  # most recent is last, i.e. [-1]
        ]
    }

    def __unicode__(self):
        kwargs = ('%s=%s' % (k, getattr(self, k))
                  for k in self._fields.keys() if k in ('bucket', 'prefix', 'created', 'kind'))
        return u"Metadata(%s)" % ','.join(kwargs)