Esempio n. 1
0
    def test_task_mapreduce_script(self):
        om = Omega()
        df = pd.DataFrame({'x': range(1, 10), 'y': range(5, 14)})
        lr = LinearRegression()
        om.datasets.put(df, 'sample')
        om.models.put(lr, 'regmodel')
        om.runtime.model('regmodel').fit('sample[x]', 'sample[y]').get()

        om = Omega()
        basepath = os.path.join(
            os.path.dirname(sys.modules['omegaml'].__file__), 'example')
        pkgpath = os.path.abspath(os.path.join(basepath, 'demo', 'callback'))
        pkgsrc = 'pkg://{}'.format(pkgpath)
        om.scripts.put(pkgsrc, 'callback')
        with om.runtime.mapreduce() as ctr:
            # two tasks to map
            ctr.model('regmodel').predict('sample[x]')
            ctr.model('regmodel').predict('sample[x]')
            # one task to reduce
            ctr.script('callback').run(as_callback=True)
            result = ctr.run()

        result.get()
        self.assertEqual(len(om.datasets.get('callback_results')), 18)

        with om.runtime.mapreduce() as ctr:
            # two tasks to map
            ctr.model('regmodel').predict('sample[x]')
            # one task to reduce
            ctr.script('callback').run(as_callback=True)
            result = ctr.run()

        result.get()
        self.assertEqual(len(om.datasets.get('callback_results')), 27)
Esempio n. 2
0
 def setUp(self):
     om = self.om = Omega()
     self.clean()
     om.models.register_backend(MLFlowModelBackend.KIND,
                                MLFlowModelBackend)
     om.models.register_backend(MLFlowRegistryBackend.KIND,
                                MLFlowRegistryBackend)
Esempio n. 3
0
 def test_promotion_to_other_db_works(self):
     om = self.om
     other = Omega(mongo_url=om.mongo_url + '_promotest')
     [other.models.drop(name, force=True) for name in other.models.list(include_temp=True)]
     [other.datasets.drop(name, force=True) for name in other.datasets.list(include_temp=True)]
     reg = LinearRegression()
     reg.coef_ = 10
     # try models
     om.models.put(reg, 'mymodel')
     self.assertIn('mymodel', om.models.list())
     self.assertNotIn('mymodel', other.models.list())
     om.models.promote('mymodel', other.models)
     self.assertIn('mymodel', other.models.list())
     # ensure changes only in original
     reg.coef_ = 15
     om.models.put(reg, 'mymodel')
     self.assertNotEqual(om.models.get('mymodel').coef_, other.models.get('mymodel').coef_)
     # try datasets
     om.datasets.put(['foo'], 'foo')
     # -- ensure only in original
     self.assertIn('foo', om.datasets.list())
     self.assertNotIn('foo', other.datasets.list())
     # -- promote to other
     om.datasets.promote('foo', other.datasets)
     self.assertIn('foo', other.datasets.list())
     self.assertEqual(om.datasets.get('foo'), other.datasets.get('foo'))
     # change original ensure copy not changed
     om.datasets.put(['foo'], 'foo', append=True)
     self.assertNotEqual(om.datasets.get('foo'), other.datasets.get('foo'))
Esempio n. 4
0
 def test_ping(self):
     om = Omega()
     result = om.runtime.ping('foo', fox='bar')
     self.assertIn('message', result)
     self.assertIn('worker', result)
     self.assertEqual(result['args'], ('foo',))
     self.assertEqual(result['kwargs'], dict(fox='bar'))
Esempio n. 5
0
 def setUp(self):
     import os
     os.environ['TF_EAGER'] = '1'
     tf_perhaps_eager_execution()
     self.om = Omega()
     self.om.models.register_backend(TFDatasetBackend.KIND, TFDatasetBackend)
     self.clean()
Esempio n. 6
0
 def setUp(self):
     TestCase.setUp(self)
     df = self.df = pd.DataFrame({'x': list(range(0, 10)) + list(range(0, 10)),
                                  'y': list(range(0, 10)) + list(range(0, 10))})
     om = self.om = Omega()
     om.datasets.put(df, 'sample', append=False)
     self.coll = om.datasets.collection('sample')
Esempio n. 7
0
 def test_partial_fit_chunked(self):
     # create some data
     x = np.array(list(range(0, 100)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     # generate a large dataset
     for i in range(100):
         om.datasets.put(df, 'data', append=(i > 0))
     # create a model locally, store (unfitted) in Omega
     # -- ignore warnings on y shape
     import warnings
     warnings.filterwarnings("ignore", category=DataConversionWarning)
     lr = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
     om.models.put(lr, 'mymodel2')
     # have Omega fit the model to get a start, then predict
     result = om.runtime.model('mymodel2').fit(df[['x']], df[['y']])
     result.get()
     # check the new model version metadata includes the datax/y references
     result = om.runtime.model('mymodel2').predict('data[x]')
     pred1 = result.get()
     mse = mean_squared_error(om.datasets.get('data[y]'), pred1)
     self.assertGreater(mse, 40)
     # fit mini batches add better training data, update model
     result = om.runtime.model('mymodel2').partial_fit('data[x]#', 'data[y]#')
     result = om.runtime.model('mymodel2').predict('data[x]')
     pred1 = result.get()
     mse_2 = mean_squared_error(om.datasets.get('data[y]'), pred1)
     self.assertLess(mse_2, mse)
Esempio n. 8
0
 def test_predict_hdf_dataframe(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df['x']
     Y = df['y']
     # put into Omega -- assume a client with pandas, scikit learn
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.pure_python = True
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax', as_hdf=True)
     om.datasets.put(Y, 'datay', as_hdf=True)
     # have Omega fit the model then predict
     lr = LinearRegression()
     lr.fit(reshaped(X), reshaped(Y))
     pred = lr.predict(reshaped(X))
     om.models.put(lr, 'mymodel2')
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').predict('datax')
     pred2 = result.get()
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Esempio n. 9
0
 def test_predict(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x, 'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     pred = lr.predict(X)
     om.models.put(lr, 'mymodel')
     self.assertIn('mymodel', om.models.list('*'))
     # have Omega predict it
     # -- using data already in Omega
     result = om.runtime.model('mymodel').predict('datax')
     pred1 = result.get()
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel').predict('foo')
     result = om.runtime.model('mymodel').predict(X)
     pred2 = result.get()
     self.assertTrue((pred == pred1).all(),
                     "runtimes prediction is different(1)")
     self.assertTrue((pred == pred2).all(),
                     "runtimes prediction is different(2)")
Esempio n. 10
0
    def setUp(self):
        from omegaml.backends.tensorflow.tfsavedmodel import TensorflowSavedModelBackend

        self.om = Omega()
        self.om.models.register_backend(TensorflowSavedModelBackend.KIND,
                                        TensorflowSavedModelBackend)
        self.clean()
Esempio n. 11
0
 def test_fit_pipeline(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a pipeline locally, store (unfitted) in Omega
     p = Pipeline([
         ('lr', LinearRegression()),
     ])
     om.models.put(p, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     p.fit(reshaped(X), reshaped(Y))
     pred = p.predict(reshaped(X))
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
Esempio n. 12
0
 def test_default_bucket_fails(self):
     # put a model in the default bucket
     om = Omega()
     X = np.arange(10).reshape(-1, 1)
     y = X * 2
     # train model locally
     clf = LinearRegression()
     clf.fit(X, y)
     result = clf.predict(X)
     # store model in om
     om.models.put(clf, 'regression')
     resp = self.client.put('/api/v1/model/regression/predict',
                            json={
                                'columns': ['v'],
                                'data': [dict(v=[5])],
                            },
                            auth=self.auth,
                            headers=self._headers)
     # we expect an error because the model does not exist in the default bucket
     self.assertEqual(resp.status_code, 500)
     # see if we can get it to predict with the correct bucket (all other tests do this)
     # -- note we simply remove the the 'bucket' header which reverts to the default
     resp = self.client.put('/api/v1/model/regression/predict',
                            json={
                                'columns': ['v'],
                                'data': dict(v=[5])
                            },
                            auth=self.auth)
     self.assertEqual(resp.status_code, 200)
Esempio n. 13
0
 def test_promotion_to_other_db_works(self):
     om = self.om
     other = Omega(mongo_url=om.mongo_url + '_test')
     reg = LinearRegression()
     om.models.put(reg, 'mymodel')
     om.models.promote('mymodel', other.models)
     om.datasets.put(['foo'], 'foo')
     om.datasets.promote('foo', other.datasets)
Esempio n. 14
0
 def test_predict_multiple_samples(self):
     om = Omega()
     reg = LinearRegression()
     df = pd.DataFrame({'x': range(10)})
     df['y'] = df['x'] * 2 + 3
     reg.fit(df[['x']], df['y'])
     om.models.put(reg, 'regmodel')
     result = om.runtime.model('regmodel').predict([[5], [6]]).get()
Esempio n. 15
0
    def setUp(self):
        from omegaml.backends.tensorflow.tfestimatormodel import TFEstimatorModelBackend

        self.om = Omega()
        self.om.models.register_backend(TFEstimatorModelBackend.KIND,
                                        TFEstimatorModelBackend)
        self.clean()
        tf_perhaps_eager_execution()
Esempio n. 16
0
 def setUp(self):
     self.om = Omega()
     self.om.models.register_backend(TensorflowKerasBackend.KIND,
                                     TensorflowKerasBackend)
     self.om.models.register_backend(TensorflowKerasSavedModelBackend.KIND,
                                     TensorflowKerasSavedModelBackend)
     self.clean()
     tf_perhaps_eager_execution()
Esempio n. 17
0
 def setUp(self):
     self.om = om = Omega()
     self.om_restore = self.om['restore']
     for omx in (self.om, self.om_restore):
         self._apply_store_mixin(omx)
     self.clean()
     self.clean(bucket='restore')
     with OmegaExportArchive('/tmp/test', None) as arc:
         arc.clear()
Esempio n. 18
0
 def setUp(self):
     self.defaults = settings()
     OMEGA_STORE_BACKENDS = self.defaults.OMEGA_STORE_BACKENDS
     self.backend = backend = 'omegaml.backends.package.PythonPipSourcedPackageData'
     if PythonPipSourcedPackageData.KIND not in OMEGA_STORE_BACKENDS:
         OMEGA_STORE_BACKENDS[PythonPipSourcedPackageData.KIND] = backend
     self.om = Omega()
     delete_database()
     self.pkgsdir = self.om.scripts.get_backend_bykind(
         PythonPipSourcedPackageData.KIND).packages_path
     mkdirs(self.pkgsdir)
Esempio n. 19
0
 def test_mapreduce_getall(self):
     om = Omega()
     code = """print('hello')"""
     om.jobs.create(code, 'myjob')
     # --mapreduce
     with om.runtime.mapreduce() as crt:
         for i in range(5):
             om.runtime.job(f'myjob').run()
         result = crt.run()
     results = result.getall()
     self.assertEqual(len(results), 5)
Esempio n. 20
0
    def setUp(self):
        from omegaml.backends.tensorflow.tfkeras import TensorflowKerasBackend
        from omegaml.backends.tensorflow.tfkerassavedmodel import TensorflowKerasSavedModelBackend

        self.om = Omega()
        self.om.models.register_backend(TensorflowKerasBackend.KIND,
                                        TensorflowKerasBackend)
        self.om.models.register_backend(TensorflowKerasSavedModelBackend.KIND,
                                        TensorflowKerasSavedModelBackend)
        self.clean()
        tf_perhaps_eager_execution()
Esempio n. 21
0
 def test_defaults_repr(self):
     # ensure accessing defaults in shell does not cause RecursionError
     om = Omega()
     not_raised = False
     try:
         context_repr = repr(om.defaults)
     except RecursionError as e:
         pass
     else:
         not_raised = True
     self.assertTrue(not_raised)
Esempio n. 22
0
 def test_fit(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, store (unfitted) in Omega
     lr = LinearRegression()
     om.models.put(lr, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     lr.fit(X, Y)
     pred = lr.predict(X)
     # try predicting without fitting
     with self.assertRaises(NotFittedError):
         result = om.runtime.model('mymodel2').predict('datax')
         result.get()
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     # check the new model version metadata includes the datax/y references
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     # -- using data already in Omega
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').fit(X, Y)
     result = om.runtime.model('mymodel2').predict(X)
     pred2 = result.get()
     # -- check the local data provided to fit was stored as intended
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     self.assertIn('_fitX', meta.attributes.get('metaX').get('collection'))
     self.assertIn('_fitY', meta.attributes.get('metaY').get('collection'))
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Esempio n. 23
0
            def consumer(q, url):
                # note the stream decorator blocks the consumer and runs the decorated
                # function asynchronously upon the window criteria is satisfied
                om = Omega(mongo_url=url)

                @streaming('test',
                           size=2,
                           url=url,
                           keep=True,
                           queue=q,
                           sink=DatasetSink(om, 'consumer'))
                def myprocess(window):
                    return {'myprocess': True, 'data': window.data}
Esempio n. 24
0
    def test_task_sequence(self):
        om = Omega()
        df = pd.DataFrame({'x': range(1, 10), 'y': range(5, 14)})
        lr = LinearRegression()
        om.datasets.put(df, 'sample')
        om.models.put(lr, 'regmodel')
        with om.runtime.sequence() as ctr:
            ctr.ping(wait=False)
            ctr.model('regmodel').fit('sample[x]', 'sample[y]')
            ctr.model('regmodel').predict('sample[x]')
            result = ctr.run()

        data = result.get()
        assert_array_almost_equal(df['y'].values, data[:, 0])
Esempio n. 25
0
 def test_partial_fit(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']][0:2]
     Y = df[['y']][0:2]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(df[['x']], 'datax-full')
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, store (unfitted) in Omega
     # -- ignore warnings on y shape
     import warnings
     warnings.filterwarnings("ignore", category=DataConversionWarning)
     lr = SGDRegressor()
     om.models.put(lr, 'mymodel2')
     # have Omega fit the model to get a start, then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     # check the new model version metadata includes the datax/y references
     result = om.runtime.model('mymodel2').predict('datax-full')
     pred1 = result.get()
     mse = mean_squared_error(df.y, pred1)
     self.assertGreater(mse, 90)
     # fit mini batches add better training data, update model
     batch_size = 2
     for i, start in enumerate(range(0, len(df))):
         previous_mse = mse
         X = df[['x']][start:start + batch_size]
         Y = df[['y']][start:start + batch_size]
         om.datasets.put(X, 'datax-update', append=False)
         om.datasets.put(Y, 'datay-update', append=False)
         result = om.runtime.model('mymodel2').partial_fit(
             'datax-update', 'datay-update')
         result.get()
         # check the new model version metadata includes the datax/y
         # references
         result = om.runtime.model('mymodel2').predict('datax-full')
         pred1 = result.get()
         mse = mean_squared_error(df.y, pred1)
         self.assertLess(mse, previous_mse)
     # mse == 0 is most accurate the best
     self.assertLess(mse, 1.0)
Esempio n. 26
0
def _get_omega_from_config(configfile, qualifier=None):
    from omegaml import Omega
    from omegaml import settings, _base_config
    defaults = settings()
    with open(configfile, 'r') as fconfig:
        configs = yaml.safe_load(fconfig)
    qualifier = qualifier or 'default'
    if qualifier == 'default':
        config = configs.get(qualifier, configs)
    else:
        config = configs[qualifier]
    _base_config.update_from_dict(config, attrs=defaults)
    settings(reload=True)
    om = Omega(defaults=defaults)
    return om
Esempio n. 27
0
def get_omega_from_apikey(userid,
                          apikey,
                          api_url=None,
                          requested_userid=None,
                          qualifier=None,
                          view=False):
    """
    setup an Omega instance from userid and apikey

    :param userid: the userid
    :param apikey: the apikey
    :param api_url: the api URL
    :param requested_userid: the userid to request config for. in this case userid
      and apikey must for a staff user for the request to succeed
    :param qualifier: the database qualifier requested. defaults to 'default'
    :returns: the Omega instance configured for the given user
    """
    from omegaml import Omega
    from omegaml import settings, _base_config

    defaults = settings()
    qualifier = qualifier or 'default'
    api_url = api_url or defaults.OMEGA_RESTAPI_URL
    if api_url.startswith('http') or any('test' in v for v in sys.argv):
        api_auth = OmegaRestApiAuth(userid, apikey)
        configs = get_user_config_from_api(api_auth,
                                           api_url=api_url,
                                           requested_userid=requested_userid,
                                           view=view)
        configs = configs['objects'][0]['data']
    elif api_url == 'local':
        configs = {
            k: getattr(defaults, k)
            for k in dir(defaults) if k.startswith('OMEGA')
        }
    else:
        raise ValueError('invalid api_url {}'.format(api_url))
    if qualifier == 'default':
        config = configs.get(qualifier, configs)
    else:
        config = configs[qualifier]
    _base_config.update_from_dict(config)
    settings(reload=True)
    om = Omega(defaults=defaults)
    return om
Esempio n. 28
0
 def test_gridsearch(self):
     X, y = make_classification()
     logreg = LogisticRegression(solver='liblinear')
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.models.put(logreg, 'logreg')
     params = {'C': [0.1, 0.5, 1.0]}
     # gridsearch on runtimes
     om.runtime.model('logreg').gridsearch(X, y, parameters=params).get()
     meta = om.models.metadata('logreg')
     # check gridsearch was saved
     self.assertIn('gridsearch', meta.attributes)
     self.assertEqual(len(meta.attributes['gridsearch']), 1)
     self.assertIn('gsModel', meta.attributes['gridsearch'][0])
     # check we can get back the gridsearch model
     gs_model = om.models.get(meta.attributes['gridsearch'][0]['gsModel'])
     self.assertIsInstance(gs_model, GridSearchCV)
Esempio n. 29
0
 def test_task_callback(self):
     om = Omega()
     basepath = os.path.join(
         os.path.dirname(sys.modules['omegaml'].__file__), 'example')
     pkgpath = os.path.abspath(os.path.join(basepath, 'demo', 'callback'))
     pkgsrc = 'pkg://{}'.format(pkgpath)
     om.scripts.put(pkgsrc, 'callback')
     df = pd.DataFrame({'x': range(1, 10), 'y': range(5, 14)})
     lr = LinearRegression()
     lr.fit(df[['x']], df['y'])
     om.datasets.put(df, 'sample')
     om.models.put(lr, 'regmodel')
     result = (om.runtime.callback('callback').model('regmodel').predict(
         'sample[x]').get())
     self.assertEqual(len(om.datasets.get('callback_results')), 1)
     result = (om.runtime.callback('callback').model('regmodel').predict(
         'sample[x]').get())
     self.assertEqual(len(om.datasets.get('callback_results')), 2)
Esempio n. 30
0
 def test_logging_mode(self):
     """ test task python output can be logged for all requests """
     om = Omega()
     om.logger.reset()
     # -- request logging
     om.runtime.mode(local=True, logging=True)
     om.runtime.ping(fox='bar')
     self.assertEqual(len(om.logger.dataset.get(level='INFO')), 4)
     # -- switch off logging
     om.logger.reset()
     om.runtime.mode(local=True, logging=False)
     om.runtime.ping(fox='bar')
     self.assertEqual(len(om.logger.dataset.get(level='INFO')), 0)
     # -- request specific logger
     om.logger.reset()
     om.runtime.mode(local=True, logging=('celery', 'DEBUG'))
     om.runtime.ping(fox='bar')
     self.assertEqual(len(om.logger.dataset.get(level='DEBUG')), 3)