Exemple #1
0
 def test_fit_pipeline(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a pipeline locally, store (unfitted) in Omega
     p = Pipeline([
         ('lr', LinearRegression()),
     ])
     om.models.put(p, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     p.fit(reshaped(X), reshaped(Y))
     pred = p.predict(reshaped(X))
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
Exemple #2
0
 def fit_transform(self,
                   modelname,
                   Xname,
                   Yname=None,
                   rName=None,
                   pure_python=True,
                   **kwargs):
     model = self.model_store.get(modelname)
     X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname)
     Y, metaY = None, None
     if Yname:
         Y, metaY = (self.data_store.get(Yname),
                     self.data_store.metadata(Yname))
     result = model.fit_transform(reshaped(X), reshaped(Y), **kwargs)
     # store information required for retraining
     model_attrs = {
         'metaX': metaX.to_mongo(),
         'metaY': metaY.to_mongo() if metaY is not None else None
     }
     try:
         import sklearn
         model_attrs['scikit-learn'] = sklearn.__version__
     except:
         model_attrs['scikit-learn'] = 'unknown'
     meta = self.model_store.put(model, modelname, attributes=model_attrs)
     if pure_python:
         result = result.tolist()
     if rName:
         meta = self.data_store.put(result, rName)
     result = meta
     return result
Exemple #3
0
 def test_predict_hdf_dataframe(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df['x']
     Y = df['y']
     # put into Omega -- assume a client with pandas, scikit learn
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.pure_python = True
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax', as_hdf=True)
     om.datasets.put(Y, 'datay', as_hdf=True)
     # have Omega fit the model then predict
     lr = LinearRegression()
     lr.fit(reshaped(X), reshaped(Y))
     pred = lr.predict(reshaped(X))
     om.models.put(lr, 'mymodel2')
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').predict('datax')
     pred2 = result.get()
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Exemple #4
0
 def partial_fit(self,
                 modelname,
                 Xname,
                 Yname=None,
                 pure_python=True,
                 **kwargs):
     model = self.model_store.get(modelname)
     X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname)
     Y, metaY = None, None
     if Yname:
         Y, metaY = (self.data_store.get(Yname),
                     self.data_store.metadata(Yname))
     process(
         maybe_chunked(model.partial_fit,
                       lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y,
                       **kwargs))
     # store information required for retraining
     model_attrs = {
         'metaX': metaX.to_mongo(),
         'metaY': metaY.to_mongo() if metaY is not None else None,
     }
     try:
         import sklearn
         model_attrs['scikit-learn'] = sklearn.__version__
     except:
         model_attrs['scikit-learn'] = 'unknown'
     meta = self.model_store.put(model, modelname, attributes=model_attrs)
     return meta
Exemple #5
0
    def score(self,
              modelname,
              Xname,
              Yname=None,
              rName=None,
              pure_python=True,
              **kwargs):
        model = self.model_store.get(modelname)
        X = self.data_store.get(Xname)
        Y = self.data_store.get(Yname)

        def store(result):
            if rName:
                meta = self.model_store.put(result, rName)
                result = meta
            return result

        result = process(maybe_chunked(
            model.score, lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y,
            **kwargs),
                         fn=store,
                         keep_last=True)

        with self.tracking as exp:
            exp.log_metric('score', result)
        return result
Exemple #6
0
        def test_save_mlflow_saved_model_file(self):
            """ test deploying model saved by MLFlow, by file """
            import mlflow

            model_path = os.path.join(omegaml.defaults.OMEGA_TMP, 'mymodel')
            model = LinearRegression()
            X = pd.Series(range(0, 10))
            Y = pd.Series(X) * 2 + 3
            model.fit(reshaped(X), reshaped(Y))
            rmtree(model_path, ignore_errors=True)
            mlflow.sklearn.save_model(model, model_path)

            om = self.om
            # test multiple ways of storing
            for fn in ('mlflow://' + os.path.join(model_path, 'MLmodel'),
                       'mlflow://' + model_path):
                # store with just the MLmodel file as a reference, no kind necessary
                om.models.drop('mymodel', force=True)
                meta = om.models.put(fn, 'mymodel')
                self.assertEqual(meta.kind, MLFlowModelBackend.KIND)
                self.assertEqual(meta.kind, MLFlowModelBackend.KIND)
                model_ = om.models.get('mymodel')
                self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
                yhat_direct = model_.predict(reshaped(X))
                yhat_rt = om.runtime.model('mymodel').predict(X).get()
                assert_array_equal(yhat_rt, yhat_direct)
Exemple #7
0
        def test_save_mlflow_model_run(self):
            """ test deploying an MLModel from a tracking server URI """
            import mlflow

            mlflow.set_tracking_uri(self.mlflow_tracking_db)
            mlflow.set_registry_uri(self.mlflow_registry_db)

            with mlflow.start_run() as run:
                model = LinearRegression()
                X = pd.Series(range(0, 10))
                Y = pd.Series(X) * 2 + 3
                model.fit(reshaped(X), reshaped(Y))
            mlflow.sklearn.log_model(sk_model=model,
                                     artifact_path='sklearn-model',
                                     registered_model_name='sklearn-model')

            # simulate a new session on another device (tracking URI comes from repo)
            mlflow.set_tracking_uri(None)

            om = self.om
            # use the tracking URI to store the model as a reference to a MLFlow tracking server
            meta = om.models.put('mlflow+models://sklearn-model/1',
                                 'sklearn-model')
            self.assertEqual(meta.kind, MLFlowRegistryBackend.KIND)
            # simulate a new mlflow session
            model_ = om.models.get('sklearn-model')
            self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
            yhat_direct = model_.predict(reshaped(X))
            yhat_rt = om.runtime.model('sklearn-model').predict(X).get()
            assert_array_equal(yhat_rt, yhat_direct)
Exemple #8
0
 def score(
         self, modelname, Xname, Yname, rName=None, pure_python=True,
         **kwargs):
     model = self.model_store.get(modelname)
     X = self.data_store.get(Xname)
     Y = self.data_store.get(Yname)
     result = model.score(reshaped(X), reshaped(Y), **kwargs)
     if rName:
         meta = self.model_store.put(result, rName)
         result = meta
     return result
Exemple #9
0
        def test_inferred_model_flavor(self):
            """ test deploying an arbitrary model by inferring MLFlow flavor """
            import mlflow

            om = self.om
            model = LinearRegression()
            X = pd.Series(range(0, 10))
            Y = pd.Series(X) * 2 + 3
            model.fit(reshaped(X), reshaped(Y))
            meta = om.models.put(model, 'mymodel', kind='mlflow.model')
            self.assertEqual(meta.kind, MLFlowModelBackend.KIND)
            model_ = om.models.get('mymodel')
            self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
Exemple #10
0
 def decision_function(self, modelname, Xname, rName=None, pure_python=True, **kwargs):
     model = self.model_store.get(modelname)
     X = self.data_store.get(Xname)
     result = model.decision_function(reshaped(X), **kwargs)
     if pure_python:
         result = result.tolist()
     if rName:
         meta = self.data_store.put(result, rName)
         result = meta
     return result
Exemple #11
0
    def fit_transform(self,
                      modelname,
                      Xname,
                      Yname=None,
                      rName=None,
                      pure_python=True,
                      **kwargs):
        model = self.model_store.get(modelname)
        X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname)
        Y, metaY = None, None
        if Yname:
            Y, metaY = (self.data_store.get(Yname),
                        self.data_store.metadata(Yname))

        def store(result):
            if pure_python:
                result = result.tolist()
            if rName:
                meta = self.data_store.put(result, rName)
                result = meta
            return result

        result = process(maybe_chunked(
            model.fit_transform,
            lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y, **kwargs),
                         fn=store,
                         keep_last=True)

        # store information required for retraining
        model_attrs = {
            'metaX': metaX.to_mongo(),
            'metaY': metaY.to_mongo() if metaY is not None else None
        }
        try:
            import sklearn
            model_attrs['scikit-learn'] = sklearn.__version__
        except:
            model_attrs['scikit-learn'] = 'unknown'
        model_meta = self.model_store.put(model,
                                          modelname,
                                          attributes=model_attrs)
        return result if rName else model_meta
Exemple #12
0
 def predict(
         self, modelname, Xname, rName=None, pure_python=True, **kwargs):
     data = self.data_store.get(Xname)
     model = self.model_store.get(modelname)
     result = model.predict(reshaped(data), **kwargs)
     if pure_python:
         result = result.tolist()
     if rName:
         meta = self.data_store.put(result, rName)
         result = meta
     return result
Exemple #13
0
        def test_save_mlflow_saved_model_path(self):
            """ test deploying a model saved by MLflow, from path """
            import mlflow

            model_path = os.path.join(omegaml.defaults.OMEGA_TMP, 'mymodel')
            model = LinearRegression()
            X = pd.Series(range(0, 10))
            Y = pd.Series(X) * 2 + 3
            model.fit(reshaped(X), reshaped(Y))
            rmtree(model_path, ignore_errors=True)
            mlflow.sklearn.save_model(model, model_path)

            om = self.om
            # store with just the model path, specify the kind because paths can be other files too
            meta = om.models.put(model_path, 'mymodel', kind='mlflow.model')
            self.assertEqual(meta.kind, MLFlowModelBackend.KIND)
            model_ = om.models.get('mymodel')
            self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
            yhat_direct = model_.predict(reshaped(X))
            yhat_rt = om.runtime.model('mymodel').predict(X).get()
            assert_array_equal(yhat_rt, yhat_direct)
Exemple #14
0
        def test_save_mlflow_pyfunc_model(self):
            """ test deploying a custom MLFlow PythonModel"""
            import mlflow

            class MyModel(mlflow.pyfunc.PythonModel):
                def predict(self, context, data):
                    return data

            X = pd.Series(range(10))
            model = MyModel()
            om = self.om
            meta = om.models.put(model, 'mymodel')
            self.assertEqual(meta.kind, MLFlowModelBackend.KIND)
            model_ = om.models.get('mymodel')
            self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
            yhat_direct = model_.predict(X)
            yhat_rt = om.runtime.model('mymodel').predict(X).get()
            assert_array_equal(yhat_rt, reshaped(yhat_direct))
Exemple #15
0
    def predict_proba(self,
                      modelname,
                      Xname,
                      rName=None,
                      pure_python=True,
                      **kwargs):
        data = self.data_store.get(Xname)
        model = self.model_store.get(modelname)

        def store(result):
            if pure_python:
                result = result.tolist()
            if rName:
                meta = self.data_store.put(result, rName)
                result = meta
            return result

        result = process(maybe_chunked(model.predict_proba,
                                       lambda data: as_args(reshaped(data)),
                                       data, **kwargs),
                         fn=store,
                         keep_last=True)
Exemple #16
0
    def decision_function(self,
                          modelname,
                          Xname,
                          rName=None,
                          pure_python=True,
                          **kwargs):
        model = self.model_store.get(modelname)
        X = self.data_store.get(Xname)

        def store(result):
            if pure_python:
                result = result.tolist()
            if rName:
                meta = self.data_store.put(result, rName)
                result = meta
            return result

        result = process(maybe_chunked(model.decision_function,
                                       lambda X: as_args(reshaped(X)), X,
                                       **kwargs),
                         fn=store,
                         keep_last=True)
        return result