Esempio n. 1
0
    def test_model_log(self):
        with TempDir(chdr=True, remove_on_exit=True) as tmp:
            model_path = tmp.path("linear.pkl")
            with open(model_path, "wb") as f:
                pickle.dump(self._linear_lr, f)
            tracking_dir = os.path.abspath(tmp.path("mlruns"))
            tracking.set_tracking_uri("file://%s" % tracking_dir)
            tracking.start_run()
            try:
                pyfunc.log_model(artifact_path="linear",
                                 data_path=model_path,
                                 loader_module=os.path.basename(__file__)[:-3],
                                 code_path=[__file__])

                run_id = tracking.active_run().info.run_uuid
                path = tracking._get_model_log_dir("linear", run_id)
                m = Model.load(os.path.join(path, "MLmodel"))
                print(m.__dict__)
                x = pyfunc.load_pyfunc("linear", run_id=run_id)
                xpred = x.predict(self._X)
                np.testing.assert_array_equal(self._linear_lr_predict, xpred)
            finally:
                tracking.end_run()
                tracking.set_tracking_uri(None)
                # Remove the log directory in order to avoid adding new tests to pytest...
                shutil.rmtree(tracking_dir)
Esempio n. 2
0
 def test_model_log(self):
     with TempDir(chdr=True, remove_on_exit=True):
         tracking.start_run()
         try:
             sklearn.log_model(sk_model=self._linear_lr,
                               artifact_path="linear")
             x = sklearn.load_model(
                 "linear", run_id=tracking.active_run().info.run_uuid)
             xpred = x.predict(self._X)
             np.testing.assert_array_equal(self._linear_lr_predict, xpred)
         finally:
             tracking.end_run()
Esempio n. 3
0
def log_model(artifact_path, **kwargs):
    """Export the model in python-function form and log it with current mlflow tracking service.

    Model is exported by calling @save_model and logs the result with @tracking.log_output_files
    """
    with TempDir() as tmp:
        local_path = tmp.path(artifact_path)
        run_id = tracking.active_run().info.run_uuid
        if 'model' in kwargs:
            raise Exception("Unused argument 'model'. log_model creates a new model object")

        save_model(dst_path=local_path, model=Model(artifact_path=artifact_path, run_id=run_id),
                   **kwargs)
        tracking.log_artifacts(local_path, artifact_path)
Esempio n. 4
0
def test_model_log(model, data, predicted):
    x, y = data
    old_uri = tracking.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        with TempDir(chdr=True, remove_on_exit=True) as tmp:
            try:
                tracking.set_tracking_uri("test")
                if should_start_run:
                    tracking.start_run()
                mlflow.keras.log_model(model, artifact_path="keras_model")

                # Load model
                model_loaded = mlflow.keras.load_model(
                    "keras_model", run_id=tracking.active_run().info.run_uuid)
                assert all(model_loaded.predict(x) == predicted)

                # Loading pyfunc model
                pyfunc_loaded = mlflow.pyfunc.load_pyfunc(
                    "keras_model", run_id=tracking.active_run().info.run_uuid)
                assert all(pyfunc_loaded.predict(x).values == predicted)
            finally:
                tracking.end_run()
    tracking.set_tracking_uri(old_uri)
def test_model_log(tmpdir):
    conda_env = os.path.join(str(tmpdir), "conda_env.yml")
    _mlflow_conda_env(
        conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)])
    iris = datasets.load_iris()
    X = iris.data  # we only take the first two features.
    y = iris.target
    pandas_df = pd.DataFrame(X, columns=iris.feature_names)
    pandas_df['label'] = pd.Series(y)
    spark_session = pyspark.sql.SparkSession.builder \
        .config(key="spark_session.python.worker.reuse", value=True) \
        .master("local-cluster[2, 1, 1024]") \
        .getOrCreate()
    spark_df = spark_session.createDataFrame(pandas_df)
    model_path = tmpdir.mkdir("model")
    assembler = VectorAssembler(inputCols=iris.feature_names,
                                outputCol="features")
    lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8)
    pipeline = Pipeline(stages=[assembler, lr])
    # Fit the model
    model = pipeline.fit(spark_df)
    # Print the coefficients and intercept for multinomial logistic regression
    preds_df = model.transform(spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    old_tracking_uri = tracking.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        try:
            tracking_dir = os.path.abspath(str(tmpdir.mkdir("mlruns")))
            tracking.set_tracking_uri("file://%s" % tracking_dir)
            if should_start_run:
                tracking.start_run()
            sparkm.log_model(artifact_path="model", spark_model=model)
            run_id = tracking.active_run().info.run_uuid
            x = pyfunc.load_pyfunc("model", run_id=run_id)
            preds2 = x.predict(pandas_df)
            assert preds1 == preds2
            reloaded_model = sparkm.load_model("model", run_id=run_id)
            preds_df_1 = reloaded_model.transform(spark_df)
            preds3 = [
                x.prediction
                for x in preds_df_1.select("prediction").collect()
            ]
            assert preds1 == preds3
        finally:
            tracking.end_run()
            tracking.set_tracking_uri(old_tracking_uri)
            shutil.rmtree(tracking_dir)
 def test_model_log(self):
     old_uri = tracking.get_tracking_uri()
     # should_start_run tests whether or not calling log_model() automatically starts a run.
     for should_start_run in [False, True]:
         with TempDir(chdr=True, remove_on_exit=True) as tmp:
             try:
                 tracking.set_tracking_uri("test")
                 if should_start_run:
                     tracking.start_run()
                 sklearn.log_model(sk_model=self._linear_lr, artifact_path="linear")
                 x = sklearn.load_model("linear", run_id=tracking.active_run().info.run_uuid)
                 xpred = x.predict(self._X)
                 np.testing.assert_array_equal(self._linear_lr_predict, xpred)
             finally:
                 tracking.end_run()
                 tracking.set_tracking_uri(old_uri)
Esempio n. 7
0
    def test_model_log(self):
        old_uri = tracking.get_tracking_uri()
        # should_start_run tests whether or not calling log_model() automatically starts a run.
        for should_start_run in [False, True]:
            with TempDir(chdr=True, remove_on_exit=True) as tmp:
                try:
                    tracking.set_tracking_uri("test")
                    if should_start_run:
                        tracking.start_run()
                    mlflow.h2o.log_model(self.gbm, artifact_path="gbm")

                    # Load model
                    gbm_loaded = mlflow.h2o.load_model("gbm",
                                                       run_id=tracking.active_run().info.run_uuid)
                    assert all(gbm_loaded.predict(self.test).as_data_frame() == self.predicted)
                finally:
                    tracking.end_run()
                    tracking.set_tracking_uri(old_uri)
def test_model_log(tmpdir):
    conda_env = os.path.join(str(tmpdir), "conda_env.yml")
    _mlflow_conda_env(
        conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)])
    iris = datasets.load_iris()
    feature_names = ["0", "1", "2", "3"]
    pandas_df = pd.DataFrame(iris.data,
                             columns=feature_names)  # to make spark_udf work
    pandas_df['label'] = pd.Series(iris.target)
    spark_session = pyspark.sql.SparkSession.builder \
        .config(key="spark_session.python.worker.reuse", value=True) \
        .master("local-cluster[2, 1, 1024]") \
        .getOrCreate()
    spark_df = spark_session.createDataFrame(pandas_df)
    assembler = VectorAssembler(inputCols=feature_names, outputCol="features")
    lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8)
    pipeline = Pipeline(stages=[assembler, lr])
    # Fit the model
    model = pipeline.fit(spark_df)
    # Print the coefficients and intercept for multinomial logistic regression
    preds_df = model.transform(spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    old_tracking_uri = tracking.get_tracking_uri()
    cnt = 0
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]:
            print("should_start_run =", should_start_run, "dfs_tmp_dir =",
                  dfs_tmp_dir)
            try:
                tracking_dir = os.path.abspath(str(tmpdir.mkdir("mlruns")))
                tracking.set_tracking_uri("file://%s" % tracking_dir)
                if should_start_run:
                    tracking.start_run()
                artifact_path = "model%d" % cnt
                cnt += 1
                if dfs_tmp_dir:
                    sparkm.log_model(artifact_path=artifact_path,
                                     spark_model=model,
                                     dfs_tmpdir=dfs_tmp_dir)
                else:
                    sparkm.log_model(artifact_path=artifact_path,
                                     spark_model=model)
                run_id = tracking.active_run().info.run_uuid
                # test pyfunc
                x = pyfunc.load_pyfunc(artifact_path, run_id=run_id)
                preds2 = x.predict(pandas_df)
                assert preds1 == preds2
                # test load model
                reloaded_model = sparkm.load_model(artifact_path,
                                                   run_id=run_id)
                preds_df_1 = reloaded_model.transform(spark_df)
                preds3 = [
                    x.prediction
                    for x in preds_df_1.select("prediction").collect()
                ]
                assert preds1 == preds3
                # test spar_udf
                preds4 = score_model_as_udf(artifact_path, run_id, pandas_df)
                assert preds1 == preds4
                # make sure we did not leave any temp files behind
                x = dfs_tmp_dir or sparkm.DFS_TMP
                assert os.path.exists(x)
                assert not os.listdir(x)
                shutil.rmtree(x)
            finally:
                tracking.end_run()
                tracking.set_tracking_uri(old_tracking_uri)
                shutil.rmtree(tracking_dir)