def dbnd_on_pre_init_context(ctx): from mlflow import get_tracking_uri, set_tracking_uri if not config.getboolean("mlflow_tracking", "databand_tracking"): return databand_url = config.get("core", "databand_url") if not databand_url: logger.info( "Although 'databand_tracking' was set in 'mlflow_tracking', " "dbnd will not use it since 'core.databand_url' was not set." ) return duplicate_tracking_to = config.get("mlflow_tracking", "duplicate_tracking_to") if not duplicate_tracking_to: duplicate_tracking_to = get_tracking_uri() # check if dbnd store uri was already defined with MLFlow config if is_composite_uri(duplicate_tracking_to): raise DatabandConfigError( "Config conflict: MLFlow and DBND configs both define dbnd store uri" ) composite_uri = build_composite_uri(databand_url, duplicate_tracking_to) global _original_mlflow_tracking_uri _original_mlflow_tracking_uri = get_tracking_uri() set_tracking_uri(composite_uri)
def __init__(self, _tracking_uri=get_pod_uri("mlflow", "5001")): mlflow.set_tracking_uri(_tracking_uri) print("Tracking Model Metadata on MLFlow Server @ " + mlflow.get_tracking_uri()) if not mlflow.get_tracking_uri() == _tracking_uri: Warning( "MLManager doesn't seem to be communicating with the right server endpoint." "Try instantiating this class again!") MlflowClient.__init__(self, _tracking_uri) self.active_run = None self.active_experiment = None
def test_create_new_or_continue_experiment(self, with_uri, name_from_meta): temp_dir = self.get_temp_dir() projects_dir = os.path.join(temp_dir, "projects_dir") os.mkdir(projects_dir) project_dir = os.path.join(projects_dir, "experiment_test") uri_dir = os.path.join(temp_dir, "env_uri") if name_from_meta: os.mkdir(project_dir) meta_fname = os.path.join(project_dir, "nucleus7_project.json") with open(meta_fname, "w") as f: json.dump({"PROJECT_NAME": "experiment_test_meta"}, f) if with_uri: os.environ["MLFLOW_TRACKING_URI"] = uri_dir mlflow_utils.create_new_or_continue_experiment(project_dir) if not mlflow.active_run(): with mlflow.start_run(): experiment_id = mlflow.active_run().info.experiment_id else: experiment_id = mlflow.active_run().info.experiment_id experiment_name = ( mlflow.tracking.MlflowClient().get_experiment(experiment_id).name) experiment_name_must = (name_from_meta and "experiment_test_meta" or "experiment_test") uri_must = with_uri and uri_dir or os.path.join(projects_dir, 'mlruns') self.assertEqual(uri_must, mlflow.get_tracking_uri()) self.assertEqual(experiment_name_must, experiment_name)
def test_kedro_mlflow_config_setup_tracking_priority(mocker, tmp_path, config_dir): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also eist in credentials. Args: mocker ([type]): [description] tmp_path ([type]): [description] config_dir ([type]): [description] """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) (tmp_path / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2")))) config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ) context = load_context(tmp_path) config.setup(context) assert mlflow.get_tracking_uri() == (tmp_path / "mlruns1").as_uri()
def process_log_func(exp_name): import mlflow import random mlflow.set_experiment(exp_name) uri = mlflow.get_tracking_uri() mlflow.set_tracking_uri(uri) my_pid = os.getpid() parent_pid = os.getppid() artifact_path = "/tmp/features-" + str(my_pid) + ".txt" # create an artifact features = "rooms, zipcode, median_price, school_rating, transport" with open(artifact_path, 'w') as f: f.write(features) print("Running in PID: {}".format(my_pid)) with mlflow.start_run(): pid = "PID-" + str(my_pid) ppid = "PPID-" + str(parent_pid) mlflow.log_param(pid, my_pid) mlflow.log_param(ppid, parent_pid) mlflow.log_metric("metric", random.randint(1, 10)) mlflow.log_artifact(artifact_path) experiment = mlflow.get_experiment_by_name(exp_name) if experiment: print("In pid: {}, experiment_id: {}".format(my_pid, experiment.experiment_id))
def train_model(X_train: np.ndarray, y_train: np.ndarray) -> LinearRegression: """Train the linear regression model. Args: X_train: Training data of independent features. y_train: Training data for price. Returns: Trained model. """ regressor = LinearRegression() regressor.fit(X_train, y_train) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme # Model registry does not work with file store if tracking_url_type_store != "file": # Register the model # There are other ways to use the Model Registry, which depends on the use case, # please refer to the doc for more information: # https://mlflow.org/docs/latest/model-registry.html#api-workflow mlflow.sklearn.log_model(regressor, "model", registered_model_name="regressor") else: mlflow.sklearn.log_model(regressor, "model") return regressor
def _run_entry_point(command, work_dir, experiment_id, run_id): """ Run an entry point command in a subprocess, returning a SubmittedRun that can be used to query the run's status. :param command: Entry point command to run :param work_dir: Working directory in which to run the command :param run_id: MLflow run ID associated with the entry point execution. """ env = os.environ.copy() env.update(get_run_env_vars(run_id, experiment_id)) env.update(get_databricks_env_vars(tracking_uri=mlflow.get_tracking_uri())) _logger.info("=== Running command '%s' in run with ID '%s' === ", command, run_id) # in case os name is not 'nt', we are not running on windows. It introduces # bash command otherwise. if os.name != "nt": process = subprocess.Popen(["bash", "-c", command], close_fds=True, cwd=work_dir, env=env) else: # process = subprocess.Popen(command, close_fds=True, cwd=work_dir, env=env) process = subprocess.Popen(["cmd", "/c", command], close_fds=True, cwd=work_dir, env=env) return LocalSubmittedRun(run_id, process)
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also exist in credentials. """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))) ) config = KedroMlflowConfig( server=dict( mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() config.setup(context) assert ( mlflow.get_tracking_uri() == (kedro_project_with_mlflow_conf / "mlruns1").as_uri() ) # reset folder to avoid interference with other tests (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text("")
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator, should_start_run, use_dfs_tmpdir): old_tracking_uri = mlflow.get_tracking_uri() if use_dfs_tmpdir: dfs_tmpdir = None else: dfs_tmpdir = tmpdir.join("test").strpath try: tracking_dir = os.path.abspath(str(tmpdir.join("mlruns"))) mlflow.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: mlflow.start_run() artifact_path = "model" sparkm.log_model( artifact_path=artifact_path, spark_model=spark_model_estimator.model, dfs_tmpdir=dfs_tmpdir, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmpdir) preds_df = reloaded_model.transform(spark_model_estimator.spark_df) preds = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_estimator.predictions == preds finally: mlflow.end_run() mlflow.set_tracking_uri(old_tracking_uri)
def train_model(model, X_train, y_train, name, config): """train train a single model. # Arguments model: Model, NN model to train. X_train: ndarray(number, lags), Input data for train. y_train: ndarray(number, ), result data for train. name: String, name of model. config: Dict, parameter for train. """ mlflow.set_tracking_uri("http://127.0.0.1:5000") tracking_uri = mlflow.get_tracking_uri() print("Current tracking uri: {}".format(tracking_uri)) tags = {"usuario": "Anonymous"} mlflow.set_experiment("traffic_flow-saes") with mlflow.start_run() as run: mlflow.set_tags(tags) mlflow.keras.autolog() model.compile(loss="mse", optimizer="rmsprop", metrics=['mape']) #early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto') hist = model.fit(X_train, y_train, batch_size=config["batch"], epochs=config["epochs"], validation_split=0.05) model.save('model/' + name + '.h5') df = pd.DataFrame.from_dict(hist.history) df.to_csv('model/' + name + ' loss.csv', encoding='utf-8', index=False) mlflow.log_param("Run_id", run.info.run_id)
def test_model_log(prophet_model): old_uri = mlflow.get_tracking_uri() with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "prophet" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["pystan", "prophet"]) model_info = mlflow.prophet.log_model( pr_model=prophet_model.model, artifact_path=artifact_path, conda_env=conda_env ) model_uri = f"runs:/{mlflow.active_run().info.run_id}/{artifact_path}" assert model_info.model_uri == model_uri reloaded_prophet_model = mlflow.prophet.load_model(model_uri=model_uri) np.testing.assert_array_equal( generate_forecast(prophet_model.model, FORECAST_HORIZON), generate_forecast(reloaded_prophet_model, FORECAST_HORIZON), ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def mlflow_tracking_in_task_example(check_time=datetime.datetime.now()): # type: ( datetime.datetime)-> str logger.info("Running MLFlow tracking integration check!") logger.info("MLFlow tracking URI: {}".format(get_tracking_uri())) start_run() # params log_param("param1", randint(0, 100)) log_param("param2", randint(0, 100)) # metrics log_metric("foo1", random()) log_metric("foo1", random() + 1) log_metric("foo2", random()) log_metric("foo2", random() + 1) # artifacts if not os.path.exists("outputs"): os.makedirs("outputs") with open("outputs/test1.txt", "w") as f1, open("outputs/test2.txt", "w") as f2: f1.write("hello") f2.write("world!") log_artifacts("outputs") # Get run metadata & data from the tracking server service = MlflowClient() run_id = active_run().info.run_id run = service.get_run(run_id) logger.info("Metadata & data for run with UUID %s: %s" % (run_id, run)) end_run() logger.info("MLFlow tracking integration check completed!")
def test_model_log(h2o_iris_model): h2o_model = h2o_iris_model.model old_uri = mlflow.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True): try: artifact_path = "gbm_model" mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() mlflow.h2o.log_model(h2o_model=h2o_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) # Load model h2o_model_loaded = mlflow.h2o.load_model(model_uri=model_uri) assert all( h2o_model_loaded.predict(h2o_iris_model.inference_data). as_data_frame() == h2o_model.predict( h2o_iris_model.inference_data).as_data_frame()) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also eist in credentials. Args: mocker ([type]): [description] tmp_path ([type]): [description] """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))) ) config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert ( mlflow.get_tracking_uri() == (kedro_project_with_mlflow_conf / "mlruns1").as_uri() )
def test_docker_project_tracking_uri_propagation(ProfileConfigProvider, tmpdir, tracking_uri, expected_command_segment, docker_example_base_image): # pylint: disable=unused-argument mock_provider = mock.MagicMock() mock_provider.get_config.return_value = DatabricksConfig("host", "user", "pass", None, insecure=True) ProfileConfigProvider.return_value = mock_provider # Create and mock local tracking directory local_tracking_dir = os.path.join(tmpdir.strpath, "mlruns") if tracking_uri is None: tracking_uri = local_tracking_dir old_uri = mlflow.get_tracking_uri() try: mlflow.set_tracking_uri(tracking_uri) with mock.patch("mlflow.tracking._tracking_service.utils._get_store" ) as _get_store_mock: _get_store_mock.return_value = file_store.FileStore( local_tracking_dir) mlflow.projects.run( TEST_DOCKER_PROJECT_DIR, experiment_id=file_store.FileStore.DEFAULT_EXPERIMENT_ID) finally: mlflow.set_tracking_uri(old_uri)
def set_tracking_uri(): # This does not affect the currently active run (if one exists), # but takes effect for successive runs. mlflow.set_tracking_uri("http://127.0.0.1:5000") # Check tracking_uri = mlflow.get_tracking_uri() print(f"\nCurrent tracking uri: {tracking_uri}")
def train_workflow(num_topics): print(mlflow.get_tracking_uri()) mlflow.create_experiment(name="topic_modeling_news_topics_5") with mlflow.start_run(): lda.train(num_topics=num_topics)
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator): # Print the coefficients and intercept for multinomial logistic regression old_tracking_uri = mlflow.get_tracking_uri() cnt = 0 # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]: print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir) try: tracking_dir = os.path.abspath(str(tmpdir.join("mlruns"))) mlflow.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: mlflow.start_run() artifact_path = "model%d" % cnt cnt += 1 sparkm.log_model( artifact_path=artifact_path, spark_model=spark_model_estimator.model, dfs_tmpdir=dfs_tmp_dir, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) # test reloaded model reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir) preds_df = reloaded_model.transform(spark_model_estimator.spark_df) preds = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_estimator.predictions == preds finally: mlflow.end_run() mlflow.set_tracking_uri(old_tracking_uri) x = dfs_tmp_dir or sparkm.DFS_TMP shutil.rmtree(x) shutil.rmtree(tracking_dir)
def test_model_log(self): old_uri = mlflow.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "linear" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["sklearn"]) sklearn.log_model(sk_model=self._linear_lr, artifact_path=artifact_path, conda_env=conda_env) x = sklearn.load_model(artifact_path, run_id=mlflow.active_run().info.run_uuid) model_path = _get_model_log_dir( artifact_path, mlflow.active_run().info.run_uuid) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) xpred = x.predict(self._X) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def _ensure_tracking_uri(self, meta, tracking_uri=None): tracking_uri = (tracking_uri or mlflow.get_tracking_uri() or meta.kind_meta.get('tracking_uri') or os.environ.get('MLFLOW_TRACKING_URI')) meta.kind_meta['tracking_uri'] = tracking_uri assert tracking_uri, "pass tracking_uri= or set kind_meta.tracking_uri or set env MLFLOW_TRACKING_URI" mlflow.tracking.MlflowClient(tracking_uri).list_registered_models(max_results=1) mlflow.set_tracking_uri(tracking_uri)
def tune_fn(): mlflow.set_experiment(experiment_name=experiment_name) optuna_search = OptunaSearch(metric="auroc", mode="max") ax_search = AxSearch(metric="auroc", mode="max") tune.run(objective, name="mlflow_gbdt", num_samples=65, config={ "num_leaves": tune.randint(5, 95), "learning_rate": tune.loguniform(1e-4, 1.0), "n_estimators": tune.randint(100, 100000), "subsample": tune.loguniform(0.01, 1.0), "subsample_freq": tune.randint(1, 5), "objective": "binary", "reg_alpha": tune.loguniform(1e-4, 1.0), "reg_lambda": tune.loguniform(1e-4, 1.0), "tree_learner": "feature", "feature_sel": 0, "mlflow": { "experiment_name": experiment_name, "tracking_uri": mlflow.get_tracking_uri() } }, search_alg=optuna_search)
def test_tune_exp_default_trainable( dataset, pipeline_config, trainer_config, monkeypatch ): # avoid logging to wandb monkeypatch.setenv("WANDB_MODE", "dryrun") pipeline_config["features"]["word"]["embedding_dim"] = tune.choice([2, 4]) trainer_config["optimizer"]["lr"] = tune.loguniform(0.001, 0.01) my_exp = TuneExperiment( pipeline_config=pipeline_config, trainer_config=trainer_config, train_dataset=dataset, valid_dataset=dataset, num_samples=1, ) assert my_exp._name.startswith("HPO on") assert my_exp.name == my_exp._name assert my_exp._run_identifier == "_default_trainable" analysis = tune.run(my_exp) assert len(analysis.trials) == 1 mlflow.set_tracking_uri(mlflow.get_tracking_uri()) assert mlflow.get_experiment_by_name(my_exp._name)
def get_store(uri=None): from mlflow import get_tracking_uri from mlflow.store.file_store import FileStore if uri is None: uri = get_tracking_uri() return FileStore(uri)
def setUp(self): self.dirpath = tempfile.mkdtemp() import mlflow mlflow.set_tracking_uri(self.dirpath) mlflow.create_experiment(name="existing_experiment") self.mlflow_util = MLflowLoggerUtil() self.tracking_uri = mlflow.get_tracking_uri()
def test_model_log(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)]) iris = datasets.load_iris() feature_names = ["0", "1", "2", "3"] pandas_df = pd.DataFrame(iris.data, columns=feature_names) # to make spark_udf work pandas_df['label'] = pd.Series(iris.target) spark_session = pyspark.sql.SparkSession.builder \ .config(key="spark_session.python.worker.reuse", value=True) \ .master("local-cluster[2, 1, 1024]") \ .getOrCreate() spark_df = spark_session.createDataFrame(pandas_df) assembler = VectorAssembler(inputCols=feature_names, outputCol="features") lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8) pipeline = Pipeline(stages=[assembler, lr]) # Fit the model model = pipeline.fit(spark_df) # Print the coefficients and intercept for multinomial logistic regression preds_df = model.transform(spark_df) preds1 = [x.prediction for x in preds_df.select("prediction").collect()] old_tracking_uri = mlflow.get_tracking_uri() cnt = 0 # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]: print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir) try: tracking_dir = os.path.abspath(str(tmpdir.mkdir("mlruns"))) mlflow.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: mlflow.start_run() artifact_path = "model%d" % cnt cnt += 1 sparkm.log_model(artifact_path=artifact_path, spark_model=model, dfs_tmpdir=dfs_tmp_dir) run_id = active_run().info.run_uuid # test pyfunc x = pyfunc.load_pyfunc(artifact_path, run_id=run_id) preds2 = x.predict(pandas_df) assert preds1 == preds2 # test load model reloaded_model = sparkm.load_model(artifact_path, run_id=run_id, dfs_tmpdir=dfs_tmp_dir) preds_df_1 = reloaded_model.transform(spark_df) preds3 = [x.prediction for x in preds_df_1.select("prediction").collect()] assert preds1 == preds3 # test spar_udf preds4 = score_model_as_udf(artifact_path, run_id, pandas_df) assert preds1 == preds4 # We expect not to delete the DFS tempdir. x = dfs_tmp_dir or sparkm.DFS_TMP assert os.path.exists(x) assert os.listdir(x) shutil.rmtree(x) finally: mlflow.end_run() mlflow.set_tracking_uri(old_tracking_uri) shutil.rmtree(tracking_dir)
def run_test(): for l1, alpha in itertools.product([0.75, 1], [0, 0.5]): # with mlflow.start_run(run_id='91878d6666994fa8b7205f4a83171e8a', experiment_id=2, run_name='ipython'): with mlflow.start_run(run_id='91878d6666994fa8b7205f4a83171e8a', experiment_id=2, run_name='ipython'): parameters = { 'l1': str(l1), 'alpha': str(alpha), } # metrics = { # 'MAE': [rand()], # 'R2': [rand()], # 'RMSE': [rand()], # } mlflow.log_params(parameters) mlflow.get_artifact_uri() mlflow.get_tracking_uri()
def create_common_manifest(duration): return { "info": { "mlflow_version": mlflow.__version__, "mlflow_tracking_uri": mlflow.get_tracking_uri(), "export_time": get_now_nice(), "duration": duration } }
def get_db_schema(): engine = sqlalchemy.create_engine(mlflow.get_tracking_uri()) created_tables_metadata = MetaData(bind=engine) created_tables_metadata.reflect() # Write out table schema as described in # https://docs.sqlalchemy.org/en/13/faq/metadata_schema.html#how-can-i-get-the-create-table-drop-table-output-as-a-string lines = [] for ti in created_tables_metadata.sorted_tables: lines += list(map(str.rstrip, str(CreateTable(ti)).splitlines())) return "\n".join(lines)
def test_log_saved_model(self): # This tests model logging capabilities on the sklearn.iris dataset. iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. y = iris.target trainingFeatures = {} for i in range(0, 2): # TensorFlow is fickle about feature names, so we remove offending characters iris.feature_names[i] = iris.feature_names[i].replace(" ", "") iris.feature_names[i] = iris.feature_names[i].replace("(", "") iris.feature_names[i] = iris.feature_names[i].replace(")", "") trainingFeatures[iris.feature_names[i]] = iris.data[:, i:i+1] tf_feat_cols = [] feature_names = iris.feature_names[:2] # Creating TensorFlow-specific numeric columns for input. for col in iris.feature_names[:2]: tf_feat_cols.append(tf.feature_column.numeric_column(col)) # Creating input training function. input_train = tf.estimator.inputs.numpy_input_fn(trainingFeatures, y, shuffle=False, batch_size=1) # Creating Deep Neural Network Regressor. estimator = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols, hidden_units=[1]) # Training and creating expected predictions on training dataset. estimator.train(input_train, steps=10) # Saving the estimator's prediction on the training data; assume the DNNRegressor # produces a single output column named 'predictions' pred_col = "predictions" estimator_preds = [s[pred_col] for s in estimator.predict(input_train)] estimator_preds_df = pd.DataFrame({pred_col: estimator_preds}) old_tracking_uri = mlflow.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} for name in feature_names: feature_spec[name] = tf.placeholder("float", name=name, shape=[150]) mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() pyfunc_preds_df = self.helper(feature_spec, tmp, estimator, pandas.DataFrame(data=X, columns=feature_names)) # Asserting that the loaded model predictions are as expected. assert estimator_preds_df.equals(pyfunc_preds_df) finally: # Restoring the old logging location. mlflow.end_run() mlflow.set_tracking_uri(old_tracking_uri)
def config(self) -> dict: """The config dictionary used by the `TuneExperiment.trainable` function""" return { "pipeline_config": self._pipeline_config, "trainer_config": self._trainer_config, "train_dataset_path": self._train_dataset_path, "valid_dataset_path": self._valid_dataset_path, "mlflow_tracking_uri": mlflow.get_tracking_uri(), "vocab_path": self._vocab_path, "name": self._name, }