def get_latest_mlrun(params): """Get latest mlflow run :param params: gdl parameters dictionary :return: mlflow run object """ tracking_uri = params['global']['mlflow_uri'] mlflow.set_tracking_uri(tracking_uri) mlexp = mlflow.get_experiment_by_name( params['global']['mlflow_experiment_name']) exp_id = mlexp.experiment_id try: run_ids = ([ x.run_id for x in mlflow.list_run_infos( exp_id, max_results=1, order_by=["tag.release DESC"]) ]) except AttributeError: mlflow_client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri) run_ids = [ x.run_id for x in mlflow_client.list_run_infos(exp_id, run_view_type=3)[0:1] ] mlrun = mlflow.get_run(run_ids[0]) return mlrun
def mlflow_run(mlflow_resources): _, client = mlflow_resources exp_id = client.get_experiment_by_name( "Digits-sklearn-genetic-opt").experiment_id active_run = mlflow.list_run_infos(exp_id, run_view_type=ViewType.ACTIVE_ONLY) runs = [run.run_id for run in active_run] return runs
def load_artifacts(): global model global avg_price global dummies # On récupère le premier run (le plus récent) run_id = mlflow.list_run_infos("1")[0].run_id mlflow_client.download_artifacts(run_id, "process/", "/tmp/") model = mlflow.sklearn.load_model("runs:/{}/model".format(run_id)) avg_price = pd.read_csv("/tmp/process/avg_price.csv") with open("/tmp/process/dummies_cols.txt", "r") as f: dummies = f.read().split(",")
def test_listRuns_shouldReturn_NoRuns(tmpdir): import whylogs import mlflow set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow() for i in range(0, 10): with mlflow.start_run(): pass assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 0 whylogs.mlflow.disable_mlflow()
def test_listRuns_shouldReturn_CorrectRunCount(tmpdir): import whylogs import mlflow set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow() for i in range(0, 10): with mlflow.start_run(): if i % 2 == 0: mlflow.whylogs.log(features={"a": 1}) assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 5 assert len(whylogs.mlflow.get_experiment_profiles("0")) == 5 whylogs.mlflow.disable_mlflow()
def test_listRuns_shouldReturn_NoRuns(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow(session) for i in range(0, 10): with mlflow.start_run(): pass assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 0 whylogs.mlflow.disable_mlflow()
def test_listRuns_shouldReturn_CorrectRunCount(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow(session) for i in range(0, 10): with mlflow.start_run(): if i % 2 == 0: mlflow.whylogs.log(features={"a": 1}) print("WEIRD") assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 5 assert len(whylogs.mlflow.get_experiment_profiles("0")) == 5 whylogs.mlflow.disable_mlflow()
import mlflow from mlflow.tracking import MlflowClient from mlflow.entities import ViewType if __name__ == "__main__": def print_run_infos(run_infos): for r in run_infos: print("- run_id: {}, lifecycle_stage: {}".format(r.run_id, r.lifecycle_stage)) # Create two runs with mlflow.start_run() as run1: mlflow.log_metric("click_rate", 1.55) with mlflow.start_run() as run2: mlflow.log_metric("click_rate", 2.50) # Delete the last run client = MlflowClient() client.delete_run(run2.info.run_id) # Get all runs under the default experiment (whose id is 0) print("Active runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.ACTIVE_ONLY)) print("Deleted runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.DELETED_ONLY)) print("All runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.ALL, order_by=["metric.click_rate DESC"]))
logged_model = f'{run.info.artifact_uri}/house-price-pipelineModel' # Load model model = mlflow.spark.load_model(logged_model) # Use the model to predict prices on the new data # Make the predictions on the input data df_with_predictions = model.transform(df_in) print("Below we print the first 20 rows of predictions against the unlabeled data") df_with_predictions.show() # Clean up resources if args.run_cleanup_at_eoj.lower() == 'y': # First delete all runs from mlflow.entities import ViewType def delete_all_runs(run_infos): for r in run_infos: client.delete_run(r.run_id) print("Deleting all runs...") client = mlflow.tracking.MlflowClient() delete_all_runs(mlflow.list_run_infos(run.info.experiment_id, run_view_type=ViewType.ALL)) # ViewType can be ACTIVE_ONLY, DELETED_ONLY, ALL print("... all runs deleted.") # Now clean up all other resources used in the workshop (database, local and dbfs file paths) print(lr_wkshp_helpers.cleanup(args.user_name, spark))
import mlflow def print_run_infos(run_infos): for r in run_infos: print("- run_id: {}, status: {}, lifecycle_stage: {}".format( r.run_id, r.status, r.lifecycle_stage)) if __name__ == '__main__': exp = mlflow.get_experiment_by_name("issue_3932") if exp: print_run_infos(mlflow.list_run_infos(experiment_id=exp.experiment_id)) else: print("Experiment name: {} doesn't exist".format("issue_3932"))