# A reasonable value for parallelism is the square root of max_evals. spark_trials = SparkTrials(parallelism=10) # Run fmin within an MLflow run context so that each hyperparameter configuration is logged as a child run of a parent # run called "xgboost_models" . with mlflow.start_run(run_name=run_name): best_params = fmin( fn=train_model, space=search_space, algo=tpe.suggest, max_evals=20, trials=spark_trials, rstate=np.random.RandomState(SEED) ) # COMMAND ---------- # MAGIC %md # MAGIC ### And the best run 😎 # COMMAND ---------- best_run = mlflow.search_runs(order_by=['metrics.auc DESC'], run_view_type=ViewType.ACTIVE_ONLY, ).iloc[0] print(f'AUC of Best Run: {round(best_run["metrics.auc"], 3)}') model_uri = "runs:/" + best_run["run_id"] + "/model" print(f'model_uri: {model_uri}')
} with mlflow.start_run(run_name='lightgbm_bankruptcy'): best_params = fmin(fn=train_model, space=hyperparameters, algo=tpe.suggest, max_evals=50, trials=Trials(), rstate=np.random.RandomState(1)) # - # ## Real Model import mlflow df = mlflow.search_runs(filter_string="metric.f1_macro > 0.8") df df.sort_values(by='metrics.f1_macro').iloc[0] params = df.sort_values(by='metrics.f1_macro').iloc[0, 7:16].to_dict() params params["params.objective"] X_test_prepared = X_test_final[chosen_features['predictor']] X_test_prepared = X_test_prepared[chosen_features['predictor']] X_test_final_prepared = X_test_final_prepared[chosen_features['predictor']] import lightgbm
import mlflow if __name__ == '__main__': mlflow.set_experiment("Nested Runs") exp = mlflow.get_experiment_by_name("Nested Runs") # Create nested runs with mlflow.start_run(experiment_id=exp.experiment_id, run_name='PARENT_RUN') as parent_run: mlflow.log_param("parent", "yes") with mlflow.start_run(experiment_id=exp.experiment_id, run_name='CHILD_RUN1', nested=True) as child_run: mlflow.log_param("child1", "yes") with mlflow.start_run(experiment_id=exp.experiment_id, run_name='CHILD_RUN2', nested=True) as child_run2: mlflow.log_param("child2", "yes") with mlflow.start_run(experiment_id=exp.experiment_id, run_name='CHILD_RUN3', nested=True) as child_run3: mlflow.log_param("child3", "yes") # Search all child runs with a parent id query = "tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id) results = mlflow.search_runs(filter_string=query) print(results) print(results[["run_id", "params.child", "tags.mlflow.runName"]])
for values in itertools.product(*params_dict.values()) ] for run_ind, params in enumerate(params_list): logger.info( f'================== Run {run_ind+1}/{len(params_list)} ==================' ) # Recalculating number of epochs, so that training time will be the same for all the models # baseline - 2000 epochs for 200 labels: 12000 total_batches batch_size_train = 16 batches_per_epoch = params['data_args.n_labels'] // batch_size_train params['t.epochs'] = round(BASELINE_TOTAL_BATCHES / batches_per_epoch) params['early_stopping'] = dict(patience=params['t.epochs'], monitor='cross_entropy') existing_runs = mlflow.search_runs( filter_string=f"params.run_hash = '{calculate_hash(params)}'", run_view_type=mlflow.tracking.client.ViewType.ACTIVE_ONLY, experiment_ids=['4']) if len(existing_runs) > 0: logger.info('Skipping existing run.') continue params_list = [('--' + k, str(v)) for k, v in params.items()] params_list = list(itertools.chain(*params_list)) call([ 'python3', f'{SRC_PATH}/models/fix_match/main.py', '--run_hash', calculate_hash(params) ] + sys.argv[1:] + params_list)
# COMMAND ---------- df = spark.sql(f'select * from {DATABASE_NAME}.sensor_readings_historical_bronze_sample').toPandas() df['device_operation_status_pred'] = model.predict(df) preds_stats = df.groupby(['device_operational_status', 'device_operation_status_pred']).count()['id'].reset_index() display(preds_stats) # COMMAND ---------- # MAGIC %md # Step 2: Use MLFlow APIs # MAGIC MLFlow APIs allow you to programatically search for runs, filter, and do analysts of your experiments. # MAGIC Let's looks at all of the runs where macro_avg of f1 is > 0.5 # COMMAND ---------- all_runs = mlflow.search_runs(experiment_ids=[experiment_id]) display(all_runs) # COMMAND ---------- best_runs= mlflow.search_runs( experiment_ids=[experiment_id], filter_string='metrics.macro_avg__f1_score > 0.5', order_by=['metrics.macro_avg__f1_score desc'] ) print(f'Found {best_runs.shape[0]} OK runs') # COMMAND ---------- # This allows us to grab the best run out of MLFlow very easily best_run = best_runs.iloc[0]
import mlflow import shutil deleted_runs = mlflow.search_runs( experiment_ids=['0', '1', '2', '3', '4'], run_view_type=mlflow.entities.ViewType.DELETED_ONLY) for i, run in deleted_runs.iterrows(): run_id, exp_id = run['run_id'], run['experiment_id'] print(f'Deleting run {run_id} in experiment {exp_id}') shutil.rmtree(f'./mlruns/{exp_id}/{run_id}')
def test_mlflow_callback(tmpdir): epochs = 2 batch_size = 8 num_examples = 32 input_features = [sequence_feature(reduce_output="sum")] output_features = [category_feature(vocab_size=2, reduce_input="sum")] config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": epochs, "batch_size": batch_size}, } data_csv = generate_data( input_features, output_features, os.path.join(tmpdir, "train.csv"), num_examples=num_examples ) val_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "validation.csv")) test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv")) mlflow_uri = f"file://{tmpdir}/mlruns" mlflow.set_tracking_uri(mlflow_uri) client = MlflowClient(tracking_uri=mlflow_uri) exp_name = "mlflow_test" callback = MlflowCallback() wrapped_callback = mock.Mock(wraps=callback) model = LudwigModel(config, callbacks=[wrapped_callback], backend=FakeRemoteBackend()) model.train(training_set=data_csv, validation_set=val_csv, test_set=test_csv, experiment_name=exp_name) expected_df, _ = model.predict(test_csv) # Check mlflow artifacts assert callback.experiment_id is not None assert callback.run is not None experiment = mlflow.get_experiment_by_name(exp_name) assert experiment.experiment_id == callback.experiment_id df = mlflow.search_runs([experiment.experiment_id]) assert len(df) == 1 run_id = df.run_id[0] assert run_id == callback.run.info.run_id run = mlflow.get_run(run_id) assert run.info.status == "FINISHED" assert wrapped_callback.on_trainer_train_setup.call_count == 1 assert wrapped_callback.on_trainer_train_teardown.call_count == 1 artifacts = [f.path for f in client.list_artifacts(callback.run.info.run_id, "")] local_dir = f"{tmpdir}/local_artifacts" os.makedirs(local_dir) assert "config.yaml" in artifacts local_config_path = client.download_artifacts(callback.run.info.run_id, "config.yaml", local_dir) with open(local_config_path) as f: config_artifact = yaml.safe_load(f) assert config_artifact == config model_path = f"runs:/{callback.run.info.run_id}/model" loaded_model = mlflow.pyfunc.load_model(model_path) assert "ludwig" in loaded_model.metadata.flavors flavor = loaded_model.metadata.flavors["ludwig"] def compare_features(key): assert len(model.config[key]) == len(flavor["ludwig_schema"][key]) for feature, schema_feature in zip(model.config[key], flavor["ludwig_schema"][key]): assert feature["name"] == schema_feature["name"] assert feature["type"] == schema_feature["type"] compare_features("input_features") compare_features("output_features") test_df = pd.read_csv(test_csv) pred_df = loaded_model.predict(test_df) assert pred_df.equals(expected_df)
# get workspace ws = Workspace.from_config() # get root of git repo prefix = Path(__file__).parent # azure ml settings experiment_name = "sklearn-diabetes-example" # setup mlflow tracking mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri()) mlflow.set_experiment(experiment_name) # get latest completed run of the training runs_df = mlflow.search_runs() runs_df = runs_df.loc[runs_df["status"] == "FINISHED"] runs_df = runs_df.sort_values(by="end_time", ascending=False) print(runs_df.head()) run_id = runs_df.at[0, "run_id"] # create deployment configuration aks_config = AksWebservice.deploy_configuration( compute_target_name="aks-cpu-deploy", cpu_cores=2, memory_gb=5, tags={ "data": "diabetes", "method": "sklearn" }, description="Predict using webservice",
def tune_models(): experiments = mlflow.search_runs(run_view_type=1) experiments = experiments.sort_values(by='metrics.f_test', ascending=False).head(3) print(experiments[['params.learner', 'metrics.f_test']])
'monetary': ['mean', 'min', 'max', 'count'] }) rfm_resumen.to_csv('/dbfs/FileStore/rfm_resumen.csv') mlflow.log_artifact('/dbfs/FileStore/rfm_resumen.csv') # COMMAND ---------- rfm_resumen # COMMAND ---------- #Registro del modelo - aca se puede parametrizar el nombre y despues usarlo para filtrar y asi tener varios modelos # al mismo tiempo run_id = mlflow.search_runs().iloc[0].run_id model_name = "rfm-test" model_version = mlflow.register_model(f"runs:/{run_id}", model_name) mlflow.end_run() # COMMAND ---------- #Stage del modelo en producción from mlflow.tracking import MlflowClient client = MlflowClient() client.transition_model_version_stage( name=model_name, version=6,
"""SELECT experiment_id, run_id, metrics.auc as AUC, metrics.F1 as F1, artifact_uri FROM vw_client WHERE status='FINISHED' ORDER BY metrics.f1 desc """) display(df_model_selector) # COMMAND ---------- # DBTITLE 1,Using MLFlow client API best_models = mlflow.search_runs( filter_string= f'tags.model="{model_name}" and attributes.status = "FINISHED" and metrics.F1 > 0', order_by=['metrics.F1 DESC'], max_results=1) model_uri = best_models.iloc[0].artifact_uri print(f'AUC of Best Run: {best_models["metrics.auc"]}') # COMMAND ---------- # MAGIC %md # MAGIC ## MLOps: Select best model and push to Registry # COMMAND ---------- # DBTITLE 1,Instantiate MLFlow client for API calls client = mlflow.tracking.MlflowClient()
import warnings import mlflow warnings.filterwarnings('ignore') PROJECT_DIR = sys.path[0] os.chdir(PROJECT_DIR) experiment_name = 'rented_bikes' mlflow.set_experiment(experiment_name) PORT = 5001 # REST API serving port CONTAINER_NAME = "mlflow_example_model_serving" best_run_df = mlflow.search_runs(order_by=['metrics.RMSE_CV ASC'], max_results=1) if len(best_run_df.index) == 0: raise Exception(f"Found no runs for experiment '{experiment_name}'") best_run = mlflow.get_run(best_run_df.at[0, 'run_id']) best_model_uri = f"{best_run.info.artifact_uri}/model" # best_model = mlflow.sklearn.load_model(best_model_uri) # print best run info print("Best run info:") print(f"Run id: {best_run.info.run_id}") print(f"Run parameters: {best_run.data.params}") print("Run score: RMSE_CV = {:.4f}".format(best_run.data.metrics['RMSE_CV'])) print(f"Run model URI: {best_model_uri}") # remove current container if exists
def app_main(): st.title("Machine learning analysis platform") if st.sidebar.checkbox('Define Data Source'): filesFolder = st.sidebar.text_input('folder', value="data") dataList = list_files(filesFolder, 'csv') if len(dataList) == 0: st.warning('No data set available') else: file_selected = st.sidebar.selectbox('Select a document', dataList) file_selected_path = concat_file_path(filesFolder, file_selected) nrows = st.sidebar.number_input('Number of lines', value=-1) n_rows_str = 'All' if nrows == -1 else str(nrows) st.info( 'Selected file:{file_selected_path},The number of rows read is{n_rows_str}' ) else: file_selected_path = None nrows = 100 st.warning('The currently selected file is empty, please select:') if st.sidebar.checkbox('Exploratory Analysis'): if file_selected_path is not None: if st.sidebar.button('Report Generation'): df = load_csv(file_selected_path, nrows) pr = ProfileReport(df, explorative=True) st_profile_report(pr) else: st.info('No file selected, analysis cannot be performed') if st.sidebar.checkbox('Modeling'): if file_selected_path is not None: task = st.sidebar.selectbox('Select Task', ML_LIST) if task == 'Regression': model = st.sidebar.selectbox('Select Model', RG_LIST) elif task == 'Classification': model = st.sidebar.selectbox('Select Model', RG_LIST) df = load_csv(file_selected_path, nrows) try: cols = df.columns.to_list() target_col = st.sidebar.selectbox('Select Prediction Object', cols) except BaseException: st.sidebar.warning('The data format cannot be read correctly') target_col = None if target_col is not None and st.sidebar.button('Training Model'): if task == 'Regression': st.success('Data preprocessing...') pc_rg.setup(df, target=target_col, log_experiment=True, experiment_name='ml_', log_plots=True, silent=True, verbose=False, profile=True) st.success('Data preprocessing is complete') st.success('Training model. . .') pc_rg.create_model(model, verbose=False) st.success('The model training is complete. . .') #pc_rg.finalize_model(model) st.success('Model has been created') elif task == 'Classification': st.success('Data preprocessing. . .') pc_cl.setup(df, target=target_col, fix_imbalance=True, log_experiment=True, experiment_name='ml_', log_plots=True, silent=True, verbose=False, profile=True) st.success('Data preprocessing is complete.') st.success('Training model. . .') pc_cl.create_model(model, verbose=False) st.success('The model training is complete. . .') #pc_cl.finalize_model(model) st.success('Model has been created') if st.sidebar.checkbox('View System Log'): n_lines = st.sidebar.slider(label='Number of lines', min_value=3, max_value=50) if st.sidebar.button("Check View"): logs = get_model_training_logs(n_lines=n_lines) st.text('System log') st.write(logs) try: allOfRuns = mlflow.search_runs(experiment_ids=0) except: allOfRuns = [] if len(allOfRuns) != 0: if st.sidebar.checkbox('Preview model'): ml_logs = 'http://kubernetes.docker.internal:5000/ -->Open mlflow, enter the command line: mlflow ui' st.markdown(ml_logs) st.dataframe(allOfRuns) if st.sidebar.checkbox('Choose a model'): selected_run_id = st.sidebar.selectbox( 'Choose from saved models', allOfRuns[allOfRuns['tags.Source'] == 'create_model']['run_id'].tolist()) selected_run_info = allOfRuns[( allOfRuns['run_id'] == selected_run_id)].iloc[0, :] st.code(selected_run_info) if st.sidebar.button('Forecast data'): model_uri = 'runs:/' + selected_run_id + '/model/' model_loaded = mlflow.sklearn.load_model(model_uri) df = pd.read_csv(file_selected_path, nrows=nrows) #st.success('Model prediction. . .') pred = model_loaded.predict(df) pred_df = pd.DataFrame(pred, columns=['Predictive Data']) st.dataframe(pred_df) pred_df.plot() st.pyplot() else: st.sidebar.warning('Did not find a trained model')
import pandas as pd import mlflow pd.set_option("display.width", 160) pd.set_option("display.precision", 3) EXPERIMENT_NAME = "ww_bench_v1" experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME) experiment_id = experiment.experiment_id runs = mlflow.search_runs(experiment_ids=experiment_id) columns = [ "dataset", "fit_time", "predict_train_time", "predict_test_time", "log_loss_train", "log_loss_test", "roc_auc_train", "roc_auc_test", ] runs_avg = (runs[[ col for col in runs.columns if col.startswith("metrics") or col.startswith("params") ]].rename( { key: key.replace("metrics.", "").replace("params.", "") for key in runs.keys() }, axis="columns",
mlflow.log_metric("recall", metrics.recall(1.0)) mlflow.log_metric("f1", metrics.fMeasure(1.0)) mlflow.spark.log_model(pipelineTrained, "turbine_gbt") mlflow.set_tag("model", "turbine_gbt") # COMMAND ---------- # MAGIC %md ## Save to the model registry # MAGIC Get the model having the best metrics.AUROC from the registry # COMMAND ---------- best_models = mlflow.search_runs( filter_string= 'tags.model="turbine_gbt" and attributes.status = "FINISHED" and metrics.f1 > 0', order_by=['metrics.f1 DESC'], max_results=1) model_uri = best_models.iloc[0].artifact_uri + "/turbine_gbt" model_registered = mlflow.register_model(model_uri, f"turbine_failure_model_{dbName}") # print(model_uri) # COMMAND ---------- # DBTITLE 1,Flag version as staging/production ready client = mlflow.tracking.MlflowClient() print("registering model version " + model_registered.version + " as production model") client.transition_model_version_stage(name=f"turbine_failure_model_{dbName}", version=model_registered.version,
def get_runs(self, search_query: str = "") -> DataFrame: mlflow.set_tracking_uri(self.__get_uri()) return mlflow.search_runs(filter_string=search_query)
import mlflow import os import argparse import pandas as pd # Documentation for search API: https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.search_runs parser = argparse.ArgumentParser() parser.add_argument('-expfolder', type=str, default='mlruns') args = parser.parse_args() run_ids = [k for k in os.listdir(args.expfolder) if k.isdigit()] runs_folder = 'mlruns' mlflow.set_tracking_uri(args.expfolder) runs = mlflow.search_runs(experiment_ids=run_ids, filter_string="") runs = pd.DataFrame(runs) runs.to_csv('flexy_deepmpc_2020_9_8.csv') runs.to_pickle('flexy_deepmpc_2020_9_8.pkl')
def test_search_runs(): start_run_and_log_data() runs = mlflow.search_runs(experiment_ids=["0"], order_by=["param.start_time DESC"]) mlflow.get_run(runs["run_id"][0])
def mlflow_last_run_add_param(param_dict, save_model=False): last_experiment_id, last_run_id, _ = mlflow.search_runs(order_by=['attribute.end_time DESC'])[['experiment_id', 'run_id', 'end_time']].iloc[0] mlflow_dict = {'layer_df': ['notes'], 'model_name': ['tag', 'param'], 'max_epochs': ['param'], 'actual_epochs': ['param'], 'early_stopped': ['param'], 'loss': ['param', 'tag'], 'data_dataset': ['tag', 'param'], 'data_y_col': ['tag', 'param'], 'data_window_input_width': ['tag', 'param'], 'data_window_pred_width': ['tag', 'param'], 'data_split_method': ['tag', 'param'], 'data_normalize_method': ['tag', 'param'], 'data_lagged_cols': ['param'], 'data_hash_first_level': ['tag', 'param'], 'data_hash_second_level': ['tag', 'param'], 'data_hash': ['tag', 'param'], 'look_ups': ['artifact'], 'final_data_file': ['artifact'], 'data_statistics': ['artifact'], 'data_props': [], 'data_time_period': ['tag', 'param'], 'data_train_sample_size': ['param'], 'model_type': ['param'], 'history_obj': []} notes = '' if "model_name" in param_dict: notes = notes + f'# {param_dict["model_name"]}\n' if "kwargs" in param_dict: for key, value in param_dict['kwargs'].items(): param_dict[key] = value mlflow_dict[key] = ['param'] param_dict.pop('kwargs') if "data_props" in param_dict: param_dict['data_dataset'] = param_dict['data_props']['first_step']['dataset'] param_dict['data_y_col'] = param_dict['data_props']['first_step']['dataset_y_col'] param_dict['data_window_input_width'] = param_dict['data_props']['first_step']['window_input_width'] param_dict['data_window_pred_width'] = param_dict['data_props']['first_step']['window_pred_width'] param_dict['data_split_method'] = param_dict['data_props']['second_step']['split_method'] param_dict['data_normalize_method'] = param_dict['data_props']['second_step']['normalize_method'] #param_dict['data_lagged_cols'] = str(param_dict['data_props']['second_step']['lagged_col_dict']) param_dict['data_hash_first_level'] = param_dict['data_props']['first_step_data_hash'] param_dict['data_hash_second_level'] = param_dict['data_props']['second_step_data_hash'] param_dict['data_hash'] = param_dict['data_hash_first_level'] + '_' + param_dict['data_hash_second_level'] param_dict['data_time_period'] = param_dict['data_props']['iter_step'] param_dict['data_train_sample_size'] = param_dict["data_props"]["statistics"]['train']['samples'] cache_folder = get_project_directories(key='cache_dir') cache_folder = os.path.join(cache_folder, param_dict['data_hash_first_level']) look_up_file = os.path.join(cache_folder, f'{param_dict["data_hash_second_level"]}_{param_dict["data_time_period"]}_look_up.json') final_data_file = os.path.join(cache_folder, f'{param_dict["data_hash_second_level"]}_{param_dict["data_time_period"]}_data_schema.json') statistics_file = os.path.join(cache_folder, f'{param_dict["data_hash_second_level"]}_{param_dict["data_time_period"]}_data_statistics.txt') if not os.path.exists(statistics_file): with open(statistics_file, "w") as outfile: txt = tabulate.tabulate(pd.DataFrame(param_dict["data_props"]["statistics"]), headers='keys', tablefmt='simple') outfile.write(txt) if not os.path.exists(look_up_file): with open(look_up_file, "w") as outfile: json.dump(param_dict['data_props']['look_ups'], outfile) if not os.path.exists(final_data_file): for key, value in param_dict['data_props']['final_data']['idx'].items(): param_dict['data_props']['final_data']['idx'][key] = value.tolist() with open(final_data_file, "w") as outfile: json.dump(param_dict['data_props']['final_data'], outfile) param_dict['look_ups'] = look_up_file param_dict['final_data_file'] = final_data_file param_dict['data_statistics'] = statistics_file notes = notes + f'**Time:** {param_dict["data_time_period"]}\n**Dataset:** {param_dict["data_dataset"]}\n' with mlflow.start_run(run_id=last_run_id) as run: if save_model and param_dict['model_type'] == 'TensorFlow': path_saved_model = os.path.join(run.info.artifact_uri, 'tf_model') model = param_dict.pop('history_obj').model model.compile(loss=tf.losses.MeanAbsoluteError(), optimizer=tf.optimizers.Adam(), metrics=[tf.metrics.MeanAbsoluteError(), tf.metrics.MeanAbsolutePercentageError(), tf.metrics.MeanSquaredError(), tf.metrics.MeanSquaredLogarithmicError()]) model.save(path_saved_model, save_traces=False) else: path_saved_model = None for key, value in param_dict.items(): if key[:8] == 'metrics_': type_list = ['metric_dict'] else: type_list = mlflow_dict[key] for i_type in type_list: if i_type == 'tag': mlflow.set_tag(key, value) elif i_type == 'param': mlflow.log_param(key, value) elif i_type == 'notes': pass elif i_type == 'metric': mlflow.log_metric(key, value) elif i_type == 'metric_dict': for metric_key, metric_value in value.items(): mlflow.log_metric(key[8:] + '_' + metric_key, metric_value) elif i_type == 'artifact': mlflow.log_artifact(value) else: raise Exception(f'Unknown mlflow type {i_type}') if "layer_df" in param_dict: notes = notes + (param_dict["layer_df"].to_markdown(index=False) if len(param_dict["layer_df"]) > 0 else '\nNo layers.') if "metrics_test" in param_dict: metrics_test = pd.DataFrame.from_dict({key: [value] for key, value in param_dict["metrics_test"].items()}, orient='columns') notes = notes + '\n\n<br>\n## Model Performance Metrics:\n' + (metrics_test.to_markdown(index=False) if len(param_dict["metrics_test"]) > 0 else 'Empty test metrics.') if "data_props" in param_dict: notes = notes + '\n\n<br>\n## Data Statistics:\n' + pd.DataFrame(param_dict["data_props"]["statistics"]).to_markdown(index=True) mlflow.set_tag("mlflow.note.content", notes) return {'model_id': last_run_id,'model_name': param_dict['model_name'], 'saved_model_path': path_saved_model}
#---------------------- #data predect データをpandasで準備する #---------------------- #mlflowとは、機械学習のライフサイクル(前処理→学習→デプロイ)を管理するオープンソースなプラットフォーム #Tracking: ロギング #ロギング(英:logging)とは #「取るぜ取るぜ~ログ取るぜ~」のこと。 #実際にロギングを行ってみる ☛ pycaret の後に実施 with mlflow.start_run(): mlflow.log_param('param1', 1) # パラメータ mlflow.log_metric('metric1', 0.1) # スコア mlflow.log_artifact(filename) # その他、モデルやデータなど mlflow.search_runs() # experiment内のロギング内容を取得できる #------------------------------ #anaconda prompt を開く Open the anaconda prompt #URIで設定したディレクトリまで移動する。 #この時、 mlruns ディレクトリが配下になるようにする( mlruns ディレクトリが存在しない場合、 mlruns ディレクトリが作成される)。 #mlflow ui でローカルサーバが起動する。 #Go to the directory set in URI. #Move to the directory set in the URI. At this time, make sure that the mlruns directory is under it (if the mlruns directory does not exist, the mlruns directory is created). #The local server is started by mlflow ui. #𝑐𝑑./ℎ𝑜𝑔𝑒/ ls mlruns
# Return ROC glm_model_new = _fit_crossvalidator(loan_stats_new, features + ["net"], target="bad_loan") lr_summary_new = glm_model_new.stages[len(glm_model_new.stages) - 1].summary display(lr_summary_new.roc) # COMMAND ---------- print("ML Pipeline accuracy: %s" % lr_summary_new.accuracy) # COMMAND ---------- # MAGIC %md ## 2. Find runs that used the original data version # MAGIC # MAGIC Model accuracy improved from ~80% to ~95% after the feature engineering step. You might therefore wonder: what if you retrained all models built off of the original dataset against the feature-engineered dataset? Would there be similar improvements in model performance? # MAGIC # MAGIC To identify other runs launched against the original dataset, use MLflow's `mlflow.search_runs` API: # COMMAND ---------- mlflow.search_runs( filter_string="params.data_path='{path}' and params.data_version='{version}'" .format(path=data_path, version=0)) # COMMAND ---------- # MAGIC %md # MAGIC ## 3. Load back and reproduce runs against a snapshot of data # MAGIC Finally, you can load back a specific version of the data for use in model re-training. To do this, simply update the widgets above with a data version of 1 (corresponding to the feature-engineered data) and rerun section 1) of this notebook.
def launch( environment: str, job: str, trace: bool, kill_on_sigterm: bool, existing_runs: str, tags: List[str], parameters: List[str], ): dbx_echo(f"Launching job {job} on environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) override_parameters = parse_multiple(parameters) filter_string = generate_filter_string(environment, additional_tags) runs = mlflow.search_runs(filter_string=filter_string, max_results=1) if runs.empty: raise EnvironmentError( f""" No runs provided per given set of filters: {filter_string} Please check experiment UI to verify current status of deployments. """ ) run_info = runs.iloc[0].to_dict() dbx_echo("Successfully found deployment per given job name") deployment_run_id = run_info["run_id"] with mlflow.start_run(run_id=deployment_run_id) as deployment_run: with mlflow.start_run(nested=True): artifact_base_uri = deployment_run.info.artifact_uri deployments = _load_deployments(api_client, artifact_base_uri) job_id = deployments.get(job) if not job_id: raise Exception( f"Job with name {job} not found in the latest deployment" % job ) jobs_service = JobsService(api_client) active_runs = jobs_service.list_runs(job_id, active_only=True).get( "runs", [] ) for run in active_runs: if existing_runs == "pass": dbx_echo("Passing the existing runs status check") if existing_runs == "wait": dbx_echo( f'Waiting for job run with id {run["run_id"]} to be finished' ) _wait_run(api_client, run) if existing_runs == "cancel": dbx_echo(f'Cancelling run with id {run["run_id"]}') _cancel_run(api_client, run) if override_parameters: _prepared_parameters = sum( [[k, v] for k, v in override_parameters.items()], [] ) dbx_echo( f"Default launch parameters are overridden with the following: {_prepared_parameters}" ) run_data = jobs_service.run_now( job_id, python_params=_prepared_parameters ) else: run_data = jobs_service.run_now(job_id) if trace: dbx_echo("Tracing job run") if kill_on_sigterm: dbx_echo("Click Ctrl+C to stop the job run") try: dbx_status = _trace_run(api_client, run_data) except KeyboardInterrupt: dbx_status = "CANCELLED" dbx_echo("Cancelling the run gracefully") _cancel_run(api_client, run_data) dbx_echo("Run cancelled successfully") else: dbx_status = _trace_run(api_client, run_data) if dbx_status == "ERROR": raise Exception( "Tracked job failed during execution. " "Please check Databricks UI for job logs" ) dbx_echo("Launch command finished") else: dbx_status = "NOT_TRACKED" dbx_echo( "Job successfully launched in non-tracking mode. Please check Databricks UI for job status" ) deployment_tags = { "job_id": job_id, "run_id": run_data["run_id"], "dbx_action_type": "launch", "dbx_status": dbx_status, "dbx_environment": environment, } mlflow.set_tags(deployment_tags)
def close_log(self) -> None: """Finds all **running** runs and ends them.""" all_runs = mlflow.search_runs() for _ in all_runs[all_runs.status == "RUNNING"]: mlflow.end_run()
required=False, help="The address of server to connect to if using " "Ray Client.") args, _ = parser.parse_known_args() if args.server_address: import ray ray.util.connect(args.server_address) if args.server_address and not args.tracking_uri: raise RuntimeError("If running this example with Ray Client, " "the tracking URI for your tracking server should" "be explicitly passed in.") if args.smoke_test: mlflow_tracking_uri = os.path.join(tempfile.gettempdir(), "mlruns") else: mlflow_tracking_uri = args.tracking_uri tune_function(mlflow_tracking_uri, finish_fast=args.smoke_test) if not args.smoke_test: df = mlflow.search_runs( [mlflow.get_experiment_by_name("example").experiment_id]) print(df) tune_decorated(mlflow_tracking_uri, finish_fast=args.smoke_test) if not args.smoke_test: df = mlflow.search_runs( [mlflow.get_experiment_by_name("mixin_example").experiment_id]) print(df)
clf = ExtraTreesClassifier(max_features=14, n_estimators=160, max_depth=10) clf.fit(X_train, y_train) print('Accuracy on train:', clf.score(X_train, y_train)) print('Accuracy on test:', clf.score(X_test, y_test)) y_pred = clf.predict(X_val) pd.DataFrame(list(y_pred)).to_csv('Extra_trees_1.csv') fimp = pd.DataFrame( zip(pr_df_train.drop('is_pass', axis=1).columns, clf.feature_importances_)) fimp.columns = ['features', 'Score'] sfimp = fimp.sort_values(by='Score') plt.barh(sfimp['features'], sfimp['Score']) # In[52]: y_pred_proba = clf.predict_proba(X_val) y_pred_proba # In[56]: pd.DataFrame(list(y_pred_proba), columns=['Extra_P_0', 'Extra_P_1']) # In[62]: mlflow.search_runs()
width, height = config["width"], config["height"] for step in range(config.get("steps", 100)): # Iterative training function - can be any arbitrary training procedure intermediate_score = evaluation_fn(step, width, height) # Feed the score back back to Tune. tune.report(iterations=step, mean_loss=intermediate_score) time.sleep(0.1) if __name__ == "__main__": client = MlflowClient() experiment_id = client.create_experiment("test") trials = tune.run( easy_objective, name="mlflow", num_samples=5, loggers=DEFAULT_LOGGERS + (MLFLowLogger, ), config={ "logger_config": { "mlflow_experiment_id": experiment_id, }, "width": tune.sample_from(lambda spec: 10 + int(90 * random.random())), "height": tune.sample_from(lambda spec: int(100 * random.random())) }) df = mlflow.search_runs([experiment_id]) print(df)
import random import mlflow if __name__ == "__main__": mlflow.set_tracking_uri("http://localhost:5000") # Create or Get an experiment and log two runs under it experiment = mlflow.get_experiment_by_name("Pycon2021") if not experiment: experiment_id = mlflow.create_experiment("Pycon2021") else: experiment_id = experiment.experiment_id # Search all runs in experiment_id df = mlflow.search_runs([experiment_id], order_by=["metrics.m DESC"]) print(df[["metrics.m", "tags.s.release", "run_id"]]) print("--") # Search the experiment_id using a filter_string with tag # that has a case insensitive pattern filter_string = "tags.s.release ILIKE '%rc%'" df = mlflow.search_runs([experiment_id], filter_string=filter_string) print(df[["metrics.m", "tags.s.release", "run_id"]])
# MAGIC # MAGIC A random classifier would have an AUC of 0.5, and higher AUC values are better. For more information, see [Receiver Operating Characteristic Curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve). # COMMAND ---------- # MAGIC %md #### Registering the model in the MLflow Model Registry # MAGIC # MAGIC By registering this model in the Model Registry, you can easily reference the model from anywhere within Databricks. # MAGIC # MAGIC The following section shows how to do this programmatically, but you can also register a model using the UI by following the steps in [Register a model in the Model Registry # MAGIC ](https://docs.databricks.com/applications/mlflow/model-registry.html#register-a-model-in-the-model-registry). # COMMAND ---------- run_id = mlflow.search_runs( filter_string='tags.mlflow.runName = "untuned_random_forest"' ).iloc[0].run_id # COMMAND ---------- model_name = "patricia_wine_quality" model_version = mlflow.register_model(f"runs:/{run_id}/random_forest_model", model_name) # COMMAND ---------- # MAGIC %md You should now see the wine-quality model in the Models page. To display the Models page, click the Models icon in the left sidebar. # MAGIC # MAGIC Next, transition this model to production and load it into this notebook from the model registry. # COMMAND ----------
tags = { "model" : "ptg", "inputs" : field } train_ptgmodel(parameters, possible_inputs[field], tags, log = False) # # Evaluate mlflow results # In[18]: # Select the run of the experiment df_runs = mlflow.search_runs(experiment_ids="0") print("Number of runs done : ", len(df_runs)) # In[19]: # Quick sorting to get the best models based on the RMSE metric df_runs.sort_values(["metrics.rmse"], ascending = True, inplace = True) df_runs.head() # In[20]: # Get the best one
client.update_registered_model(model_name, description='For ranking') # COMMAND ---------- client.transition_model_version_stage(model_name, 1, 'Staging') client.get_model_version(model_name, 1) # COMMAND ---------- from mlflow.tracking import MlflowClient client = MlflowClient() experiment_id = '3181692072700047' runs_df = mlflow.search_runs(experiment_id) display(runs_df) # COMMAND ---------- df_client = spark.read.format("mlflow-experiment").load("3181692072700047") df_client.createOrReplaceTempView("airbnb_model") df_model_selector = (spark.sql(""" select experiment_id, run_id, end_time, metrics.training_r2_score as R2, metrics.training_rmse as RMSE, metrics.training_mae as MAE, CONCAT(artifact_uri,'/log-model') as artifact_uri FROM airbnb_model WHERE status = 'FINISHED' and metrics.training_rmse IS NOT NULL ORDER BY RMSE limit 1