def main(): mlflow.sklearn.autolog() iris = datasets.load_iris() parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]} svc = svm.SVC() clf = GridSearchCV(svc, parameters) clf.fit(iris.data, iris.target) run_id = mlflow.last_active_run().info.run_id # show data logged in the parent run print("========== parent run ==========") for key, data in fetch_logged_data(run_id).items(): print("\n---------- logged {} ----------".format(key)) pprint(data) # show data logged in the child runs filter_child_runs = "tags.mlflow.parentRunId = '{}'".format(run_id) runs = mlflow.search_runs(filter_string=filter_child_runs) param_cols = ["params.{}".format(p) for p in parameters.keys()] metric_cols = ["metrics.mean_test_score"] print("\n========== child runs ==========\n") pd.set_option("display.max_columns", None) # prevent truncating columns print(runs[["run_id", *param_cols, *metric_cols]])
def test_last_active_run_returns_most_recently_ended_active_run(): run_id = mlflow.start_run().info.run_id mlflow.log_metric("a", 1.0) mlflow.log_param("b", 2) mlflow.end_run() last_active_run = mlflow.last_active_run() assert last_active_run.info.run_id == run_id assert last_active_run.data.metrics == {"a": 1.0} assert last_active_run.data.params == {"b": "2"}
def test_last_active_run_retrieves_autologged_run(): from sklearn.ensemble import RandomForestRegressor mlflow.autolog() rf = RandomForestRegressor(n_estimators=1, max_depth=1, max_features=1) rf.fit([[1, 2]], [[3]]) rf.predict([[2, 1]]) autolog_run = mlflow.last_active_run() assert autolog_run is not None assert autolog_run.info.run_id is not None
def main(): # enable autologging mlflow.sklearn.autolog() # prepare training data X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y = np.dot(X, np.array([1, 2])) + 3 # train a model model = LinearRegression() model.fit(X, y) run_id = mlflow.last_active_run().info.run_id print("Logged data and model in run {}".format(run_id)) # show logged data for key, data in fetch_logged_data(run_id).items(): print("\n---------- logged {} ----------".format(key)) pprint(data)
def main(): # prepare example dataset X, y = load_iris(return_X_y=True, as_frame=True) X_train, X_test, y_train, y_test = train_test_split(X, y) # enable auto logging # this includes lightgbm.sklearn estimators mlflow.lightgbm.autolog() regressor = lgb.LGBMClassifier(n_estimators=20, reg_lambda=1.0) regressor.fit(X_train, y_train, eval_set=[(X_test, y_test)]) y_pred = regressor.predict(X_test) f1 = f1_score(y_test, y_pred, average="micro") run_id = mlflow.last_active_run().info.run_id print("Logged data and model in run {}".format(run_id)) # show logged data for key, data in fetch_logged_data(run_id).items(): print("\n---------- logged {} ----------".format(key)) pprint(data)
def main(): # prepare example dataset X, y = load_diabetes(return_X_y=True, as_frame=True) X_train, X_test, y_train, y_test = train_test_split(X, y) # enable auto logging # this includes xgboost.sklearn estimators mlflow.xgboost.autolog() regressor = xgb.XGBRegressor(n_estimators=20, reg_lambda=1, gamma=0, max_depth=3) regressor.fit(X_train, y_train, eval_set=[(X_test, y_test)]) y_pred = regressor.predict(X_test) mse = mean_squared_error(y_test, y_pred) run_id = mlflow.last_active_run().info.run_id print("Logged data and model in run {}".format(run_id)) # show logged data for key, data in fetch_logged_data(run_id).items(): print("\n---------- logged {} ----------".format(key)) pprint(data)
def test_last_active_run_returns_currently_active_run(): run_id = mlflow.start_run().info.run_id last_active_run_id = mlflow.last_active_run().info.run_id mlflow.end_run() assert run_id == last_active_run_id
# Load data iris_dataset = load_iris() data, target, target_names = ( iris_dataset["data"], iris_dataset["target"], iris_dataset["target_names"], ) # Instantiate model model = GradientBoostingClassifier() # Split training and validation data data, target = shuffle(data, target) train_x, train_y = data[:100], target[:100] val_x, val_y = data[100:], target[100:] # Train and evaluate model model.fit(train_x, train_y) run_id = mlflow.last_active_run().info.run_id print("MSE:", mean_squared_error(model.predict(val_x), val_y)) print("Target names: ", target_names) print("run_id: {}".format(run_id)) # Register the auto-logged model model_uri = "runs:/{}/model".format(run_id) registered_model_name = "RayMLflowIntegration" mv = mlflow.register_model(model_uri, registered_model_name) print("Name: {}".format(mv.name)) print("Version: {}".format(mv.version))