def test_log_metric(): with start_run() as active_run, mock.patch("time.time") as time_mock: time_mock.side_effect = [123 for _ in range(100)] run_id = active_run.info.run_id kiwi.log_metric("name_1", 25) kiwi.log_metric("name_2", -3) kiwi.log_metric("name_1", 30, 5) kiwi.log_metric("name_1", 40, -2) kiwi.log_metric("nested/nested/name", 40) finished_run = tracking.MlflowClient().get_run(run_id) # Validate metrics assert len(finished_run.data.metrics) == 3 expected_pairs = {"name_1": 30, "name_2": -3, "nested/nested/name": 40} for key, value in finished_run.data.metrics.items(): assert expected_pairs[key] == value client = tracking.MlflowClient() metric_history_name1 = client.get_metric_history(run_id, "name_1") assert set([(m.value, m.timestamp, m.step) for m in metric_history_name1]) == set([ (25, 123 * 1000, 0), (30, 123 * 1000, 5), (40, 123 * 1000, -2), ]) metric_history_name2 = client.get_metric_history(run_id, "name_2") assert set([(m.value, m.timestamp, m.step) for m in metric_history_name2]) == set([ (-3, 123 * 1000, 0), ])
def test_log_metric_validation(): with start_run() as active_run: run_id = active_run.info.run_id with pytest.raises(MlflowException) as e: kiwi.log_metric("name_1", "apple") assert e.value.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) finished_run = tracking.MlflowClient().get_run(run_id) assert len(finished_run.data.metrics) == 0
def test_start_and_end_run(): # Use the start_run() and end_run() APIs without a `with` block, verify they work. with start_run() as active_run: kiwi.log_metric("name_1", 25) finished_run = tracking.MlflowClient().get_run(active_run.info.run_id) # Validate metrics assert len(finished_run.data.metrics) == 1 assert finished_run.data.metrics["name_1"] == 25
def __exit__(self, exc_type, exc_val, exc_tb): """ Log the best model at the end of the training run. """ if not self._best_model: raise Exception("Failed to build any model") kiwi.log_metric(self.train_loss, self._best_train_loss, step=self._next_step) kiwi.log_metric(self.val_loss, self._best_val_loss, step=self._next_step) kiwi.keras.log_model(self._best_model, "model")
def objective(args): # Define model model = RankedNetworkCNNModule(args['learning_rate'], dataset.get_embeddings(), hidden_dim=args['hidden'], output_labels=2) # Evaluation on held-out test-set with torch.no_grad(): model.eval() results = pd.DataFrame(columns=['labels', 'predictions']) for batch_idx, batch in enumerate(test_loader): y_hat = model(batch['a'], batch['b']) results: pd.DataFrame = results.append(pd.DataFrame({ 'labels': batch['label'].flatten(), 'predictions': y_hat.detach().argmax(axis=1) }), ignore_index=True) results.to_csv() # With a nice confusion matrix confusion_matrix(y_pred=results['predictions'].values, y_true=results['labels'].values, classes=[0, 1]) cm = ConfusionMatrix(actual_vector=results['labels'].values, predict_vector=results['predictions'].values) output_test_results = "cm.txt" cm.save_stat(output_test_results) output_test_predictions_file = "test_predictions.txt" np.savetxt(output_test_predictions_file, results['predictions'].values, delimiter=",") kiwi.log_metric(key="test_acc", value=cm.Overall_ACC) kiwi.log_metric(key="test_f1_micro", value=cm.F1_Micro) kiwi.log_metric(key="test_f1_macro", value=cm.F1_Macro) kiwi.log_metric(key="test_ci_pm", value=cm.CI95[1] - cm.Overall_ACC) kiwi.log_metric(key="test_ci_pm", value=cm.CI95[1] - cm.Overall_ACC) kiwi.log_artifact(output_test_predictions_file) kiwi.log_artifact(output_test_results + ".pycm") return cm.Overall_ACC
def test_metric_timestamp(): with kiwi.start_run() as active_run: kiwi.log_metric("name_1", 25) kiwi.log_metric("name_1", 30) run_id = active_run.info.run_uuid # Check that metric timestamps are between run start and finish client = kiwi.tracking.MlflowClient() history = client.get_metric_history(run_id, "name_1") finished_run = client.get_run(run_id) assert len(history) == 2 assert all([ m.timestamp >= finished_run.info.start_time and m.timestamp <= finished_run.info.end_time for m in history ])
def on_train_end(self, *args, **kwargs): """ Log the best model with MLflow and evaluate it on the train and validation data so that the metrics stored with MLflow reflect the logged model. """ self._model.set_weights(self._best_weights) x, y = self._train train_res = self._model.evaluate(x=x, y=y) for name, value in zip(self._model.metrics_names, train_res): kiwi.log_metric("train_{}".format(name), value) x, y = self._valid valid_res = self._model.evaluate(x=x, y=y) for name, value in zip(self._model.metrics_names, valid_res): kiwi.log_metric("valid_{}".format(name), value) log_model(keras_model=self._model, **self._pyfunc_params)
def on_epoch_end(self, epoch, logs=None): """ Log Keras metrics with MLflow. Update the best model if the model improved on the validation data. """ if not logs: return for name, value in logs.items(): if name.startswith("val_"): name = "valid_" + name[4:] else: name = "train_" + name kiwi.log_metric(name, value) val_loss = logs["val_loss"] if val_loss < self._best_val_loss: # Save the "best" weights self._best_val_loss = val_loss self._best_weights = [x.copy() for x in self._model.get_weights()]
def test_search_runs_multiple_experiments(): experiment_ids = [ kiwi.create_experiment("exp__{}".format(exp_id)) for exp_id in range(1, 4) ] for eid in experiment_ids: with kiwi.start_run(experiment_id=eid): kiwi.log_metric("m0", 1) kiwi.log_metric("m_{}".format(eid), 2) assert len(MlflowClient().search_runs(experiment_ids, "metrics.m0 > 0", ViewType.ALL)) == 3 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_1 > 0", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_2 = 2", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_3 < 4", ViewType.ALL)) == 1
def train_als(ratings_data, split_prop, max_iter, reg_param, rank, cold_start_strategy): seed = 42 spark = pyspark.sql.SparkSession.builder.getOrCreate() ratings_df = spark.read.parquet(ratings_data) (training_df, test_df) = ratings_df.randomSplit([split_prop, 1 - split_prop], seed=seed) training_df.cache() test_df.cache() kiwi.log_metric("training_nrows", training_df.count()) kiwi.log_metric("test_nrows", test_df.count()) print('Training: {0}, test: {1}'.format(training_df.count(), test_df.count())) als = (ALS().setUserCol("userId").setItemCol("movieId").setRatingCol( "rating").setPredictionCol("predictions").setMaxIter(max_iter).setSeed( seed).setRegParam(reg_param).setColdStartStrategy( cold_start_strategy).setRank(rank)) als_model = Pipeline(stages=[als]).fit(training_df) reg_eval = RegressionEvaluator(predictionCol="predictions", labelCol="rating", metricName="mse") predicted_test_dF = als_model.transform(test_df) test_mse = reg_eval.evaluate(predicted_test_dF) train_mse = reg_eval.evaluate(als_model.transform(training_df)) print('The model had a MSE on the test set of {0}'.format(test_mse)) print('The model had a MSE on the (train) set of {0}'.format(train_mse)) kiwi.log_metric("test_mse", test_mse) kiwi.log_metric("train_mse", train_mse) kiwi.spark.log_model(als_model, "als-model")
def train_random_forest(ntrees): with kiwi.start_run(): rf = H2ORandomForestEstimator(ntrees=ntrees) train_cols = [n for n in wine.col_names if n != "quality"] rf.train(train_cols, "quality", training_frame=train, validation_frame=test) kiwi.log_param("ntrees", ntrees) kiwi.log_metric("rmse", rf.rmse()) kiwi.log_metric("r2", rf.r2()) kiwi.log_metric("mae", rf.mae()) kiwi.h2o.log_model(rf, "model")
def test_search_runs(): kiwi.set_experiment("exp-for-search") # Create a run and verify that the current active experiment is the one we just set logged_runs = {} with kiwi.start_run() as active_run: logged_runs["first"] = active_run.info.run_id kiwi.log_metric("m1", 0.001) kiwi.log_metric("m2", 0.002) kiwi.log_metric("m1", 0.002) kiwi.log_param("p1", "a") kiwi.set_tag("t1", "first-tag-val") with kiwi.start_run() as active_run: logged_runs["second"] = active_run.info.run_id kiwi.log_metric("m1", 0.008) kiwi.log_param("p2", "aa") kiwi.set_tag("t2", "second-tag-val") def verify_runs(runs, expected_set): assert set([r.info.run_id for r in runs ]) == set([logged_runs[r] for r in expected_set]) experiment_id = MlflowClient().get_experiment_by_name( "exp-for-search").experiment_id # 2 runs in this experiment assert len(MlflowClient().list_run_infos(experiment_id, ViewType.ACTIVE_ONLY)) == 2 # 2 runs that have metric "m1" > 0.001 runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.0001") verify_runs(runs, ["first", "second"]) # 1 run with has metric "m1" > 0.002 runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.002") verify_runs(runs, ["second"]) # no runs with metric "m1" > 0.1 runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.1") verify_runs(runs, []) # 1 run with metric "m2" > 0 runs = MlflowClient().search_runs([experiment_id], "metrics.m2 > 0") verify_runs(runs, ["first"]) # 1 run each with param "p1" and "p2" runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'", ViewType.ALL) verify_runs(runs, ["first"]) runs = MlflowClient().search_runs([experiment_id], "params.p2 != 'a'", ViewType.ALL) verify_runs(runs, ["second"]) runs = MlflowClient().search_runs([experiment_id], "params.p2 = 'aa'", ViewType.ALL) verify_runs(runs, ["second"]) # 1 run each with tag "t1" and "t2" runs = MlflowClient().search_runs([experiment_id], "tags.t1 = 'first-tag-val'", ViewType.ALL) verify_runs(runs, ["first"]) runs = MlflowClient().search_runs([experiment_id], "tags.t2 != 'qwerty'", ViewType.ALL) verify_runs(runs, ["second"]) runs = MlflowClient().search_runs([experiment_id], "tags.t2 = 'second-tag-val'", ViewType.ALL) verify_runs(runs, ["second"]) # delete "first" run MlflowClient().delete_run(logged_runs["first"]) runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'", ViewType.ALL) verify_runs(runs, ["first"]) runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'", ViewType.DELETED_ONLY) verify_runs(runs, ["first"]) runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'", ViewType.ACTIVE_ONLY) verify_runs(runs, [])
def call_tracking_apis(): kiwi.log_metric("some_key", 3) with tempfile.NamedTemporaryFile("w") as temp_file: temp_file.write("Temporary content.") kiwi.log_artifact(temp_file.name)
def call_tracking_apis(): kiwi.log_metric("some_key", 3)
def eval_and_log_metrics(prefix, actual, pred, epoch): rmse = np.sqrt(mean_squared_error(actual, pred)) kiwi.log_metric("{}_rmse".format(prefix), rmse, step=epoch) return rmse
def main(expected_env_name): actual_conda_env = os.environ.get("CONDA_DEFAULT_ENV", None) assert actual_conda_env == expected_env_name,\ "Script expected to be run from conda env %s but was actually run from env" \ " %s" % (expected_env_name, actual_conda_env) kiwi.log_metric("CPU usage", psutil.cpu_percent())
import os from random import random, randint from kiwi import log_metric, log_param, log_artifacts if __name__ == "__main__": print("Running mlflow_tracking.py") log_param("param1", randint(0, 100)) log_metric("foo", random()) log_metric("foo", random() + 1) log_metric("foo", random() + 2) if not os.path.exists("outputs"): os.makedirs("outputs") with open("outputs/test.txt", "w") as f: f.write("hello world!") log_artifacts("outputs")
def log_scalar(name, value, step): """Log a scalar value to both MLflow and TensorBoard""" writer.add_scalar(name, value, step) kiwi.log_metric(name, value)
def log_metrics(metrics): for k, values in metrics.items(): for v in values: kiwi.log_metric(k, v)
metrics = { 'MAE': [rand()], 'R2': [rand()], 'RMSE': [rand()], } log_params(parameters) log_metrics(metrics) # Runs with multiple values for a single metric so that we can QA the time-series metric # plot for i in range(3): with kiwi.start_run(): for j in range(10): sign = random.choice([-1, 1]) kiwi.log_metric( "myReallyLongTimeSeriesMetricName-abcdefghijklmnopqrstuvwxyz", random.random() * sign) kiwi.log_metric("Another Timeseries Metric", rand() * sign) kiwi.log_metric("Yet Another Timeseries Metric", rand() * sign) if i == 0: kiwi.log_metric("Special Timeseries Metric", rand() * sign) kiwi.log_metric("Bar chart metric", rand()) # Big parameter values with kiwi.start_run(run_name='ipython'): parameters = { 'this is a pretty long parameter name': 'NA10921-test_file_2018-08-10.txt', } metrics = {'grower': [i**1.2 for i in range(10)]} log_params(parameters)
def train_keras(ratings_data, als_model_uri, hidden_units): np.random.seed(0) tf.set_random_seed(42) # For reproducibility spark = pyspark.sql.SparkSession.builder.getOrCreate() als_model = kiwi.spark.load_model(als_model_uri).stages[0] ratings_df = spark.read.parquet(ratings_data) (training_df, test_df) = ratings_df.randomSplit([0.8, 0.2], seed=42) training_df.cache() test_df.cache() kiwi.log_metric("training_nrows", training_df.count()) kiwi.log_metric("test_nrows", test_df.count()) print('Training: {0}, test: {1}'.format(training_df.count(), test_df.count())) user_factors = als_model.userFactors.selectExpr("id as userId", "features as uFeatures") item_factors = als_model.itemFactors.selectExpr("id as movieId", "features as iFeatures") joined_train_df = training_df.join(item_factors, on="movieId").join(user_factors, on="userId") joined_test_df = test_df.join(item_factors, on="movieId").join(user_factors, on="userId") # We'll combine the movies and ratings vectors into a single vector of length 24. # We will then explode this features vector into a set of columns. def concat_arrays(*args): return list(chain(*args)) concat_arrays_udf = udf(concat_arrays, ArrayType(FloatType())) concat_train_df = (joined_train_df.select( 'userId', 'movieId', concat_arrays_udf(col("iFeatures"), col("uFeatures")).alias("features"), col('rating').cast("float"))) concat_test_df = (joined_test_df.select( 'userId', 'movieId', concat_arrays_udf(col("iFeatures"), col("uFeatures")).alias("features"), col('rating').cast("float"))) pandas_df = concat_train_df.toPandas() pandas_test_df = concat_test_df.toPandas() # This syntax will create a new DataFrame where elements of the 'features' vector # are each in their own column. This is what we'll train our neural network on. x_test = pd.DataFrame(pandas_test_df.features.values.tolist(), index=pandas_test_df.index) x_train = pd.DataFrame(pandas_df.features.values.tolist(), index=pandas_df.index) # Show matrix for example. print("Training matrix:") print(x_train) # Create our Keras model with two fully connected hidden layers. model = Sequential() model.add(Dense(30, input_dim=24, activation='relu')) model.add(Dense(hidden_units, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(loss="mse", optimizer=keras.optimizers.Adam(lr=.0001)) filepath = '/tmp/ALS_checkpoint_weights.hdf5' early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=2, mode='auto') model.fit(x_train, pandas_df["rating"], validation_split=.2, verbose=2, epochs=3, batch_size=128, shuffle=False, callbacks=[early_stopping]) train_mse = model.evaluate(x_train, pandas_df["rating"], verbose=2) test_mse = model.evaluate(x_test, pandas_test_df["rating"], verbose=2) kiwi.log_metric("test_mse", test_mse) kiwi.log_metric("train_mse", train_mse) print('The model had a MSE on the test set of {0}'.format(test_mse)) kiwi.keras.log_model(model, "keras-model")
import numpy as np from sklearn.linear_model import LogisticRegression import kiwi import kiwi.sklearn if __name__ == "__main__": X = np.array([-2, -1, 0, 1, 2, 1]).reshape(-1, 1) y = np.array([0, 0, 1, 1, 1, 0]) lr = LogisticRegression() lr.fit(X, y) score = lr.score(X, y) print("Score: %s" % score) kiwi.log_metric("score", score) kiwi.sklearn.log_model(lr, "model") print("Model saved in run %s" % kiwi.active_run().info.run_uuid)
def log_metrics(self, metrics, step): # metrics is a dictionary of metric names and values for k, v in metrics.items(): kiwi.log_metric(key=k, value=v, step=step)
with kiwi.start_run(): lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) predicted_qualities = lr.predict(test_x) (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) kiwi.log_param("alpha", alpha) kiwi.log_param("l1_ratio", l1_ratio) kiwi.log_metric("rmse", rmse) kiwi.log_metric("r2", r2) kiwi.log_metric("mae", mae) tracking_url_type_store = urlparse(kiwi.get_tracking_uri()).scheme # Model registry does not work with file store if tracking_url_type_store != "file": # Register the model # There are other ways to use the Model Registry, which depends on the use case, # please refer to the doc for more information: # https://mlflow.org/docs/latest/model-registry.html#api-workflow kiwi.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel")
import os import shutil import sys import random import tempfile import kiwi from kiwi import log_metric, log_param, log_artifacts, get_artifact_uri, active_run,\ get_tracking_uri, log_artifact if __name__ == "__main__": print("Running {} with tracking URI {}".format(sys.argv[0], get_tracking_uri())) log_param("param1", 5) log_metric("foo", 5) log_metric("foo", 6) log_metric("foo", 7) log_metric("random_int", random.randint(0, 100)) run_id = active_run().info.run_id # Get run metadata & data from the tracking server service = kiwi.tracking.MlflowClient() run = service.get_run(run_id) print("Metadata & data for run with UUID %s: %s" % (run_id, run)) local_dir = tempfile.mkdtemp() message = "test artifact written during run %s within artifact URI %s\n" \ % (active_run().info.run_id, get_artifact_uri()) try: file_path = os.path.join(local_dir, "some_output_file.txt") with open(file_path, "w") as handle: handle.write(message) log_artifacts(local_dir, "some_subdir")