def main(argv): # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model # with MLflow, and loads the fitted model back as a PyFunc to make predictions. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data() # There are 13 features we are using for inference. feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))] feat_spec = {"features":tf.placeholder("float", name="features", shape=[None, x_train.shape[1]])} hidden_units = [50, 20] steps = 1000 regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols) train_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_train}, y_train, num_epochs=None, shuffle=True) with tracking.start_run() as tracked_run: mlflow.log_param("Hidden Units", hidden_units) mlflow.log_param("Steps", steps) regressor.train(train_input_fn, steps=steps) test_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_test}, y_test, num_epochs=None, shuffle=True) # Compute mean squared error mse = regressor.evaluate(test_input_fn, steps=steps) mlflow.log_metric("Mean Square Error", mse['average_loss']) # Building a receiver function for exporting receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feat_spec) temp = tempfile.mkdtemp() try: saved_estimator_path = regressor.export_savedmodel(temp, receiver_fn).decode("utf-8") # Logging the saved model tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model") # Reloading the model pyfunc = tensorflow.load_pyfunc(saved_estimator_path) df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1]) # Predicting on the loaded Python Function predict_df = pyfunc.predict(df) predict_df['original_labels'] = y_test print(predict_df) finally: shutil.rmtree(temp)
def test_dnn(): old_uri = tracking.get_tracking_uri() try: with TempDir(chdr=False, remove_on_exit=True) as tmp: diamonds = tmp.path("diamonds") estimator = tmp.path("estimator") artifacts = tmp.path("artifacts") os.mkdir(diamonds) os.mkdir(estimator) os.mkdir(artifacts) tracking.set_tracking_uri(artifacts) # Download the diamonds dataset via mlflow run run(".", entry_point="main", version=None, parameters={"dest-dir": diamonds}, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Run the main dnn app via mlflow run("apps/dnn-regression", entry_point="main", version=None, parameters={ "model-dir": estimator, "train": os.path.join(diamonds, "train_diamonds.parquet"), "test": os.path.join(diamonds, "test_diamonds.parquet"), "hidden-units": "30,30", "label-col": "price", "steps": 5000, "batch-size": 128 }, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Loading the saved model as a pyfunc. pyfunc = tensorflow.load_pyfunc( os.path.join(estimator, os.listdir(estimator)[0])) df = pandas.read_parquet( os.path.join(diamonds, "test_diamonds.parquet")) predict_df = pyfunc.predict(df) assert 'predictions' in predict_df assert isinstance(predict_df['predictions'][0][0], numpy.float32) finally: tracking.set_tracking_uri(old_uri)