def main(argv): # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model # with MLflow, and loads the fitted model back as a PyFunc to make predictions. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data() # There are 13 features we are using for inference. feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))] feat_spec = {"features":tf.placeholder("float", name="features", shape=[None, x_train.shape[1]])} hidden_units = [50, 20] steps = 1000 regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols) train_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_train}, y_train, num_epochs=None, shuffle=True) with tracking.start_run() as tracked_run: mlflow.log_param("Hidden Units", hidden_units) mlflow.log_param("Steps", steps) regressor.train(train_input_fn, steps=steps) test_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_test}, y_test, num_epochs=None, shuffle=True) # Compute mean squared error mse = regressor.evaluate(test_input_fn, steps=steps) mlflow.log_metric("Mean Square Error", mse['average_loss']) # Building a receiver function for exporting receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feat_spec) temp = tempfile.mkdtemp() try: saved_estimator_path = regressor.export_savedmodel(temp, receiver_fn).decode("utf-8") # Logging the saved model tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model") # Reloading the model pyfunc = tensorflow.load_pyfunc(saved_estimator_path) df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1]) # Predicting on the loaded Python Function predict_df = pyfunc.predict(df) predict_df['original_labels'] = y_test print(predict_df) finally: shutil.rmtree(temp)
def test_log_saved_model(self): with TempDir(chdr=False, remove_on_exit=True) as tmp: # Setting the logging such that it is in the temp folder and deleted after the test. old_tracking_dir = tracking.get_tracking_uri() tracking_dir = os.path.abspath(tmp.path("mlruns")) tracking.set_tracking_uri("file://%s" % tracking_dir) tracking.start_run() try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} for name in self._feature_names: feature_spec[name] = tf.placeholder("float", name=name, shape=[150]) # Creating receiver function for model saving. receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec) saved_model_path = tmp.path("model") os.makedirs(saved_model_path) os.makedirs(tmp.path("hello")) # Saving Tensorflow model. saved_model_path = self._dnn.export_savedmodel( saved_model_path, receiver_fn).decode("utf-8") # Logging the Tensorflow model just saved. tensorflow.log_saved_model(saved_model_dir=saved_model_path, signature_def_key="predict", artifact_path=tmp.path("hello")) # Loading the saved Tensorflow model as a pyfunc. x = pyfunc.load_pyfunc(saved_model_path) # Predicting on the iris dataset using the pyfunc. xpred = x.predict( pandas.DataFrame(data=self._X, columns=self._feature_names)) saved = [] for s in self._dnn_predict: saved.append(s['predictions']) loaded = [] for index, rows in xpred.iterrows(): loaded.append(rows) # Asserting that the loaded model predictions are as expected. np.testing.assert_array_equal(saved, loaded) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_dir)
def helper(self, feature_spec, tmp, estimator, df): """ This functions handles exporting, logging, loading back, and predicting on an estimator for testing purposes. """ receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec) saved_estimator_path = tmp.path("model") os.makedirs(saved_estimator_path) # Saving TensorFlow model. saved_estimator_path = estimator.export_savedmodel(saved_estimator_path, receiver_fn).decode("utf-8") # Logging the TensorFlow model just saved. tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="hello") # Loading the saved TensorFlow model as a pyfunc. x = pyfunc.load_pyfunc(saved_estimator_path) # Predicting on the dataset using the pyfunc. return x.predict(df)
def train(model_dir, training_pandas_data, test_pandas_data, label_col, feat_cols, hidden_units, steps, batch_size, training_data_path, test_data_path): print("training-data-path: " + training_data_path) print("test-data-path: " + test_data_path) for hu in hidden_units: print("hidden-units: ", hu) print("steps: ", steps) print("batch_size: ", batch_size) print("label-col: " + label_col) for feat in feat_cols: print("feat-cols: " + feat) # Split data into training labels and testing labels. training_labels = training_pandas_data[label_col].values test_labels = test_pandas_data[label_col].values training_features = { } # Dictionary of column names to column values for training test_features = { } # Dictionary of column names to column values for testing tf_feat_cols = [] # List of tf.feature_columns for input functions feature_spec = { } # Dictionary of column name -> placeholder tensor for receiver functions # Create TensorFlow columns based on passed in feature columns for feat in feat_cols: training_features[feat] = training_pandas_data[feat].values test_features[feat] = test_pandas_data[feat].values tf_feat_cols.append(tf.feature_column.numeric_column(feat)) feature_spec[feat] = tf.placeholder("float", name=feat, shape=[None]) # Create receiver function for loading the model for serving. receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec) # Create input functions for both the training and testing sets. input_train = tf.estimator.inputs.numpy_input_fn(training_features, training_labels, shuffle=True, batch_size=batch_size) input_test = tf.estimator.inputs.numpy_input_fn(test_features, test_labels, shuffle=False, batch_size=batch_size) # Creating DNNRegressor regressor = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols, hidden_units=hidden_units) # Training regressor on training input function regressor.train(input_fn=input_train, steps=steps) # Evaluating model on training data test_eval = regressor.evaluate(input_fn=input_test) test_rmse = test_eval["average_loss"]**0.5 print("Test RMSE:", test_rmse) mlflow.log_param("Number of data points", len(training_pandas_data[label_col].values)) #Logging the RMSE and predictions. mlflow.log_metric("RMSE for test set", test_rmse) # Saving TensorFlow model. saved_estimator_path = regressor.export_savedmodel( model_dir, receiver_fn).decode("utf-8") # Logging the TensorFlow model just saved. tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model") run_id = mlflow.tracking.active_run().info.run_uuid print("Run with id %s finished" % run_id)
def train(model_dir, training_pandas_data, test_pandas_data, label_col, feat_cols, hidden_units, steps, batch_size, training_data_path, test_data_path): print("train: " + training_data_path) print("test: " + test_data_path) for hu in hidden_units: print("hidden-units: ", hu) print("steps: ", steps) print("batch_size: ", batch_size) print("label-col: " + label_col) for feat in feat_cols: print("feat-cols: " + feat) # Split data into training labels and testing labels. training_labels = training_pandas_data[label_col].values test_labels = test_pandas_data[label_col].values training_features = { } # Dictionary of column names to column values for training test_features = { } # Dictionary of column names to column values for testing tf_feat_cols = [] # List of tf.feature_columns for input functions feature_spec = { } # Dictionary of column name -> placeholder tensor for receiver functions # Create TensorFlow columns based on passed in feature columns for feat in feat_cols: training_features[feat] = training_pandas_data[feat].values test_features[feat] = test_pandas_data[feat].values tf_feat_cols.append(tf.feature_column.numeric_column(feat)) feature_spec[feat] = tf.placeholder("float", name=feat, shape=[None]) # Create receiver function for loading the model for serving. receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec) # Create input functions for both the training and testing sets. input_train = tf.estimator.inputs.numpy_input_fn(training_features, training_labels, shuffle=True, batch_size=batch_size) input_test = tf.estimator.inputs.numpy_input_fn(test_features, test_labels, shuffle=False, batch_size=batch_size) # Creating DNNRegressor regressor = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols, hidden_units=hidden_units) # Training regressor on training input function regressor.train(input_fn=input_train, steps=steps) # Evaluating model on training data test_eval = regressor.evaluate(input_fn=input_test) # Calculating the RMSE of the testing set. test_rmse = test_eval["average_loss"]**0.5 # Putting the predictions in a list we can use to calculate r^2 scores with. train_raw_pred = list(regressor.predict(input_fn=input_train)) train_pred = [i['predictions'][0] for i in train_raw_pred] r2_train = r2_score(training_labels, train_pred) test_raw_pred = list(regressor.predict(input_fn=input_test)) test_pred = [i['predictions'][0] for i in test_raw_pred] r2_test = r2_score(test_labels, test_pred) print("Test RMSE:", test_rmse) print("Training Set R2", r2_train) print("Test Set R2", r2_test) mlflow.log_param("num_train_points", len(training_pandas_data[label_col].values)) #Logging the RMSE and r2 scores. mlflow.log_metric("Test RMSE", test_rmse) mlflow.log_metric("Train R2", r2_train) mlflow.log_metric("Test R2", r2_test) # Saving TensorFlow model. saved_estimator_path = regressor.export_savedmodel( model_dir, receiver_fn).decode("utf-8") # Logging the TensorFlow model just saved. tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model")