Esempio n. 1
0
def main(argv):
    # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model 
    # with MLflow, and loads the fitted model back as a PyFunc to make predictions.
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()
    # There are 13 features we are using for inference.
    feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))]
    feat_spec = {"features":tf.placeholder("float", name="features", shape=[None, x_train.shape[1]])}
    hidden_units = [50, 20]
    steps = 1000
    regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols)
    train_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_train}, y_train, num_epochs=None, shuffle=True)
    with tracking.start_run() as tracked_run:
        mlflow.log_param("Hidden Units", hidden_units)
        mlflow.log_param("Steps", steps)
        regressor.train(train_input_fn, steps=steps)
        test_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_test}, y_test, num_epochs=None, shuffle=True)
        # Compute mean squared error
        mse = regressor.evaluate(test_input_fn, steps=steps)
        mlflow.log_metric("Mean Square Error", mse['average_loss'])
        # Building a receiver function for exporting
        receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feat_spec)
        temp = tempfile.mkdtemp()
        try:
            saved_estimator_path = regressor.export_savedmodel(temp, receiver_fn).decode("utf-8")
            # Logging the saved model
            tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model")
            # Reloading the model
            pyfunc = tensorflow.load_pyfunc(saved_estimator_path)
            df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1])
            # Predicting on the loaded Python Function
            predict_df = pyfunc.predict(df)
            predict_df['original_labels'] = y_test
            print(predict_df)
        finally:
            shutil.rmtree(temp)
Esempio n. 2
0
 def test_log_saved_model(self):
     with TempDir(chdr=False, remove_on_exit=True) as tmp:
         # Setting the logging such that it is in the temp folder and deleted after the test.
         old_tracking_dir = tracking.get_tracking_uri()
         tracking_dir = os.path.abspath(tmp.path("mlruns"))
         tracking.set_tracking_uri("file://%s" % tracking_dir)
         tracking.start_run()
         try:
             # Creating dict of features names (str) to placeholders (tensors)
             feature_spec = {}
             for name in self._feature_names:
                 feature_spec[name] = tf.placeholder("float",
                                                     name=name,
                                                     shape=[150])
             # Creating receiver function for model saving.
             receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
                 feature_spec)
             saved_model_path = tmp.path("model")
             os.makedirs(saved_model_path)
             os.makedirs(tmp.path("hello"))
             # Saving Tensorflow model.
             saved_model_path = self._dnn.export_savedmodel(
                 saved_model_path, receiver_fn).decode("utf-8")
             # Logging the Tensorflow model just saved.
             tensorflow.log_saved_model(saved_model_dir=saved_model_path,
                                        signature_def_key="predict",
                                        artifact_path=tmp.path("hello"))
             # Loading the saved Tensorflow model as a pyfunc.
             x = pyfunc.load_pyfunc(saved_model_path)
             # Predicting on the iris dataset using the pyfunc.
             xpred = x.predict(
                 pandas.DataFrame(data=self._X,
                                  columns=self._feature_names))
             saved = []
             for s in self._dnn_predict:
                 saved.append(s['predictions'])
             loaded = []
             for index, rows in xpred.iterrows():
                 loaded.append(rows)
             # Asserting that the loaded model predictions are as expected.
             np.testing.assert_array_equal(saved, loaded)
         finally:
             # Restoring the old logging location.
             tracking.end_run()
             tracking.set_tracking_uri(old_tracking_dir)
Esempio n. 3
0
 def helper(self, feature_spec, tmp, estimator, df):
     """
     This functions handles exporting, logging, loading back, and predicting on an estimator for
     testing purposes.
     """
     receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec)
     saved_estimator_path = tmp.path("model")
     os.makedirs(saved_estimator_path)
     # Saving TensorFlow model.
     saved_estimator_path = estimator.export_savedmodel(saved_estimator_path,
                                                        receiver_fn).decode("utf-8")
     # Logging the TensorFlow model just saved.
     tensorflow.log_saved_model(saved_model_dir=saved_estimator_path,
                                signature_def_key="predict",
                                artifact_path="hello")
     # Loading the saved TensorFlow model as a pyfunc.
     x = pyfunc.load_pyfunc(saved_estimator_path)
     # Predicting on the dataset using the pyfunc.
     return x.predict(df)
Esempio n. 4
0
def train(model_dir, training_pandas_data, test_pandas_data, label_col,
          feat_cols, hidden_units, steps, batch_size, training_data_path,
          test_data_path):

    print("training-data-path:    " + training_data_path)
    print("test-data-path:        " + test_data_path)
    for hu in hidden_units:
        print("hidden-units:         ", hu)
    print("steps:                ", steps)
    print("batch_size:           ", batch_size)
    print("label-col:             " + label_col)
    for feat in feat_cols:
        print("feat-cols:             " + feat)

    # Split data into training labels and testing labels.
    training_labels = training_pandas_data[label_col].values
    test_labels = test_pandas_data[label_col].values

    training_features = {
    }  # Dictionary of column names to column values for training
    test_features = {
    }  # Dictionary of column names to column values for testing
    tf_feat_cols = []  # List of tf.feature_columns for input functions
    feature_spec = {
    }  # Dictionary of column name -> placeholder tensor for receiver functions
    # Create TensorFlow columns based on passed in feature columns
    for feat in feat_cols:
        training_features[feat] = training_pandas_data[feat].values
        test_features[feat] = test_pandas_data[feat].values
        tf_feat_cols.append(tf.feature_column.numeric_column(feat))
        feature_spec[feat] = tf.placeholder("float", name=feat, shape=[None])

    # Create receiver function for loading the model for serving.
    receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
        feature_spec)

    # Create input functions for both the training and testing sets.
    input_train = tf.estimator.inputs.numpy_input_fn(training_features,
                                                     training_labels,
                                                     shuffle=True,
                                                     batch_size=batch_size)
    input_test = tf.estimator.inputs.numpy_input_fn(test_features,
                                                    test_labels,
                                                    shuffle=False,
                                                    batch_size=batch_size)

    # Creating DNNRegressor
    regressor = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols,
                                          hidden_units=hidden_units)

    # Training regressor on training input function
    regressor.train(input_fn=input_train, steps=steps)

    # Evaluating model on training data
    test_eval = regressor.evaluate(input_fn=input_test)

    test_rmse = test_eval["average_loss"]**0.5

    print("Test RMSE:", test_rmse)

    mlflow.log_param("Number of data points",
                     len(training_pandas_data[label_col].values))

    #Logging the RMSE and predictions.
    mlflow.log_metric("RMSE for test set", test_rmse)

    # Saving TensorFlow model.
    saved_estimator_path = regressor.export_savedmodel(
        model_dir, receiver_fn).decode("utf-8")

    # Logging the TensorFlow model just saved.
    tensorflow.log_saved_model(saved_model_dir=saved_estimator_path,
                               signature_def_key="predict",
                               artifact_path="model")

    run_id = mlflow.tracking.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
Esempio n. 5
0
def train(model_dir, training_pandas_data, test_pandas_data, label_col,
          feat_cols, hidden_units, steps, batch_size, training_data_path,
          test_data_path):

    print("train:                 " + training_data_path)
    print("test:                  " + test_data_path)
    for hu in hidden_units:
        print("hidden-units:         ", hu)
    print("steps:                ", steps)
    print("batch_size:           ", batch_size)
    print("label-col:             " + label_col)
    for feat in feat_cols:
        print("feat-cols:             " + feat)

    # Split data into training labels and testing labels.
    training_labels = training_pandas_data[label_col].values
    test_labels = test_pandas_data[label_col].values

    training_features = {
    }  # Dictionary of column names to column values for training
    test_features = {
    }  # Dictionary of column names to column values for testing
    tf_feat_cols = []  # List of tf.feature_columns for input functions
    feature_spec = {
    }  # Dictionary of column name -> placeholder tensor for receiver functions
    # Create TensorFlow columns based on passed in feature columns
    for feat in feat_cols:
        training_features[feat] = training_pandas_data[feat].values
        test_features[feat] = test_pandas_data[feat].values
        tf_feat_cols.append(tf.feature_column.numeric_column(feat))
        feature_spec[feat] = tf.placeholder("float", name=feat, shape=[None])

    # Create receiver function for loading the model for serving.
    receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
        feature_spec)

    # Create input functions for both the training and testing sets.
    input_train = tf.estimator.inputs.numpy_input_fn(training_features,
                                                     training_labels,
                                                     shuffle=True,
                                                     batch_size=batch_size)
    input_test = tf.estimator.inputs.numpy_input_fn(test_features,
                                                    test_labels,
                                                    shuffle=False,
                                                    batch_size=batch_size)

    # Creating DNNRegressor
    regressor = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols,
                                          hidden_units=hidden_units)

    # Training regressor on training input function
    regressor.train(input_fn=input_train, steps=steps)

    # Evaluating model on training data
    test_eval = regressor.evaluate(input_fn=input_test)

    # Calculating the RMSE of the testing set.
    test_rmse = test_eval["average_loss"]**0.5

    # Putting the predictions in a list we can use to calculate r^2 scores with.
    train_raw_pred = list(regressor.predict(input_fn=input_train))
    train_pred = [i['predictions'][0] for i in train_raw_pred]
    r2_train = r2_score(training_labels, train_pred)

    test_raw_pred = list(regressor.predict(input_fn=input_test))
    test_pred = [i['predictions'][0] for i in test_raw_pred]
    r2_test = r2_score(test_labels, test_pred)

    print("Test RMSE:", test_rmse)
    print("Training Set R2", r2_train)
    print("Test Set R2", r2_test)

    mlflow.log_param("num_train_points",
                     len(training_pandas_data[label_col].values))

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_train)
    mlflow.log_metric("Test R2", r2_test)

    # Saving TensorFlow model.
    saved_estimator_path = regressor.export_savedmodel(
        model_dir, receiver_fn).decode("utf-8")

    # Logging the TensorFlow model just saved.
    tensorflow.log_saved_model(saved_model_dir=saved_estimator_path,
                               signature_def_key="predict",
                               artifact_path="model")