def score(data_conf, model_conf, evaluation=False, **kwargs): try: print() print("-----------------------------------") print(" Model Serving ") print("-----------------------------------") print() # ============================== # 1.0 Data Loading # ============================== # # USING IRIS DATASET: # iris = load_iris() #The Iris dataset is available through the scikit-learn API # idx = list(range(len(iris.target))) # np.random.shuffle(idx) #We shuffle it (important if we want to split in train and test sets) # X = iris.data[idx] # y = iris.target[idx] # # Load data in Pandas dataFrame and then in a Pyspark dataframe # data_pd = pd.DataFrame(data=np.column_stack((X,y)), columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label']) # data_df = spark.createDataFrame(data_pd) #if not evaluation: table_in = data_conf[env]['input_to_score'] # for scoring new data #if evaluation: table_in = data_conf[env]['input_test'] # for performance evaluation on historical data #data_df = spark.table(table_in) data_df = spark.read.format("delta").load( "/mnt/delta/{0}".format('test_data_spark_rf')) data_df.show(5) print("Step 1.0 completed: Loaded dataset in Spark") except Exception as e: print("Errored on 1.0: data loading") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =================== # 1.1 Model serving # =================== # Load model from MLflow model registry #https://www.mlflow.org/docs/latest/model-registry.html if env == 'PROD': mlflow_model_stage = 'Production' else: mlflow_model_stage = 'Staging' # Detecting the model dictionary among available models in MLflow model registry. client = MlflowClient() for mv in client.search_model_versions( "name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) break print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) def get_local_path_from_dbfs(dbfs_path): ''' This get the local version of the dbfs path, i.e. replaces "dbfs:" by "/dbfs", for local APIs use. ''' return "/dbfs" + dbfs_path.lstrip("dbfs:") mlflow_path = model_dict['source'] print("mlflow_path: ", mlflow_path) # De-serialize the model #model = PipelineModel.load("/tmp/rf_model_test") model = mlflow.spark.load_model(mlflow_path) # Make predictions predictions = model.transform(data_df) # Select example rows to display. predictions.select("prediction", "indexedLabel", "features").show(5) # Saving the result of the scoring if not evaluation: table_out = data_conf[env]['output_to_score'] if evaluation: table_out = data_conf[env]['output_test'] #predictions.write.format("ORC").saveAsTable(table_out, mode='overwrite') print( "Step 1.1 completed: model loading, data scoring and writing to hive" ) print() except Exception as e: print("Errored on step 1.1: model serving") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
def evaluate(data_conf, model_conf, scoring=True, **kwargs): try: # =========================== # E.1 Scoring of test data # =========================== if scoring: # switch, in case we want to skip score (if score already computed earlier) score( data_conf, model_conf, evaluation=True ) # the score function is applied on test dataset for performance evaluation except Exception as e: print("Errored on step E.1: scoring of test data") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =========================== # E.2 Metrics & Visualization # =========================== # Load model from MLflow model registry #https://www.mlflow.org/docs/latest/model-registry.html if env == 'PROD': mlflow_model_stage = 'Production' else: mlflow_model_stage = 'Staging' # Detecting the model dictionary among available models in MLflow model registry. client = MlflowClient() for mv in client.search_model_versions( "name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) break print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) #MLflow logging of metrics for trained model mlflow.end_run() # in case mlfow run_id defined before here mlflow.start_run(run_id=model_dict['run_id']) # Loading dataset table_in = data_conf[env]['output_test'] predictions = spark.table(table_in) # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Accuracy = %g" % (accuracy)) # Extracting the test set to Pandas pred_pd = predictions.toPandas() y_test = pred_pd['indexedLabel'].values y_pred = pred_pd['prediction'].values # Accuracy and Confusion Matrix accuracy = accuracy_score(y_test, y_pred) print('Accuracy = ', accuracy) print('Confusion matrix:') Classes = ['setosa', 'versicolor', 'virginica'] C = confusion_matrix(y_test, y_pred) C_normalized = C / C.astype(np.float).sum() C_normalized_pd = pd.DataFrame(C_normalized, columns=Classes, index=Classes) print(C_normalized_pd) #labels = ['business', 'health'] fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(C, cmap='Blues') plt.title('Confusion matrix of the classifier') fig.colorbar(cax) ax.set_xticklabels([''] + Classes) ax.set_yticklabels([''] + Classes) plt.xlabel('Predicted') plt.ylabel('True') plt.show() fig.savefig('/dbfs/mnt/delta/confusion_matrix_spark_rf.png') # Tracking performance metrics mlflow.log_metric("Accuracy", accuracy) mlflow.log_artifact("/dbfs/mnt/delta/confusion_matrix_spark_rf.png") print("Step E.2 completed metrics & visualisation") print() except Exception as e: print("Errored on step E.2: metrics & visualisation") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
) # COMMAND ---------- # MAGIC %md ### Fetch the new model version ID using MLflow Model Registry Search # MAGIC # MAGIC The `MlflowClient.search_model_versions()` function searches for model versions by model name, MLflow run ID, or artifact source location. All model versions satisfying a particular filter query are returned. # MAGIC # MAGIC The following cell uses this search function to fetch the version ID of the new model, which is assumed to be the largest (e.g., most recent) version ID. # COMMAND ---------- from mlflow.tracking.client import MlflowClient client = MlflowClient() model_version_infos = client.search_model_versions("name = '%s'" % model_name) new_model_version = max([model_version_info.version for model_version_info in model_version_infos]) # COMMAND ---------- # MAGIC %md Wait for the new model version to become ready. # COMMAND ---------- wait_until_ready(model_name, new_model_version) # COMMAND ---------- # MAGIC %md ## Add a description to the new model version # COMMAND ----------
input_data, headers=headers) print("Predicted : ", http_res.text) # COMMAND ---------- # MAGIC %md ### Cleanup # COMMAND ---------- # delete AML webservice svc.delete() # loop over registered models in MLflow models = client.search_model_versions("name='{}'".format(model_name)) for model in models: try: # set model stage to Archive client.transition_model_version_stage(name=model_name, version=model.version, stage='Archived') except: pass # delete version of model client.delete_model_version(model_name, model.version) # delete model client.delete_registered_model(model_name) # COMMAND ----------
def train(data_conf, model_conf, **kwargs): try: print("-----------------------------------") print("Starting Cashflow DL Model Training") print("-----------------------------------") print() # ============================== # 0. Main parameters definitions # ============================== # Size of X and y arrays definition N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int( data_conf['number_of_predicted_days']) #365, 92 print('Number of days used for prediction (X): ', N_days_X) print('Number of days predicted (y): ', N_days_y) print() # Date range definition start_date, end_date = data_conf['start_date'], data_conf['end_date'] start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = dates_definitions( start_date, end_date, N_days_X, N_days_y) print('Date range: ', start_date, end_date) print() model_name = model_conf['model_name'] except Exception as e: print("Errored on initialization") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # ======================================== # T.1 Pre-processing before model training # ======================================== # Loading dataset table_in = data_conf[environment]['table_to_train_on'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache() ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) # Cleaning of the time series ts_balance = ts_balance.withColumn( 'balance', ts_balance.balance.cast("array<float>")) ts_balance = ts_balance.withColumn( 'keep_ts', F.udf(lambda x, y: time_series_cleaning(x, y), "int")('balance', F.lit(20)) ) #at least 10 transactions in the ts, to be used in the training ts_balance = ts_balance.where('keep_ts == 1') # Creating the dataset on which we train (and test and validate) the model ts_balance_model = ts_balance.sample( False, 0.7, seed=0) #now 0.7, but in real case would be 0.1 at best... or 0.05 print('ts_balance_model.count()', ts_balance_model.count()) # Pre-processing before model training ts_balance_model = pre_processing(ts_balance_model, end_date, spark, serving=False) ts_balance_model.show(3) print('ts_balance_model.rdd.getNumPartitions()', ts_balance_model.rdd.getNumPartitions()) ts_balance_model.show(3) # Saving prepared dataset table_out = 'cashflow_training_step1' #ts_balance_model.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out)) ts_balance_model.write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_out)) except Exception as e: print("Errored on step T.1: pre-processing before model training") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # ======================================== # T.2 Generating TRAIN, VAL, TEST datasets # ======================================== # Loading datasets table_model = 'cashflow_training_step1' #ts_balance_model = spark.read.parquet("/mnt/test/{0}.parquet".format(table_model)).cache() ts_balance_model = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_model)).cache() ts_balance_model.show(3) print('ts_balance_model.count()', ts_balance_model.count()) print('ts_balance_model.rdd.getNumPartitions()', ts_balance_model.rdd.getNumPartitions()) train_set, val_set, test_set = ts_balance_model.randomSplit( [0.6, 0.2, 0.2], seed=12345) train_set.show(3) print( 'train_set.rdd.getNumPartitions(), val_set.rdd.getNumPartitions(), test_set.rdd.getNumPartitions()', train_set.rdd.getNumPartitions(), val_set.rdd.getNumPartitions(), test_set.rdd.getNumPartitions()) # Saving prepared datasets (train, val, test sets to parquet) table_train = 'cashflow_train' table_val = 'cashflow_val' table_test = data_conf[environment][ 'table_test_for_performance'] #'cashflow_test' train_set.select('X', 'y').write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_train)) val_set.select('X', 'y').write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_val)) test_set.select('primaryaccountholder','transactiondate','balance')\ .write.format("delta").mode("overwrite").save("/mnt/delta/{0}".format(table_test)) except Exception as e: print("Errored on step T.2: pre-processings") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # ============================== # T.3 MODEL DEFINITION AND TRAIN # ============================== table_train = 'cashflow_train' table_val = 'cashflow_val' #table_train = spark.read.parquet("/mnt/test/{0}.parquet".format(table_train)) table_train = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_train)) #table_val = spark.read.parquet("/mnt/test/{0}.parquet".format(table_val)) table_val = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_val)) table_train_count = table_train.count() table_val_count = table_val.count() #table_train_count, table_val_count from pyspark.sql.functions import col from petastorm.spark import SparkDatasetConverter, make_spark_converter # Set a cache directory on DBFS FUSE for intermediate data. spark.conf.set(SparkDatasetConverter.PARENT_CACHE_DIR_URL_CONF, "file:///dbfs/tmp/petastorm/cache") converter_train = make_spark_converter(table_train) converter_val = make_spark_converter(table_val) print(f"train: {len(converter_train)}, val: {len(converter_val)}") def get_compiled_model(N_days_X, N_days_y, model_conf): #lr=0.001 #model = get_model(lr=lr) model = define_1dcnn_model(N_days_X, N_days_y, model_conf) hyperparameters = model_conf['hyperParameters'] opt = tf.keras.optimizers.Adam() # Model compilation model.compile(optimizer=opt, loss=hyperparameters['loss']) return model # Enable auto-logging to MLflow to capture TensorBoard metrics. mlflow.tensorflow.autolog(every_n_iter=1) model_name = model_conf['model_name'] mlflow_model_name = model_name model_dir = "/tmp/" + model_name try: dbutils.fs.rm(model_dir, recurse=True) except OSError: pass with mlflow.start_run(): NUM_EPOCHS = model_conf['hyperParameters']['epochs'] #5 BATCH_SIZE = model_conf['hyperParameters']['batch_size'] #500 def train_and_evaluate(N_days_X, N_days_y, model_conf): #lr=0.001 model = get_compiled_model(N_days_X, N_days_y, model_conf) #lr with converter_train.make_tf_dataset(batch_size=BATCH_SIZE) as train_dataset, \ converter_val.make_tf_dataset(batch_size=BATCH_SIZE) as val_dataset: #train_dataset = train_dataset.map(lambda x: (x.features, x.label_index)) train_dataset = train_dataset.map( lambda x: (tf.reshape(x.X, [-1, N_days_X, 1]), tf.reshape(x.y, [-1, N_days_y]))) steps_per_epoch = len(converter_train) // BATCH_SIZE #val_dataset = val_dataset.map(lambda x: (x.features, x.label_index)) val_dataset = val_dataset.map( lambda x: (tf.reshape(x.X, [-1, N_days_X, 1]), tf.reshape(x.y, [-1, N_days_y]))) validation_steps = max(1, len(converter_val) // BATCH_SIZE) print( f"steps_per_epoch: {steps_per_epoch}, validation_steps: {validation_steps}" ) hist = model.fit(train_dataset, steps_per_epoch=steps_per_epoch, epochs=NUM_EPOCHS, validation_data=val_dataset, validation_steps=validation_steps, verbose=2) return model, hist model, hist = train_and_evaluate(N_days_X, N_days_y, model_conf) print(hist.history['val_loss'][-1]) #MLflow logging #mlflow.log_artifact(cwd + "data.json") #mlflow.log_artifact(cwd + "config.json") mlflow.log_param("model_name", str(model_name)) mlflow.log_param("N_days_X", N_days_X) mlflow.log_param("N_days_y", N_days_y) mlflow.log_param("start_date", start_date) mlflow.log_param("end_date", end_date) mlflow.log_param("num_epochs", str(NUM_EPOCHS)) mlflow.log_param("batch_size", str(BATCH_SIZE)) #mlflow.log_param("steps_per_epoch", str(steps_per_epoch)) #validation_steps # saving using tf.keras.models.save_model tf.keras.models.save_model(model, filepath=model_dir + '/model') #SavedModel format #model.save(filepath=model_dir+'model', save_format="h5") #H5 format (todo, and look how to register that) # saving using mlflow.tensorflow.save_model (this does NOT log nor register the model) does not overwrites... #mlflow.tensorflow.save_model(tf_saved_model_dir=model_dir+'/model', # tf_meta_graph_tags=[tf.compat.v1.saved_model.tag_constants.SERVING], # tf_signature_def_key='serving_default', # path = 'model') # logging already saved model mlflow.tensorflow.log_model( tf_saved_model_dir=model_dir + '/model', tf_meta_graph_tags=[ tf.compat.v1.saved_model.tag_constants.SERVING ], tf_signature_def_key='serving_default', registered_model_name=model_name, artifact_path='model') # Getting the version number of the newly registered MLflow model (useful for next steps) mlflow_model_version = 0 client_current_model = MlflowClient() for mv in client_current_model.search_model_versions( "name='{0}'".format(mlflow_model_name)): #if int(dict(mv)['version']) == mlflow_model_version: if int( dict(mv)['version'] ) >= mlflow_model_version: # finding the last version registered mlflow_model_version = int(dict(mv)['version']) model_dict = dict(mv) #update 2020-07017: to grab the latest model version, we can also do like this: (TO BE TESTED!!!) #model_version_infos = client_current_model.search_model_versions(f"name = '{model_name}'") #mlflow_model_version = max([model_version_info.version for model_version_info in model_version_infos]) # Wait until the model is ready def wait_until_model_ready(model_name, model_version): client = MlflowClient() for _ in range(20): model_version_details = client.get_model_version( name=model_name, version=model_version, ) status = ModelVersionStatus.from_string( model_version_details.status) print("Model status: %s" % ModelVersionStatus.to_string(status)) if status == ModelVersionStatus.READY: break tm.sleep(5) wait_until_model_ready(mlflow_model_name, mlflow_model_version) # Transition the registered model stage from "None" to "Staging" client_current_model.transition_model_version_stage( name=mlflow_model_name, version=mlflow_model_version, stage="Staging", ) # Copy the file from the driver node and save it to DBFS (so that they can be accessed e.g. after the current cluster terminates.): dbutils.fs.cp("file:/tmp/{0}/model".format(model_name), "dbfs:/mnt/test/{0}/model".format(model_name), recurse=True) print('Model copied here: ', "dbfs:/mnt/test/{0}/model/".format(model_name)) #mlflow.end_run() except Exception as e: print("Errored on step T.3: model definition and train") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
def evaluate(data_conf, model_conf, scoring=True, **kwargs): try: print("-------------------------------------") print("Starting Cashflow DL Model Evaluation") print("-------------------------------------") print() # ============================== # 0. Main parameters definitions # ============================== # Size of X and y arrays definition N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int( data_conf['number_of_predicted_days']) #365, 92 print('Number of days used for prediction (X): ', N_days_X) print('Number of days predicted (y): ', N_days_y) print() # Date range definition start_date, end_date = data_conf['start_date'], data_conf['end_date'] start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = dates_definitions( start_date, end_date, N_days_X, N_days_y) print('Date range: ', start_date, end_date) print() model_name = model_conf['model_name'] except Exception as e: print("Errored on initialization") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =========================== # E.1 Scoring of test data # =========================== #if kwargs['do_we_score'] is True: # switch, in case we want to skip score (if score already computed earlier) if scoring: # switch, in case we want to skip score (if score already computed earlier) score( data_conf, model_conf, evaluation=True ) # the score function is applied on test dataset for performance evaluation except Exception as e: print("Errored on step E.1: scoring of test data") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =========================== # E.2 Metrics & Visualization # =========================== # Load model from MLflow model registry #https://www.mlflow.org/docs/latest/model-registry.html #mlflow_model_name = 'cashflow-poc' mlflow_model_name = model_conf['model_name'] if environment == 'prod': mlflow_model_stage = 'Production' else: mlflow_model_stage = 'Staging' # Detecting the model dictionary among available models in MLflow model registry. client = MlflowClient() for mv in client.search_model_versions( "name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) break print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) #MLflow logging of metrics for trained model mlflow.end_run() # in case mlfow run_id defined before here mlflow.start_run(run_id=model_dict['run_id']) #mlflow.start_run() # specify the runid!!! # Loading dataset table_in = data_conf[environment]['table_scored'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache() ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) # Extracting the test set to Pandas ts_balance_pd = ts_balance.select( 'balance', 'X', 'y', 'y_pred', 'y_pred_rescaled_retrended').toPandas() # Extraction of metrics R2_all_3month, R2_array_3month, R2_all_1month, R2_array_1month = metric_extraction( ts_balance_pd, N_days_y) # Visualization of prediction #fig1, fig2 = visualization_prediction(ts_balance_pd, start_date, end_date, N_days_X, N_days_y, R2_array_1month, R2_array_3month, serving=False) fig1, fig2 = visualization_time_series_pred_only(ts_balance_pd, start_date, end_date, N_days_X, N_days_y, R2_array_1month, R2_array_3month, serving=False) fig1.savefig('/dbfs/mnt/delta/performance.png') fig2.savefig('/dbfs/mnt/delta/performance_R2.png') mlflow.log_artifact('/dbfs/mnt/delta/performance.png') mlflow.log_artifact('/dbfs/mnt/delta/performance_R2.png') # Saving the metric print('Test R2 metric (3-months window): {}'.format(R2_all_3month)) print('Test R2 metric (1-months window): {}'.format(R2_all_1month)) mlflow.log_metric("R2_all_3month", R2_all_3month) mlflow.log_metric("R2_all_1month", R2_all_1month) with open("/dbfs/mnt/delta/evaluation.json", "w+") as f: json.dump({ 'R2_3month': R2_all_3month, 'R2_1month': R2_all_1month }, f) mlflow.log_artifact("/dbfs/mnt/delta/evaluation.json") mlflow.end_run() ts_balance.unpersist() print("Step E.2 completed visualisation") except Exception as e: print("Errored on step E.2: visualisation") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
def score(data_conf, model_conf, evaluation=False, **kwargs): try: print("----------------------------------") print("Starting Cashflow DL Model Scoring") print("----------------------------------") print("") # ============================== # 0. Main parameters definitions # ============================== # Size of X and y arrays definition N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int( data_conf['number_of_predicted_days']) #365, 92 print('Number of days used for prediction (X): {0}'.format(N_days_X)) print('Number of days predicted (y): {0}'.format(N_days_y)) print('') # Date range definition start_date, end_date = data_conf['start_date'], data_conf['end_date'] start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = dates_definitions( start_date, end_date, N_days_X, N_days_y) print('Date range: [{0}, {1}]'.format(start_date, end_date)) print('') model_name = model_conf['model_name'] #print("Step 0 completed (main parameters definition)") except Exception as e: print("Errored on initialization") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # ================================== # S.1 Pre-processings before serving # ================================== start_time_S1 = time.time() # Loading dataset table_in = data_conf[environment]['table_to_score'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache() ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) print('Reading table {0}'.format(table_in)) #print('Size of table: ',ts_balance.count()) #print('ts_balance.rdd.getNumPartitions()',ts_balance.rdd.getNumPartitions()) if not evaluation: ts_balance = pre_processing(ts_balance, end_date, spark, serving=True) if evaluation: ts_balance = pre_processing(ts_balance, end_date, spark, serving=False) ts_balance.show(3) # Saving prepared dataset table_out = data_conf[environment]['cashflow_s1_out_scoring'] #ts_balance.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out)) ts_balance.write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_out)) ts_balance.unpersist() spark.catalog.clearCache() end_time_S1 = time.time() print("Step S.1 completed: pre-processings before serving") print("Time spent: ", end_time_S1 - start_time_S1) except Exception as e: print("Errored on step S.1: pre-processings before serving") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =================== # S.2 Model serving # =================== start_time_S2 = time.time() # Loading dataset table_in = data_conf[environment]['cashflow_s1_out_scoring'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)) ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) ts_balance.cache() print('Number of partitions: ', ts_balance.rdd.getNumPartitions()) # Load model from MLflow model registry #https://www.mlflow.org/docs/latest/model-registry.html mlflow_model_name = model_conf['model_name'] if environment == 'prod': mlflow_model_stage = 'Production' else: mlflow_model_stage = 'Staging' # Detecting the model dictionary among available models in MLflow model registry. client = MlflowClient() for mv in client.search_model_versions( "name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) break print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) def get_local_path_from_dbfs(dbfs_path): ''' This get the local version of the dbfs path, i.e. replaces "dbfs:" by "/dbfs", for local APIs use. ''' #os.path.join("/dbfs", dbfs_path.lstrip("dbfs:")) #why does not work??? return "/dbfs" + dbfs_path.lstrip("dbfs:") mlflow_path = get_local_path_from_dbfs( model_dict['source']) + '/tfmodel' print("mlflow_path: ", mlflow_path) # It detects the name id of the pb model file file = [ f for f in os.listdir('/dbfs/mnt/test/{0}/model/'.format(model_name)) ] print(file) export_dir_saved = "/dbfs/mnt/test/{0}/model/".format( model_name) #+file[0] # TODO!!! GET THE MODEL FROM MLFLOW !!!! print(export_dir_saved) #def rdd_scoring(numpy_array): # predictor_fn = tf.contrib.predictor.from_saved_model(export_dir = export_dir_saved) # return predictor_fn({'input': numpy_array.reshape(-1, N_days_X, 1) }) #@F.udf("array<float>") #def udf_scoring(x): # predictor_fn = tf.contrib.predictor.from_saved_model(export_dir = mlflow_path) #export_dir_saved) # return np.around(predictor_fn({'input': np.array(x).reshape(-1, N_days_X, 1) })['output'][0].tolist(), decimals=3).tolist() @F.pandas_udf("array<float>") def pandas_udf_scoring(x): #predictor_fn = tf.contrib.predictor.from_saved_model(export_dir = export_dir_saved) #mlflow_path) #return Series([np.around(predictor_fn({'input': np.array(v).reshape(-1, N_days_X, 1)})['output'][0], decimals=3) for v in x]) new_model = tf.keras.models.load_model(export_dir_saved) #new_model = mlflow.tensorflow.load_model(mlflow_path) return Series([ np.around(new_model.predict( np.array(v).reshape(-1, N_days_X, 1)).reshape(N_days_y), decimals=3) for v in x ]) ts_balance = ts_balance.withColumn('y_pred', pandas_udf_scoring('X')) #ts_balance = ts_balance.withColumn('y_pred', udf_scoring('X')) print('ts_balance.rdd.getNumPartitions()', ts_balance.rdd.getNumPartitions()) ts_balance.show(3) #print('Size of table: ',ts_balance.count()) # Saving prepared dataset table_out = data_conf[environment]['cashflow_s2_out_scoring'] #ts_balance.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out)) ts_balance.write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_out)) ts_balance.unpersist() spark.catalog.clearCache() end_time_S2 = time.time() print("Step S.2 completed: model serving") print("Time spent: ", end_time_S2 - start_time_S2) except Exception as e: print("Errored on step S.2: model serving") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =================== # S.3 Post-processing # =================== start_time_S3 = time.time() # Loading dataset table_in = data_conf[environment]['cashflow_s2_out_scoring'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache() ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) ts_balance = post_processing(ts_balance) ts_balance.show(3) # Saving prepared dataset table_out = data_conf[environment]['table_scored'] #ts_balance.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out)) ts_balance.write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_out)) ts_balance.unpersist() end_time_S3 = time.time() print("Step S.3 completed: post-processing") print("Time spent: ", end_time_S3 - start_time_S3) except Exception as e: print("Errored on step S.3: post-processing") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
# If run in production, in MLflow model registry, the last model currently in "Staging" is transitioned to "Production" # --------------------------------------------------------------------------------------------------------------------- from mlflow.tracking.client import MlflowClient # Define the environment (dev, test or prod) environment = dbutils.widgets.getArgument("environment") if environment == 'prod': # Detect the last model currently in "Staging" in MLflow model registry. mlflow_model_name = 'super_test' mlflow_model_stage = 'Staging' client = MlflowClient() for mv in client.search_model_versions( "name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) # Transition the registered model stage from "None" to "Staging" client.transition_model_version_stage( name=mlflow_model_name, version=model_dict['version'], stage="Production", ) print()
def score(data_conf, model_conf, evaluation=False, **kwargs): try: print() print("-----------------------------------") print(" Model Serving ") print("-----------------------------------") print() # ============================== # 1.0 Data Loading # ============================== #if not evaluation: table_in = data_conf[env]['input_to_score'] # for scoring new data #if evaluation: table_in = data_conf[env]['input_test'] # for performance evaluation on historical data #data_df = spark.table(table_in) data_df = spark.read.format("delta").load("/mnt/delta/{0}".format('test_data_sklearn_rf')) data_pd = data_df.toPandas() # Feature selection feature_cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] target = 'label' x_test = data_pd[feature_cols].values y_test = data_pd[target].values # Creation of train and test datasets #x_train, x_test, y_train, y_test = train_test_split(X,y,train_size=0.7, stratify=y) #stratify=y ensures that the same proportion of labels are in both train and test sets! print("Step 1.0 completed: Loaded dataset in Spark") except Exception as e: print("Errored on 1.0: data loading") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # =================== # 1.1 Model serving # =================== # Load model from MLflow model registry #https://www.mlflow.org/docs/latest/model-registry.html if env == 'PROD' : mlflow_model_stage = 'Production' else: mlflow_model_stage = 'Staging' print(mlflow_model_stage) # Detecting the model dictionary among available models in MLflow model registry. client = MlflowClient() for mv in client.search_model_versions("name='{0}'".format(mlflow_model_name)): if dict(mv)['current_stage'] == mlflow_model_stage: model_dict = dict(mv) break print('Model extracted run_id: ', model_dict['run_id']) print('Model extracted version number: ', model_dict['version']) print('Model extracted stage: ', model_dict['current_stage']) # def get_local_path_from_dbfs(dbfs_path): # ''' # This get the local version of the dbfs path, i.e. replaces "dbfs:" by "/dbfs", for local APIs use. # ''' # return "/dbfs"+dbfs_path.lstrip("dbfs:") mlflow_path = model_dict['source'] print("mlflow_path: ", mlflow_path) # De-serialize the model # model = mlflow.sklearn.load_model(mlflow_path) # works but using the mlflow.sklearn API (not general) model = mlflow.pyfunc.load_model(mlflow_path) # Load model as a PyFuncModel. # Make predictions #y_pred = model.predict(x_test) y_pred = model.predict(pd.DataFrame(x_test)) # when using the PyFuncModel, the input expected is a Pandas df # Saving the result of the scoring if not evaluation: table_out = data_conf[env]['output_to_score'] if evaluation: table_out = data_conf[env]['output_test'] #predictions.write.format("ORC").saveAsTable(table_out, mode='overwrite') pred_pd = pd.DataFrame(data=np.column_stack((y_test,y_pred)), columns=['y_test', 'y_pred']) pred_df = spark.createDataFrame(pred_pd) pred_df.write.format("delta").mode("overwrite").save("/mnt/delta/{0}".format('prediction_sklearn_rf')) # Select example rows to display. pred_df.show(5) print("Step 1.1 completed: model loading, data scoring and writing to hive") print() except Exception as e: print("Errored on step 1.1: model serving") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e