def get_predictions(freq, ts_range): freq = int(freq) ts = pd.date_range(ts_range['$gt'], ts_range['$lte'], freq='H') current_timestamp_ms = int(round(time.time() * 1000)) print(ts_range['$gt'], file=sys.stderr) #known_metrics = cdsw.read_metrics(model_deployment_crn, ts_range['$gt'].timestamp(), ts_range['$lte'].timestamp()) known_metrics = cdsw.read_metrics(model_deployment_crn, 0, current_timestamp_ms) print(known_metrics, file=sys.stderr) df = pd.io.json.json_normalize(known_metrics["metrics"]) return df
def query_model_metrics(self, **kwargs): """ Use the cdsw.read_metrics() functionality to query saved model metrics from the PostgresSQL database, and return details in formatted dataframe Query metrics for the model deployment saved in self.latest_deployment_details. Optionally, can pass additional arguments to indicate start/end timestamp. """ ipt = {} ipt["model_deployment_crn"] = self.latest_deployment_details[ "latest_deployment_crn"] if kwargs: ipt.update(kwargs) return self.format_model_metrics_query(cdsw.read_metrics(**ipt))
import cdsw print(cdsw.read_metrics('mean_sq_err'))
API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) latest_model = cml.get_model({ "id": model_id, "latestModelDeployment": True, "latestModelBuild": True }) Model_CRN = latest_model["crn"] Deployment_CRN = latest_model["latestModelDeployment"]["crn"] # Read in the model metrics dict. model_metrics = cdsw.read_metrics(model_crn=Model_CRN, model_deployment_crn=Deployment_CRN) # This is a handy way to unravel the dict into a big pandas dataframe. metrics_df = pd.io.json.json_normalize( model_metrics["metrics"]) # [metric_start_index:]) metrics_df.tail().T # Do some conversions & calculations metrics_df['startTimeStampMs'] = pd.to_datetime(metrics_df['startTimeStampMs'], unit='ms') metrics_df['endTimeStampMs'] = pd.to_datetime(metrics_df['endTimeStampMs'], unit='ms') metrics_df["processing_time"] = ( metrics_df["endTimeStampMs"] - metrics_df["startTimeStampMs"]).dt.microseconds * 1000
from scipy.stats import chisquare # Define our uqique model deployment id model_deployment_crn = "crn:cdp:ml:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:workspace:ec3efe6f-c4f5-4593-857b-a80698e4857e/d5c3fbbe-d604-4f3b-b98a-227ecbd741b4" # Define our training distribution for training_distribution_percent = pd.DataFrame({ "Excellent": [0.50], "Poor": [0.50] }) training_distribution_percent current_timestamp_ms = int(round(time.time() * 1000)) known_metrics = cdsw.read_metrics(model_deployment_crn=model_deployment_crn, start_timestamp_ms=0, end_timestamp_ms=current_timestamp_ms) df = pd.io.json.json_normalize(known_metrics["metrics"]) # Do some conversions & Calculations df['startTimeStampMs'] = pd.to_datetime(df['startTimeStampMs'], unit='ms') df['endTimeStampMs'] = pd.to_datetime(df['endTimeStampMs'], unit='ms') df["processing_time"] = (df["endTimeStampMs"] - df["startTimeStampMs"]).dt.microseconds * 1000 non_agg_metrics = df.dropna(subset=["metrics.prediction"]) non_agg_metrics.tail() # Visualize the processing time non_agg_metrics.plot(kind='line', x='predictionUuid', y='processing_time')
import cdsw, time, os import random import json import pandas as pd import matplotlib.pyplot as plt import numpy as np from scipy.stats import chisquare # Define our uqique model deployment id model_deployment_crn = "GET THIS FROM MODEL SCREEN IN CML" current_timestamp_ms = int(round(time.time() * 1000)) known_metrics = cdsw.read_metrics(model_deployment_crn, 0, current_timestamp_ms) df = pd.io.json.json_normalize(known_metrics["metrics"]) df # Do some conversions & Calculations df['startTimeStampMs'] = pd.to_datetime(df['startTimeStampMs'], unit='ms') df['endTimeStampMs'] = pd.to_datetime(df['endTimeStampMs'], unit='ms') df["processing_time"] = (df["endTimeStampMs"] - df["startTimeStampMs"]).dt.microseconds * 1000 #df.plot(kind='line',x='endTimeStampMs',y='metrics.MonthlyCharges',color='red') cdsw.track_aggregate_metrics({"accuracy": random.random()}, current_timestamp_ms, current_timestamp_ms,
for i in range(len(score_x)): output = cdsw.call_model(model_access_key, {"petal_length": score_x[i][0]}, api_key) #Record the UUID of each prediction for correlation with ground truth. uuids.append(output["response"]["uuid"]) predictions.append(output["response"]["prediction"]) print(output) # Record the current time. end_timestamp_ms=int(round(time.time() * 1000)) # We can now use the read_metrics function to read the metrics we just # generated into the current session, by querying by time window. data = cdsw.read_metrics(model_deployment_crn=model_deployment_crn, start_timestamp_ms=start_timestamp_ms, end_timestamp_ms=end_timestamp_ms) data = data['metrics'] # Now, ground truth is known and we want to track the true value # corresponding to each prediction above. score_y = iris.data[:test_size, 3].reshape(-1, 1) # Observed petal width # Track the true values alongside the corresponding predictions using # track_delayed_metrics. At the same time, calculate the mean absolute # prediction error. mean_absolute_error = 0 n = len(score_y) for i in range(n): ground_truth = score_x[i][0] cdsw.track_delayed_metrics({"actual_result":ground_truth}, uuids[i])