Ejemplo n.º 1
0
def predict(args):

    cdsw.track_metric("input", args)
    petal_length = float(args.get('petal_length'))

    result = model.predict([[petal_length]])
    cdsw.track_metric("predict_result", result[0][0])
    modified_result = result + 1
    cdsw.track_aggregate_metrics({"modified_result": agg_result},
                                 start_timestamp_ms,
                                 end_timestamp_ms,
                                 model_deployment_crn=Deployment_CRN)
    return result[0][0]
Ejemplo n.º 2
0
        "response_label":
        response["response"]["prediction"]["probability"] >= 0.5,
        "timestamp_ms":
        int(round(time.time() * 1000))
    })

# The "ground truth" loop adds the updated actual label value and an accuracy measure
# every 100 calls to the model.
for index, vals in enumerate(response_labels_sample):
    print("Update {} records".format(index)) if (index % 50 == 0) else None
    cdsw.track_delayed_metrics({"final_label": vals['final_label']},
                               vals['uuid'])
    if (index % 100 == 0):
        start_timestamp_ms = vals['timestamp_ms']
        final_labels = []
        response_labels = []
    final_labels.append(vals['final_label'])
    response_labels.append(vals['response_label'])
    if (index % 100 == 99):
        print("Adding accuracy metrc")
        end_timestamp_ms = vals['timestamp_ms']
        accuracy = classification_report(final_labels,
                                         response_labels,
                                         output_dict=True)["accuracy"]
        cdsw.track_aggregate_metrics({"accuracy": accuracy},
                                     start_timestamp_ms,
                                     end_timestamp_ms,
                                     model_deployment_crn=Deployment_CRN)

print("done")
# Check the accuracy of the model responses.
df_week_1_accuracy = classification_report(week_1_lables,
                                           response_lables_week_1,
                                           output_dict=True)["accuracy"]

# Show the info for cdsw.track_aggregate_metrics
help(cdsw.track_aggregate_metrics)

# Add the accuracy metric, along with start and end timestamps to the model metrics
w1_start_timestamp_ms = int(round(
    time.time() * 1000)) - 7 * 24 * 60 * 60 * 1000 * 3
w1_end_timestamp_ms = int(round(
    time.time() * 1000)) - 7 * 24 * 60 * 60 * 1000 * 2
cdsw.track_aggregate_metrics({"accuracy": df_week_1_accuracy},
                             w1_start_timestamp_ms,
                             w1_end_timestamp_ms,
                             model_deployment_crn=Deployment_CRN)

### Week 2
# Same as week 1, now with rows 501 - 1000

df_week_2 = df_sample.iloc[501:1000, :]

df_week_2 = df_week_2.apply(lambda x: flip_churn(x, 0.2), axis=1)
df_week_2.groupby('Churn')['Churn'].count()

df_week_2_clean = df_week_2.\
  replace({'SeniorCitizen': {"1": 'Yes', "0": 'No'}}).\
  replace(r'^\s$', np.nan, regex=True).\
  dropna()
Ejemplo n.º 4
0
import json

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import chisquare

# Define our uqique model deployment id
model_deployment_crn = "GET THIS FROM MODEL SCREEN IN CML"

current_timestamp_ms = int(round(time.time() * 1000))

known_metrics = cdsw.read_metrics(model_deployment_crn, 0,
                                  current_timestamp_ms)

df = pd.io.json.json_normalize(known_metrics["metrics"])
df

# Do some conversions & Calculations
df['startTimeStampMs'] = pd.to_datetime(df['startTimeStampMs'], unit='ms')
df['endTimeStampMs'] = pd.to_datetime(df['endTimeStampMs'], unit='ms')
df["processing_time"] = (df["endTimeStampMs"] -
                         df["startTimeStampMs"]).dt.microseconds * 1000

#df.plot(kind='line',x='endTimeStampMs',y='metrics.MonthlyCharges',color='red')

cdsw.track_aggregate_metrics({"accuracy": random.random()},
                             current_timestamp_ms,
                             current_timestamp_ms,
                             model_deployment_crn=model_deployment_crn)
Ejemplo n.º 5
0
# We can now use the read_metrics function to read the metrics we just
# generated into the current session, by querying by time window.
data = cdsw.read_metrics(model_deployment_crn=model_deployment_crn,
            start_timestamp_ms=start_timestamp_ms,
            end_timestamp_ms=end_timestamp_ms)
data = data['metrics']

# Now, ground truth is known and we want to track the true value
# corresponding to each prediction above.
score_y = iris.data[:test_size, 3].reshape(-1, 1) # Observed petal width

# Track the true values alongside the corresponding predictions using
# track_delayed_metrics. At the same time, calculate the mean absolute
# prediction error.
mean_absolute_error = 0
n = len(score_y)
for i in range(n):
    ground_truth = score_x[i][0]
    cdsw.track_delayed_metrics({"actual_result":ground_truth}, uuids[i])

    absolute_error = np.abs(ground_truth - predictions[i])
    mean_absolute_error += absolute_error / n

# Use the track_aggregate_metrics function to record the mean absolute
# error within the time window where we made the model calls above.
cdsw.track_aggregate_metrics(
    {"mean_absolute_error": mean_absolute_error}, 
    start_timestamp_ms, 
    end_timestamp_ms, 
    model_deployment_crn="crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:e4bc3658-32bb-4dc3-a22d-828150329c76/857bd78e-ec51-48dc-be2b-5582e4a8dbe1"
)
})
training_distribution_percent

current_timestamp_ms = int(round(time.time() * 1000))

known_metrics = cdsw.read_metrics(model_deployment_crn=model_deployment_crn,
                                  start_timestamp_ms=0,
                                  end_timestamp_ms=current_timestamp_ms)

df = pd.io.json.json_normalize(known_metrics["metrics"])
df.tail()

# Test if current distribution is different than training data set

prediction_dist_series = df.groupby(
    df["metrics.prediction"]).describe()["metrics.Alcohol"]["count"]
prediction_dist_series
x2, pv = chisquare([(training_distribution_percent["Poor"] * len(df))[0], \
                    (training_distribution_percent["Excellent"] * len(df))[0]],\
                   [prediction_dist_series[0], prediction_dist_series[1]])

print(x2, pv)

# Put it back into MLOps for Tracking
cdsw.track_aggregate_metrics({
    "chisq_x2": x2,
    "chisq_p": pv
},
                             current_timestamp_ms,
                             current_timestamp_ms,
                             model_deployment_crn=model_deployment_crn)
Ejemplo n.º 7
0
                         dev=dev)
## Print out Model Serving Metrics
print(data)
data = data['metrics']

# Now, ground truth is known and we want to track the true value
# corresponding to each prediction above.
score_y = iris.data[:test_size, 3].reshape(-1, 1)  # Observed petal width

# Track the true values alongside the corresponding predictions using
# track_delayed_metrics. At the same time, calculate the mean absolute
# prediction error.
mean_absolute_error = 0
n = len(score_y)
for i in range(n):
    ground_truth = score_x[i][0]
    cdsw.track_delayed_metrics({"actual_result": ground_truth},
                               uuids[i],
                               dev=dev)

    absolute_error = np.abs(ground_truth - predictions[i])
    mean_absolute_error += absolute_error / n

# Use the track_aggregate_metrics function to record the mean absolute
# error within the time window where we made the model calls above.
cdsw.track_aggregate_metrics({"mean_absolute_error": mean_absolute_error},
                             start_timestamp_ms,
                             end_timestamp_ms,
                             model_deployment_crn=model_deployment_crn,
                             dev=dev)