import xml.etree.ElementTree as ET import datetime run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") # Set the setup variables needed by CMLBootstrap HOST = os.getenv("CDSW_API_URL").split( ":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split( "/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Set the STORAGE environment variable try : storage=os.environ["STORAGE"] except: tree = ET.parse('/etc/hadoop/conf/hive-site.xml') root = tree.getroot() for prop in root.findall('property'): if prop.find('name').text == "hive.metastore.warehouse.dir": storage = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2] storage_environment_params = {"STORAGE":storage} storage_environment = cml.create_environment_variable(storage_environment_params) os.environ["STORAGE"] = storage
import time import datetime from cmlbootstrap import CMLBootstrap import numpy as np run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") HOST = os.getenv("CDSW_API_URL").split( ":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Get User Details user_details = cml.get_user({}) user_obj = {"id": user_details["id"], "username": "******", "name": user_details["name"], "type": user_details["type"], "html_url": user_details["html_url"], "url": user_details["url"] } # Get Project Details project_details = cml.get_project({}) project_id = project_details["id"]
import os import time import datetime from cmlbootstrap import CMLBootstrap import numpy as np run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Get User Details user_details = cml.get_user({}) user_obj = { "id": user_details["id"], "username": "******", "name": user_details["name"], "type": user_details["type"], "html_url": user_details["html_url"], "url": user_details["url"] } # Get Project Details project_details = cml.get_project({}) project_id = project_details["id"]
## Set the model ID # Get the model id from the model you deployed in step 5. These are unique to each # model on CML. model_id = "88" # Get the various Model CRN details HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[ 6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) latest_model = cml.get_model({ "id": model_id, "latestModelDeployment": True, "latestModelBuild": True }) Model_CRN = latest_model["crn"] Deployment_CRN = latest_model["latestModelDeployment"]["crn"] # Read in the model metrics dict. model_metrics = cdsw.read_metrics(model_crn=Model_CRN, model_deployment_crn=Deployment_CRN) # This is a handy way to unravel the dict into a big pandas dataframe.
import xml.etree.ElementTree as ET import datetime run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") HOST = os.getenv("CDSW_API_URL").split( ":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split( "/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # set the S3 bucket variable try : s3_bucket=os.environ["STORAGE"] except: tree = ET.parse('/etc/hadoop/conf/hive-site.xml') root = tree.getroot() for prop in root.findall('property'): if prop.find('name').text == "hive.metastore.warehouse.dir": s3_bucket = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2] storage_environment_params = {"STORAGE":s3_bucket} storage_environment = cml.create_environment_variable(storage_environment_params) os.environ["STORAGE"] = s3_bucket
!pip3 install --progress-bar off -r requirements.txt # Create the directories and upload data from cmlbootstrap import CMLBootstrap import os import xml.etree.ElementTree as ET import subprocess # Set the setup variables needed by CMLBootstrap HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Set the STORAGE environment variable try: storage = os.environ["STORAGE"] except: if os.path.exists("/etc/hadoop/conf/hive-site.xml"): tree = ET.parse("/etc/hadoop/conf/hive-site.xml") root = tree.getroot() for prop in root.findall("property"): if prop.find("name").text == "hive.metastore.warehouse.dir": storage = ( prop.find("value").text.split("/")[0] + "//" + prop.find("value").text.split("/")[2] )
import xml.etree.ElementTree as ET import datetime run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") # Set the setup variables needed by CMLBootstrap HOST = os.getenv("CDSW_API_URL").split( ":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split( "/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Set the STORAGE environment variable #try : # storage=os.environ["STORAGE"] #except: # tree = ET.parse('/etc/hadoop/conf/hive-site.xml') # root = tree.getroot() # # for prop in root.findall('property'): # if prop.find('name').text == "hive.metastore.warehouse.dir": # storage = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2] # storage_environment_params = {"STORAGE":storage} # storage_environment = cml.create_environment_variable(storage_environment_params) os.environ["STORAGE"] = "/user/" + cml.get_user({})["username"]
telco_data_raw = spark.read.csv(path, header=True, sep=",", schema=schema, nullValue="NA") df = telco_data_raw.toPandas() # Get the various Model CRN details HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Get newly deployed churn model details using cmlbootstrapAPI models = cml.get_models({}) churn_model_details = [ model for model in models if model["name"] == "Churn Model API Endpoint" and model["creator"] ["username"] == USERNAME and model["project"]["slug"] == PROJECT_NAME ][0] latest_model = cml.get_model({ "id": churn_model_details["id"], "latestModelDeployment": True, "latestModelBuild": True, }) Model_CRN = latest_model["crn"]
#Deploy the Challenger model - prepare yml: from cmlbootstrap import CMLBootstrap import datetime import os, time HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[ 6] # args.username # "vdibia" API_KEY = "uuc48l0gm0r3n2mib27voxazoos65em0" #os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") # Create the YAML file for the model lineage yaml_text = open("lineage.yml", "r") yaml_read = yaml_text.read() challenger_yaml = '''"Challenger {}": hive_table_qualified_names: - "{}@cm" metadata: deployment: "this model was deployed programmatically"'''.format( run_time_suffix, table_name)
import xml.etree.ElementTree as ET import datetime run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") HOST = os.getenv("CDSW_API_URL").split( ":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split( "/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # set the storage variable to the default location try : s3_bucket=os.environ["STORAGE"] except: tree = ET.parse('/etc/hadoop/conf/hive-site.xml') root = tree.getroot() for prop in root.findall('property'): if prop.find('name').text == "hive.metastore.warehouse.dir": s3_bucket = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2] storage_environment_params = {"STORAGE":s3_bucket} storage_environment = cml.create_environment_variable(storage_environment_params) os.environ["STORAGE"] = s3_bucket
import random from cmlbootstrap import CMLBootstrap import datetime import os, time #Retrieve project info with CML library HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[ 6] # args.username # "vdibia" API_KEY = "uuc48l0gm0r3n2mib27voxazoos65em0" PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) #Retrieve model access keys for models that are compared #Champion can be either hardcoded or can be the most recent model from the day it was deployed #mvav17o0lwb9oogg3jlh8g7wqaw99e6w champion_ak = "mvav17o0lwb9oogg3jlh8g7wqaw99e6w" #Challenger is the most recent model deployed today project_id = cml.get_project({})['id'] #get project ID deployed_models_df = pd.DataFrame(cml.get_models({})) challenger_ak = deployed_models_df[deployed_models_df['projectId'] == project_id]\ .sort_values("createdAt", ascending=False)['accessKey'].iloc[0] def route_request(args):
### Step 1: Install Requirements !bash cdsw-build.sh from cmlbootstrap import CMLBootstrap import datetime import os, time HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper # Passing API key directly is better cml = CMLBootstrap(HOST, USERNAME, os.environ["MY_API_KEY"], PROJECT_NAME) # Get Project Details project_details = cml.get_project({}) project_id = project_details["id"] run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") ### Step 2: Run 00_bootstrap.py to create Spark table exec(open("00_boostrap.py").read()) ### Step 3: Run 01_ModelDevelopment.ipynb to develop a first baseline model exec(open("01_A_ModelDevelopment.py").read())