Esempio n. 1
0
import xml.etree.ElementTree as ET
import datetime

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

# Set the setup variables needed by CMLBootstrap
HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT")  

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# Set the STORAGE environment variable
try : 
  storage=os.environ["STORAGE"]
except:
  tree = ET.parse('/etc/hadoop/conf/hive-site.xml')
  root = tree.getroot()
    
  for prop in root.findall('property'):
    if prop.find('name').text == "hive.metastore.warehouse.dir":
        storage = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2]
  storage_environment_params = {"STORAGE":storage}
  storage_environment = cml.create_environment_variable(storage_environment_params)
  os.environ["STORAGE"] = storage
Esempio n. 2
0
import time
import datetime
from cmlbootstrap import CMLBootstrap
import numpy as np
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")


HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6]
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")


cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)


# Get User Details
user_details = cml.get_user({})
user_obj = {"id": user_details["id"], "username": "******",
            "name": user_details["name"],
            "type": user_details["type"],
            "html_url": user_details["html_url"],
            "url": user_details["url"]
            }


# Get Project Details
project_details = cml.get_project({})
project_id = project_details["id"]
Esempio n. 3
0
import os
import time
import datetime
from cmlbootstrap import CMLBootstrap
import numpy as np
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv(
    "CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6]
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# Get User Details
user_details = cml.get_user({})
user_obj = {
    "id": user_details["id"],
    "username": "******",
    "name": user_details["name"],
    "type": user_details["type"],
    "html_url": user_details["html_url"],
    "url": user_details["url"]
}

# Get Project Details
project_details = cml.get_project({})
project_id = project_details["id"]
## Set the model ID
# Get the model id from the model you deployed in step 5. These are unique to each
# model on CML.

model_id = "88"

# Get the various Model CRN details
HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv(
    "CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[
    6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

latest_model = cml.get_model({
    "id": model_id,
    "latestModelDeployment": True,
    "latestModelBuild": True
})

Model_CRN = latest_model["crn"]
Deployment_CRN = latest_model["latestModelDeployment"]["crn"]

# Read in the model metrics dict.
model_metrics = cdsw.read_metrics(model_crn=Model_CRN,
                                  model_deployment_crn=Deployment_CRN)

# This is a handy way to unravel the dict into a big pandas dataframe.
import xml.etree.ElementTree as ET
import datetime

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")


HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT")  

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# set the S3 bucket variable
try : 
  s3_bucket=os.environ["STORAGE"]
except:
  tree = ET.parse('/etc/hadoop/conf/hive-site.xml')
  root = tree.getroot()
    
  for prop in root.findall('property'):
    if prop.find('name').text == "hive.metastore.warehouse.dir":
        s3_bucket = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2]
  storage_environment_params = {"STORAGE":s3_bucket}
  storage_environment = cml.create_environment_variable(storage_environment_params)
  os.environ["STORAGE"] = s3_bucket
Esempio n. 6
0
!pip3 install --progress-bar off -r requirements.txt

# Create the directories and upload data
from cmlbootstrap import CMLBootstrap
import os
import xml.etree.ElementTree as ET
import subprocess

# Set the setup variables needed by CMLBootstrap
HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# Set the STORAGE environment variable
try:
    storage = os.environ["STORAGE"]
except:
    if os.path.exists("/etc/hadoop/conf/hive-site.xml"):
        tree = ET.parse("/etc/hadoop/conf/hive-site.xml")
        root = tree.getroot()
        for prop in root.findall("property"):
            if prop.find("name").text == "hive.metastore.warehouse.dir":
                storage = (
                    prop.find("value").text.split("/")[0]
                    + "//"
                    + prop.find("value").text.split("/")[2]
                )
Esempio n. 7
0
import xml.etree.ElementTree as ET
import datetime

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

# Set the setup variables needed by CMLBootstrap
HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT")  

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# Set the STORAGE environment variable
#try : 
#  storage=os.environ["STORAGE"]
#except:
#  tree = ET.parse('/etc/hadoop/conf/hive-site.xml')
#  root = tree.getroot()
#    
#  for prop in root.findall('property'):
#    if prop.find('name').text == "hive.metastore.warehouse.dir":
#        storage = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2]
#  storage_environment_params = {"STORAGE":storage}
#  storage_environment = cml.create_environment_variable(storage_environment_params)

os.environ["STORAGE"] = "/user/" + cml.get_user({})["username"]
Esempio n. 8
0
    telco_data_raw = spark.read.csv(path,
                                    header=True,
                                    sep=",",
                                    schema=schema,
                                    nullValue="NA")

df = telco_data_raw.toPandas()

# Get the various Model CRN details
HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv(
    "CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6]
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# Get newly deployed churn model details using cmlbootstrapAPI
models = cml.get_models({})
churn_model_details = [
    model for model in models
    if model["name"] == "Churn Model API Endpoint" and model["creator"]
    ["username"] == USERNAME and model["project"]["slug"] == PROJECT_NAME
][0]
latest_model = cml.get_model({
    "id": churn_model_details["id"],
    "latestModelDeployment": True,
    "latestModelBuild": True,
})

Model_CRN = latest_model["crn"]
Esempio n. 9
0
#Deploy the Challenger model - prepare yml:

from cmlbootstrap import CMLBootstrap
import datetime
import os, time

HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv(
    "CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[
    6]  # args.username  # "vdibia"
API_KEY = "uuc48l0gm0r3n2mib27voxazoos65em0"  #os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

# Create the YAML file for the model lineage
yaml_text = open("lineage.yml", "r")
yaml_read = yaml_text.read()

challenger_yaml = '''"Challenger {}":
  hive_table_qualified_names:
    - "{}@cm"
  metadata:
    deployment: "this model was deployed programmatically"'''.format(
    run_time_suffix, table_name)
Esempio n. 10
0
import xml.etree.ElementTree as ET
import datetime

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")


HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT")  

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

# set the storage variable to the default location
try : 
  s3_bucket=os.environ["STORAGE"]
except:
  tree = ET.parse('/etc/hadoop/conf/hive-site.xml')
  root = tree.getroot()
    
  for prop in root.findall('property'):
    if prop.find('name').text == "hive.metastore.warehouse.dir":
        s3_bucket = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2]
  storage_environment_params = {"STORAGE":s3_bucket}
  storage_environment = cml.create_environment_variable(storage_environment_params)
  os.environ["STORAGE"] = s3_bucket
Esempio n. 11
0
import random

from cmlbootstrap import CMLBootstrap
import datetime
import os, time

#Retrieve project info with CML library
HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv(
    "CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[
    6]  # args.username  # "vdibia"
API_KEY = "uuc48l0gm0r3n2mib27voxazoos65em0"
PROJECT_NAME = os.getenv("CDSW_PROJECT")

# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

#Retrieve model access keys for models that are compared

#Champion can be either hardcoded or can be the most recent model from the day it was deployed
#mvav17o0lwb9oogg3jlh8g7wqaw99e6w
champion_ak = "mvav17o0lwb9oogg3jlh8g7wqaw99e6w"

#Challenger is the most recent model deployed today
project_id = cml.get_project({})['id']  #get project ID
deployed_models_df = pd.DataFrame(cml.get_models({}))
challenger_ak = deployed_models_df[deployed_models_df['projectId'] == project_id]\
    .sort_values("createdAt", ascending=False)['accessKey'].iloc[0]


def route_request(args):
Esempio n. 12
0
### Step 1: Install Requirements
!bash cdsw-build.sh

from cmlbootstrap import CMLBootstrap
import datetime
import os, time

HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT") 

# Instantiate API Wrapper
# Passing API key directly is better
cml = CMLBootstrap(HOST, USERNAME, os.environ["MY_API_KEY"], PROJECT_NAME)

# Get Project Details
project_details = cml.get_project({})
project_id = project_details["id"]

run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

### Step 2: Run 00_bootstrap.py to create Spark table

exec(open("00_boostrap.py").read())

### Step 3: Run 01_ModelDevelopment.ipynb to develop a first baseline model

exec(open("01_A_ModelDevelopment.py").read())