import datetime from cmlbootstrap import CMLBootstrap import numpy as np run_time_suffix = datetime.datetime.now() run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S") HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv( "CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split("/")[6] API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Get User Details user_details = cml.get_user({}) user_obj = { "id": user_details["id"], "username": "******", "name": user_details["name"], "type": user_details["type"], "html_url": user_details["html_url"], "url": user_details["url"] } # Get Project Details project_details = cml.get_project({}) project_id = project_details["id"] # Create Job create_jobs_params = {
":")[0] + "://" + os.getenv("CDSW_DOMAIN") USERNAME = os.getenv("CDSW_PROJECT_URL").split( "/")[6] # args.username # "vdibia" API_KEY = os.getenv("CDSW_API_KEY") PROJECT_NAME = os.getenv("CDSW_PROJECT") # Instantiate API Wrapper cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME) # Set the STORAGE environment variable #try : # storage=os.environ["STORAGE"] #except: # tree = ET.parse('/etc/hadoop/conf/hive-site.xml') # root = tree.getroot() # # for prop in root.findall('property'): # if prop.find('name').text == "hive.metastore.warehouse.dir": # storage = prop.find('value').text.split("/")[0] + "//" + prop.find('value').text.split("/")[2] # storage_environment_params = {"STORAGE":storage} # storage_environment = cml.create_environment_variable(storage_environment_params) os.environ["STORAGE"] = "/user/" + cml.get_user({})["username"] # Upload the data to the cloud storage !hdfs dfs -mkdir -p $STORAGE/datalake !hdfs dfs -mkdir -p $STORAGE/datalake/data !hdfs dfs -mkdir -p $STORAGE/datalake/data/churn !hdfs dfs -copyFromLocal /home/cdsw/raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv $STORAGE/datalake/data/churn/WA_Fn-UseC_-Telco-Customer-Churn-.csv