''' try: auth = AzureCliAuthentication() auth.get_authentication_header() except AuthenticationException: print("Authentication Error Occured") return auth if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-sid", "--subscription_id", help="Subscription ID") parser.add_argument("-rg", "--resource_group", help="Resource Group") parser.add_argument("-wn", "--workspace_name", help="Workspace Name") args = parser.parse_args() ws = Workspace(subscription_id=args.subscription_id, resource_group=args.resource_group, workspace_name=args.workspace_name, auth=get_auth()) print("Workspace Details") print(ws.get_details()) print("Success of Authentication and Workspace Setup") ws.write_config() print("Saved config file")
def setAutomatedMLWorkspace(create_workspace=False, create_resource_group=False, workspace_region=None, *, subscription_id=None, resource_group=None, workspace_name=None, auth=None): """Set configuration file for AutomatedML actions with the EconML library. If ``create_workspace`` is set true, a new workspace is created for the user. If ``create_workspace`` is set true, a new workspace is created for the user. Parameters ---------- create_workspace: Boolean, optional, default False If set to true, a new workspace will be created if the specified workspace does not exist. create_resource_group: Boolean, optional, default False If set to true, a new resource_group will be created if the specified resource_group does not exist. workspace_region: String, optional Region of workspace, only necessary if create_new is set to true and a new workspace is being created. auth: azureml.core.authentication.AbstractAuthentication, optional If set EconML will use auth object for handling Azure Authentication. Otherwise, EconML will use interactive automation, opening an authentication portal in the browser. subscription_id: String, required Definition of a class that will serve as the parent class of the AutomatedMLMixin. This class must inherit from _BaseDMLCateEstimator. resource_group: String, required Name of resource group of workspace to be created or set. workspace_name: String, required Name of workspace of workspace to be created or set. """ try: ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name, auth=auth) # write the details of the workspace to a configuration file to the notebook library ws.write_config() print("Workspace configuration has succeeded.") except ProjectSystemException: if (create_workspace): if (create_resource_group): print("Workspace not accessible. Creating a new workspace and \ resource group.") ws = Workspace.create( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=workspace_region, create_resource_group=create_resource_group, sku='basic', auth=auth, exist_ok=True) ws.get_details() else: print("Workspace not accessible. Set \ create_resource_group = True and run again to create a new \ workspace and resource group.") else: print("Workspace not accessible. Set create_workspace = True \ to create a new workspace.")
import azureml.core from azureml.core import Workspace from azureml.core.run import Run from azureml.core.experiment import Experiment # Check core SDK version number print("SDK version:", azureml.core.VERSION) # COMMAND ---------- ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp) ws.get_details() print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location, 'Subscription id: ' + ws.subscription_id, 'Resource group: ' + ws.resource_group, sep='\n') # COMMAND ---------- # MAGIC %md #2. Add model to Azure Machine Learning Service # COMMAND ---------- import os import urllib
def trigger_training_job(): # Define Vars < Change the vars>. # In a production situation, don't put secrets in source code, but as secret variables, # see https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables # workspace="<Name of your workspace>" # subscription_id="<Subscription id>" # resource_grp="<Name of your resource group where aml service is created>" # domain = "westeurope.azuredatabricks.net" # change location in case databricks instance is not in westeurope # DBR_PAT_TOKEN = bytes("<<your Databricks Personal Access Token>>", encoding='utf-8') # adding b' workspace = "aiml-ws1" subscription_id = "05c034fe-a6e2-42b7-bdfe-519a3b3a40cf" resource_grp = "vikram-aiml" domain = "eastus.azuredatabricks.net" # change location in case databricks instance is not in westeurope DBR_PAT_TOKEN = bytes("dapi2dd4008fef79f1f64392ca27b7a2888e", encoding='utf-8') # adding b' notebookRemote = "/3_IncomeNotebookDevops" experiment_name = "experiment_model_release" model_name_run = datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) + "_dbrmod.mml" # in case you want to change the name, keep the .mml extension model_name = "databricksmodel.mml" # in case you want to change the name, keep the .mml extension # # Step 1: Create job and attach it to cluster # # In this steps, secret are added as parameters (spn_tenant, spn_clientid, spn_clientsecret) # Never do this in a production situation, but use secret scope backed by key vault instead # See https://docs.azuredatabricks.net/user-guide/secrets/secret-scopes.html#azure-key-vault-backed-scopes response = requests.post( 'https://%s/api/2.0/jobs/create' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={ "name": "Run AzureDevopsNotebook Job", "new_cluster": { "spark_version": "4.0.x-scala2.11", "node_type_id": "Standard_D3_v2", "spark_env_vars": { 'PYSPARK_PYTHON': '/databricks/python3/bin/python3', }, "autoscale": { "min_workers": 1, "max_workers": 2 } }, "libraries": [{ "pypi": { "package": "azureml-sdk[databricks]" } }], "notebook_task": { "notebook_path": notebookRemote, "base_parameters": [{ "key": "subscription_id", "value": subscription_id }, { "key": "resource_group", "value": resource_grp }, { "key": "workspace_name", "value": workspace }, { "key": "model_name", "value": model_name_run }] } }) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) # # Step 2: Start job # databricks_job_id = response.json()['job_id'] response = requests.post( 'https://%s/api/2.0/jobs/run-now' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={"job_id": +databricks_job_id}) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(3) print(response.json()['run_id']) # # Step 3: Wait until job is finished # databricks_run_id = response.json()['run_id'] scriptRun = 1 count = 0 while scriptRun == 1: response = requests.get( 'https://%s/api/2.0/jobs/runs/get?run_id=%s' % (domain, databricks_run_id), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, ) state = response.json()['state'] life_cycle_state = state['life_cycle_state'] print(state) if life_cycle_state in ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]: result_state = state['result_state'] if result_state == "SUCCESS": print("run ok") scriptRun = 0 #exit(0) else: exit(4) elif count > 180: print("time out occurred after 30 minutes") exit(5) else: count += 1 time.sleep(30) # wait 30 seconds before next status update # # Step 4: Retrieve model from dbfs # mdl, ext = model_name_run.split(".") model_zip_run = mdl + ".zip" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_zip_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(6) model_output = base64.b64decode(response.json()['data']) # download model in deploy folder os.chdir("deploy") with open(model_zip_run, "wb") as outfile: outfile.write(model_output) print("Downloaded model {} to Project root directory".format(model_name)) # # Step 5: Put model to Azure ML Service # cli_auth = AzureCliAuthentication() ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp, auth=cli_auth) ws.get_details() # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() run.upload_file("outputs/" + model_zip_run, model_zip_run) run.complete() run_id = run.id print("run id:", run_id) # unzip file to model_name_run shutil.unpack_archive(model_zip_run, model_name_run) model = Model.register( model_path=model_name_run, # this points to a local file model_name=model_name, # this is the name the model is registered as tags={ "area": "spar", "type": "regression", "run_id": run_id }, description="Medium blog test model", workspace=ws, ) print("Model registered: {} \nModel Description: {} \nModel Version: {}". format(model.name, model.description, model.version)) # Step 6. Finally, writing the registered model details to conf/model.json model_json = {} model_json["model_name"] = model.name model_json["model_version"] = model.version model_json["run_id"] = run_id model_json["model_name_run"] = model_name_run with open("../conf/model.json", "w") as outfile: json.dump(model_json, outfile)
except Exception as e: print("Workspace not accessible. Attempting to create it now.....") print("Error: ", str(e)) try: amlsWorkspace = Workspace.create(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=workspace_region, default_cpu_compute_target=Workspace. DEFAULT_CPU_CLUSTER_CONFIGURATION, default_gpu_compute_target=Workspace. DEFAULT_GPU_CLUSTER_CONFIGURATION, create_resource_group=True, exist_ok=True) except Exception as ie: print("Creating of workspace failed.") print("Error: ", str(ie)) # Report on what every happened, but more importantly, write config if there is one. if amlsWorkspace: print("Workspace details:") details = amlsWorkspace.get_details() for key in details.keys(): print(key, details[key]) # write the details of the workspace to a configuration file to the notebook library print("Workspace details saved...") amlsWorkspace.write_config() else: print("Unable to retrieve or create workspace.")
"""Authentication to access workspace""" try: auth = AzureCliAuthentication() auth.get_authentication_header() except AuthenticationException: logger.info("Authentication Error Occured") return auth if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-sid", "--subscription_id", help="Subscription ID") parser.add_argument("-rg", "--resource_group", help="Resource Group") parser.add_argument("-wn", "--workspace_name", help="Workspace Name") args = parser.parse_args() workspace = Workspace(subscription_id=args.subscription_id, resource_group=args.resource_group, workspace_name=args.workspace_name, auth=get_auth()) logger.info("Workspace Details") logger.info(workspace.get_details()) logger.info("Success of Authentication and Workspace Setup") workspace.write_config() logger.info("Saved config file")
def trigger_training_job(): # Define Vars < Change the vars>. # In a production situation, don't put secrets in source code, but as secret variables, # see https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables workspace = sys.argv[1] subscription_id = sys.argv[2] resource_grp = sys.argv[3] domain = sys.argv[4] dbr_pat_token_raw = sys.argv[5] DBR_PAT_TOKEN = bytes(dbr_pat_token_raw, encoding='utf-8') # adding b' notebookRemote = "/3_IncomeNotebookDevops" experiment_name = "experiment_model_release" model_name_run = datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) + "_dbrmod.mml" # in case you want to change the name, keep the .mml extension model_name = "databricksmodel.mml" # in case you want to change the name, keep the .mml extension db_compute_name = "dbr-amls-comp" # # Step 1: Run notebook using Databricks Compute in AML SDK # cli_auth = AzureCliAuthentication() ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp, auth=cli_auth) ws.get_details() # # Step 2: Create job and attach it to cluster # # In this steps, secret are added as parameters (spn_tenant, spn_clientid, spn_clientsecret) # Never do this in a production situation, but use secret scope backed by key vault instead # See https://docs.azuredatabricks.net/user-guide/secrets/secret-scopes.html#azure-key-vault-backed-scopes response = requests.post( 'https://%s/api/2.0/jobs/create' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={ "name": "Run AzureDevopsNotebook Job", "new_cluster": { "spark_version": "6.6.x-scala2.11", "node_type_id": "Standard_D3_v2", "spark_env_vars": { 'PYSPARK_PYTHON': '/databricks/python3/bin/python3', }, "autoscale": { "min_workers": 1, "max_workers": 2 } }, "notebook_task": { "notebook_path": notebookRemote, "base_parameters": [{ "key": "subscription_id", "value": subscription_id }, { "key": "resource_group", "value": resource_grp }, { "key": "workspace_name", "value": workspace }, { "key": "model_name", "value": model_name_run }] } }) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) # # Step 3: Start job # databricks_job_id = response.json()['job_id'] response = requests.post( 'https://%s/api/2.0/jobs/run-now' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={"job_id": +databricks_job_id}) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(3) print(response.json()['run_id']) # # Step 4: Wait until job is finished # databricks_run_id = response.json()['run_id'] scriptRun = 1 count = 0 while scriptRun == 1: response = requests.get( 'https://%s/api/2.0/jobs/runs/get?run_id=%s' % (domain, databricks_run_id), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, ) state = response.json()['state'] life_cycle_state = state['life_cycle_state'] print(state) if life_cycle_state in ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]: result_state = state['result_state'] if result_state == "SUCCESS": print("run ok") scriptRun = 0 #exit(0) else: exit(4) elif count > 180: print("time out occurred after 30 minutes") exit(5) else: count += 1 time.sleep(30) # wait 30 seconds before next status update # # Step 5: Retrieve model from dbfs # mdl, ext = model_name_run.split(".") model_zip_run = mdl + ".zip" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_zip_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(1) model_output = base64.b64decode(response.json()['data']) # download model in deploy folder os.chdir("deploy") with open(model_zip_run, "wb") as outfile: outfile.write(model_output) print("Downloaded model {} to Project root directory".format(model_name)) # # Step 6: Retrieve model metrics from dbfs # mdl, ext = model_name_run.split(".") model_metrics_json_run = mdl + "_metrics.json" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_metrics_json_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) model_metrics_output = json.loads(base64.b64decode( response.json()['data'])) # # Step 7: Put model and metrics to Azure ML Service # # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() run.upload_file("outputs/" + model_zip_run, model_zip_run) #run.log("pipeline_run", pipeline_run.id) run.log("au_roc", model_metrics_output["Area_Under_ROC"]) run.log("au_prc", model_metrics_output["Area_Under_PR"]) run.log("truePostive", model_metrics_output["True_Positives"]) run.log("falsePostive", model_metrics_output["False_Positives"]) run.log("trueNegative", model_metrics_output["True_Negatives"]) run.log("falseNegative", model_metrics_output["False_Negatives"]) run.complete() run_id = run.id print("run id:", run_id) # Register the model as zip file. The model will be unzipped in the init of the score.py. # In case no zip file is used and the entire directory is uploaded, this results that more than 125 layers # are created in the docker image creation. This results docker max depth exceeded and the docker build fails model = Model.register( model_path=model_zip_run, # this points to a local file model_name=model_name, # this is the name the model is registered as tags={ "area": "spar", "type": "regression", "run_id": run_id }, description="Medium blog test model", workspace=ws, ) print("Model registered: {} \nModel Description: {} \nModel Version: {}". format(model.name, model.description, model.version)) # Step 8. Finally, writing the registered model details to conf/model.json model_json = {} model_json["model_name"] = model.name model_json["model_version"] = model.version model_json["run_id"] = run_id model_json["model_name_run"] = model_name_run with open("../conf/model.json", "w") as outfile: json.dump(model_json, outfile)
print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') except: print('need to create new Workspace.') print('Creating new Workspace.') ws = Workspace.create( name=myworkspace, subscription_id=subscription_id, resource_group=myresourcegroup, #create_resource_group=True, location=location) ws_details = ws.get_details() ws_details # ============================================================================= # # retrieve an existing datastore in the workspace by name # datastore = Datastore.get(ws, datastore_name) # # create a TabularDataset from a delimited file behind a public web url # web_path ='https://dprepdata.blob.core.windows.net/demo/Titanic.csv' # titanic_ds1 = Dataset.Tabular.from_delimited_files(path=web_path) # # # preview the first 3 rows of titanic_ds # titanic_ds1.take(3).to_pandas_dataframe() # titanic_ds1 = titanic_ds1.register(workspace = ws, # name = 'titanic_ds1', # description = 'titanic training data') # =============================================================================
def trigger_training_job(): # get the parameter values workspace = sys.argv[1] subscription_id = sys.argv[2] resource_grp = sys.argv[3] domain = sys.argv[4] DBR_PAT_TOKEN = bytes(sys.argv[5], encoding='utf-8') # adding b' stor2_name = sys.argv[6] stor2_container = sys.argv[7] secret_scope = sys.argv[8] train_dataset = "p_train.csv" test_dataset = "p_test.csv" notebook_remote_path = "/lgb_eq_sec" experiment_name = "experiment_model_release" model_name_run = datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) + "_dbrmod.mml" # in case you want to change the name, keep the .mml extension model_name = "databricksmodel.mml" # # Step 1: Create job and attach it to cluster # response = requests.post( 'https://%s/api/2.0/jobs/create' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={ "name": "Run AzureDevopsNotebook Job", "existing_cluster_id": "0626-030203-fie285", "notebook_task": { "notebook_path": notebook_remote_path, "base_parameters": [{ "key": "model_name", "value": model_name_run }, { "key": "stor2_name", "value": stor2_name }, { "key": "stor2_container", "value": stor2_container }, { "key": "stor2_train_file", "value": train_dataset }, { "key": "stor2_test_file", "value": test_dataset }, { "key": "secret_scope", "value": secret_scope }] } }) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(2) # # Step 2: Start job # databricks_job_id = response.json()['job_id'] response = requests.post( 'https://%s/api/2.0/jobs/run-now' % domain, headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, json={"job_id": +databricks_job_id}) if response.status_code != 200: print("Error launching cluster: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(3) print(response.json()['run_id']) # # Step 3: Wait until job is finished # databricks_run_id = response.json()['run_id'] script_run_state = 1 count = 0 while script_run_state == 1: response = requests.get( 'https://%s/api/2.0/jobs/runs/get?run_id=%s' % (domain, databricks_run_id), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}, ) state = response.json()['state'] life_cycle_state = state['life_cycle_state'] print(state) if life_cycle_state in ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]: result_state = state['result_state'] if result_state == "SUCCESS": print("run successful") script_run_state = 0 #exit(0) else: exit(4) elif count > 100: #180 print("time out occurred after 30 minutes") exit(5) else: count += 1 time.sleep(30) # wait 30 seconds before next status update # # Step 4: Retrieve model from dbfs # mdl, ext = model_name_run.split(".") model_zip_run = mdl + ".zip" response = requests.get( 'https://%s/api/2.0/dbfs/read?path=/%s' % (domain, model_zip_run), headers={'Authorization': b"Bearer " + DBR_PAT_TOKEN}) if response.status_code != 200: print("Error copying dbfs results: %s: %s" % (response.json()["error_code"], response.json()["message"])) exit(6) model_output = base64.b64decode(response.json()['data']) # download model in deploy folder os.chdir("deploy") with open(model_zip_run, "wb") as outfile: outfile.write(model_output) print("Downloaded model {} to Project root directory".format(model_name)) # # Step 5: Put model to Azure ML Service # cli_auth = AzureCliAuthentication() ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp, auth=cli_auth) ws.get_details() # start a training run by defining an experiment myexperiment = Experiment(ws, experiment_name) run = myexperiment.start_logging() run.upload_file("outputs/" + model_zip_run, model_zip_run) run.complete() run_id = run.id print("run id:", run_id) # unzip file to model_name_run shutil.unpack_archive(model_zip_run, model_name_run) model = Model.register( model_path=model_name_run, # this points to a local file model_name=model_name, # the name the model is registered as tags={ "area": "spar", "type": "regression", "run_id": run_id }, description= "LightGBM model from Kaggle 1st place, for Earthquake prediction", workspace=ws, ) print("Model registered: {} \nModel Description: {} \nModel Version: {}". format(model.name, model.description, model.version)) # Step 6: Finally, writing the registered model details to conf/model.json model_json = {} model_json["model_name"] = model.name model_json["model_version"] = model.version model_json["run_id"] = run_id model_json["model_name_run"] = model_name_run with open("../conf/model.json", "w") as outfile: json.dump(model_json, outfile)
# MAGIC %md ## Setup MLFlow backend to Azure Machine Learning # COMMAND ---------- import azureml from azureml.core import Workspace workspace_name = "mlops-demo" workspace_location = "westeurope" resource_group = "mlops-rg" subscription_id = "6ee947fa-0d77-4915-bf68-4a83a8bec2a4" workspace = Workspace(subscription_id, resource_group, workspace_name) ws_details = workspace.get_details() print(f'Logged into {ws_details["name"]}') # COMMAND ---------- # ws_details = workspace.get_details() # print(f'Logged into {ws_details["name"]}') # COMMAND ---------- import mlflow print(f"original MLFlow tracing url: {mlflow.get_tracking_uri()}") # change to AML mlflow.set_tracking_uri(workspace.get_mlflow_tracking_uri()) print(f"new MLFlow tracing url: {mlflow.get_tracking_uri()}")