def link_databricks_workspace(): """Checks if a Databricks compute target is attached to AML. If it is not attached it attaches it.""" databricks_compute_name = os.environ.get("AML_DATABRICKS_COMPUTE_NAME") databricks_workspace_name = os.environ.get("AML_WORKSPACE") databricks_resource_group = os.environ.get("AML_RESOURCE_GROUP") databricks_access_token = os.environ.get("AML_DATABRICKS_ACCESS_TOKEN") aml_workspace_name = os.environ.get("AML_WORKSPACE_NAME") try: # For automation use a Service Principal too. databricks_compute = DatabricksCompute(workspace=aml_workspace_name, name=databricks_compute_name) print('Compute target already exists') except ComputeTargetException: print('compute not found') print('databricks_compute_name {}'.format(databricks_compute_name)) print('databricks_workspace_name {}'.format(databricks_workspace_name)) # Unsecure # print('databricks_access_token {}'.format(databricks_access_token)) # Create attach config attach_config = DatabricksCompute.attach_configuration( resource_group=databricks_resource_group, workspace_name=databricks_workspace_name, access_token=databricks_access_token) databricks_compute = ComputeTarget.attach(databricks_workspace_name, databricks_compute_name, attach_config) databricks_compute.wait_for_completion(True)
def trigger_env_prep(): # Define Vars < Change the vars>. # In a production situation, don't put secrets in source code, but as secret variables, # see https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables workspace = "<Name of your workspace>" subscription_id = "<Subscription id>" resource_grp = "<Name of your resource group where aml service is created>" domain = "westeurope.azuredatabricks.net" # change location in case databricks instance is not in westeurope databricks_name = "<<Your Databricks Name>>" dbr_pat_token_raw = "<<your Databricks Personal Access Token>>" DBR_PAT_TOKEN = bytes(dbr_pat_token_raw, encoding='utf-8') # adding b' databricks_grp = resource_grp dataset = "AdultCensusIncome.csv" notebook = "3_IncomeNotebookDevops.py" experiment_name = "experiment_model_release" db_compute_name = "dbr-amls-comp" # Print AML Version print("Azure ML SDK Version: ", azureml.core.VERSION) # Point file to conf directory containing details for the aml service cli_auth = AzureCliAuthentication() ws = Workspace(workspace_name=workspace, subscription_id=subscription_id, resource_group=resource_grp, auth=cli_auth) print(ws.name, ws._workspace_name, ws.resource_group, ws.location, sep='\t') # Create a new experiment print("Starting to create new experiment") Experiment(workspace=ws, name=experiment_name) # Upload notebook to Databricks print("Upload notebook to databricks") upload_notebook(domain, DBR_PAT_TOKEN, notebook) print("Add databricks env to Azure ML Service Compute") # Create databricks workspace in AML SDK try: databricks_compute = DatabricksCompute(workspace=ws, name=db_compute_name) print('Compute target {} already exists'.format(db_compute_name)) except ComputeTargetException: print('Compute not found, will use below parameters to attach new one') config = DatabricksCompute.attach_configuration( resource_group=databricks_grp, workspace_name=databricks_name, access_token=dbr_pat_token_raw) databricks_compute = ComputeTarget.attach(ws, db_compute_name, config) databricks_compute.wait_for_completion(True)
def main(args, workspace): # Connect Databricks to AzureML print("Connecting Databricks to AzureML") try: databricks_compute = DatabricksCompute(workspace=workspace, name=args.compute_name) print(f"Compute target {databricks_compute.name} already exists") except ComputeTargetException as exception: print(f"Databricks compute target not found: {exception}") print("Attaching Databricks to Azure ML") databricks_config = DatabricksCompute.attach_configuration( resource_group=args.db_resource_group, workspace_name=args.db_workspace_name, access_token=args.db_access_token) databricks_compute = ComputeTarget.attach( workspace=workspace, name=args.compute_name, attach_configuration=databricks_config) databricks_compute.wait_for_completion(show_output=True)
def get_compute(workspace: Workspace, dbcomputename: str, resource_group: str, dbworkspace: str, dbaccesstoken: str): try: databricks_compute = DatabricksCompute(workspace=workspace, name=dbcomputename) print('Compute target {} already exists'.format(dbcomputename)) except ComputeTargetException: print('Compute not found, will use below parameters to attach new one') print('db_compute_name {}'.format(dbcomputename)) print('db_resource_group {}'.format(resource_group)) print('db_workspace_name {}'.format(dbworkspace)) config = DatabricksCompute.attach_configuration( resource_group=resource_group, workspace_name=dbworkspace, access_token=dbaccesstoken) databricks_compute = ComputeTarget.attach(workspace, dbcomputename, config) databricks_compute.wait_for_completion(True) return databricks_compute
def get_db_compute(ws: Workspace) -> DatabricksCompute: db_compute = None try: db_compute = ComputeTarget(ws, db_compute_name) except ComputeTargetException: attach_config = DatabricksCompute.attach_configuration( resource_group=db_rg, workspace_name=db_workspace_name, access_token=db_access_token) db_compute = ComputeTarget.attach(ws, db_compute_name, attach_config) db_compute.wait_for_completion(True) return db_compute
def attach_databricks(name, access_token, compute_resource_id, workspace_name=None, resource_group_name=None): workspace = get_workspace_or_default(workspace_name, resource_group_name) print('Attaching compute resource...') attach_config = DatabricksCompute.attach_configuration( resource_id=compute_resource_id, access_token=access_token) ComputeTarget.attach(workspace, name, attach_config) print('Resource attach submitted successfully.') print('To see if your compute target is ready to use, run:') print(' az ml computetarget show -n {}'.format(name))
def create_databricks_compute( workspace: Workspace, databricks_workspace_name: str, compute_name: str, access_token: str, ): compute_config = DatabricksCompute.attach_configuration( resource_group=workspace.resource_group, workspace_name=databricks_workspace_name, access_token=access_token) compute_target = ComputeTarget.attach(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True) return compute_target
#unmanaged Compute Target from azureml.core import Workspace from azureml.core.compute import ComputeTarget, DatabricksCompute # Load the workspace from the saved config file ws = Workspace.from_config() # Specify a name for the compute (unique within the workspace) compute_name = 'db_cluster' # Define configuration for existing Azure Databricks cluster db_workspace_name = 'db_workspace' db_resource_group = 'db_resource_group' db_access_token = '1234-abc-5678-defg-90...' db_config = DatabricksCompute.attach_configuration(resource_group=db_resource_group, workspace_name=db_workspace_name, access_token=db_access_token) # Create the compute databricks_compute = ComputeTarget.attach(ws, compute_name, db_config) databricks_compute.wait_for_completion(True) #check for existing compute from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException compute_name = "aml-cluster" # Check if the compute target exists try:
print("Argument 2: %s" % args.model_name) print("Argument 3: %s" % args.build_number) print("Argument 4: %s" % args.image_name) print("Argument 5: %s" % args.path) print('creating AzureCliAuthentication...') cli_auth = AzureCliAuthentication() print('done creating AzureCliAuthentication!') print('get workspace...') ws = Workspace.from_config(path=args.path, auth=cli_auth) print('done getting workspace!') print("looking for existing compute target.") # aml_compute = AmlCompute(ws, args.aml_compute_target) aml_compute = DatabricksCompute(ws, args.aml_compute_target) print("found existing compute target.") # Create a new runconfig object run_amlcompute = RunConfiguration() # # Use the cpu_cluster you created above. run_amlcompute.target = args.aml_compute_target # # Enable Docker # run_amlcompute.environment.docker.enabled = True # # Set Docker base image to the default CPU-based image # run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE # # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
# print("found existing compute target.") # except ComputeTargetException: # print("creating new compute target") # provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", # min_nodes = 1, # max_nodes = 1) # aml_compute = ComputeTarget.create(ws, args.aml_compute_target, provisioning_config) # aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # print("Aml Compute attached") # ---------------------- try: aml_compute = DatabricksCompute(ws, args.aml_compute_target) print("found existing compute target.") except: print("Attaching new ADB compute target") # See below for param description # https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.databrickscompute?view=azure-ml-py#attach-configuration-resource-group-none--workspace-name-none--resource-id-none--access-token---- db_workspace_name = 'test-aml-adb-workspace' db_resource_group = 'test-aml-adb' db_access_token = '...........................' provisioning_config = DatabricksCompute.attach_configuration(resource_group=db_resource_group, workspace_name=db_workspace_name, access_token=db_access_token) databricks_compute = ComputeTarget.attach(ws, args.aml_compute_target, provisioning_config)
def resolve_db_cluster_id(): return os.environ["DB_CLUSTER_ID"] my_env = CondaDependencies.create(conda_packages=resolve_dependencies()) with open("myenv.yml", "w") as f: f.write(my_env.serialize_to_string()) ws = Workspace(resolve_subscription_id(), resolve_rg(), resolve_ml_workspace_name()) config = DatabricksCompute.attach_configuration( resource_group=resolve_rg(), workspace_name=resolve_db_workspace_name(), access_token=resolve_db_access_token()) databricks_compute = ComputeTarget.attach(ws, resolve_compute_name(), config) databricks_compute.wait_for_completion(True) dbPythonInLocalMachineStep = DatabricksStep( name="DBPythonInLocalMachine", python_script_name=resolve_script_name(), source_directory=resolve_source_directory(), run_name='DB_Worst_Regression_Run', compute_target=databricks_compute, existing_cluster_id=resolve_db_cluster_id(), allow_reuse=True) steps = [dbPythonInLocalMachineStep] pipeline = Pipeline(workspace=ws, steps=steps)