def create_aml_compute_target_batchai(cfg, ws): """ input : ws : definition : workspace type : Workspace from azureml.core.workspace cfg : config dictionnary from the json file input for this program type : python dictionnary output : computetarget object """ try: compute_target = ComputeTarget(workspace = ws, name = cfg.ClusterProperties.cluster_name) log.info('Found existing compute target. Using it. NOT VALIDATING IF YOU CHANGED THE CLUSTER CONFIG...') except ComputeTargetException: log.info('Creating Batch AI compute target "{}" in workspace "{}".'.format(cfg.ClusterProperties.cluster_name, ws.name)) # Defining the compute configuration for actual target creation compute_config = BatchAiCompute.provisioning_configuration( vm_size= cfg.ClusterProperties.vm_size, vm_priority= cfg.ClusterProperties.vm_priority, autoscale_enabled=True if cfg.ClusterProperties.scaling_method == 'auto_scale' else False, cluster_min_nodes=cfg.ClusterProperties.minimumNodeCount, cluster_max_nodes=cfg.ClusterProperties.maximumNodeCount, location = cfg.AMLConfig.location) log.info('Launching creation of the Batch AI compute target "{}" under the AML workspace "{}"'.format(cfg.ClusterProperties.cluster_name, ws.name)) compute_target = ComputeTarget.create(workspace= ws, name=cfg.ClusterProperties.cluster_name, provisioning_configuration=compute_config) compute_target.wait_for_completion(show_output=True) log.info(compute_target.status.serialize()) return compute_target
def create_amlcompute_cluster(self, pet_cluster_name, min_nodes, max_nodes, vm_size): self.min_nodes = min_nodes self.max_nodes = max_nodes # Verify that the cluster doesn't exist already try: self.pet_compute_target = ComputeTarget(workspace=self.ws, name=pet_cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes, vnet_name=self.vnet_name, vnet_resourcegroup_name=self.rg_name, subnet_name=self.subnet_name) # create the cluster self.pet_compute_target = ComputeTarget.create( self.ws, pet_cluster_name, compute_config) self.pet_compute_target.wait_for_completion(show_output=True) # Use the 'status' property to get a detailed status for the current cluster. #print(self.pet_compute_target.status.serialize()) return self.pet_compute_target
def get_compute(workspace, cluster_name, vm_size='STANDARD_NC6', max_nodes=4): """ Get or create a compute cluster. If a cluster with the provided name already exists in this workspace, return it. Otherwise, create a new one. :param workspace: The Azure ML workspace to use. :param cluster_name: Name of the cluster to find or create. :param vm_size: Type/size of VM to create on AzureML, if no cluster was found. :param max_nodes: Max number of nodes to give to this cluster. :returns: A ComputeTarget object. """ try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, max_nodes=max_nodes) compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def create_cluster(ws, cluster_name, vm_size, max_nodes): if cluster_name is None: cluster_name = input("name of the cluster : ") else: print("using cluster : ", cluster_name) if vm_size is None: vm_size = input("size of your VM : ") else: print("using vm size of : ", vm_size) if max_nodes is None: max_nodes = input("maximum amount of nodes on the cluster : ") else: print("maximum amount of nodes is : ", max_nodes) from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException # Verify that cluster does not exist already try: cluster = ComputeTarget(workspace=ws, name=cluster_name) print("Found existing cluster") except ComputeTargetException: print("Creating new cluster") # Specify the configuration for the new cluster compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=max_nodes) # Create the cluster with the specified name and configuration cluster = ComputeTarget.create(ws, cluster_name, compute_config) # Wait for the cluster to complete, show the output log cluster.wait_for_completion(show_output=True) return cluster
def get_credentials(self): """Retrieve the credentials for the RemoteCompute target. :return: The credentials for the RemoteCompute target. :rtype: dict :raises azureml.exceptions.ComputeTargetException: """ endpoint = self._mlc_endpoint + '/listKeys' headers = self._auth.get_authentication_header() ComputeTarget._add_request_tracking_headers(headers) params = {'api-version': MLC_WORKSPACE_API_VERSION} resp = ClientBase._execute_func(get_requests_session().post, endpoint, params=params, headers=headers) try: resp.raise_for_status() except requests.exceptions.HTTPError: raise ComputeTargetException('Received bad response from MLC:\n' 'Response Code: {}\n' 'Headers: {}\n' 'Content: {}'.format( resp.status_code, resp.headers, resp.content)) content = resp.content if isinstance(content, bytes): content = content.decode('utf-8') creds_content = json.loads(content) return creds_content
def attach_remote( name, address, ssh_port, username, password='', private_key_file='', private_key_passphrase='', workspace_name=None, resource_group_name=None, ): workspace = get_workspace_or_default(workspace_name=workspace_name, resource_group=resource_group_name) print('Attaching compute resource...') attach_config = RemoteCompute.attach_configuration( username=username, address=address, ssh_port=ssh_port, password=password, private_key_file=private_key_file, private_key_passphrase=private_key_passphrase) ComputeTarget.attach(workspace, name, attach_config) print('Resource attach submitted successfully.') print('To see if your compute target is ready to use, run:') print(' az ml computetarget show -n {}'.format(name))
def create_cluster(batchai_cluster_name, vm_size="STANDARD_D2_V2", cluster_min_nodes=0, cluster_max_nodes=2, autoscale_enabled=True): workspace = Workspace.from_config() try: compute_target = ComputeTarget(workspace=workspace, name=batchai_cluster_name) if type(compute_target) is BatchAiCompute: print('found compute target {}, just use it.'.format( batchai_cluster_name)) else: print( '{} exists but it is not a Batch AI cluster. Please choose a different name.' .format(batchai_cluster_name)) return compute_target except ComputeTargetException: print('creating a new compute target...') compute_config = BatchAiCompute.provisioning_configuration( vm_size=vm_size, autoscale_enabled=autoscale_enabled, cluster_min_nodes=cluster_min_nodes, cluster_max_nodes=cluster_max_nodes) compute_target = ComputeTarget.create(workspace, batchai_cluster_name, compute_config) #Create Cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print(compute_target.status.serialize()) return compute_target
def get_or_create_compute(workspace, cpu_cluster_name, compute_vm_size, max_nodes): try: cpu_cluster = ComputeTarget(workspace=workspace, name=cpu_cluster_name) except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size=compute_vm_size, min_nodes=0, max_nodes=max_nodes) cpu_cluster = ComputeTarget.create(workspace, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True) return cpu_cluster
def get_db_compute(ws: Workspace) -> DatabricksCompute: db_compute = None try: db_compute = ComputeTarget(ws, db_compute_name) except ComputeTargetException: attach_config = DatabricksCompute.attach_configuration( resource_group=db_rg, workspace_name=db_workspace_name, access_token=db_access_token) db_compute = ComputeTarget.attach(ws, db_compute_name, attach_config) db_compute.wait_for_completion(True) return db_compute
def get_or_create_amlcompute( workspace, compute_name, vm_size="", min_nodes=0, max_nodes=None, idle_seconds_before_scaledown=None, verbose=False, ): """ Get or create AmlCompute as the compute target. If a cluster of the same name is found, attach it and rescale accordingly. Otherwise, create a new cluster. Args: workspace (Workspace): workspace compute_name (str): name vm_size (str, optional): vm size min_nodes (int, optional): minimum number of nodes in cluster max_nodes (None, optional): maximum number of nodes in cluster idle_seconds_before_scaledown (None, optional): how long to wait before the cluster autoscales down verbose (bool, optional): if true, print logs Returns: Compute target """ try: if verbose: print("Found compute target: {}".format(compute_name)) compute_target = ComputeTarget(workspace=workspace, name=compute_name) if len(compute_target.list_nodes()) < max_nodes: if verbose: print("Rescaling to {} nodes".format(max_nodes)) compute_target.update(max_nodes=max_nodes) compute_target.wait_for_completion(show_output=verbose) except ComputeTargetException: if verbose: print("Creating new compute target: {}".format(compute_name)) compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_seconds_before_scaledown, ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=verbose) return compute_target
def attach_databricks(name, access_token, compute_resource_id, workspace_name=None, resource_group_name=None): workspace = get_workspace_or_default(workspace_name, resource_group_name) print('Attaching compute resource...') attach_config = DatabricksCompute.attach_configuration( resource_id=compute_resource_id, access_token=access_token) ComputeTarget.attach(workspace, name, attach_config) print('Resource attach submitted successfully.') print('To see if your compute target is ready to use, run:') print(' az ml computetarget show -n {}'.format(name))
def get_compute_target(self, compute_name, vm_size=None): try: compute_target = ComputeTarget(workspace=self.workspace, name=compute_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=1, max_nodes=2) compute_target = ComputeTarget.create(self.workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, timeout_in_minutes=20) return compute_target
def _attach_compute_internal(name, compute_resource_id, compute_type, workspace_name=None, resource_group_name=None): workspace = get_workspace_or_default(workspace_name=workspace_name, resource_group=resource_group_name) print('Attaching compute resource...') attach_config = compute_type.attach_configuration( resource_id=compute_resource_id) ComputeTarget.attach(workspace, name, attach_config) print('Resource attach submitted successfully.') print('To see if your compute target is ready to use, run:') print(' az ml computetarget show -n {}'.format(name))
def create_gpu_target(workspace, name): try: target = ComputeTarget(workspace=workspace, name=name) print("Found existing compute target, use it.") except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size="Standard_NC6", # GPU: Tesla K80 . NC12 has 2 of them max_nodes=8) target = ComputeTarget.create( workspace=workspace, name=name, provisioning_configuration=compute_config) target.wait_for_completion(show_output=True) return target
def get_aks(workspace: Workspace, compute_name: str): # Verify that cluster does not exist already try: aks_target = workspace.compute_targets.get(compute_name) if aks_target is not None and type(aks_target) is AksCompute: print('Found existing compute target ' + compute_name + ' so using it.') # noqa: E127 else: prov_config = AksCompute.provisioning_configuration( cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST) aks_name = compute_name print("No Azure Kubernetes Service cluster found, " "creating one now...") # Create the cluster aks_target = ComputeTarget.create( workspace=workspace, name=aks_name, provisioning_configuration=prov_config) # Wait for the create process to complete aks_target.wait_for_completion(show_output=True) return aks_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') raise
def get_compute_object(ws, compute_name, size="STANDARD_NC6", min_nodes=1, max_nodes=4): """ get_compute_object - Retrieves a AMLS compute object. :param Workspace ws: AMLS Workspace object. :param str compute_name: AMLS compute name. :returns: MLS compute target :rtype: azureml.core.compute.ComputeTarget """ if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] else: provisioning_config = AmlCompute.provisioning_configuration( vm_size=size, min_nodes=min_nodes, max_nodes=max_nodes) # Create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True) return compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): load_dotenv() try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority'), min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)), idle_seconds_before_scaledown="300") compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit()
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=e.vm_priority, min_nodes=e.min_nodes, max_nodes=e.max_nodes, idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit(1)
def create_compute_target(workspace, name, config): # Creating compute target print("::debug::Creating compute target") try: compute_target = ComputeTarget.create( workspace=workspace, name=name, provisioning_configuration=config) compute_target.wait_for_completion(show_output=True) except ComputeTargetException as exception: print( f"::error::Could not create compute target with specified parameters: {exception}" ) raise AMLConfigurationException( f"Could not create compute target with specified parameters. Please review the provided parameters." ) # Checking state of compute target print("::debug::Checking state of compute target") if compute_target.provisioning_state != "Succeeded": print( f"::error::Deployment of compute target '{compute_target.name}' failed with state '{compute_target.provisioning_state}'. Please delete the compute target manually and retry." ) raise AMLComputeException( f"Deployment of compute target '{compute_target.name}' failed with state '{compute_target.provisioning_state}'. Please delete the compute target manually and retry." ) return compute_target
def create_compute(ws, gpus): '''Creates an azure compute cluster''' if gpus == 1: # # the name for the cluster compute_name = "gpu-cluster-NC6" # compute_name = "gpu-cluster-NC4as" # # the reference to the azure machine type vm_size = 'Standard_NC6_Promo' # vm_size = 'Standard_NC4as_T4_v3' elif gpus == 2: # the name for the cluster compute_name = "gpu-cluster-NC12" # the reference to the azure machine type vm_size = 'Standard_NC12_Promo' elif gpus == 4: # the name for the cluster compute_name = "gpu-cluster-NC24" # the reference to the azure machine type vm_size = 'Standard_NC24_Promo' else: print(gpus, 'is not a valid number of GPUs. No compute was created') return # define the cluster and the max and min number of nodes provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=10) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
def ConnectToAzure(): """ Connect to Azure workspace, Compute Target, DataStore and Experiement """ # Connect to workspace # config.json file expected in ./azureml directory # config.json can be generated from the azure portal while browsing the workspace global az_workspace az_workspace = Workspace.from_config() print("Workspace:", az_workspace.name) # Connect to compute for training # compute target must belong to the workspace AND compute targets are limited by the workspace region # there may be ability to do cross workspace compute targets in the future global az_computetarget az_computetarget = ComputeTarget(workspace=az_workspace, name="AzPytrch-NC6") print("Compute Target:", az_computetarget.name) # Connect to the datastore for the training images # datastore must be associated with storage account belonging to workspace global az_datastore az_datastore = Datastore.get_default(az_workspace) print("Datastore:", az_datastore.name) # Connect to the experiment global az_experiment az_experiment = Experiment(workspace=az_workspace, name='616_Final') print("Experiment:", az_experiment.name)
def main(): with open("authentication.json") as jsonFile: authData = json.load(jsonFile)[args.auth_cluster] # AzureML Subscription Details (get details from the Azure Portal) subID = authData["subID"] # Get from Azure Portal; used for billing resGroup = authData["resGroup"] # Name for the resource group wsName = authData[ "wsName"] # Name for the workspace, which is the collection of compute clusters + experiments computeName = authData["computeName"] # Name for computer cluster ### Get workspace and compute target ws = Workspace.get(wsName, subscription_id=subID, resource_group=resGroup) compute_target = ComputeTarget(ws, computeName) # The path to the dataset. If using RichPath then this should be prefixed with azure:// # otherwise this is the location where the AzureML Datastore will be mounted # datapath_prefix = "azure://example1234/data/" # Set up by using the URL like above as well as a generated SAS key, placed into azureinfo.json datapath_prefix = authData["dataPath"] script_folder = "." script_params = OrderedDict([(datapath_prefix + args.train_file_name, ""), (datapath_prefix + args.validate_file_name, ""), (datapath_prefix + args.test_file_name, ""), ("./model.pkl.gz", ""), ("--max-num-epochs", args.max_epochs), ("--aml", ""), ("--azure-info", "azureinfo.json"), ("--quiet", "")]) # we are trying to predict statements if args.predicting_statement: script_params["--predicting-statement"] = "" with open("Dockerfile") as f: docker = DockerSection() docker.base_image = None docker.base_dockerfile = f.read() docker.enabled = True environment = Environment(name="pytorchenv") environment.docker = docker environment.python.user_managed_dependencies = True est = Estimator( source_directory=script_folder, script_params=script_params, compute_target=compute_target, entry_script="ptgnn/implementations/graph2seq/trainandtest.py", environment_definition=environment, use_docker=True, ) ### Submit the experiment exp = Experiment(workspace=ws, name=args.exp_name) run = exp.submit(config=est, tags=args.tags) print( "Experiment Started. Remember you can exit out of this program but the experiment will still run on Azure!" ) # print("Portal URL: ", run.get_portal_url()) run.wait_for_completion(show_output=True)
def shutdownComputeInstances(): #Get service principal details from app settings subscriptionID = os.environ["subscriptionID"] tenantID = os.environ["tenantID"] clientID = os.environ["clientID"] spSecret = os.environ["secret"] resourceGroupName = os.environ["resourceGroupName"] amlWorkspaceName = os.environ["amlWorkspaceName"] #logging.info(subscriptionID, tenantID, clientID, spSecret) #Authenticate to AML workspace with service principal auth = ServicePrincipalAuthentication(tenant_id=tenantID, service_principal_id=clientID, service_principal_password=spSecret) ws = Workspace(subscription_id=subscriptionID, resource_group=resourceGroupName, workspace_name=amlWorkspaceName, auth=auth) #Loop through workspace compute, stop all compute instances computeList = ComputeTarget.list(ws) for compute in computeList: if compute.type == 'ComputeInstance': logging.info("stop compute instance") compute.stop()
def create_aml_compute(self, ws): # choose a name for your cluster print("Creating new AML Compute") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) print(" AML Compute " + compute_name + " min nodes " + str(compute_min_nodes) + " compute max nodes " + str(compute_max_nodes)) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. just use it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster print("Starting to create ACI Compute cluster") compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def get_or_create(self, compute_name, min_nodes=2, max_nodes=4, vm_size='Standard_D2_v2'): if compute_name in self.__workspace.compute_targets(): compute_target = self.__workspace.compute_targets()[compute_name] if compute_target and type(compute_target) is BatchAiCompute: print('Found compute target: ' + compute_name) return compute_target # If not found, create a new one print('Creating a new compute target...') provisioning_config = BatchAiCompute.provisioning_configuration( vm_size=vm_size, cluster_min_nodes=min_nodes, cluster_max_nodes=max_nodes, autoscale_enabled=True) compute_target = ComputeTarget.create(self.__workspace, compute_name, provisioning_config) # Can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current BatchAI cluster status, use the 'status' property print(compute_target.status.serialize()) return compute_target
def get_or_create_compute(ws, compute_name="amlcpucluster"): # choose a name for your cluster compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", compute_name) compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 2) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") compute_target = None if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. just use it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use the 'status' property print(compute_target.status.serialize()) return compute_target
def getComputeAML(ws, name="amlcluster"): # Azure ML compute configuration if name in ws.compute_targets: compute_target = ws.compute_targets[name] if compute_target and type(compute_target) is AmlCompute: print(f"### Found existing cluster '{name}' so will use it") return compute_target else: nodesMin = int(os.environ.get('AZML_COMPUTE_MIN_NODES', "0")) nodesMax = int(os.environ.get('AZML_COMPUTE_MAX_NODES', "3")) vmSize = os.environ.get('AZML_COMPUTE_VMSIZE', "Standard_D3_v2") print(f"### Creating cluster '{name}' this could take time...") provisioning_config = AmlCompute.provisioning_configuration( vm_size=vmSize, min_nodes=nodesMin, max_nodes=nodesMax, idle_seconds_before_scaledown=3600) # create the cluster compute_target = ComputeTarget.create(ws, name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def executeAction(self, parameters_file=None, ws=None, azure_credentials=None, azure_computeTarget=None): try: azure_credentials = json.loads(azure_credentials) except JSONDecodeError: print( "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS" ) raise AMLConfigurationException( f"Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-compute/blob/master/README.md" ) # Checking provided parameters print("::debug::Checking provided parameters") required_parameters_provided( parameters=azure_credentials, keys=["tenantId", "clientId", "clientSecret"], message= "Required parameter(s) not found in your azure credentials saved in AZURE_CREDENTIALS secret for logging in to the workspace. Please provide a value for the following key(s): " ) compute_target = ComputeTarget(workspace=ws, name=azure_computeTarget) print( f"::debug::Found compute target with same name. Not updating the compute target: {compute_target.serialize()}" ) print( "::debug::Successfully finished Azure Machine Learning Compute Action" ) return compute_target
def createCompute(ws, args): compute_name = args.clusterName if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print("Found compute target : {0}".format(compute_name)) else: print("Compute target {0} not found.".format(compute_name)) compute_min_nodes = args.minNodes compute_max_nodes = args.maxNodes vm_size = args.clusterSku print("Creating a new compute target {0}.".format(compute_name)) provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) if (args.verbose): # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def _setup_compute(self): """ sets up the compute in the azureml workspace. Either retrieves a pre-existing compute target or creates one (uses environment variables). :returns: compute_target :rtype: ComputeTarget """ if self.comp_name in self.ws.compute_targets: self.compute_target = self.ws.compute_targets[self.comp_name] if self.compute_target and type(self.compute_target) is AmlCompute: print("Found compute target: " + self.comp_name) else: print("creating a new compute target...") p_cfg = AmlCompute.provisioning_configuration( vm_size=self.comp_vm_size, min_nodes=self.comp_min_nodes, max_nodes=self.comp_max_nodes) self.compute_target = ComputeTarget.create(self.ws, self.comp_name, p_cfg) self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print(self.compute_target.get_status().serialize()) return self.compute_target