def getComputeClusterResource( ws: Workspace, compute_name: str, vm_size: str = "Standard_DS1_v2", min_nodes=0, max_nodes=1, ): if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: return compute_target # Create new compute resource: provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def exec_ComputeTargetSetup( self, Parameters: ComputeTargetSetupParameter) -> ExecResult: execResult = False old_stdout = sys.stdout sys.stdout = mystdout = StringIO() try: self.compute_target = self.ws.compute_targets[ Parameters.ClusterName] print('Found existing compute target.') self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) execResult = True except KeyError: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size='Standard_NC6', idle_seconds_before_scaledown=1800, min_nodes=0, max_nodes=4) self.compute_target = ComputeTarget.create(self.ws, Parameters.ClusterName, compute_config) execResult = True self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) except Exception as ex: print(ex) sys.stdout = old_stdout return ExecResult(execResult, mystdout.getvalue())
def prepare_remote_compute(ws): compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. Using it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def _create_cluster( workspace, cluster_name=_CLUSTER_NAME, vm_size=_CLUSTER_VM_SIZE, min_nodes=_CLUSTER_MIN_NODES, max_nodes=_CLUSTER_MAX_NODES, ): logger = logging.getLogger(__name__) try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) logger.info("Found existing compute target.") except ComputeTargetException: logger.info("Creating a new compute target...") compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) # create the cluster compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current AmlCompute. logger.debug(compute_target.get_status().serialize()) return compute_target
def get_compute_object(ws, compute_name, size="STANDARD_NC6", min_nodes=1, max_nodes=4): """ get_compute_object - Retrieves a AMLS compute object. :param Workspace ws: AMLS Workspace object. :param str compute_name: AMLS compute name. :returns: MLS compute target :rtype: azureml.core.compute.ComputeTarget """ if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] else: provisioning_config = AmlCompute.provisioning_configuration( vm_size=size, min_nodes=min_nodes, max_nodes=max_nodes) # Create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True) return compute_target
def get_aml_compute(workspace): # TODO: Set desired name for compute target aml_compute_target = "example_vm_name" try: aml_compute = AmlCompute(workspace, aml_compute_target) print("found existing compute target.") except ComputeTargetException: print("creating new compute target") # TODO: Configure desired VM, see: https://docs.microsoft.com/nl-nl/azure/virtual-machines/sizes-general provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D1_V2", min_nodes=0, max_nodes=1, vnet_resourcegroup_name="", vnet_name="-vn", subnet_name="default", idle_seconds_before_scaledown=1800, vm_priority='lowpriority') aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print("Azure Machine Learning Compute attached") return aml_compute
def create_amlcompute_cluster(self, pet_cluster_name, min_nodes, max_nodes, vm_size): self.min_nodes = min_nodes self.max_nodes = max_nodes # Verify that the cluster doesn't exist already try: self.pet_compute_target = ComputeTarget(workspace=self.ws, name=pet_cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes, vnet_name=self.vnet_name, vnet_resourcegroup_name=self.rg_name, subnet_name=self.subnet_name) # create the cluster self.pet_compute_target = ComputeTarget.create( self.ws, pet_cluster_name, compute_config) self.pet_compute_target.wait_for_completion(show_output=True) # Use the 'status' property to get a detailed status for the current cluster. #print(self.pet_compute_target.status.serialize()) return self.pet_compute_target
def ComputeCompute(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] location = request.json['location'] cluster_name = request.json['cluster_name'] vm_size = request.json['vm_size'] min_nodes = request.json['min_nodes'] max_nodes = request.json['max_nodes'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') #aml_compute = AmlCompute(ws, cluster_name) #cluster_name = 'cpu-cluster' try: aml_compute = AmlCompute(ws, cluster_name) print('Found existing AML compute context.') return "Found existing AML compute context." except: print('need to create new Compute.') print('Creating new AML compute context.') aml_config = AmlCompute.provisioning_configuration(vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) aml_compute = AmlCompute.create(ws, name=cluster_name, provisioning_configuration=aml_config) aml_compute.wait_for_completion(show_output=True) return "Compute successfully created"
def getComputeAML(ws, name="amlcluster"): # Azure ML compute configuration if name in ws.compute_targets: compute_target = ws.compute_targets[name] if compute_target and type(compute_target) is AmlCompute: print(f"### Found existing cluster '{name}' so will use it") return compute_target else: nodesMin = int(os.environ.get('AZML_COMPUTE_MIN_NODES', "0")) nodesMax = int(os.environ.get('AZML_COMPUTE_MAX_NODES', "3")) vmSize = os.environ.get('AZML_COMPUTE_VMSIZE', "Standard_D3_v2") print(f"### Creating cluster '{name}' this could take time...") provisioning_config = AmlCompute.provisioning_configuration( vm_size=vmSize, min_nodes=nodesMin, max_nodes=nodesMax, idle_seconds_before_scaledown=3600) # create the cluster compute_target = ComputeTarget.create(ws, name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def create_aml_cluster(workspace, parameters): print("::debug::Creating aml cluster configuration") aml_config = AmlCompute.provisioning_configuration( vm_size=parameters.get("vm_size", None), vm_priority=parameters.get("vm_priority", "dedicated"), min_nodes=parameters.get("min_nodes", 0), max_nodes=parameters.get("max_nodes", 4), idle_seconds_before_scaledown=parameters.get("idle_seconds_before_scaledown", None), tags={"Created": "GitHub Action: Azure/aml-compute"}, description="AML Cluster created by Azure/aml-compute GitHubb Action", remote_login_port_public_access=parameters.get("remote_login_port_public_access", "NotSpecified") ) print("::debug::Adding VNET settings to configuration if all required settings were provided") if parameters.get("vnet_resource_group_name", None) and parameters.get("vnet_name", None) and parameters.get("subnet_name", None): aml_config.vnet_resourcegroup_name = parameters.get("vnet_resource_group_name", None) aml_config.vnet_name = parameters.get("vnet_name", None) aml_config.subnet_name = parameters.get("subnet_name", None) print("::debug::Adding credentials to configuration if all required settings were provided") if parameters.get("admin_username", None) and parameters.get("admin_user_password", None): aml_config.admin_username = parameters.get("admin_username", None) aml_config.admin_user_password = parameters.get("admin_user_password", None) elif parameters.get("admin_username", None) and parameters.get("admin_user_ssh_key", None): aml_config.admin_username = parameters.get("admin_username", None) aml_config.admin_user_ssh_key = parameters.get("admin_user_ssh_key", None) print("::debug::Creating compute target") aml_cluster = create_compute_target( workspace=workspace, name=parameters.get("name", None), config=aml_config ) return aml_cluster
def createCompute(ws, args): compute_name = args.clusterName if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print("Found compute target : {0}".format(compute_name)) else: print("Compute target {0} not found.".format(compute_name)) compute_min_nodes = args.minNodes compute_max_nodes = args.maxNodes vm_size = args.clusterSku print("Creating a new compute target {0}.".format(compute_name)) provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) if (args.verbose): # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def get_compute(workspace, cluster_name, vm_size='STANDARD_NC6', max_nodes=4): """ Get or create a compute cluster. If a cluster with the provided name already exists in this workspace, return it. Otherwise, create a new one. :param workspace: The Azure ML workspace to use. :param cluster_name: Name of the cluster to find or create. :param vm_size: Type/size of VM to create on AzureML, if no cluster was found. :param max_nodes: Max number of nodes to give to this cluster. :returns: A ComputeTarget object. """ try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, max_nodes=max_nodes) compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=e.vm_priority, min_nodes=e.min_nodes, max_nodes=e.max_nodes, idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit(1)
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): # Load the environment variables from .env in case this script # is called outside an existing process load_dotenv() # Verify that cluster does not exist already try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", 'dedicated'), min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 2)), idle_seconds_before_scaledown="120" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit()
def create_cluster(ws, cluster_name, vm_size, max_nodes): if cluster_name is None: cluster_name = input("name of the cluster : ") else: print("using cluster : ", cluster_name) if vm_size is None: vm_size = input("size of your VM : ") else: print("using vm size of : ", vm_size) if max_nodes is None: max_nodes = input("maximum amount of nodes on the cluster : ") else: print("maximum amount of nodes is : ", max_nodes) from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException # Verify that cluster does not exist already try: cluster = ComputeTarget(workspace=ws, name=cluster_name) print("Found existing cluster") except ComputeTargetException: print("Creating new cluster") # Specify the configuration for the new cluster compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=max_nodes) # Create the cluster with the specified name and configuration cluster = ComputeTarget.create(ws, cluster_name, compute_config) # Wait for the cluster to complete, show the output log cluster.wait_for_completion(show_output=True) return cluster
def _setup_compute(self): """ sets up the compute in the azureml workspace. Either retrieves a pre-existing compute target or creates one (uses environment variables). :returns: compute_target :rtype: ComputeTarget """ if self.comp_name in self.ws.compute_targets: self.compute_target = self.ws.compute_targets[self.comp_name] if self.compute_target and type(self.compute_target) is AmlCompute: print("Found compute target: " + self.comp_name) else: print("creating a new compute target...") p_cfg = AmlCompute.provisioning_configuration( vm_size=self.comp_vm_size, min_nodes=self.comp_min_nodes, max_nodes=self.comp_max_nodes) self.compute_target = ComputeTarget.create(self.ws, self.comp_name, p_cfg) self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print(self.compute_target.get_status().serialize()) return self.compute_target
def _create_cluster(workspace, cluster_name, vm_size, min_nodes, max_nodes): """Creates AzureML cluster Args: cluster_name (string): The name you wish to assign the cluster. vm_size (string): The type of sku to use for your vm. min_nodes (int): Minimum number of nodes in cluster. Use 0 if you don't want to incur costs when it isn't being used. max_nodes (int): Maximum number of nodes in cluster. """ logger = logging.getLogger(__name__) try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) logger.info("Found existing compute target.") except ComputeTargetException: logger.info("Creating a new compute target...") compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes ) # create the cluster compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current AmlCompute. logger.debug(compute_target.serialize()) return compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): load_dotenv() try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority'), min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)), idle_seconds_before_scaledown="300") compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit()
def _create_or_update_cluster(self, min_nodes, max_nodes, idle_timeout_secs): try: self.cluster = AmlCompute(workspace=self.workspace, name=self.cluster_name) print('Updating existing cluster "{}"'.format( colored(self.cluster_name, "green"))) self.cluster.update( min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, ) except ComputeTargetException: print('Creating new cluster "{}"'.format( colored(self.cluster_name, "green"))) cluster_config = AmlCompute.provisioning_configuration( vm_size=self.vm_type, min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, admin_username=self.admin_username, admin_user_ssh_key=self.ssh_key, remote_login_port_public_access="Enabled", ) self.cluster = AmlCompute.create(self.workspace, self.cluster_name, cluster_config) self.cluster.wait_for_completion() if len(self.cluster_nodes) < min_nodes: sleep(30) if len(self.cluster_nodes) < min_nodes: raise RuntimeError("Failed to provision sufficient nodes")
def create_aml_compute(self, ws): # choose a name for your cluster print("Creating new AML Compute") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) print(" AML Compute " + compute_name + " min nodes " + str(compute_min_nodes) + " compute max nodes " + str(compute_max_nodes)) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. just use it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster print("Starting to create ACI Compute cluster") compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def create_compute(ws, gpus): '''Creates an azure compute cluster''' if gpus == 1: # # the name for the cluster compute_name = "gpu-cluster-NC6" # compute_name = "gpu-cluster-NC4as" # # the reference to the azure machine type vm_size = 'Standard_NC6_Promo' # vm_size = 'Standard_NC4as_T4_v3' elif gpus == 2: # the name for the cluster compute_name = "gpu-cluster-NC12" # the reference to the azure machine type vm_size = 'Standard_NC12_Promo' elif gpus == 4: # the name for the cluster compute_name = "gpu-cluster-NC24" # the reference to the azure machine type vm_size = 'Standard_NC24_Promo' else: print(gpus, 'is not a valid number of GPUs. No compute was created') return # define the cluster and the max and min number of nodes provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=10) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
def create_aml_cluster(workspace, parameters): print("::debug::Creating aml cluster configuration") aml_config = AmlCompute.provisioning_configuration( vm_size=parameters.get("vm_size", "Standard_DS3_v2"), vm_priority=parameters.get("vm_priority", "dedicated"), min_nodes=parameters.get("min_nodes", 0), max_nodes=parameters.get("max_nodes", 4), idle_seconds_before_scaledown=parameters.get( "idle_seconds_before_scaledown", None), tags={"Created": "GitHub Action: Azure/aml-compute"}, description="AML Cluster created by Azure/aml-compute GitHub Action", remote_login_port_public_access=parameters.get( "remote_login_port_public_access", "NotSpecified")) print( "::debug::Adding VNET settings to configuration if all required settings were provided" ) if parameters.get( "vnet_resource_group_name", None) is not None and parameters.get( "vnet_name", None) is not None and parameters.get( "subnet_name", None) is not None: aml_config.vnet_resourcegroup_name = parameters.get( "vnet_resource_group_name", None) aml_config.vnet_name = parameters.get("vnet_name", None) aml_config.subnet_name = parameters.get("subnet_name", None) print( "::debug::Adding credentials to configuration if all required settings were provided" ) if os.environ.get("ADMIN_USER_NAME", None) is not None and os.environ.get( "ADMIN_USER_PASSWORD", None) is not None: aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None) aml_config.admin_user_password = os.environ.get( "ADMIN_USER_PASSWORD", None) elif os.environ.get("ADMIN_USER_NAME", None) is not None and os.environ.get( "ADMIN_USER_SSH_KEY", None) is not None: aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None) aml_config.admin_user_ssh_key = os.environ.get("ADMIN_USER_SSH_KEY", None) print( "::debug::Adding identity settings to configuration if all required settings were provided" ) if parameters.get("identity_type", None) == "UserAssigned" and parameters.get( "identity_id", None) is not None: aml_config.identity_type = parameters.get("identity_type", None) aml_config.identity_id = parameters.get("identity_id", None) print("::debug::Creating compute target") # Default compute target name repository_name = str( os.environ.get("GITHUB_REPOSITORY")).split("/")[-1][:16] aml_cluster = create_compute_target(workspace=workspace, name=parameters.get( "name", repository_name), config=aml_config) return aml_cluster
def get_compute(ws: Workspace, compute_target: str) -> ComputeTarget: if not compute_target in ws.compute_targets: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', min_nodes=1, max_nodes=4) cluster = ComputeTarget.create(ws, compute_target, compute_config) cluster.wait_for_completion(show_output=True) return ws.compute_targets[compute_target]
def main(name, vm_size, nodes): ws = Workspace.from_config() try: compute_cluster = ComputeTarget(ws, name) except: compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, min_nodes=1, max_nodes=nodes) compute_cluster = ComputeTarget.create(ws, name, compute_config) compute_cluster.wait_for_completion(show_output=True)
def get_or_create_compute(workspace, cpu_cluster_name, compute_vm_size, max_nodes): try: cpu_cluster = ComputeTarget(workspace=workspace, name=cpu_cluster_name) except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size=compute_vm_size, min_nodes=0, max_nodes=max_nodes) cpu_cluster = ComputeTarget.create(workspace, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True) return cpu_cluster
def __create_compute_target(self): import random tmp_name = "dask-ct-{}".format(random.randint(100000, 999999)) ct_name = self.kwargs.get("ct_name", tmp_name) vm_name = self.kwargs.get("vm_size", "STANDARD_DS3_V2") min_nodes = int(self.kwargs.get("min_nodes", "0")) max_nodes = int(self.kwargs.get("max_nodes", "100")) idle_time = int(self.kwargs.get("idle_time", "300")) vnet_rg = None vnet_name = None subnet_name = None if self.admin_username is None: self.admin_username = "******" ssh_key_pub, self.admin_ssh_key = self.__get_ssh_keys() if self.vnet and self.subnet: vnet_name = self.vnet subnet_name = self.subnet if self.vnet_resource_group: vnet_rg = self.vnet_resource_group else: vnet_rg = self.workspace.resource_group try: if ct_name not in self.workspace.compute_targets: config = AmlCompute.provisioning_configuration( vm_size=vm_name, min_nodes=min_nodes, max_nodes=max_nodes, vnet_resourcegroup_name=vnet_rg, vnet_name=vnet_name, subnet_name=subnet_name, idle_seconds_before_scaledown=idle_time, admin_username=self.admin_username, admin_user_ssh_key=ssh_key_pub, remote_login_port_public_access="Enabled", ) self.__print_message("Creating new compute targe: {}".format(ct_name)) ct = ComputeTarget.create(self.workspace, ct_name, config) ct.wait_for_completion(show_output=self.show_output) else: self.__print_message( "Using existing compute target: {}".format(ct_name) ) ct = self.workspace.compute_targets[ct_name] except Exception as e: logger.exception("Cannot create/get compute target. {}".format(e)) raise e return ct
def test_create_compute_target_invalid_name(): """ Unit test to check the create_compute_target function with invalid name """ workspace = object() name = "" config = AmlCompute.provisioning_configuration(max_nodes=1) with pytest.raises(AMLConfigurationException): assert create_compute_target( workspace=workspace, name=name, config=config )
def get_or_create_amlcompute( workspace, compute_name, vm_size="", min_nodes=0, max_nodes=None, idle_seconds_before_scaledown=None, verbose=False, ): """ Get or create AmlCompute as the compute target. If a cluster of the same name is found, attach it and rescale accordingly. Otherwise, create a new cluster. Args: workspace (Workspace): workspace compute_name (str): name vm_size (str, optional): vm size min_nodes (int, optional): minimum number of nodes in cluster max_nodes (None, optional): maximum number of nodes in cluster idle_seconds_before_scaledown (None, optional): how long to wait before the cluster autoscales down verbose (bool, optional): if true, print logs Returns: Compute target """ try: if verbose: print("Found compute target: {}".format(compute_name)) compute_target = ComputeTarget(workspace=workspace, name=compute_name) if len(compute_target.list_nodes()) < max_nodes: if verbose: print("Rescaling to {} nodes".format(max_nodes)) compute_target.update(max_nodes=max_nodes) compute_target.wait_for_completion(show_output=verbose) except ComputeTargetException: if verbose: print("Creating new compute target: {}".format(compute_name)) compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_seconds_before_scaledown, ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=verbose) return compute_target
def choose_compute_target(workspace, name): try: aml_compute = AmlCompute(workspace, name) print("Found existing compute target: {}".format(name)) except: print("Creating new compute target: {}".format(name)) provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, name, provisioning_config) aml_compute.wait_for_completion(show_output=True) print(aml_compute) return aml_compute
def get_compute_target(self, compute_name, vm_size=None): try: compute_target = ComputeTarget(workspace=self.workspace, name=compute_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=1, max_nodes=2) compute_target = ComputeTarget.create(self.workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, timeout_in_minutes=20) return compute_target