コード例 #1
0
def create_aml_compute_target_batchai(cfg, ws):
    """
    input : 
        ws : definition :  workspace 
                type : Workspace from azureml.core.workspace
        cfg : config dictionnary from the json file input for this program
                type : python dictionnary 
    output : computetarget object
    """
    try:
        compute_target = ComputeTarget(workspace = ws, name = cfg.ClusterProperties.cluster_name)
        log.info('Found existing compute target. Using it. NOT VALIDATING IF YOU CHANGED THE CLUSTER CONFIG...')
    except ComputeTargetException:
        log.info('Creating Batch AI compute target "{}" in workspace "{}".'.format(cfg.ClusterProperties.cluster_name, ws.name))
        # Defining the compute configuration for actual target creation
        compute_config = BatchAiCompute.provisioning_configuration(
                                            vm_size= cfg.ClusterProperties.vm_size,
                                            vm_priority= cfg.ClusterProperties.vm_priority,
                                            autoscale_enabled=True if cfg.ClusterProperties.scaling_method == 'auto_scale' else False,
                                            cluster_min_nodes=cfg.ClusterProperties.minimumNodeCount,
                                            cluster_max_nodes=cfg.ClusterProperties.maximumNodeCount,
                                            location = cfg.AMLConfig.location)
        log.info('Launching creation of the Batch AI compute target "{}" under the AML workspace "{}"'.format(cfg.ClusterProperties.cluster_name, ws.name))
        compute_target = ComputeTarget.create(workspace= ws, name=cfg.ClusterProperties.cluster_name, provisioning_configuration=compute_config)
        compute_target.wait_for_completion(show_output=True)
        log.info(compute_target.status.serialize())
    return compute_target
コード例 #2
0
    def create_amlcompute_cluster(self, pet_cluster_name, min_nodes, max_nodes,
                                  vm_size):
        self.min_nodes = min_nodes
        self.max_nodes = max_nodes

        # Verify that the cluster doesn't exist already
        try:
            self.pet_compute_target = ComputeTarget(workspace=self.ws,
                                                    name=pet_cluster_name)
            print('Found existing compute target.')
        except ComputeTargetException:
            print('Creating a new compute target...')
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                vnet_name=self.vnet_name,
                vnet_resourcegroup_name=self.rg_name,
                subnet_name=self.subnet_name)

            # create the cluster
            self.pet_compute_target = ComputeTarget.create(
                self.ws, pet_cluster_name, compute_config)
            self.pet_compute_target.wait_for_completion(show_output=True)

        # Use the 'status' property to get a detailed status for the current cluster.
        #print(self.pet_compute_target.status.serialize())
        return self.pet_compute_target
コード例 #3
0
def get_compute(workspace, cluster_name, vm_size='STANDARD_NC6', max_nodes=4):
    """
    Get or create a compute cluster. If a cluster with the provided name
    already exists in this workspace, return it. Otherwise, create a new one.

    :param workspace:       The Azure ML workspace to use.
    :param cluster_name:    Name of the cluster to find or create.
    :param vm_size:         Type/size of VM to create on AzureML, if no cluster
                            was found.
    :param max_nodes:       Max number of nodes to give to this cluster.
    :returns:               A ComputeTarget object.
    """
    try:
        compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
        print('Found existing compute target')
    except ComputeTargetException:
        print('Creating a new compute target...')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, max_nodes=max_nodes)

        compute_target = ComputeTarget.create(workspace, cluster_name,
                                              compute_config)

        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

    return compute_target
コード例 #4
0
def create_cluster(ws, cluster_name, vm_size, max_nodes):
    if cluster_name is None:
        cluster_name = input("name of the cluster : ")
    else:
        print("using cluster : ", cluster_name)
    if vm_size is None:
        vm_size = input("size of your VM : ")
    else:
        print("using vm size of : ", vm_size)
    if max_nodes is None:
        max_nodes = input("maximum amount of nodes on the cluster : ")
    else:
        print("maximum amount of nodes is : ", max_nodes)

    from azureml.core.compute import ComputeTarget, AmlCompute
    from azureml.core.compute_target import ComputeTargetException

    # Verify that cluster does not exist already
    try:
        cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print("Found existing cluster")
    except ComputeTargetException:
        print("Creating new cluster")
        # Specify the configuration for the new cluster
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=0, max_nodes=max_nodes)

        # Create the cluster with the specified name and configuration
        cluster = ComputeTarget.create(ws, cluster_name, compute_config)

        # Wait for the cluster to complete, show the output log
        cluster.wait_for_completion(show_output=True)
    return cluster
コード例 #5
0
    def get_credentials(self):
        """Retrieve the credentials for the RemoteCompute target.

        :return: The credentials for the RemoteCompute target.
        :rtype: dict
        :raises azureml.exceptions.ComputeTargetException:
        """
        endpoint = self._mlc_endpoint + '/listKeys'
        headers = self._auth.get_authentication_header()
        ComputeTarget._add_request_tracking_headers(headers)
        params = {'api-version': MLC_WORKSPACE_API_VERSION}
        resp = ClientBase._execute_func(get_requests_session().post,
                                        endpoint,
                                        params=params,
                                        headers=headers)

        try:
            resp.raise_for_status()
        except requests.exceptions.HTTPError:
            raise ComputeTargetException('Received bad response from MLC:\n'
                                         'Response Code: {}\n'
                                         'Headers: {}\n'
                                         'Content: {}'.format(
                                             resp.status_code, resp.headers,
                                             resp.content))
        content = resp.content
        if isinstance(content, bytes):
            content = content.decode('utf-8')
        creds_content = json.loads(content)
        return creds_content
コード例 #6
0
def attach_remote(
    name,
    address,
    ssh_port,
    username,
    password='',
    private_key_file='',
    private_key_passphrase='',
    workspace_name=None,
    resource_group_name=None,
):
    workspace = get_workspace_or_default(workspace_name=workspace_name,
                                         resource_group=resource_group_name)

    print('Attaching compute resource...')
    attach_config = RemoteCompute.attach_configuration(
        username=username,
        address=address,
        ssh_port=ssh_port,
        password=password,
        private_key_file=private_key_file,
        private_key_passphrase=private_key_passphrase)
    ComputeTarget.attach(workspace, name, attach_config)
    print('Resource attach submitted successfully.')
    print('To see if your compute target is ready to use, run:')
    print('  az ml computetarget show -n {}'.format(name))
コード例 #7
0
def create_cluster(batchai_cluster_name,
                   vm_size="STANDARD_D2_V2",
                   cluster_min_nodes=0,
                   cluster_max_nodes=2,
                   autoscale_enabled=True):
    workspace = Workspace.from_config()
    try:
        compute_target = ComputeTarget(workspace=workspace,
                                       name=batchai_cluster_name)
        if type(compute_target) is BatchAiCompute:
            print('found compute target {}, just use it.'.format(
                batchai_cluster_name))
        else:
            print(
                '{} exists but it is not a Batch AI cluster. Please choose a different name.'
                .format(batchai_cluster_name))
        return compute_target
    except ComputeTargetException:
        print('creating a new compute target...')
        compute_config = BatchAiCompute.provisioning_configuration(
            vm_size=vm_size,
            autoscale_enabled=autoscale_enabled,
            cluster_min_nodes=cluster_min_nodes,
            cluster_max_nodes=cluster_max_nodes)

        compute_target = ComputeTarget.create(workspace, batchai_cluster_name,
                                              compute_config)  #Create Cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)
        print(compute_target.status.serialize())
        return compute_target
コード例 #8
0
def get_or_create_compute(workspace, cpu_cluster_name, compute_vm_size,
                          max_nodes):
    try:
        cpu_cluster = ComputeTarget(workspace=workspace, name=cpu_cluster_name)
    except ComputeTargetException:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=compute_vm_size, min_nodes=0, max_nodes=max_nodes)
        cpu_cluster = ComputeTarget.create(workspace, cpu_cluster_name,
                                           compute_config)

    cpu_cluster.wait_for_completion(show_output=True)
    return cpu_cluster
コード例 #9
0
def get_db_compute(ws: Workspace) -> DatabricksCompute:
    db_compute = None
    try:
        db_compute = ComputeTarget(ws, db_compute_name)
    except ComputeTargetException:
        attach_config = DatabricksCompute.attach_configuration(
            resource_group=db_rg,
            workspace_name=db_workspace_name,
            access_token=db_access_token)
        db_compute = ComputeTarget.attach(ws, db_compute_name, attach_config)
        db_compute.wait_for_completion(True)
    return db_compute
コード例 #10
0
def get_or_create_amlcompute(
    workspace,
    compute_name,
    vm_size="",
    min_nodes=0,
    max_nodes=None,
    idle_seconds_before_scaledown=None,
    verbose=False,
):
    """
        Get or create AmlCompute as the compute target. If a cluster of the same name is found,
        attach it and rescale accordingly. Otherwise, create a new cluster.

    Args:
        workspace (Workspace): workspace
        compute_name (str): name
        vm_size (str, optional): vm size
        min_nodes (int, optional): minimum number of nodes in cluster
        max_nodes (None, optional): maximum number of nodes in cluster
        idle_seconds_before_scaledown (None, optional): how long to wait before the cluster
            autoscales down
        verbose (bool, optional): if true, print logs
    Returns:
        Compute target
    """
    try:
        if verbose:
            print("Found compute target: {}".format(compute_name))

        compute_target = ComputeTarget(workspace=workspace, name=compute_name)
        if len(compute_target.list_nodes()) < max_nodes:
            if verbose:
                print("Rescaling to {} nodes".format(max_nodes))
            compute_target.update(max_nodes=max_nodes)
            compute_target.wait_for_completion(show_output=verbose)

    except ComputeTargetException:
        if verbose:
            print("Creating new compute target: {}".format(compute_name))

        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=min_nodes,
            max_nodes=max_nodes,
            idle_seconds_before_scaledown=idle_seconds_before_scaledown,
        )
        compute_target = ComputeTarget.create(workspace, compute_name,
                                              compute_config)
        compute_target.wait_for_completion(show_output=verbose)

    return compute_target
コード例 #11
0
def attach_databricks(name,
                      access_token,
                      compute_resource_id,
                      workspace_name=None,
                      resource_group_name=None):
    workspace = get_workspace_or_default(workspace_name, resource_group_name)

    print('Attaching compute resource...')
    attach_config = DatabricksCompute.attach_configuration(
        resource_id=compute_resource_id, access_token=access_token)
    ComputeTarget.attach(workspace, name, attach_config)
    print('Resource attach submitted successfully.')
    print('To see if your compute target is ready to use, run:')
    print('  az ml computetarget show -n {}'.format(name))
コード例 #12
0
 def get_compute_target(self, compute_name, vm_size=None):
     try:
         compute_target = ComputeTarget(workspace=self.workspace,
                                        name=compute_name)
         print('Found existing compute target')
     except ComputeTargetException:
         print('Creating a new compute target...')
         compute_config = AmlCompute.provisioning_configuration(
             vm_size=vm_size, min_nodes=1, max_nodes=2)
         compute_target = ComputeTarget.create(self.workspace, compute_name,
                                               compute_config)
         compute_target.wait_for_completion(show_output=True,
                                            timeout_in_minutes=20)
     return compute_target
コード例 #13
0
def _attach_compute_internal(name,
                             compute_resource_id,
                             compute_type,
                             workspace_name=None,
                             resource_group_name=None):
    workspace = get_workspace_or_default(workspace_name=workspace_name,
                                         resource_group=resource_group_name)

    print('Attaching compute resource...')
    attach_config = compute_type.attach_configuration(
        resource_id=compute_resource_id)
    ComputeTarget.attach(workspace, name, attach_config)
    print('Resource attach submitted successfully.')
    print('To see if your compute target is ready to use, run:')
    print('  az ml computetarget show -n {}'.format(name))
コード例 #14
0
ファイル: rutils.py プロジェクト: tdard/computer-vision
def create_gpu_target(workspace, name):
    try:
        target = ComputeTarget(workspace=workspace, name=name)
        print("Found existing compute target, use it.")
    except ComputeTargetException:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size="Standard_NC6",  # GPU: Tesla K80 . NC12 has 2 of them
            max_nodes=8)

        target = ComputeTarget.create(
            workspace=workspace,
            name=name,
            provisioning_configuration=compute_config)
    target.wait_for_completion(show_output=True)
    return target
コード例 #15
0
def get_aks(workspace: Workspace, compute_name: str):
    # Verify that cluster does not exist already
    try:
        aks_target = workspace.compute_targets.get(compute_name)
        if aks_target is not None and type(aks_target) is AksCompute:
            print('Found existing compute target ' + compute_name +
                  ' so using it.')  # noqa: E127
        else:
            prov_config = AksCompute.provisioning_configuration(
                cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST)
            aks_name = compute_name

            print("No Azure Kubernetes Service cluster found, "
                  "creating one now...")

            # Create the cluster
            aks_target = ComputeTarget.create(
                workspace=workspace,
                name=aks_name,
                provisioning_configuration=prov_config)

            # Wait for the create process to complete
            aks_target.wait_for_completion(show_output=True)
        return aks_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        raise
コード例 #16
0
    def get_compute_object(ws,
                           compute_name,
                           size="STANDARD_NC6",
                           min_nodes=1,
                           max_nodes=4):
        """
        get_compute_object - Retrieves a AMLS compute object.

        :param Workspace ws: AMLS Workspace object.
        :param str compute_name: AMLS compute name.

        :returns: MLS compute target
        :rtype: azureml.core.compute.ComputeTarget
        """
        if compute_name in ws.compute_targets:
            compute_target = ws.compute_targets[compute_name]

        else:
            provisioning_config = AmlCompute.provisioning_configuration(
                vm_size=size, min_nodes=min_nodes, max_nodes=max_nodes)
            # Create the cluster
            compute_target = ComputeTarget.create(ws, compute_name,
                                                  provisioning_config)
            compute_target.wait_for_completion(show_output=True)
        return compute_target
コード例 #17
0
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    load_dotenv()
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=os.environ.get("AML_CLUSTER_PRIORITY",
                                           'lowpriority'),
                min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)),
                max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)),
                idle_seconds_before_scaledown="300")
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit()
コード例 #18
0
ファイル: attach_compute.py プロジェクト: R0M0/rmMLOpsPython
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            e = Env()
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=e.vm_priority,
                min_nodes=e.min_nodes,
                max_nodes=e.max_nodes,
                idle_seconds_before_scaledown="300"
                #    #Uncomment the below lines for VNet support
                #    vnet_resourcegroup_name=vnet_resourcegroup_name,
                #    vnet_name=vnet_name,
                #    subnet_name=subnet_name
            )
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit(1)
コード例 #19
0
ファイル: utils.py プロジェクト: ashishonce/amlworkspace
def create_compute_target(workspace, name, config):
    # Creating compute target
    print("::debug::Creating compute target")
    try:
        compute_target = ComputeTarget.create(
            workspace=workspace, name=name, provisioning_configuration=config)
        compute_target.wait_for_completion(show_output=True)
    except ComputeTargetException as exception:
        print(
            f"::error::Could not create compute target with specified parameters: {exception}"
        )
        raise AMLConfigurationException(
            f"Could not create compute target with specified parameters. Please review the provided parameters."
        )

    # Checking state of compute target
    print("::debug::Checking state of compute target")
    if compute_target.provisioning_state != "Succeeded":
        print(
            f"::error::Deployment of compute target '{compute_target.name}' failed with state '{compute_target.provisioning_state}'. Please delete the compute target manually and retry."
        )
        raise AMLComputeException(
            f"Deployment of compute target '{compute_target.name}' failed with state '{compute_target.provisioning_state}'. Please delete the compute target manually and retry."
        )
    return compute_target
コード例 #20
0
def create_compute(ws, gpus):
    '''Creates an azure compute cluster'''

    if gpus == 1:
        # # the name for the cluster
        compute_name = "gpu-cluster-NC6"
        # compute_name = "gpu-cluster-NC4as"
        # # the reference to the azure machine type
        vm_size = 'Standard_NC6_Promo'
        # vm_size = 'Standard_NC4as_T4_v3'

    elif gpus == 2:
        # the name for the cluster
        compute_name = "gpu-cluster-NC12"
        # the reference to the azure machine type
        vm_size = 'Standard_NC12_Promo'
    elif gpus == 4:
        # the name for the cluster
        compute_name = "gpu-cluster-NC24"
        # the reference to the azure machine type
        vm_size = 'Standard_NC24_Promo'
    else:
        print(gpus, 'is not a valid number of GPUs.  No compute was created')
        return

    # define the cluster and the max and min number of nodes
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size=vm_size, min_nodes=0, max_nodes=10)
    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name,
                                          provisioning_config)
コード例 #21
0
def ConnectToAzure():
    """
        Connect to Azure workspace, Compute Target, DataStore and Experiement
    """

    # Connect to workspace
    # config.json file expected in ./azureml directory
    # config.json can be generated from the azure portal while browsing the workspace
    global az_workspace
    az_workspace = Workspace.from_config()
    print("Workspace:", az_workspace.name)

    # Connect to compute for training
    # compute target must belong to the workspace AND compute targets are limited by the workspace region
    # there may be ability to do cross workspace compute targets in the future
    global az_computetarget
    az_computetarget = ComputeTarget(workspace=az_workspace,
                                     name="AzPytrch-NC6")
    print("Compute Target:", az_computetarget.name)

    # Connect to the datastore for the training images
    # datastore must be associated with storage account belonging to workspace
    global az_datastore
    az_datastore = Datastore.get_default(az_workspace)
    print("Datastore:", az_datastore.name)

    # Connect to the experiment
    global az_experiment
    az_experiment = Experiment(workspace=az_workspace, name='616_Final')
    print("Experiment:", az_experiment.name)
コード例 #22
0
ファイル: runOnAzure.py プロジェクト: georgiosN/NeuroLog
def main():
    with open("authentication.json") as jsonFile:
        authData = json.load(jsonFile)[args.auth_cluster]

    # AzureML Subscription Details (get details from the Azure Portal)
    subID = authData["subID"]  # Get from Azure Portal; used for billing
    resGroup = authData["resGroup"]  # Name for the resource group
    wsName = authData[
        "wsName"]  # Name for the workspace, which is the collection of compute clusters + experiments
    computeName = authData["computeName"]  # Name for computer cluster
    ### Get workspace and compute target
    ws = Workspace.get(wsName, subscription_id=subID, resource_group=resGroup)
    compute_target = ComputeTarget(ws, computeName)

    # The path to the dataset. If using RichPath then this should be prefixed with azure://
    # otherwise this is the location where the AzureML Datastore will be mounted
    # datapath_prefix = "azure://example1234/data/"
    # Set up by using the URL like above as well as a generated SAS key, placed into azureinfo.json
    datapath_prefix = authData["dataPath"]
    script_folder = "."
    script_params = OrderedDict([(datapath_prefix + args.train_file_name, ""),
                                 (datapath_prefix + args.validate_file_name,
                                  ""),
                                 (datapath_prefix + args.test_file_name, ""),
                                 ("./model.pkl.gz", ""),
                                 ("--max-num-epochs", args.max_epochs),
                                 ("--aml", ""),
                                 ("--azure-info", "azureinfo.json"),
                                 ("--quiet", "")])
    # we are trying to predict statements
    if args.predicting_statement:
        script_params["--predicting-statement"] = ""

    with open("Dockerfile") as f:
        docker = DockerSection()
        docker.base_image = None
        docker.base_dockerfile = f.read()
        docker.enabled = True

    environment = Environment(name="pytorchenv")
    environment.docker = docker
    environment.python.user_managed_dependencies = True

    est = Estimator(
        source_directory=script_folder,
        script_params=script_params,
        compute_target=compute_target,
        entry_script="ptgnn/implementations/graph2seq/trainandtest.py",
        environment_definition=environment,
        use_docker=True,
    )

    ### Submit the experiment
    exp = Experiment(workspace=ws, name=args.exp_name)
    run = exp.submit(config=est, tags=args.tags)
    print(
        "Experiment Started. Remember you can exit out of this program but the experiment will still run on Azure!"
    )
    # print("Portal URL: ", run.get_portal_url())
    run.wait_for_completion(show_output=True)
コード例 #23
0
def shutdownComputeInstances():

    #Get service principal details from app settings
    subscriptionID = os.environ["subscriptionID"]
    tenantID = os.environ["tenantID"]
    clientID = os.environ["clientID"]
    spSecret = os.environ["secret"]
    resourceGroupName = os.environ["resourceGroupName"]
    amlWorkspaceName = os.environ["amlWorkspaceName"]

    #logging.info(subscriptionID, tenantID, clientID, spSecret)

    #Authenticate to AML workspace with service principal
    auth = ServicePrincipalAuthentication(tenant_id=tenantID,
                                          service_principal_id=clientID,
                                          service_principal_password=spSecret)

    ws = Workspace(subscription_id=subscriptionID,
                   resource_group=resourceGroupName,
                   workspace_name=amlWorkspaceName,
                   auth=auth)

    #Loop through workspace compute, stop all compute instances
    computeList = ComputeTarget.list(ws)
    for compute in computeList:
        if compute.type == 'ComputeInstance':
            logging.info("stop compute instance")
            compute.stop()
コード例 #24
0
    def create_aml_compute(self, ws):
        # choose a name for your cluster
        print("Creating new AML Compute")
        compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
        compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
        compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)
        print(" AML Compute " + compute_name + " min nodes " +
              str(compute_min_nodes) + " compute max nodes " +
              str(compute_max_nodes))
        # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
        vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

        if compute_name in ws.compute_targets:
            compute_target = ws.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('found compute target. just use it. ' + compute_name)
        else:
            print('creating a new compute target...')
            provisioning_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                min_nodes=compute_min_nodes,
                max_nodes=compute_max_nodes)
            # create the cluster
            print("Starting to create ACI Compute cluster")
            compute_target = ComputeTarget.create(ws, compute_name,
                                                  provisioning_config)
            # can poll for a minimum number of nodes and for a specific timeout.
            # if no min node count is provided it will use the scale settings for the cluster
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)
        return compute_target
コード例 #25
0
    def get_or_create(self,
                      compute_name,
                      min_nodes=2,
                      max_nodes=4,
                      vm_size='Standard_D2_v2'):
        if compute_name in self.__workspace.compute_targets():
            compute_target = self.__workspace.compute_targets()[compute_name]
            if compute_target and type(compute_target) is BatchAiCompute:
                print('Found compute target: ' + compute_name)
                return compute_target

        # If not found, create a new one
        print('Creating a new compute target...')
        provisioning_config = BatchAiCompute.provisioning_configuration(
            vm_size=vm_size,
            cluster_min_nodes=min_nodes,
            cluster_max_nodes=max_nodes,
            autoscale_enabled=True)

        compute_target = ComputeTarget.create(self.__workspace, compute_name,
                                              provisioning_config)

        # Can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current BatchAI cluster status, use the 'status' property
        print(compute_target.status.serialize())

        return compute_target
コード例 #26
0
def get_or_create_compute(ws, compute_name="amlcpucluster"):
    # choose a name for your cluster
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", compute_name)
    compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
    compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 2)

    # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

    compute_target = None
    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('found compute target. just use it. ' + compute_name)
    else:
        print('creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)

        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use the 'status' property
        print(compute_target.status.serialize())
    return compute_target
コード例 #27
0
def getComputeAML(ws, name="amlcluster"):
    # Azure ML compute configuration
    if name in ws.compute_targets:
        compute_target = ws.compute_targets[name]
        if compute_target and type(compute_target) is AmlCompute:
            print(f"### Found existing cluster '{name}' so will use it")
            return compute_target
    else:
        nodesMin = int(os.environ.get('AZML_COMPUTE_MIN_NODES', "0"))
        nodesMax = int(os.environ.get('AZML_COMPUTE_MAX_NODES', "3"))
        vmSize = os.environ.get('AZML_COMPUTE_VMSIZE', "Standard_D3_v2")

        print(f"### Creating cluster '{name}' this could take time...")
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vmSize,
            min_nodes=nodesMin,
            max_nodes=nodesMax,
            idle_seconds_before_scaledown=3600)

        # create the cluster
        compute_target = ComputeTarget.create(ws, name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use get_status()
        print(compute_target.get_status().serialize())
        return compute_target
コード例 #28
0
    def executeAction(self,
                      parameters_file=None,
                      ws=None,
                      azure_credentials=None,
                      azure_computeTarget=None):
        try:
            azure_credentials = json.loads(azure_credentials)
        except JSONDecodeError:
            print(
                "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS"
            )
            raise AMLConfigurationException(
                f"Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-compute/blob/master/README.md"
            )

        # Checking provided parameters
        print("::debug::Checking provided parameters")
        required_parameters_provided(
            parameters=azure_credentials,
            keys=["tenantId", "clientId", "clientSecret"],
            message=
            "Required parameter(s) not found in your azure credentials saved in AZURE_CREDENTIALS secret for logging in to the workspace. Please provide a value for the following key(s): "
        )

        compute_target = ComputeTarget(workspace=ws, name=azure_computeTarget)

        print(
            f"::debug::Found compute target with same name. Not updating the compute target: {compute_target.serialize()}"
        )
        print(
            "::debug::Successfully finished Azure Machine Learning Compute Action"
        )
        return compute_target
コード例 #29
0
def createCompute(ws, args):
    compute_name = args.clusterName

    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print("Found compute target       : {0}".format(compute_name))
    else:
        print("Compute target {0} not found.".format(compute_name))
        compute_min_nodes = args.minNodes
        compute_max_nodes = args.maxNodes
        vm_size = args.clusterSku
        print("Creating a new compute target {0}.".format(compute_name))
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)
        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)
        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        if (args.verbose):
            # For a more detailed view of current AmlCompute status, use get_status()
            print(compute_target.get_status().serialize())
    return compute_target
コード例 #30
0
    def _setup_compute(self):
        """
        sets up the compute in the azureml workspace. Either retrieves a
        pre-existing compute target or creates one (uses environment variables).

        :returns: compute_target
        :rtype: ComputeTarget
        """
        if self.comp_name in self.ws.compute_targets:
            self.compute_target = self.ws.compute_targets[self.comp_name]
            if self.compute_target and type(self.compute_target) is AmlCompute:
                print("Found compute target: " + self.comp_name)
        else:
            print("creating a new compute target...")
            p_cfg = AmlCompute.provisioning_configuration(
                vm_size=self.comp_vm_size,
                min_nodes=self.comp_min_nodes,
                max_nodes=self.comp_max_nodes)

            self.compute_target = ComputeTarget.create(self.ws, self.comp_name,
                                                       p_cfg)
            self.compute_target.wait_for_completion(show_output=True,
                                                    min_node_count=None,
                                                    timeout_in_minutes=20)

            print(self.compute_target.get_status().serialize())
        return self.compute_target