예제 #1
0
def get_aml_compute(workspace):
    # TODO: Set desired name for compute target
    aml_compute_target = "example_vm_name"
    try:
        aml_compute = AmlCompute(workspace, aml_compute_target)
        print("found existing compute target.")
    except ComputeTargetException:
        print("creating new compute target")

        # TODO: Configure desired VM, see: https://docs.microsoft.com/nl-nl/azure/virtual-machines/sizes-general
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size="STANDARD_D1_V2",
            min_nodes=0,
            max_nodes=1,
            vnet_resourcegroup_name="",
            vnet_name="-vn",
            subnet_name="default",
            idle_seconds_before_scaledown=1800,
            vm_priority='lowpriority')
        aml_compute = ComputeTarget.create(workspace, aml_compute_target,
                                           provisioning_config)
        aml_compute.wait_for_completion(show_output=True,
                                        min_node_count=None,
                                        timeout_in_minutes=20)
    print("Azure Machine Learning Compute attached")
    return aml_compute
    def _create_or_update_cluster(self, min_nodes, max_nodes,
                                  idle_timeout_secs):

        try:
            self.cluster = AmlCompute(workspace=self.workspace,
                                      name=self.cluster_name)
            print('Updating existing cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            self.cluster.update(
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
            )
        except ComputeTargetException:
            print('Creating new cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            cluster_config = AmlCompute.provisioning_configuration(
                vm_size=self.vm_type,
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
                admin_username=self.admin_username,
                admin_user_ssh_key=self.ssh_key,
                remote_login_port_public_access="Enabled",
            )
            self.cluster = AmlCompute.create(self.workspace, self.cluster_name,
                                             cluster_config)

        self.cluster.wait_for_completion()

        if len(self.cluster_nodes) < min_nodes:
            sleep(30)
            if len(self.cluster_nodes) < min_nodes:
                raise RuntimeError("Failed to provision sufficient nodes")
예제 #3
0
def ComputeDelete():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    location = request.json['location']
    Cluster_type = request.json['Cluster_type']
    cluster_name = request.json['cluster_name']
    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)
    print("Found workspace {} at location {}".format(ws.name, ws.location))
    try:
        if Cluster_type == 'Training':
            aml_compute = AmlCompute(ws, cluster_name)
            print('Found existing AML compute context.')
            aml_compute.delete()
        else:
            aks_target = AksCompute(ws, cluster_name)
            print('Found existing AKS compute context.')
            aks_target.delete()
        print('compute deleted')
        return "compute deleted"
    except Exception as e:
        error_statement = str(e)
        print("Error statement: ", error_statement)
        return error_statement
예제 #4
0
def _deploy_azuremlcompute_clusters(workspace,
                                    default_cpu_compute_target=None,
                                    default_gpu_compute_target=None,
                                    show_output=True):
    cpu_compute_object = gpu_compute_object = None

    # Start creation of both computes
    if default_cpu_compute_target:
        cpu_compute_object = AmlCompute.create(
            workspace, Workspace.DEFAULT_CPU_CLUSTER_NAME,
            default_cpu_compute_target)
        if show_output:
            print("Deploying Compute Target with name {}".format(
                cpu_compute_object.name))
    if default_gpu_compute_target:
        gpu_compute_object = AmlCompute.create(
            workspace, Workspace.DEFAULT_GPU_CLUSTER_NAME,
            default_gpu_compute_target)
        if show_output:
            print("Deploying Compute Target with name {}".format(
                gpu_compute_object.name))

    # Wait for both computes to finish
    remaining_timeout_minutes = 10
    # The time when both computes started creating
    start_time = time.time()
    for compute_object in [cpu_compute_object, gpu_compute_object]:
        if compute_object:
            # The time since we've started checking this specific compute
            compute_start_time = time.time()
            compute_object.wait_for_completion(
                show_output=False,
                timeout_in_minutes=remaining_timeout_minutes)
            compute_time_taken = time.time() - compute_start_time

            time_taken = round(time.time() - start_time, 2)
            remaining_timeout_minutes = remaining_timeout_minutes - \
                (compute_time_taken / 60)

            provision_status = compute_object.get_status()
            if not provision_status or provision_status.provisioning_state != "Succeeded":
                errors = getattr(provision_status, "errors", [])
                if remaining_timeout_minutes <= 0:
                    errors.append("Creation has exceeded timeout")
                raise ValueError(
                    "Compute creation failed for {} with errors: {}".format(
                        compute_object.name, errors))
            if show_output:
                print("Deployed Compute Target with name {}. Took {} seconds".
                      format(compute_object.name, time_taken))
예제 #5
0
def choose_compute_target(workspace, name):
    try:
        aml_compute = AmlCompute(workspace, name)
        print("Found existing compute target: {}".format(name))
    except:
        print("Creating new compute target: {}".format(name))

        provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                                    min_nodes=1,
                                                                    max_nodes=4)
        aml_compute = ComputeTarget.create(workspace, name, provisioning_config)
        aml_compute.wait_for_completion(show_output=True)
    print(aml_compute)
    return aml_compute
예제 #6
0
    def create_aml_compute(self, ws):
        # choose a name for your cluster
        print("Creating new AML Compute")
        compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
        compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
        compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)
        print(" AML Compute " + compute_name + " min nodes " +
              str(compute_min_nodes) + " compute max nodes " +
              str(compute_max_nodes))
        # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
        vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

        if compute_name in ws.compute_targets:
            compute_target = ws.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('found compute target. just use it. ' + compute_name)
        else:
            print('creating a new compute target...')
            provisioning_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                min_nodes=compute_min_nodes,
                max_nodes=compute_max_nodes)
            # create the cluster
            print("Starting to create ACI Compute cluster")
            compute_target = ComputeTarget.create(ws, compute_name,
                                                  provisioning_config)
            # can poll for a minimum number of nodes and for a specific timeout.
            # if no min node count is provided it will use the scale settings for the cluster
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)
        return compute_target
def create_compute(ws, gpus):
    '''Creates an azure compute cluster'''

    if gpus == 1:
        # # the name for the cluster
        compute_name = "gpu-cluster-NC6"
        # compute_name = "gpu-cluster-NC4as"
        # # the reference to the azure machine type
        vm_size = 'Standard_NC6_Promo'
        # vm_size = 'Standard_NC4as_T4_v3'

    elif gpus == 2:
        # the name for the cluster
        compute_name = "gpu-cluster-NC12"
        # the reference to the azure machine type
        vm_size = 'Standard_NC12_Promo'
    elif gpus == 4:
        # the name for the cluster
        compute_name = "gpu-cluster-NC24"
        # the reference to the azure machine type
        vm_size = 'Standard_NC24_Promo'
    else:
        print(gpus, 'is not a valid number of GPUs.  No compute was created')
        return

    # define the cluster and the max and min number of nodes
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size=vm_size, min_nodes=0, max_nodes=10)
    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name,
                                          provisioning_config)
예제 #8
0
def ComputeExist():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    location = request.json['location']
    Cluster_type = request.json['Cluster_type']
    cluster_name = request.json['cluster_name']
    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    #aml_compute = AmlCompute(ws, cluster_name)
    #cluster_name = 'cpu-cluster'
    try:
        if Cluster_type == 'Training':
            aml_compute = AmlCompute(ws, cluster_name)
        else:
            aks_target = AksCompute(ws, cluster_name)
        print('Found existing AML compute context.')
        return "compute exist"
    except:
        print('need to create new Compute.')
        return "compute not exist"
예제 #9
0
    def create_amlcompute_cluster(self, pet_cluster_name, min_nodes, max_nodes,
                                  vm_size):
        self.min_nodes = min_nodes
        self.max_nodes = max_nodes

        # Verify that the cluster doesn't exist already
        try:
            self.pet_compute_target = ComputeTarget(workspace=self.ws,
                                                    name=pet_cluster_name)
            print('Found existing compute target.')
        except ComputeTargetException:
            print('Creating a new compute target...')
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                vnet_name=self.vnet_name,
                vnet_resourcegroup_name=self.rg_name,
                subnet_name=self.subnet_name)

            # create the cluster
            self.pet_compute_target = ComputeTarget.create(
                self.ws, pet_cluster_name, compute_config)
            self.pet_compute_target.wait_for_completion(show_output=True)

        # Use the 'status' property to get a detailed status for the current cluster.
        #print(self.pet_compute_target.status.serialize())
        return self.pet_compute_target
예제 #10
0
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    # Load the environment variables from .env in case this script
    # is called outside an existing process
    load_dotenv()
    # Verify that cluster does not exist already
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=os.environ.get("AML_CLUSTER_PRIORITY",
                                           'dedicated'),
                min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)),
                max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 2)),
                idle_seconds_before_scaledown="120"
                #    #Uncomment the below lines for VNet support
                #    vnet_resourcegroup_name=vnet_resourcegroup_name,
                #    vnet_name=vnet_name,
                #    subnet_name=subnet_name
            )
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit()
예제 #11
0
    def getComputeClusterResource(
        ws: Workspace,
        compute_name: str,
        vm_size: str = "Standard_DS1_v2",
        min_nodes=0,
        max_nodes=1,
    ):

        if compute_name in ws.compute_targets:
            compute_target = ws.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                return compute_target

        # Create new compute resource:
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes)

        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        return compute_target
예제 #12
0
    def exec_ComputeTargetSetup(
            self, Parameters: ComputeTargetSetupParameter) -> ExecResult:
        execResult = False
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            self.compute_target = self.ws.compute_targets[
                Parameters.ClusterName]
            print('Found existing compute target.')
            self.compute_target.wait_for_completion(show_output=True,
                                                    min_node_count=None,
                                                    timeout_in_minutes=20)
            execResult = True
        except KeyError:
            print('Creating a new compute target...')
            compute_config = AmlCompute.provisioning_configuration(
                vm_size='Standard_NC6',
                idle_seconds_before_scaledown=1800,
                min_nodes=0,
                max_nodes=4)
            self.compute_target = ComputeTarget.create(self.ws,
                                                       Parameters.ClusterName,
                                                       compute_config)
            execResult = True
            self.compute_target.wait_for_completion(show_output=True,
                                                    min_node_count=None,
                                                    timeout_in_minutes=20)
        except Exception as ex:
            print(ex)

        sys.stdout = old_stdout
        return ExecResult(execResult, mystdout.getvalue())
예제 #13
0
def create_aml_cluster(workspace, parameters):
    print("::debug::Creating aml cluster configuration")
    aml_config = AmlCompute.provisioning_configuration(
        vm_size=parameters.get("vm_size", None),
        vm_priority=parameters.get("vm_priority", "dedicated"),
        min_nodes=parameters.get("min_nodes", 0),
        max_nodes=parameters.get("max_nodes", 4),
        idle_seconds_before_scaledown=parameters.get("idle_seconds_before_scaledown", None),
        tags={"Created": "GitHub Action: Azure/aml-compute"},
        description="AML Cluster created by Azure/aml-compute GitHubb Action",
        remote_login_port_public_access=parameters.get("remote_login_port_public_access", "NotSpecified")
    )

    print("::debug::Adding VNET settings to configuration if all required settings were provided")
    if parameters.get("vnet_resource_group_name", None) and parameters.get("vnet_name", None) and parameters.get("subnet_name", None):
        aml_config.vnet_resourcegroup_name = parameters.get("vnet_resource_group_name", None)
        aml_config.vnet_name = parameters.get("vnet_name", None)
        aml_config.subnet_name = parameters.get("subnet_name", None)

    print("::debug::Adding credentials to configuration if all required settings were provided")
    if parameters.get("admin_username", None) and parameters.get("admin_user_password", None):
        aml_config.admin_username = parameters.get("admin_username", None)
        aml_config.admin_user_password = parameters.get("admin_user_password", None)
    elif parameters.get("admin_username", None) and parameters.get("admin_user_ssh_key", None):
        aml_config.admin_username = parameters.get("admin_username", None)
        aml_config.admin_user_ssh_key = parameters.get("admin_user_ssh_key", None)

    print("::debug::Creating compute target")
    aml_cluster = create_compute_target(
        workspace=workspace,
        name=parameters.get("name", None),
        config=aml_config
    )
    return aml_cluster
예제 #14
0
    def __check_compute_target(self, compute_target, use_gpu: bool):
        __vm_size = ''
        if isinstance(compute_target, AmlCompute):
            __vm_size = compute_target.vm_size
        elif isinstance(compute_target, str):
            compute = ComputeTarget(workspace=self.__workspace,
                                    name=compute_target)
            __vm_size = compute.vm_size

        if self.__vm_size_list is None:
            self.__vm_size_list = AmlCompute.supported_vmsizes(
                self.__workspace)

        vm_description = list(
            filter(
                lambda vmsize: str.upper(vmsize['name']) == str.upper(
                    __vm_size), self.__vm_size_list))[0]
        if (use_gpu and vm_description['gpus'] == 0):
            raise errors.TrainingComputeException(
                f'gpu_compute was specified, but the target does not have GPUs: {vm_description} '
            )
        if (not (use_gpu) and vm_description['vCPUs'] == 0):
            raise errors.TrainingComputeException(
                f'cpu_compute was specified, but the target does not have CPUs: {vm_description} '
            )
예제 #15
0
def prepare_remote_compute(ws):
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
    compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1)
    compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

    # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('found compute target. Using it. ' + compute_name)
    else:
        print('creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)
        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use get_status()
        print(compute_target.get_status().serialize())

    return compute_target
예제 #16
0
def _create_cluster(
    workspace,
    cluster_name=_CLUSTER_NAME,
    vm_size=_CLUSTER_VM_SIZE,
    min_nodes=_CLUSTER_MIN_NODES,
    max_nodes=_CLUSTER_MAX_NODES,
):
    logger = logging.getLogger(__name__)
    try:
        compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
        logger.info("Found existing compute target.")
    except ComputeTargetException:
        logger.info("Creating a new compute target...")
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes)

        # create the cluster
        compute_target = ComputeTarget.create(workspace, cluster_name,
                                              compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current AmlCompute.
    logger.debug(compute_target.get_status().serialize())

    return compute_target
예제 #17
0
def main():
    # workspace
    ws = Workspace.from_config()

    #compute
    compute = AmlCompute(workspace=ws, name='gandalf')

    # datasource
    datastore = Datastore.get(ws, datastore_name='surfrider')

    # experiment
    script_params = {
        "--datastore": datastore.as_mount()
    }

    # Create and run experiment
    estimator = Estimator(source_directory='./',
                            script_params=script_params,
                            compute_target=compute,
                            entry_script='train.py',
                            use_gpu=True,
                            pip_packages=['opencv-python>=4.1',
                                            'tensorpack==0.9.8',
                                            'tensorflow-gpu>=1.3,<2.0',
                                            'tqdm>=4.36.1',
                                            'cython>=0.29.13',
                                            'scipy>=1.3.1',
                                            'ffmpeg-python',
                                            'wget'])

    
    exp = Experiment(ws, 'surfrider_rcnn')
    run = exp.submit(estimator)
예제 #18
0
def createCompute(ws, args):
    compute_name = args.clusterName

    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print("Found compute target       : {0}".format(compute_name))
    else:
        print("Compute target {0} not found.".format(compute_name))
        compute_min_nodes = args.minNodes
        compute_max_nodes = args.maxNodes
        vm_size = args.clusterSku
        print("Creating a new compute target {0}.".format(compute_name))
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)
        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)
        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        if (args.verbose):
            # For a more detailed view of current AmlCompute status, use get_status()
            print(compute_target.get_status().serialize())
    return compute_target
예제 #19
0
def _create_cluster(workspace, cluster_name, vm_size, min_nodes, max_nodes):
    """Creates AzureML cluster

    Args:
        cluster_name (string): The name you wish to assign the cluster.
        vm_size (string): The type of sku to use for your vm.
        min_nodes (int): Minimum number of nodes in cluster.
                                    Use 0 if you don't want to incur costs when it isn't being used.
        max_nodes (int): Maximum number of nodes in cluster.

    """
    logger = logging.getLogger(__name__)
    try:
        compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
        logger.info("Found existing compute target.")
    except ComputeTargetException:
        logger.info("Creating a new compute target...")
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes
        )

        # create the cluster
        compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current AmlCompute.
    logger.debug(compute_target.serialize())

    return compute_target
예제 #20
0
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    load_dotenv()
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=os.environ.get("AML_CLUSTER_PRIORITY",
                                           'lowpriority'),
                min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)),
                max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)),
                idle_seconds_before_scaledown="300")
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit()
예제 #21
0
def getComputeAML(ws, name="amlcluster"):
    # Azure ML compute configuration
    if name in ws.compute_targets:
        compute_target = ws.compute_targets[name]
        if compute_target and type(compute_target) is AmlCompute:
            print(f"### Found existing cluster '{name}' so will use it")
            return compute_target
    else:
        nodesMin = int(os.environ.get('AZML_COMPUTE_MIN_NODES', "0"))
        nodesMax = int(os.environ.get('AZML_COMPUTE_MAX_NODES', "3"))
        vmSize = os.environ.get('AZML_COMPUTE_VMSIZE', "Standard_D3_v2")

        print(f"### Creating cluster '{name}' this could take time...")
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vmSize,
            min_nodes=nodesMin,
            max_nodes=nodesMax,
            idle_seconds_before_scaledown=3600)

        # create the cluster
        compute_target = ComputeTarget.create(ws, name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use get_status()
        print(compute_target.get_status().serialize())
        return compute_target
예제 #22
0
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            e = Env()
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=e.vm_priority,
                min_nodes=e.min_nodes,
                max_nodes=e.max_nodes,
                idle_seconds_before_scaledown="300"
                #    #Uncomment the below lines for VNet support
                #    vnet_resourcegroup_name=vnet_resourcegroup_name,
                #    vnet_name=vnet_name,
                #    subnet_name=subnet_name
            )
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit(1)
예제 #23
0
def supported_vm_sizes(ws):
    """
    Get vm sizes available for your region
    :param ws: azureml Workspace instance
    :return: list
    """
    return [size for size in AmlCompute.supported_vmsizes(workspace=ws)]
예제 #24
0
def get_compute(workspace, cluster_name, vm_size='STANDARD_NC6', max_nodes=4):
    """
    Get or create a compute cluster. If a cluster with the provided name
    already exists in this workspace, return it. Otherwise, create a new one.

    :param workspace:       The Azure ML workspace to use.
    :param cluster_name:    Name of the cluster to find or create.
    :param vm_size:         Type/size of VM to create on AzureML, if no cluster
                            was found.
    :param max_nodes:       Max number of nodes to give to this cluster.
    :returns:               A ComputeTarget object.
    """
    try:
        compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
        print('Found existing compute target')
    except ComputeTargetException:
        print('Creating a new compute target...')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, max_nodes=max_nodes)

        compute_target = ComputeTarget.create(workspace, cluster_name,
                                              compute_config)

        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

    return compute_target
예제 #25
0
def create_cluster(ws, cluster_name, vm_size, max_nodes):
    if cluster_name is None:
        cluster_name = input("name of the cluster : ")
    else:
        print("using cluster : ", cluster_name)
    if vm_size is None:
        vm_size = input("size of your VM : ")
    else:
        print("using vm size of : ", vm_size)
    if max_nodes is None:
        max_nodes = input("maximum amount of nodes on the cluster : ")
    else:
        print("maximum amount of nodes is : ", max_nodes)

    from azureml.core.compute import ComputeTarget, AmlCompute
    from azureml.core.compute_target import ComputeTargetException

    # Verify that cluster does not exist already
    try:
        cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print("Found existing cluster")
    except ComputeTargetException:
        print("Creating new cluster")
        # Specify the configuration for the new cluster
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=0, max_nodes=max_nodes)

        # Create the cluster with the specified name and configuration
        cluster = ComputeTarget.create(ws, cluster_name, compute_config)

        # Wait for the cluster to complete, show the output log
        cluster.wait_for_completion(show_output=True)
    return cluster
예제 #26
0
    def get_compute_object(ws,
                           compute_name,
                           size="STANDARD_NC6",
                           min_nodes=1,
                           max_nodes=4):
        """
        get_compute_object - Retrieves a AMLS compute object.

        :param Workspace ws: AMLS Workspace object.
        :param str compute_name: AMLS compute name.

        :returns: MLS compute target
        :rtype: azureml.core.compute.ComputeTarget
        """
        if compute_name in ws.compute_targets:
            compute_target = ws.compute_targets[compute_name]

        else:
            provisioning_config = AmlCompute.provisioning_configuration(
                vm_size=size, min_nodes=min_nodes, max_nodes=max_nodes)
            # Create the cluster
            compute_target = ComputeTarget.create(ws, compute_name,
                                                  provisioning_config)
            compute_target.wait_for_completion(show_output=True)
        return compute_target
예제 #27
0
    def _setup_compute(self):
        """
        sets up the compute in the azureml workspace. Either retrieves a
        pre-existing compute target or creates one (uses environment variables).

        :returns: compute_target
        :rtype: ComputeTarget
        """
        if self.comp_name in self.ws.compute_targets:
            self.compute_target = self.ws.compute_targets[self.comp_name]
            if self.compute_target and type(self.compute_target) is AmlCompute:
                print("Found compute target: " + self.comp_name)
        else:
            print("creating a new compute target...")
            p_cfg = AmlCompute.provisioning_configuration(
                vm_size=self.comp_vm_size,
                min_nodes=self.comp_min_nodes,
                max_nodes=self.comp_max_nodes)

            self.compute_target = ComputeTarget.create(self.ws, self.comp_name,
                                                       p_cfg)
            self.compute_target.wait_for_completion(show_output=True,
                                                    min_node_count=None,
                                                    timeout_in_minutes=20)

            print(self.compute_target.get_status().serialize())
        return self.compute_target
예제 #28
0
def create_aml_cluster(workspace, parameters):
    print("::debug::Creating aml cluster configuration")
    aml_config = AmlCompute.provisioning_configuration(
        vm_size=parameters.get("vm_size", "Standard_DS3_v2"),
        vm_priority=parameters.get("vm_priority", "dedicated"),
        min_nodes=parameters.get("min_nodes", 0),
        max_nodes=parameters.get("max_nodes", 4),
        idle_seconds_before_scaledown=parameters.get(
            "idle_seconds_before_scaledown", None),
        tags={"Created": "GitHub Action: Azure/aml-compute"},
        description="AML Cluster created by Azure/aml-compute GitHub Action",
        remote_login_port_public_access=parameters.get(
            "remote_login_port_public_access", "NotSpecified"))

    print(
        "::debug::Adding VNET settings to configuration if all required settings were provided"
    )
    if parameters.get(
            "vnet_resource_group_name", None) is not None and parameters.get(
                "vnet_name", None) is not None and parameters.get(
                    "subnet_name", None) is not None:
        aml_config.vnet_resourcegroup_name = parameters.get(
            "vnet_resource_group_name", None)
        aml_config.vnet_name = parameters.get("vnet_name", None)
        aml_config.subnet_name = parameters.get("subnet_name", None)

    print(
        "::debug::Adding credentials to configuration if all required settings were provided"
    )
    if os.environ.get("ADMIN_USER_NAME", None) is not None and os.environ.get(
            "ADMIN_USER_PASSWORD", None) is not None:
        aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None)
        aml_config.admin_user_password = os.environ.get(
            "ADMIN_USER_PASSWORD", None)
    elif os.environ.get("ADMIN_USER_NAME",
                        None) is not None and os.environ.get(
                            "ADMIN_USER_SSH_KEY", None) is not None:
        aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None)
        aml_config.admin_user_ssh_key = os.environ.get("ADMIN_USER_SSH_KEY",
                                                       None)

    print(
        "::debug::Adding identity settings to configuration if all required settings were provided"
    )
    if parameters.get("identity_type",
                      None) == "UserAssigned" and parameters.get(
                          "identity_id", None) is not None:
        aml_config.identity_type = parameters.get("identity_type", None)
        aml_config.identity_id = parameters.get("identity_id", None)

    print("::debug::Creating compute target")
    # Default compute target name
    repository_name = str(
        os.environ.get("GITHUB_REPOSITORY")).split("/")[-1][:16]
    aml_cluster = create_compute_target(workspace=workspace,
                                        name=parameters.get(
                                            "name", repository_name),
                                        config=aml_config)
    return aml_cluster
예제 #29
0
파일: pipeline.py 프로젝트: naivelogic/seer
def get_compute(ws: Workspace, compute_target: str) -> ComputeTarget:
    if not compute_target in ws.compute_targets:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6', min_nodes=1, max_nodes=4)
        cluster = ComputeTarget.create(ws, compute_target, compute_config)
        cluster.wait_for_completion(show_output=True)

    return ws.compute_targets[compute_target]
예제 #30
0
    def get_aml_ws_sizes(self, aml_ws_name):
        ws = get_aml_ws(self.config, aml_ws_name)

        # TODO: make this an xt cmd: xt list sizes
        from azureml.core.compute import ComputeTarget, AmlCompute
        sizes = AmlCompute.supported_vmsizes(workspace=ws)
        # for size in sizes:
        #     if size["gpus"] > 0:
        #         console.print(size)

        return sizes