Beispiel #1
0
def _deploy_azuremlcompute_clusters(workspace,
                                    default_cpu_compute_target=None,
                                    default_gpu_compute_target=None,
                                    show_output=True):
    cpu_compute_object = gpu_compute_object = None

    # Start creation of both computes
    if default_cpu_compute_target:
        cpu_compute_object = AmlCompute.create(
            workspace, Workspace.DEFAULT_CPU_CLUSTER_NAME,
            default_cpu_compute_target)
        if show_output:
            print("Deploying Compute Target with name {}".format(
                cpu_compute_object.name))
    if default_gpu_compute_target:
        gpu_compute_object = AmlCompute.create(
            workspace, Workspace.DEFAULT_GPU_CLUSTER_NAME,
            default_gpu_compute_target)
        if show_output:
            print("Deploying Compute Target with name {}".format(
                gpu_compute_object.name))

    # Wait for both computes to finish
    remaining_timeout_minutes = 10
    # The time when both computes started creating
    start_time = time.time()
    for compute_object in [cpu_compute_object, gpu_compute_object]:
        if compute_object:
            # The time since we've started checking this specific compute
            compute_start_time = time.time()
            compute_object.wait_for_completion(
                show_output=False,
                timeout_in_minutes=remaining_timeout_minutes)
            compute_time_taken = time.time() - compute_start_time

            time_taken = round(time.time() - start_time, 2)
            remaining_timeout_minutes = remaining_timeout_minutes - \
                (compute_time_taken / 60)

            provision_status = compute_object.get_status()
            if not provision_status or provision_status.provisioning_state != "Succeeded":
                errors = getattr(provision_status, "errors", [])
                if remaining_timeout_minutes <= 0:
                    errors.append("Creation has exceeded timeout")
                raise ValueError(
                    "Compute creation failed for {} with errors: {}".format(
                        compute_object.name, errors))
            if show_output:
                print("Deployed Compute Target with name {}. Took {} seconds".
                      format(compute_object.name, time_taken))
    def _create_or_update_cluster(self, min_nodes, max_nodes,
                                  idle_timeout_secs):

        try:
            self.cluster = AmlCompute(workspace=self.workspace,
                                      name=self.cluster_name)
            print('Updating existing cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            self.cluster.update(
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
            )
        except ComputeTargetException:
            print('Creating new cluster "{}"'.format(
                colored(self.cluster_name, "green")))
            cluster_config = AmlCompute.provisioning_configuration(
                vm_size=self.vm_type,
                min_nodes=min_nodes,
                max_nodes=max_nodes,
                idle_seconds_before_scaledown=idle_timeout_secs,
                admin_username=self.admin_username,
                admin_user_ssh_key=self.ssh_key,
                remote_login_port_public_access="Enabled",
            )
            self.cluster = AmlCompute.create(self.workspace, self.cluster_name,
                                             cluster_config)

        self.cluster.wait_for_completion()

        if len(self.cluster_nodes) < min_nodes:
            sleep(30)
            if len(self.cluster_nodes) < min_nodes:
                raise RuntimeError("Failed to provision sufficient nodes")
def ComputeCompute():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    location = request.json['location']
    cluster_name = request.json['cluster_name']
    vm_size = request.json['vm_size']
    min_nodes = request.json['min_nodes']
    max_nodes = request.json['max_nodes']
    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    #aml_compute = AmlCompute(ws, cluster_name)
    #cluster_name = 'cpu-cluster'

    try:
        aml_compute = AmlCompute(ws, cluster_name)
        print('Found existing AML compute context.')
        return "Found existing AML compute context."
    except:
        print('need to create new Compute.')
        print('Creating new AML compute context.')
        aml_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           min_nodes=min_nodes,
                                                           max_nodes=max_nodes)
        aml_compute = AmlCompute.create(ws,
                                        name=cluster_name,
                                        provisioning_configuration=aml_config)
        aml_compute.wait_for_completion(show_output=True)
        return "Compute successfully created"
Beispiel #4
0
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.get(name='akws',
                   subscription_id='8b3748c0-bb0b-4913-ab5b-c462062118fe',
                   resource_group='akrg')

cpu_cluster_name = 'tdsp-cluster'

#verify that cluster does not exist
try:
    cpu_cluster = AmlCompute(workspace=ws, name=cpu_cluster_name)
    print('Cluster already exists.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='Standard_NC6', max_nodes=4)
    cpu_cluster = AmlCompute.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
Beispiel #5
0
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data=output, index=[''])
outputDf.T

from azureml.core.compute import AmlCompute

aml_name = 'cpu-cluster'
try:
    aml_compute = AmlCompute(ws, aml_name)
    print('Found existing AML compute context.')
except:
    print('Creating new AML compute context.')
    aml_config = AmlCompute.provisioning_configuration(
        vm_size="Standard_D2_v2", min_nodes=1, max_nodes=4)
    aml_compute = AmlCompute.create(ws,
                                    name=aml_name,
                                    provisioning_configuration=aml_config)
    aml_compute.wait_for_completion(show_output=True)

#writefile get_data.py

from sklearn import datasets
from sklearn.model_selection import train_test_split
from scipy import sparse
import numpy as np

#def get_data():
boston = pd.read_csv(
    'C:\\Users\\datacore\\OneDrive\\Desktop\\Capstone Project\\train_values_wJZrCmI.csv'
)
X = boston.drop(columns=['poverty_probability'])
Beispiel #6
0
# Create Batch AI Cluster
compute_target_name = 'myazbai'

try:
    batch_ai_compute = AmlCompute(workspace=ws, name=compute_target_name)
    print('found existing Azure Batch AI cluster:', batch_ai_compute.name)
except ComputeTargetException:
    print('creating new Azure Batch AI cluster...')
    batch_ai_config = AmlCompute.provisioning_configuration(
        vm_size="Standard_NC6",
        vm_priority="dedicated",
        min_nodes=0,
        max_nodes=4,
        idle_seconds_before_scaledown=300)
    batch_ai_compute = AmlCompute.create(
        ws,
        name=compute_target_name,
        provisioning_configuration=batch_ai_config)
    batch_ai_compute.wait_for_completion(show_output=True)

project_folder = './tmp/automl-remote-batchai'
if not os.path.exists(project_folder):
    os.makedirs(project_folder)

shutil.copy('./scripts/get_data.py', project_folder)

print("Training the model...")
# configure Auto ML
automl_config = AutoMLConfig(task='classification',
                             debug_log='automl_errors.log',
                             primary_metric='AUC_weighted',
                             iteration_timeout_minutes=2,
from azureml.core.conda_dependencies import CondaDependencies

#created an env
my_env = Environment("My_new_env")
conda_dep = CondaDependencies.create(conda_packages=['scikit-learn'])
my_env.python.conda_dependencies = conda_dep

my_env.register()

#creating the cluster
from azureml.core.compute import AmlCompute

cluster_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2"),
                                                       max_nodes = 2)

cluster = AmlCompute.create(ws, "My_cluster", cluster_config)

cluster.wait_for_completion()

#fetching the data
input_ds = ws.datasets.get("Loan Application")

#for ScriptRunning

from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
                             script = "hyperdrive_script.py",
                             arguments = ["--input_data", input_ds.as_named_input("raw_data")],
                             environment = my_env,
                             compute_taret = cluster
experiment_name = 'azureautoml'
project_folder = 'remote_automl'
nodes = 4

dsvm_name = 'dsvmaml'
try:
    dsvm_compute = AmlCompute(ws, dsvm_name)
    print('found existing dsvm.')
except:
    print('creating new dsvm.')
    # Below is using a VM of SKU Standard_D2_v2 which is 2 core machine. You can check Azure virtual machines documentation for additional SKUs of VMs.
    dsvm_config = AmlCompute.provisioning_configuration(vm_size="Standard_NC6",
                                                        max_nodes=nodes,
                                                        min_nodes=0)
    dsvm_compute = AmlCompute.create(ws,
                                     name=dsvm_name,
                                     provisioning_configuration=dsvm_config)
    dsvm_compute.wait_for_completion(show_output=True)

automl_settings = {
    "name": "AutoML_Demo_Experiment_{0}".format(time.time()),
    "iteration_timeout_minutes": 60,
    "iterations": 20,
    "n_cross_validations": 5,
    "primary_metric": 'accuracy',
    "preprocess": True,
    "verbosity": logging.INFO,
    "max_concurrent_iterations": nodes
}

## note here that the project folder gets uploaded to our DSVM.