def main(name, vm_size, nodes):
    ws = Workspace.from_config()

    try:
        compute_cluster = ComputeTarget(ws, name)
    except:
        compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                               min_nodes=1,
                                                               max_nodes=nodes)

        compute_cluster = ComputeTarget.create(ws, name, compute_config)
        compute_cluster.wait_for_completion(show_output=True)
Exemple #2
0
def main(workspace):
    # Loading compute target
    print("Loading compute target")
    compute_target = ComputeTarget(
        workspace=workspace,
        name="aml-intTest"
    )

    # Loading script parameters
    print("Loading script parameters")
    script_params = {
        "--kernel": "linear",
        "--penalty": 0.9
    }

    # Creating experiment config
    print("Creating experiment config")
    estimator = Estimator(
        source_directory="./tests/train/train_with_python_config",
        entry_script="train.py",
        script_params=script_params,
        compute_target=compute_target,
        conda_dependencies_file="environment.yml"
    )
    return estimator
Exemple #3
0
def prepare_remote_compute(ws):
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
    compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1)
    compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

    # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('found compute target. Using it. ' + compute_name)
    else:
        print('creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)
        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use get_status()
        print(compute_target.get_status().serialize())

    return compute_target
Exemple #4
0
def main(workspace,inputs):

    print("Loading compute target")
    compute_target = ComputeTarget(
        workspace=workspace,
        name=inputs["compute"]
    )
    # create a new runconfig object
    run_config = RunConfiguration()

    # enable Docker 
    run_config.environment.docker.enabled = True

    # set Docker base image to the default CPU-based image
    run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

    # use conda_dependencies.yml to create a conda environment in the Docker image for execution
    run_config.environment.python.user_managed_dependencies = False

    # specify CondaDependencies obj
    run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

    # For this step, we use yet another source_directory
    step = PythonScriptStep(name=inputs["step_name"],
                            script_name=inputs["train_script"], 
                            compute_target=compute_target, 
                            source_directory=inputs["source_directory"],
                            runconfig=run_config,
                            allow_reuse=True)
    return step
Exemple #5
0
def main(workspace):
    # Load compute target
    print("Loading compute target")
    compute_target = ComputeTarget(
        workspace=workspace,
        name="mycluster"
    )

    # Load script parameters
    print("Loading script parameters")
    script_params = {
        "--kernel": "linear",
        "--penalty": 1.0
    }

    # Create experiment config
    print("Creating experiment config")
    estimator = Estimator(
        source_directory="code/train",
        entry_script="train.py",
        script_params=script_params,
        compute_target=compute_target,
        pip_packages=["azureml-dataprep[pandas,fuse]", "scikit-learn", "pandas", "matplotlib"]
    )
    return estimator
Exemple #6
0
def main(workspace,inputs):
    # Loading compute target
    print("Loading compute target")
    compute_target = ComputeTarget(
        workspace=workspace,
        name=inputs["compute"]
    )

    step = PythonScriptStep(name=inputs["step_name"],
                            script_name=inputs["train_script"], 
                            compute_target= compute_target, 
                            source_directory=inputs["source_directory"],
                            allow_reuse=True)
    
    return step
Exemple #7
0
def main(workspace, inputs):
    # Loading compute target
    print("Loading compute target")
    compute_target = ComputeTarget(workspace=workspace, name=inputs["compute"])

    # Loading script parameters
    print("Loading script parameters")
    script_params = {"--kernel": "linear", "--penalty": 0.9}

    # Creating experiment config
    print("Creating experiment config")
    estimator = Estimator(source_directory=inputs["source_directory"],
                          entry_script=inputs["train_script"],
                          script_params=script_params,
                          compute_target=compute_target,
                          conda_dependencies_file="environment.yml")
    return estimator
Exemple #8
0
def _load_compute_target(workspace, backend_config):
    '''
    Returns the ComputeTarget object for associated with user's workspace and the
    name of the target compute
    :param workspace: AzureML Workspace object
    :param backend_config: dictionary containing target compute name
    :return ComputeTarget: AzureML ComputeTarget object
    '''
    target_name = backend_config[COMPUTE]
    try:
        compute = ComputeTarget(workspace=workspace, name=target_name)
        # pylint: disable = abstract-class-instantiated
        _logger.info(
            _CONSOLE_MSG.format(
                "Found existing cluster {}, using it.".format(target_name)))
    except ComputeTargetException as e:
        raise ComputeTargetException(e)
    return compute
Exemple #9
0
def prepare_remote_compute(ws,
                           compute_name,
                           compute_min_nodes=0,
                           compute_max_nodes=4,
                           compute_vm_size='STANDARD_D2_V2'):
    """
    :param ws: azureml Workspace instance
    :param compute_name: String with name for compute target
    :param compute_min_nodes: minimum number of nodes
    :param compute_max_nodes: maximum number of nodes
    :param compute_vm_size: vm size for compute target
    :return:
    """

    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('Found compute target: ' + compute_name + ' of size: ' +
                  compute_target.vm_size + '. Using it. ')
            print(
                'For a different size create a new target with different name!'
            )
            # TODO: Handle case if compute_name exists, but is not active!
    else:
        print('creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=compute_vm_size,
            min_nodes=compute_min_nodes,
            max_nodes=compute_max_nodes)
        # create the cluster
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current AmlCompute status, use get_status()
        print(compute_target.get_status().serialize())

    return compute_target
def main(experiment, environment, dataset):
    workspace = Workspace.from_config()
    experiment = Experiment(workspace, experiment)
    compute_target = ComputeTarget(workspace, environment)
    
    # Use the root of the solution as source folder for the run.
    root_folder = Path(__file__).parent.parent

    # Provide each of the datasets to the estimator as a named input.
    # You can acccess these from within the training script.
    datasets = [Dataset.get_by_name(workspace, ds).as_named_input(ds) for ds in dataset]

    estimator = SKLearn(
        source_directory=root_folder,
        entry_script='customer_churn/train.py',
        conda_dependencies_file='conda_dependencies.yml',
        compute_target=compute_target,
        inputs=datasets
    )

    run = experiment.submit(estimator)

    run.wait_for_completion(show_output=True)
    service.wait_for_deployment(show_output=True)
    return service


# Authentication via service principle
ws = Workspace.get(
    args.ws,
    ServicePrincipalAuthentication(
        tenant_id=os.getenv('tenant_id'),
        service_principal_id=os.getenv('service_principal_id'),
        service_principal_password=os.getenv('service_principal_password')),
    subscription_id=os.getenv('subscription_id'),
    resource_group=args.rg)

model = Model(ws, "new_model")
deployment_target = ComputeTarget(ws, args.aksname)

img = create_image_config("score.py", "scoringenv.yml")

servicename = args.servicename
try:
    service = Webservice(ws, servicename)
except Exception as e:
    print(e)
    service = None
if service:
    print("Updating existing service with new image...")
    try:
        # create new image
        service = update_service(service, [model], img, ws)
    except Exception as e:
def main():
    # Ger our configs
    with open("ptgnn/authentication.json") as jsonFile:
        authData = json.load(jsonFile)[args.auth_cluster]

    # Copy the convertCorpus script here. Done so we don't upload the corpus to Azure, or keep a copy of the script in here.
    # (It's weird, I know. It works and has a purpose though)
    convertCorpusLocation = Path("../convertCorpusForML.py")
    convertCorpusAzureLocation = Path("./convertCorpusForML.py")
    shutil.copy(convertCorpusLocation, convertCorpusAzureLocation)

    # Grab the authentication data from the JSON file
    subID = authData["subID"]  # Get from Azure Portal; used for billing
    resGroup = authData["resGroup"]  # Name for the resource group
    wsName = authData["wsName"]  # Name for the workspace, which is the collection of compute clusters + experiments
    computeName = authData["computeName"]  # Name for computer cluster
    datastoreName = authData["datastoreName"]

    # Get the workspace, the compute target and the datastore
    ws = Workspace.get(wsName, subscription_id=subID, resource_group=resGroup)
    computeTarget = ComputeTarget(ws, computeName)
    datastore = Datastore(ws, name=datastoreName)

    # Download the entire corpus to the compute target. Save the DataReference obj here
    # as_mount is also possible, but slows things down due to network opening of files
    corpus_location = datastore.path(args.aml_location).as_download()
    output_location = "./"
    # The files that will be uploaded for usage by our script (everything in the azure folder)
    source_directory = "."

    # params for the script
    params = {
        "--corpus_location": corpus_location,
        "--output_folder": output_location,
        "--aml": "",
        "--training_percent": args.training_percent,
        "--validation_percent": args.validation_percent,
        "-c": ""
    }
    if args.log_num is not None:
        params["-l"] = args.log_num
        tags = {
            "logs": str(args.log_num)
        }
    else:
        tags = {
            "logs": "MAX"
        }
    if args.statement_generation:
        params["-s"] = ""
        tags["generationType"] = "Statement"
    else:
        tags["generationType"] = "Severity"
    # Set up the estimator object. Note the inputs element, it tells azure that corpus_location in params
    # will be a DataReference Object.
    est = Estimator(source_directory=source_directory,
                    compute_target=computeTarget,
                    entry_script='convertCorpusForML.py',
                    script_params=params,
                    inputs=[corpus_location],
                    conda_packages=["pip"],
                    pip_packages=["azureml-core", "tqdm", "numpy", "protobuf"],
                    use_docker=True,
                    use_gpu=False)
    # Start the experiment
    run = Experiment(ws, args.exp_name).submit(config=est, tags=tags)
    # remove the copy of convertCorpus (Remember, don't question this)
    convertCorpusAzureLocation.unlink()
    # print out the portral URL
    # print("Portal URL: ", run.get_portal_url())
    # this will stream everything that the compute target does.
    print("Experiment Started. Remember you can exit out of this program but the experiment will still run on Azure!")
    run.wait_for_completion(show_output=True)
Exemple #13
0
from azureml.core import Workspace, Datastore, Dataset, ScriptRunConfig, ComputeTarget, Experiment
from azureml.data.datapath import DataPath
from azureml.train.sklearn import SKLearn
from azureml.train.estimator import Estimator

#multi tenant with my account 
from azureml.core.authentication import InteractiveLoginAuthentication
int_auth = InteractiveLoginAuthentication(tenant_id='your_tenant_id')
ws = Workspace.from_config(auth=int_auth)
print(ws.name)

dataset = Dataset.get_by_name(workspace=ws, name = 'demo_wines_live')

#point to compute target
comp = ComputeTarget(ws, name = 'compute-instance-demo')

#estimator with SKlearn by default + azureml-sdk package
est = SKLearn(
                source_directory='./scripts',
                entry_script='train.py',
                compute_target=comp,
                inputs = [dataset.as_named_input('train')], #readable from the script
                pip_packages=['azureml-sdk', 'pyarrow>=0.12.0']
)

exp = Experiment(workspace=ws, name = 'submitted_wine')
run = exp.submit(est)
run.wait_for_completion(show_output=True)

#%%
%%writefile ./scripts/train.py
Exemple #14
0
def create_experiment_config(workspace):
    ########################################
    ### Creating data prep Pipeline Step ###
    ########################################

    # Load settings
    print("Loading settings")
    data_prep_step_path = os.path.join("steps", "data_prep")
    with open(os.path.join(data_prep_step_path, "step.json")) as f:
        data_prep_settings = json.load(f)

    # Setup datasets of first step
    print("Setting up datasets")
    data_prep_input = Dataset.get_by_name(workspace=workspace,
                                          name=data_prep_settings.get(
                                              "dataset_input_name",
                                              None)).as_named_input(
                                                  data_prep_settings.get(
                                                      "dataset_input_name",
                                                      None)).as_mount()
    data_prep_output = PipelineData(
        name=data_prep_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=data_prep_settings.get(
                                "datastore_output_name",
                                "workspaceblobstore")),
        output_mode="mount").as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #data_prep_output.register(
    #    name=data_prep_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    data_prep_dependencies = CondaDependencies.create(
        pip_packages=data_prep_settings.get("pip_packages", []),
        conda_packages=data_prep_settings.get("conda_packages", []),
        python_version=data_prep_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    data_prep_run_config = RunConfiguration(
        conda_dependencies=data_prep_dependencies,
        framework=data_prep_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    data_prep_compute_target = ComputeTarget(workspace=workspace,
                                             name=data_prep_settings.get(
                                                 "compute_target_name", None))

    # Create python step
    print("Creating Step")
    data_prep = PythonScriptStep(
        name=data_prep_settings.get("step_name", None),
        script_name=data_prep_settings.get("script_name", None),
        arguments=data_prep_settings.get("arguments", []),
        compute_target=data_prep_compute_target,
        runconfig=data_prep_run_config,
        inputs=[data_prep_input],
        outputs=[data_prep_output],
        params=data_prep_settings.get("parameters", []),
        source_directory=data_prep_step_path,
        allow_reuse=data_prep_settings.get("allow_reuse", True),
        version=data_prep_settings.get("version", None),
    )

    ###############################################
    ### Creating data model train Pipeline Step ###
    ###############################################

    # Load settings
    print("Loading settings")
    model_train_step_path = os.path.join("steps", "model_train")
    with open(os.path.join(model_train_step_path, "step.json")) as f:
        model_train_settings = json.load(f)
    hyperparameter_sampling_settings = model_train_settings.get(
        "hyperparameter_sampling", {})

    # Setup datasets of first step
    print("Setting up datasets")
    model_train_input = data_prep_output.as_named_input(
        name=model_train_settings.get("dataset_input_name", None))
    model_train_output = PipelineData(
        name=model_train_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=model_train_settings.get(
                                "datastore_output_name", None)),
        output_mode="mount",
    ).as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #model_train_output.register(
    #    name=model_train_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    model_train_dependencies = CondaDependencies.create(
        pip_packages=model_train_settings.get("pip_packages", []),
        conda_packages=model_train_settings.get("conda_packages", []),
        python_version=model_train_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    model_train_run_config = RunConfiguration(
        conda_dependencies=model_train_dependencies,
        framework=model_train_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    model_train_compute_target = ComputeTarget(workspace=workspace,
                                               name=model_train_settings.get(
                                                   "compute_target_name",
                                                   None))

    # Create distributed training backend
    print("Creating distributed training backend")
    distributed_training_backend = get_distributed_backend(
        backend_name=model_train_settings.get("distributed_backend", None))

    # Create Estimator for Training
    print("Creating Estimator for training")
    model_train_estimator = Estimator(
        source_directory=model_train_step_path,
        entry_script=model_train_settings.get("script_name", None),
        environment_variables=model_train_settings.get("parameters", None),
        compute_target=model_train_compute_target,
        node_count=model_train_settings.get("node_count", None),
        distributed_training=distributed_training_backend,
        conda_packages=model_train_settings.get("conda_packages", None),
        pip_packages=model_train_settings.get("pip_packages", None),
    )

    try:
        # Create parameter sampling
        print("Creating Parameter Sampling")
        parameter_dict = {}
        parameters = hyperparameter_sampling_settings.get(
            "parameters",
            {}) if "parameters" in hyperparameter_sampling_settings else {}
        for parameter_name, parameter_details in parameters.items():
            parameter_distr = get_parameter_distribution(
                distribution=parameter_details.get("distribution", None),
                **parameter_details.get("settings", {}))
            parameter_dict[f"--{parameter_name}"] = parameter_distr
        model_train_ps = get_parameter_sampling(
            sampling_method=hyperparameter_sampling_settings.get(
                "method", None),
            parameter_dict=parameter_dict)

        # Get Policy definition
        policy_settings = hyperparameter_sampling_settings.get("policy", {})
        kwargs = {
            key: value
            for key, value in policy_settings.items() if key not in
            ["policy_method", "evaluation_interval", "delay_evaluation"]
        }

        # Create termination policy
        print("Creating early termination policy")
        model_train_policy = get_policy(
            policy_method=policy_settings.get("method", ""),
            evaluation_interval=policy_settings.get("evaluation_interval",
                                                    None),
            delay_evaluation=policy_settings.get("delay_evaluation", None),
            **kwargs)

        # Create HyperDriveConfig
        print("Creating HyperDriveConfig")
        model_train_hyperdrive_config = HyperDriveConfig(
            estimator=model_train_estimator,
            hyperparameter_sampling=model_train_ps,
            policy=model_train_policy,
            primary_metric_name=hyperparameter_sampling_settings.get(
                "primary_metric", None),
            primary_metric_goal=PrimaryMetricGoal.MINIMIZE
            if "min" in hyperparameter_sampling_settings.get(
                "primary_metric_goal", None) else PrimaryMetricGoal.MAXIMIZE,
            max_total_runs=hyperparameter_sampling_settings.get(
                "max_total_runs", 1),
            max_concurrent_runs=hyperparameter_sampling_settings.get(
                "max_concurrent_runs", 1),
            max_duration_minutes=hyperparameter_sampling_settings.get(
                "max_duration_minutes", None))

        # Create HyperDriveStep
        print("Creating HyperDriveStep")
        model_train = HyperDriveStep(
            name=model_train_settings.get("step_name", None),
            hyperdrive_config=model_train_hyperdrive_config,
            estimator_entry_script_arguments=model_train_settings.get(
                "arguments", None),
            inputs=[model_train_input],
            outputs=[model_train_output],
            allow_reuse=model_train_settings.get("allow_reuse", True),
            version=model_train_settings.get("version", True))
    except:
        print("Not all required parameters specified for HyperDrive step")

        # Create EstimatorStep
        print("Creating EstimatorStep")
        model_train = EstimatorStep(
            name=model_train_settings.get("step_name", None),
            estimator=model_train_estimator,
            estimator_entry_script_arguments=model_train_settings.get(
                "arguments", None),
            inputs=[model_train_input],
            outputs=[model_train_output],
            compute_target=model_train_compute_target,
            allow_reuse=model_train_settings.get("allow_reuse", True),
            version=model_train_settings.get("version", True))

    #########################
    ### Creating Pipeline ###
    #########################

    # Create Pipeline
    print("Creating Pipeline")
    pipeline = Pipeline(
        workspace=workspace,
        steps=[model_train],
        description="Training Pipeline",
    )

    # Validate pipeline
    print("Validating pipeline")
    pipeline.validate()

    return pipeline
Exemple #15
0
print("Azure ML SDK Version: ", azureml.core.VERSION)

ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

from azureml.core.webservice import Webservice

for web_svc in Webservice.list(ws):
    print("Deleting web service", web_svc.name, "...")
    web_svc.delete()

from azureml.core import ComputeTarget

for target in ComputeTarget.list(ws):
    print("Deleting compute target", target.name, "...")
    target.delete()

from azureml.core import Image

for img in Image.list(ws):
    print("Deleting image", img.id, "...")
    img.delete()

from azureml.core.model import Model

for model in Model.list(ws):
    print("Deleting model", model.id, "...")
    model.delete()
Exemple #16
0
#Find workspace using connection parameters
aml_workspace = Workspace.get(subscription_id=args.subscription_id,
                              resource_group=args.resource_group,
                              name=args.base_name + "ws")

# Load yaml and store it as a dictionary
with open("variables.yml", "r") as f:
    yaml_loaded = yaml.safe_load(f)['variables']

variables = {}
for d in yaml_loaded:
    variables[d['name']] = d['value']

# Check if compute cluster exists. If not, create one.
try:
    compute_target = ComputeTarget(aml_workspace,
                                   variables["AML_COMPUTE_CLUSTER_CPU_SKU"])
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size=variables['AML_COMPUTE_CLUSTER_SIZE'],
        vm_priority=variables['AML_CLUSTER_PRIORITY'],
        min_nodes=variables['AML_CLUSTER_MIN_NODES'],
        max_nodes=variables['AML_CLUSTER_MAX_NODES'],
        idle_seconds_before_scaledown="300")
    cpu_cluster = ComputeTarget.create(
        aml_workspace, variables["AML_COMPUTE_CLUSTER_CPU_SKU"],
        compute_config)

#create environment from conda_dependencies.yml for runconfig
environment = Environment(name="myenv")
conda_dep = CondaDependencies(
Exemple #17
0
    tenant_id=auth_config["tenant_id"],
    service_principal_id=auth_config["service_principal_id"],
    service_principal_password=os.environ["SP_SECRET"],
)

ws = Workspace(
    subscription_id=auth_config["subscription_id"],
    resource_group=auth_config["resource_group"],
    workspace_name=auth_config["workspace_name"],
    auth=auth,
)


# Usually, the  cluster already exists, so we just fetch
compute_target = next(
    (m for m in ComputeTarget.list(ws) if m.name == compute["name"]), None
)

# Specify the compute environment and register it for use in scoring
env = Environment("component-condition")
env.docker.enabled = True
cd = CondaDependencies.create(
    conda_packages=[
        "tensorflow=2.0.0",
        "pandas",
        "numpy",
        "matplotlib"
        ],
    pip_packages=[
        "azureml-mlflow==1.5.0",
        "azureml-defaults==1.5.0"
Exemple #18
0
    #try:
    #    config_path = os.path.join(this_script_dir, "config.json")
    #    workspace = Workspace.from_config(config_path, auth=interactive_auth)
    #except Exception as ex:
    print(f"Cannot get a workspace: {ex}")
    exit()

print('Workspace name: ' + workspace.name,
      'Azure region: ' + workspace.location,
      'Subscription id: ' + workspace.subscription_id,
      'Resource group: ' + workspace.resource_group,
      sep='\n')

# Getting an Azure ML Compute Target
try:
    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_D3_V2', max_nodes=1)

    # create the cluster
    compute_target = ComputeTarget.create(workspace, cluster_name,
                                          compute_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True,
                                       min_node_count=None,
                                       timeout_in_minutes=20)
def create_experiment_config(workspace):
    ########################################
    ### Creating data load Pipeline Step ###
    ########################################

    # Load settings
    print("Loading settings")
    data_load_step_path = os.path.join("steps", "data_load")
    with open(os.path.join(data_load_step_path, "step.json")) as f:
        data_load_settings = json.load(f)

    # Setup of pipeline parameter
    print("Setting up pipeline parameters")
    data_load_environment = PipelineParameter(name="environment",
                                              default_value="golden")
    data_load_start_date = PipelineParameter(name="start_date",
                                             default_value="2019-01-01")
    data_load_end_date = PipelineParameter(name="end_date",
                                           default_value="2019-01-31")
    data_load_system = PipelineParameter(name="system", default_value="PAX 1")
    data_load_platform = PipelineParameter(name="platform",
                                           default_value="Atlantis")

    # Loading compute target
    print("Loading ComputeTarget")
    data_load_compute_target = ComputeTarget(workspace=workspace,
                                             name=data_load_settings.get(
                                                 "compute_target_name", None))

    # Create Databricks step
    print("Creating Step")
    data_load = DatabricksStep(
        name=data_load_settings.get("step_name", None),
        existing_cluster_id=data_load_settings.get("existing_cluster_id",
                                                   None),
        inputs=[],
        outputs=[],
        compute_target=data_load_compute_target,
        notebook_path=data_load_settings.get("notebook_path", None),
        notebook_params={
            "environment": data_load_environment,
            "start_date": data_load_start_date,
            "end_date": data_load_end_date,
            "system": data_load_system,
            "platform": data_load_platform
        },
        run_name=data_load_settings.get("step_name", None),
        allow_reuse=data_load_settings.get("allow_reuse", True),
        version=data_load_settings.get("version", None),
    )

    #########################
    ### Creating Pipeline ###
    #########################

    # Create Pipeline
    print("Creating Pipeline")
    pipeline = Pipeline(
        workspace=workspace,
        steps=[data_load],
        description="Training Pipeline",
    )

    # Validate pipeline
    print("Validating pipeline")
    pipeline.validate()

    return pipeline
def create_experiment_config(workspace):
    ########################################
    ### Creating data prep Pipeline Step ###
    ########################################

    # Load settings
    print("Loading settings")
    data_prep_step_path = os.path.join("steps", "data_prep")
    with open(os.path.join(data_prep_step_path, "step.json")) as f:
        data_prep_settings = json.load(f)

    # Setup datasets - Create PipelineParameter for dynamic pipeline input
    print("Setting up datasets with dynamic input")
    data_prep_input_path = DataPath(
        datastore=Datastore(workspace=workspace,
                            name=data_prep_settings.get(
                                "datastore_input_name", "workspaceblobstore")),
        path_on_datastore=
        "golden/Atlantis/PAX1/15-Mar-2020-23-37-50-279971/PAX1.parquet/")
    data_prep_input_path_pipeline_parameter = PipelineParameter(
        name="input_path", default_value=data_prep_input_path)
    data_prep_input = (data_prep_input_path_pipeline_parameter,
                       DataPathComputeBinding(mode="mount"))
    data_prep_output = PipelineData(
        name=data_prep_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=data_prep_settings.get(
                                "datastore_output_name",
                                "workspaceblobstore")),
        output_mode="mount").as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #data_prep_output.register(
    #    name=data_prep_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    data_prep_dependencies = CondaDependencies.create(
        pip_packages=data_prep_settings.get("pip_packages", []),
        conda_packages=data_prep_settings.get("conda_packages", []),
        python_version=data_prep_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    data_prep_run_config = RunConfiguration(
        conda_dependencies=data_prep_dependencies,
        framework=data_prep_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    data_prep_compute_target = ComputeTarget(workspace=workspace,
                                             name=data_prep_settings.get(
                                                 "compute_target_name", None))

    # Create python step
    print("Creating Step")
    data_prep = PythonScriptStep(
        name=data_prep_settings.get("step_name", None),
        script_name=data_prep_settings.get("script_name", None),
        arguments=data_prep_settings.get("arguments", []) +
        ["--input-datapath", data_prep_input],
        compute_target=data_prep_compute_target,
        runconfig=data_prep_run_config,
        inputs=[data_prep_input],
        outputs=[data_prep_output],
        params=data_prep_settings.get("parameters", []),
        source_directory=data_prep_step_path,
        allow_reuse=data_prep_settings.get("allow_reuse", True),
        version=data_prep_settings.get("version", None),
    )

    ############################################
    ### Creating inference Parallel Run Step ###
    ############################################

    # Load settings
    print("Loading settings")
    batch_inference_step_path = os.path.join("steps", "batch_inference")
    with open(os.path.join(batch_inference_step_path, "step.json")) as f:
        batch_inference_settings = json.load(f)

    # Setup datasets of first step
    print("Setting up datasets")
    batch_inference_input = data_prep_output.as_named_input(
        name=batch_inference_settings.get("dataset_input_name", None))
    batch_inference_output = PipelineData(
        name=batch_inference_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=batch_inference_settings.get(
                                "datastore_output_name", None)),
        output_mode="mount",
    ).as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #batch_inference_output.register(
    #    name=batch_inference_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    batch_inference_dependencies = CondaDependencies.create(
        pip_packages=batch_inference_settings.get("pip_packages", []),
        conda_packages=batch_inference_settings.get("conda_packages", []),
        python_version=batch_inference_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    data_prep_run_config = RunConfiguration(
        conda_dependencies=batch_inference_dependencies,
        framework=batch_inference_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    batch_inference_compute_target = ComputeTarget(
        workspace=workspace,
        name=batch_inference_settings.get("compute_target_name", None))

    # Create python step
    print("Creating Step")
    batch_inference = PythonScriptStep(
        name=batch_inference_settings.get("step_name", None),
        script_name=batch_inference_settings.get("script_name", None),
        arguments=batch_inference_settings.get("arguments", []),
        compute_target=batch_inference_compute_target,
        runconfig=data_prep_run_config,
        inputs=[batch_inference_input],
        outputs=[batch_inference_output],
        params=batch_inference_settings.get("parameters", []),
        source_directory=batch_inference_step_path,
        allow_reuse=batch_inference_settings.get("allow_reuse", True),
        version=batch_inference_settings.get("version", None),
    )

    #########################
    ### Creating Pipeline ###
    #########################

    # Create Pipeline
    print("Creating Pipeline")
    pipeline = Pipeline(
        workspace=workspace,
        steps=[batch_inference],
        description="Batch Inference Pipeline",
    )

    return pipeline