def create_or_update_environment(workspace, name, docker_image, docker_args=None): try: environment = Environment.get(workspace, name) cprint( 'Using existing environment "{}"'.format(colored(name, "white")), "green") except Exception: # MS - please implement an exception type for env not found cprint('Creating new environment "{}"'.format(colored(name, "white")), "yellow") environment = Environment(name=name) environment.docker.enabled = True environment.python.user_managed_dependencies = True # Handle dockerfile vs image spec accordingly if os.path.exists(docker_image): environment.docker.base_dockerfile = docker_image environment.docker.base_image = None else: environment.docker.base_dockerfile = None environment.docker.base_image = docker_image environment.docker.shm_size = "100g" if docker_args is not None: environment.docker.arguments = docker_args environment.python.user_managed_dependencies = True environment = environment.register(workspace=workspace) return environment
def get_or_create_python_environment(azure_config: AzureConfig, source_config: SourceConfig, environment_name: str = "", register_environment: bool = True) -> Environment: """ Creates a description for the Python execution environment in AzureML, based on the Conda environment definition files that are specified in `source_config`. If such environment with this Conda environment already exists, it is retrieved, otherwise created afresh. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided. This parameter is meant to be used when running inference for an existing model. :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If False, it will only be created, but not registered. Use this for unit testing. """ # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be # necessary if the innereye package is installed. It is necessary when working with an outer project and # InnerEye as a git submodule and submitting jobs from the local machine. # In case of version conflicts, the package version in the outer project is given priority. conda_dependencies, merged_yaml = merge_conda_dependencies(source_config.conda_dependencies_files) # type: ignore if azure_config.pip_extra_index_url: # When an extra-index-url is supplied, swap the order in which packages are searched for. # This is necessary if we need to consume packages from extra-index that clash with names of packages on # pypi conda_dependencies.set_pip_option(f"--index-url {azure_config.pip_extra_index_url}") conda_dependencies.set_pip_option("--extra-index-url https://pypi.org/simple") env_variables = { "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds), "MKL_SERVICE_FORCE_INTEL": "1", **(source_config.environment_variables or {}) } base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04" # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that, # and will re-use existing environments even if they don't have the same name. # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in # hash function gives different results for the same string in different python instances. hash_string = "\n".join([merged_yaml, azure_config.docker_shm_size, base_image, str(env_variables)]) sha1 = hashlib.sha1(hash_string.encode("utf8")) overall_hash = sha1.hexdigest()[:32] unique_env_name = f"InnerEye-{overall_hash}" try: env_name_to_find = environment_name or unique_env_name env = Environment.get(azure_config.get_workspace(), name=env_name_to_find, version=ENVIRONMENT_VERSION) logging.info(f"Using existing Python environment '{env.name}'.") return env except Exception: logging.info(f"Python environment '{unique_env_name}' does not yet exist, creating and registering it.") env = Environment(name=unique_env_name) env.docker.enabled = True env.docker.shm_size = azure_config.docker_shm_size env.python.conda_dependencies = conda_dependencies env.docker.base_image = base_image env.environment_variables = env_variables if register_environment: env.register(azure_config.get_workspace()) return env
def get_custom_env(env_name): try: ws = _establish_connection_to_aml_workspace() except Exception as e: raise e try: new_env = Environment.get(ws, env_name) return new_env except Exception as e: raise e
def get_training_environment(ws: Workspace, name: str, pip_file: str, use_gpu: bool = False, include_prerelease: bool = False, environment_type: str = None): ''' Creates a training environment, based on the required pip packages, the need for GPU and a given environment type Args: ws (Workspace): the AzureML workspace that will be used to register the environment name (str): the name for the environment that will be registered use_gpu (bool): indicating if a GPU is required or not include_prerelease (bool): indicates if the pip packages can be installed in prerelease mode environment_type (str): either the name of an existing environment that will be taken as base, or one of these values (tensorflow, sklearn, pytorch). Returns: a registered environment , ready to use ''' from azureml.train.estimator import Estimator from azureml.core import Environment, ScriptRunConfig from azureml.core.runconfig import RunConfiguration from azureml.core.runconfig import CondaDependencies print('Getting environment for type', environment_type) base_environment = environment_type if (environment_type == 'tensorflow'): # Using Tensorflow Estimator base_environment = 'AzureML-TensorFlow-2.0-GPU' if use_gpu else 'AzureML-TensorFlow-2.0-CPU' elif (environment_type == 'sklearn'): base_environment = 'AzureML-Scikit-learn-0.20.3' elif (environment_type == 'pytorch'): base_environment = 'AzureML-PyTorch-1.5-GPU' if use_gpu else 'AzureML-PyTorch-1.5-GPU' pip_packages = __get_package_list_from_requirements(pip_file) if base_environment is not None: print('Taking', base_environment, 'as base environment') training_env = Environment.get(ws, base_environment) training_env.name = name for pippkg in pip_packages: training_env.python.conda_dependencies.add_pip_package(pippkg) else: print('Creating new environment') training_env = Environment(name=name) training_env.python.conda_dependencies = CondaDependencies.create( pip_packages=pip_packages) if (include_prerelease): training_env.python.conda_dependencies.set_pip_option("--pre") training_env.docker.enabled = True _ = training_env.register(workspace=ws) return training_env
def submit_run(aml_interface): experiment = Experiment(aml_interface.workspace, AML_EXPERIMENT_NAME) src_dir = __here__ run_config = ScriptRunConfig(source_directory=src_dir, script='train.py') run_config.run_config.target = aml_interface.get_compute_target( AML_COMPUTE_NAME, 'STANDARD_D2_V2') aml_run_env = Environment.get(aml_interface.workspace, AML_ENV_NAME) run_config.run_config.environment = aml_run_env print("Submitting Run") run = experiment.submit(config=run_config) run.wait_for_completion(show_output=True) print(run.get_metrics())
def _create_inference_config(inference_env_name): try: ws=_establish_connection_to_aml_workspace() except Exception as e: print("failed to connect to workspce") raise e try: environment=Environment.get(workspace=ws,name=inference_env_name) inference_config=InferenceConfig(entry_script="score.py", environment=environment, source_directory=r'deployment') return inference_config except Exception as e: print("failed to create inference config") raise e
def PrepareAzureScript(): """ Create Script Run Config """ # Use an Azure curated environment to create docker container # curated_env_name = 'AzureML-PyTorch-1.6-GPU' curated_env_name = 'AzureML-Pytorch1.7-Cuda11-OpenMpi4.1.0-py36' pytorch_env = Environment.get(workspace=az_workspace, name=curated_env_name) pytorch_env = pytorch_env.clone(new_name='pytorch-1.6-gpu') # OR # use build the conda environment used on local machine (from a python terminal) to create docker container # build yml file with 'conda env export -n [name of environment] -f [filename.yml]' # place yml file in the ./azureml directory # pytorch_env = Environment.from_conda_specification( # name='AzurePytorch', # file_path='./.azureml/AzurePytorch.yml' # ) # arguments can be passed to training script # they have to be parsed in the training script # import argparse # parser = argparse.ArgumentParser() # parser.add_argument("--data-folder", type=str, dest="data_folder", help="data folder mounting point", default="") # parser.add_argument("--num-epochs", type=int, dest="num_epochs", help="Number of epochs", default="") # args = parser.parse_args() # data_path = args.data_folder args = [ '--data-folder', az_dataset.as_named_input('data').as_mount(), '--num-epochs', 50000 ] # Script Run Config defines the wrapper for the python scripts and will be used to create the Docker container project_folder = "./scripts" # this refers to local location of scripts, these scripts will be built into docker container global az_config global az_computetarget az_config = ScriptRunConfig( source_directory=project_folder, script='model.py', compute_target=az_computetarget, environment=pytorch_env, arguments=args, )
def submit_run(aml_interface): experiment = Experiment(aml_interface.workspace, AML_EXPERIMENT_NAME) src_dir = __here__ run_config = ScriptRunConfig( source_directory=src_dir, script='train.py', ) run_config.run_config.target = aml_interface.get_compute_target( AML_COMPUTE_NAME, 'STANDARD_D1_V2') aml_run_env = Environment.get(aml_interface.workspace, AML_ENVIRONMENT_NAME) run_config.run_config.environment = aml_run_env logger.info(f"Submitting Run to {AML_COMPUTE_NAME}@{AML_EXPERIMENT_NAME}") run = experiment.submit(config=run_config) run.wait_for_completion(show_output=True) logger.info(run.get_metrics()) logger.info(f"Finished Run on {AML_COMPUTE_NAME}@{AML_EXPERIMENT_NAME}")
def create_azureml_env(ws, env_name, conda_yml): """ Create an Azure ML environment based a default AML docker image and a yaml file that specifies Conda and Pip dependencies. Azure ML will create a new custom docker image for the env. """ try: amlenv = Environment.get(ws, name=env_name) print('found existing env {}'.format(amlenv.name)) except Exception: print('create new env {}'.format(env_name)) amlenv = Environment.from_conda_specification(name=env_name, file_path=conda_yml) amlenv.docker.enabled = True amlenv.docker.base_image = DEFAULT_CPU_IMAGE amlenv.python.user_managed_dependencies = False amlenv.register(ws) return amlenv
def get_or_create_detector_environment(ws: Workspace, force_creation=False) -> Environment: def _create_environment(ws, environment_name): env = Environment(workspace=ws, name=environment_name) env.docker.enabled = True env.docker.base_image = None env.docker.base_dockerfile = open( os.path.join(SRC_PATH, "Dockerfile.detector"), "r").read() env.python.user_managed_dependencies = True env.register(workspace=ws) return env if not force_creation: try: return Environment.get(ENVIRONMENT_NAME_DETECTOR) except Exception: pass return _create_environment(ws, ENVIRONMENT_NAME_DETECTOR)
def create_conda_environment(workspace, name, conda_dependencies, pip_dependencies): """ Create an environment or retrieve it by its name from workspace Pip installs Python packages whereas conda installs packages which may contain software written in any language. e.g. TensorFlow, Scikit-Learn -> Conda, Matplotlib -> pip """ if name in Environment.list(workspace): env = Environment.get(workspace=workspace, name=name) print("The environment '{}' already existed for the workspace".format( name)) else: env = Environment(name=name) env.docker.enabled = True env.python.conda_dependencies = CondaDependencies.create( conda_packages=conda_dependencies, pip_packages=pip_dependencies, ) env.register(workspace=workspace) return env
def submit(): # define workspace ws = Workspace.from_config() # create compute if it does not already exist cluster_name = "goazurego" try: target = ComputeTarget(workspace=ws, name=cluster_name) print(f"Found existing cluster - {cluster_name}.") except ComputeTargetException: # create a configuration compute_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", max_nodes=2, min_nodes=0) target = ComputeTarget.create(ws, cluster_name, compute_config) target.wait_for_completion(show_output=True) # use the curated tensorflow 1.15 environment environment_name = "AzureML-TensorFlow-1.15-Inference-CPU" tf_env = Environment.get(workspace=ws, name=environment_name) # create script run configuration src = ScriptRunConfig(source_directory=".", script="train.py", compute_target=target, environment=tf_env) src.run_config.target = target # create an experiment experiment_name = "pycon-experiment" experiment = Experiment(workspace=ws, name=experiment_name) # run experiment run = experiment.submit(config=src) run.wait_for_completion(show_output=True) return True
def main(name: str, version: int, run_async: bool, local: bool, force: bool): """If the environment is already built, either exit or pull it locally""" environ = Environment.get(WS, name=name, version=version) if is_environment_built(environ=environ) and not force: if local: print( f"'{name}' is already built - pulling locally. Use '--force' to force a rebuild on the local context" ) pull_image(**parse_image_details(environ)) else: print( f"'{name}' is already built. Use '--force' to force a rebuild") else: if local: environ.build_local(WS) else: build = environ.build(WS) if not run_async: sleep(5) build.wait_for_completion(show_output=True)
def create_or_update_mlapp_env(workspace, requirements_path, wheel_path, env_name): """ Usage: ws = init_workspace() create_mlapp_environment( workspace=ws, requirements_path='../../../requirements.txt', wheel_path='./../../dist/mlapp-2.0.0-py3-none-any.whl', env_name='mlapp') """ # get or create environment and add requirements.txt file try: restored_env = Environment.get(workspace=workspace, name=env_name) new_env = restored_env.from_pip_requirements( name=env_name, file_path=requirements_path) except Exception as e: new_env = Environment.from_pip_requirements( name=env_name, file_path=requirements_path) # settings for environment new_env.docker.enabled = True new_env.python.user_managed_dependencies = False # add private package whl_url = Environment.add_private_pip_wheel(workspace, wheel_path, exist_ok=False) new_env.python.conda_dependencies.add_pip_package(whl_url) # build and register environment new_env = new_env.register(workspace) build_env_run = new_env.build(workspace) build_env_run.wait_for_completion(show_output=False) print(build_env_run.log_url) print(build_env_run.status)
def _create_running_config(ws: Workspace, path_to_dataset_in_datastore, compute_target_name, env_name, model_save_path): try: module_root_folder_path = r'online_training' compute_target = _get_compute_target(ws, compute_target_name) default_data_store = ws.get_default_datastore() dataset = Dataset.File.from_files(path=(default_data_store, path_to_dataset_in_datastore)) module_env = Environment.get(workspace=ws, name=env_name) running_config = ScriptRunConfig( source_directory=module_root_folder_path, script='train.py', compute_target=compute_target, environment=module_env, arguments=[ "--data_path", dataset.as_mount(), "--batch_size", 32, "--model_path", model_save_path ]) print("running config created!") return running_config except Exception as e: raise e
def show_available_environment(workspace): list_env = Environment.list(workspace=workspace) for env in list_env: if env.startswith("AzureML"): print("Name", env) print( "packages", list_env[env].python.conda_dependencies.serialize_to_string()) with open(Path(__file__).parent / 'configuration.json', 'r') as config_file: config = json.load(config_file) name = config['environment_base'] env = Environment.get(workspace=ws, name=name) # From a Conda specification file # myenv = Environment.from_conda_specification(name = "myenv",file_path = "path-to-conda-specification-file") # From a pip requirements file # myenv = Environment.from_pip_requirements(name = "myenv", file_path = "path-to-pip-requirements-file") #From an existing Conda environment # myenv = Environment.from_existing_conda_environment(name = "myenv",conda_environment_name = "mycondaenv") #From exisitng yml file # myenv = Environment.load_from_directory(path = "path-to-source-directory") #
def get_mlapp_environment(workspace, env_name, version=None): return Environment.get(workspace=workspace, name=env_name, version=version)
# Authenticate with AzureML auth = ServicePrincipalAuthentication( tenant_id=auth_config["tenant_id"], service_principal_id=auth_config["service_principal_id"], service_principal_password=os.environ["SP_SECRET"], ) ws = Workspace( subscription_id=auth_config["subscription_id"], resource_group=auth_config["resource_group"], workspace_name=auth_config["workspace_name"], auth=auth, ) env = Environment.get(workspace=ws, name="component-condition") env.docker.enabled = True inf_config = InferenceConfig(entry_script="./score.py", environment=env) model = Model(ws, name=conf["metadata"]["model_name"]) deployment_config = AciWebservice.deploy_configuration( cpu_cores=1, memory_gb=2, description="Webservice to predict non-compliant car components.", enable_app_insights=True) svc = Model.deploy( workspace=ws, name="compcondition", models=[model],
compute_name = "gpu-V100-2" # script arguments arguments = [ "--deepspeed", "--deepspeed_config", "ds_config.json", "--deepspeed_mpi", "--global_rank", "$AZ_BATCHAI_TASK_INDEX", "--with_aml_log", True, ] # Use the DeepSpeed Curated Environment env = Environment.get(ws, name="AzureML-DeepSpeed-0.3-GPU") # create job config mpi_config = MpiConfiguration(node_count=2, process_count_per_node=2) src = ScriptRunConfig( source_directory=script_dir, script=script_name, arguments=arguments, environment=env, compute_target=compute_name, distributed_job_config=mpi_config, ) # submit job run = Experiment(ws, experiment_name).submit(src)
ray_environment_dockerfile_path = "./Docker/Dockerfile-cpu" # Build CPU image for Ray ray_cpu_env = Environment.from_dockerfile( name=ray_environment_name, dockerfile=ray_environment_dockerfile_path) ray_cpu_env.register(workspace=ws) ray_cpu_build_details = ray_cpu_env.build(workspace=ws) while ray_cpu_build_details.status not in ["Succeeded", "Failed"]: print( f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}" ) time.sleep(10) command = ["python distribute_automl.py"] env = Environment.get(workspace=ws, name=ray_environment_name) compute_target = ws.compute_targets["cpucluster"] aml_run_config = RunConfiguration(communicator="OpenMpi") aml_run_config.target = compute_target aml_run_config.docker = DockerConfiguration(use_docker=True) aml_run_config.environment = env aml_run_config.node_count = 2 config = ScriptRunConfig( source_directory="ray/", command=command, run_config=aml_run_config, ) exp = Experiment(ws, "distribute-automl") run = exp.submit(config) print(run.get_portal_url()) # link to ml.azure.com
from typing import Tuple import click from azureml.core import (ComputeTarget, Dataset, Environment, RunConfiguration, Workspace) from azureml.core.authentication import AzureCliAuthentication from azureml.core.experiment import Experiment from azureml.pipeline.core import (Pipeline, PipelineData, PipelineParameter, PublishedPipeline) from azureml.pipeline.steps import DatabricksStep, PythonScriptStep CLI_AUTH = AzureCliAuthentication() # noinspection PyTypeChecker WS = Workspace.from_config(auth=CLI_AUTH) RC = RunConfiguration() RC.environment = Environment.get(WS, "lightgbm") # noinspection PyTypeChecker def create_databricks_step( input_dataset: Dataset, compute: ComputeTarget, debug_run: bool) -> Tuple[DatabricksStep, PipelineData]: output_data = PipelineData(name="ParquetFiles", datastore=WS.get_default_datastore(), is_directory=True) node_size = 'Standard_DS4_v2' spark_version = '7.3.x-cpu-ml-scala2.12' db_step = DatabricksStep( name='Convert to Parquet',
# A cheap machine with CPU from azureml.core import Environment, Workspace, ScriptRunConfig, Experiment from utils.agent import createAmlCompute EXPERIMENT_NAME = 'road-segmentation-build' ENV_NAME = 'AzureML-TensorFlow-2.3-CPU' CLUSTER_NAME = 'CPU-cluster' VM_SIZE = 'Standard_D1_v2' ws = Workspace.from_config() # Create an experiment experiment = Experiment(ws, EXPERIMENT_NAME) # Create an environment tf_env = Environment.get(ws, ENV_NAME) # Create compute target compute_target = createAmlCompute(ws, CLUSTER_NAME, VM_SIZE) # Create run configuration params script_run_params = dict(source_directory='./build', script='build.py', arguments=['--path_model', 'models/new'], compute_target=compute_target, environment=tf_env) src = ScriptRunConfig(**script_run_params) run = experiment.submit(src) run.wait_for_completion()
#register env.register(workspace=ws) #view registered from azureml.core import Environment env_names = Environment.list(workspace=ws) for env_name in env_names: print('Name:',env_name) #retrieve enviro from azureml.core import Environment from azureml.train.estimator import Estimator training_env = Environment.get(workspace=ws, name='training_environment') estimator = Estimator(source_directory='experiment_folder' entry_script='training_script.py', compute_target='local', environment_definition=training_env) #make Managed COmpute target from azureml.core import Workspace from azureml.core.compute import ComputeTarget, AmlCompute # Load the workspace from the saved config file ws = Workspace.from_config() # Specify a name for the compute (unique within the workspace) compute_name = 'aml-cluster'
) dataset_version = 1 arguments = [ "--remote_debug", "--remote_debug_connection_timeout", 300, "--remote_debug_client_ip", ip, "--remote_debug_port", 5678, "--version", dataset_version, ] env = Environment.get(workspace=ws, name="ds_envs") src = ScriptRunConfig( source_directory=get_project_root() / "ds_envs" / "cloud", script="train.py", arguments=arguments, compute_target="local", environment=env, ) experiment_name = "my_experiment" experiment = Experiment(workspace=ws, name=experiment_name) run = experiment.submit(config=src) run.wait_for_completion(show_output=True)
ws = Workspace.from_config() # get root of git repo prefix = Path(__file__).parent # training script source_dir = str(prefix.joinpath("src")) script_name = "train.py" # azure ml settings experiment_name = "tensorflow-mnist-distributed" compute_name = "gpu-8x-a100" # environment env = Environment.get( workspace=ws, name="AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu").clone( "tensorflow-2.4-gpu") # Experiment configuration node_count = 2 # number of nodes process_count_per_node = 8 # number of GPUs per node # create distributed config distr_config = MpiConfiguration(process_count_per_node=process_count_per_node, node_count=node_count) # create arguments args = ["--epochs", 5] # create job config src = ScriptRunConfig(
from azureml.core import Workspace, Environment, Webservice from azureml.core.model import InferenceConfig, Model from azureml.core.webservice import AciWebservice from azureml.exceptions import WebserviceException from src.utils import update_metadata, load_metadata # Initialise METADATA = load_metadata() SERVICE_DESCRIPTION = 'Heart failure predictor web service' SERVICE_NAME = 'heartfailure-prediction' CPU_CORES = 1 MEMORY_GB = 1 # Get environment workspace = Workspace.from_config() environment = Environment.get(workspace=workspace, name=METADATA['env_name']) # Deploy container inference_config = InferenceConfig( entry_script='./src/score.py', environment=environment, ) aci_config = AciWebservice.deploy_configuration( cpu_cores=CPU_CORES, memory_gb=MEMORY_GB, description=SERVICE_DESCRIPTION) # Deploy as web service try: # Remove any existing service under the same name. Webservice(workspace, SERVICE_NAME).delete() except WebserviceException: pass
} model = register_model(**registration_params) print('Name:', model.name) print('Version:', model.version) service_name = 'sahulat-service' try: Webservice(ws, service_name).delete() except WebserviceException: pass from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies myenv = Environment.get(workspace=ws, name="myenv") from azureml.core.model import InferenceConfig with open('src/score.py') as f: print(f.read()) inference_config = InferenceConfig(entry_script='src/score.py', environment=myenv) aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1) from azureml.core.webservice import LocalWebservice local_deployment_config = LocalWebservice.deploy_configuration(port=6789) service = Model.deploy(workspace=ws, name=service_name,
# Can poll for a minimum number of nodes and for a specific timeout. # If no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current cluster status, use the 'status' property print(compute_target.status.serialize()) aml_run_config = RunConfiguration() # `compute_target` as defined in "Azure Machine Learning compute" section above aml_run_config.target = compute_target USE_CURATED_ENV = True if USE_CURATED_ENV: curated_environment = Environment.get(workspace=ws, name="AzureML-Tutorial") aml_run_config.environment = curated_environment else: aml_run_config.environment.python.user_managed_dependencies = False # Add some packages relied on by data prep step aml_run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['pandas', 'scikit-learn', 'seaborn', 'tqdm'], pip_packages=[ 'azureml-sdk', 'azureml-dataprep[fuse,pandas]', 'seaborn', 'tqdm' ], pin_sdk_version=False) web_path = 'https://dprepdata.blob.core.windows.net/demo/Titanic.csv' my_dataset = Dataset.Tabular.from_delimited_files( path=web_path, set_column_types={'Survived': DataType.to_bool()})
# Create a Python environment for the experiment fraud_env = Environment("fraud-pipeline-env") fraud_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies fraud_env.docker.enabled = True # Use a docker container # Create a set of package dependencies fraud_packages = CondaDependencies.create( conda_packages=['scikit-learn', 'pandas'], pip_packages=['azureml-sdk']) # Add the dependencies to the environment fraud_env.python.conda_dependencies = fraud_packages # Register the environment (just in case you want to use it again) fraud_env.register(workspace=ws) registered_env = Environment.get(ws, 'fraud-pipeline-env') # Create a new runconfig object for the pipeline pipeline_run_config = RunConfiguration() # Use the compute you created above. pipeline_run_config.target = pipeline_cluster # Assign the environment to the run configuration pipeline_run_config.environment = registered_env print("Run configuration created.") # Get the training dataset fraud_ds = ws.datasets.get("creditcard")
def main(): train_file = r"EdwardFry_Microsoft_issueDataset.csv" ws = Workspace.from_config() # Default datastore def_data_store = ws.get_default_datastore() # Loads config.json # Get the blob storage associated with the workspace def_blob_store = Datastore(ws, "workspaceblobstore") # Get file storage associated with the workspace def_file_store = Datastore(ws, "workspacefilestore") # Set data input and output xyz_phishing_dataset = Dataset.File.from_files([(def_blob_store, train_file)]) output_data1 = OutputFileDatasetConfig( destination=(datastore, 'outputdataset/{run-id}')) output_data_dataset = output_data1.register_on_complete( name='prepared_output_data') # Set compute compute_name = "aml-compute" vm_size = "STANDARD_NC6" if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found compute target: ' + compute_name) else: print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, # STANDARD_NC6 is GPU-enabled min_nodes=0, max_nodes=4) # create the compute target compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # Can poll for a minimum number of nodes and for a specific timeout. # If no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current cluster status, use the 'status' property print(compute_target.status.serialize()) aml_run_config = RunConfiguration() # `compute_target` as defined in "Azure Machine Learning compute" section above aml_run_config.target = compute_target USE_CURATED_ENV = True if USE_CURATED_ENV: curated_environment = Environment.get(workspace=ws, name="AzureML-Tutorial") aml_run_config.environment = curated_environment else: aml_run_config.environment.python.user_managed_dependencies = False # Add some packages relied on by data prep step aml_run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['pandas', 'scikit-learn'], pip_packages=['azureml-sdk', 'azureml-dataprep[fuse,pandas]'], pin_sdk_version=False) dataprep_source_dir = "./dataprep_src" entry_point = "prepare.py" # `my_dataset` as defined above ds_input = xyz_phishing_dataset.as_named_input('input1') # `output_data1`, `compute_target`, `aml_run_config` as defined above data_prep_step = PythonScriptStep(script_name=entry_point, source_directory=dataprep_source_dir, arguments=[ "--input", ds_input.as_download(), "--output", output_data1 ], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) train_source_dir = "./train_src" train_entry_point = "train.py" training_results = OutputFileDatasetConfig(name="training_results", destination=def_blob_store) train_step = PythonScriptStep(script_name=train_entry_point, source_directory=train_source_dir, arguments=[ "--prepped_data", output_data1.as_input(), "--training_results", training_results ], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) # list of steps to run (`compare_step` definition not shown) compare_models = [data_prep_step, train_step, compare_step] # Build the pipeline pipeline1 = Pipeline(workspace=ws, steps=[compare_models]) #dataset_consuming_step = PythonScriptStep( # script_name="iris_train.py", # inputs=[iris_tabular_dataset.as_named_input("iris_data")], # compute_target=compute_target, # source_directory=project_folder #) #run_context = Run.get_context() #iris_dataset = run_context.input_datasets['iris_data'] #dataframe = iris_dataset.to_pandas_dataframe() ## Within a PythonScriptStep #ws = Run.get_context().experiment.workspace #step = PythonScriptStep(name="Hello World", # script_name="hello_world.py", # compute_target=aml_compute, # source_directory=source_directory, # allow_reuse=False, # hash_paths=['hello_world.ipynb']) # Submit the pipeline to be run pipeline_run1 = Experiment(ws, 'Compare_Models_Exp').submit(pipeline1) pipeline_run1.wait_for_completion()