Exemple #1
0
def get_environment(
    ws,
    environment_name,
    docker_image="todrabas/aml_rapids:latest",
    python_interpreter="/opt/conda/envs/rapids/bin/python",
    conda_packages=["matplotlib"],
):
    if environment_name not in ws.environments:
        env = Environment(name=environment_name)
        env.docker.enabled = True
        env.docker.base_image = docker_image

        env.python.interpreter_path = python_interpreter
        env.python.user_managed_dependencies = True

        conda_dep = CondaDependencies()

        for conda_package in conda_packages:
            conda_dep.add_conda_package(conda_package)

        env.python.conda_dependencies = conda_dep
        env.register(workspace=ws)
    else:
        env = ws.environments[environment_name]

    return env
Exemple #2
0
def createOrGetEnvironment(ws, login_config, app_config):
    environment_name = login_config["aml_compute"]["environment_name"]
    python_interpreter = login_config["aml_compute"]["python_interpreter"]
    conda_packages = login_config["aml_compute"]["conda_packages"]

    ### CREATE OR RETRIEVE THE ENVIRONMENT
    if environment_name not in ws.environments:
        logger.info(f"Creating {environment_name} environment...")
        env = Environment(name=environment_name)
        env.docker.enabled = login_config["aml_compute"]["docker_enabled"]
        env.docker.base_image = None
        env.docker.base_dockerfile = f'FROM {app_config["base_dockerfile"]}'
        env.python.interpreter_path = python_interpreter
        env.python.user_managed_dependencies = True
        conda_dep = CondaDependencies()

        for conda_package in conda_packages:
            conda_dep.add_conda_package(conda_package)

        env.python.conda_dependencies = conda_dep
        env.register(workspace=ws)
        evn = env
    else:
        logger.info(f"    Environment {environment_name} found...")
        env = ws.environments[environment_name]

    return env
Exemple #3
0
def get_or_create_python_environment(azure_config: AzureConfig,
                                     source_config: SourceConfig,
                                     environment_name: str = "",
                                     register_environment: bool = True) -> Environment:
    """
    Creates a description for the Python execution environment in AzureML, based on the Conda environment
    definition files that are specified in `source_config`. If such environment with this Conda environment already
    exists, it is retrieved, otherwise created afresh.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided. This parameter is meant to be used when running
    inference for an existing model.
    :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If
    False, it will only be created, but not registered. Use this for unit testing.
    """
    # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be
    # necessary if the innereye package is installed. It is necessary when working with an outer project and
    # InnerEye as a git submodule and submitting jobs from the local machine.
    # In case of version conflicts, the package version in the outer project is given priority.
    conda_dependencies, merged_yaml = merge_conda_dependencies(source_config.conda_dependencies_files)  # type: ignore
    if azure_config.pip_extra_index_url:
        # When an extra-index-url is supplied, swap the order in which packages are searched for.
        # This is necessary if we need to consume packages from extra-index that clash with names of packages on
        # pypi
        conda_dependencies.set_pip_option(f"--index-url {azure_config.pip_extra_index_url}")
        conda_dependencies.set_pip_option("--extra-index-url https://pypi.org/simple")
    env_variables = {
        "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds),
        "MKL_SERVICE_FORCE_INTEL": "1",
        **(source_config.environment_variables or {})
    }
    base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"
    # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that,
    # and will re-use existing environments even if they don't have the same name.
    # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in
    # hash function gives different results for the same string in different python instances.
    hash_string = "\n".join([merged_yaml, azure_config.docker_shm_size, base_image, str(env_variables)])
    sha1 = hashlib.sha1(hash_string.encode("utf8"))
    overall_hash = sha1.hexdigest()[:32]
    unique_env_name = f"InnerEye-{overall_hash}"
    try:
        env_name_to_find = environment_name or unique_env_name
        env = Environment.get(azure_config.get_workspace(), name=env_name_to_find, version=ENVIRONMENT_VERSION)
        logging.info(f"Using existing Python environment '{env.name}'.")
        return env
    except Exception:
        logging.info(f"Python environment '{unique_env_name}' does not yet exist, creating and registering it.")
    env = Environment(name=unique_env_name)
    env.docker.enabled = True
    env.docker.shm_size = azure_config.docker_shm_size
    env.python.conda_dependencies = conda_dependencies
    env.docker.base_image = base_image
    env.environment_variables = env_variables
    if register_environment:
        env.register(azure_config.get_workspace())
    return env
Exemple #4
0
 def _create_environment(ws, environment_name):
     env = Environment(workspace=ws, name=environment_name)
     env.docker.enabled = True
     env.docker.base_image = None
     env.docker.base_dockerfile = open(
         os.path.join(SRC_PATH, "Dockerfile.detector"), "r").read()
     env.python.user_managed_dependencies = True
     env.register(workspace=ws)
     return env
def create_or_update_environment(workspace,
                                 name,
                                 docker_image,
                                 docker_args=None):

    try:
        environment = Environment.get(workspace, name)
        cprint(
            'Using existing environment "{}"'.format(colored(name, "white")),
            "green")
    except Exception:  # MS - please implement an exception type for env not found
        cprint('Creating new environment "{}"'.format(colored(name, "white")),
               "yellow")
        environment = Environment(name=name)

    environment.docker.enabled = True
    environment.python.user_managed_dependencies = True

    # Handle dockerfile vs image spec accordingly
    if os.path.exists(docker_image):
        environment.docker.base_dockerfile = docker_image
        environment.docker.base_image = None
    else:
        environment.docker.base_dockerfile = None
        environment.docker.base_image = docker_image

    environment.docker.shm_size = "100g"
    if docker_args is not None:
        environment.docker.arguments = docker_args

    environment.python.user_managed_dependencies = True
    environment = environment.register(workspace=workspace)

    return environment
Exemple #6
0
def create_conda_environment(workspace, name, conda_dependencies,
                             pip_dependencies):
    """
    Create an environment or retrieve it by its name from workspace
    Pip installs Python packages whereas conda installs packages which may contain software written in any language.
    e.g. TensorFlow, Scikit-Learn -> Conda, Matplotlib -> pip   
    """
    if name in Environment.list(workspace):
        env = Environment.get(workspace=workspace, name=name)
        print("The environment '{}' already existed for the workspace".format(
            name))
    else:
        env = Environment(name=name)
        env.docker.enabled = True
        env.python.conda_dependencies = CondaDependencies.create(
            conda_packages=conda_dependencies,
            pip_packages=pip_dependencies,
        )
        env.register(workspace=workspace)
    return env
def get_training_environment(ws: Workspace,
                             name: str,
                             pip_file: str,
                             use_gpu: bool = False,
                             include_prerelease: bool = False,
                             environment_type: str = None):
    '''
    Creates a training environment, based on the required pip packages, the need for GPU and a given environment type
    Args:
        ws (Workspace): the AzureML workspace that will be used to register the environment
        name (str): the name for the environment that will be registered
        use_gpu (bool): indicating if a GPU is required or not
        include_prerelease (bool): indicates if the pip packages can be installed in prerelease mode
        environment_type (str): either the name of an existing environment that will be taken as base, or one of these values (tensorflow, sklearn, pytorch).  
    Returns:
        a registered environment , ready to use
    '''
    from azureml.train.estimator import Estimator
    from azureml.core import Environment, ScriptRunConfig
    from azureml.core.runconfig import RunConfiguration
    from azureml.core.runconfig import CondaDependencies

    print('Getting environment for type', environment_type)
    base_environment = environment_type
    if (environment_type == 'tensorflow'):
        # Using Tensorflow Estimator
        base_environment = 'AzureML-TensorFlow-2.0-GPU' if use_gpu else 'AzureML-TensorFlow-2.0-CPU'
    elif (environment_type == 'sklearn'):
        base_environment = 'AzureML-Scikit-learn-0.20.3'
    elif (environment_type == 'pytorch'):
        base_environment = 'AzureML-PyTorch-1.5-GPU' if use_gpu else 'AzureML-PyTorch-1.5-GPU'

    pip_packages = __get_package_list_from_requirements(pip_file)

    if base_environment is not None:
        print('Taking', base_environment, 'as base environment')
        training_env = Environment.get(ws, base_environment)
        training_env.name = name
        for pippkg in pip_packages:
            training_env.python.conda_dependencies.add_pip_package(pippkg)

    else:
        print('Creating new environment')
        training_env = Environment(name=name)
        training_env.python.conda_dependencies = CondaDependencies.create(
            pip_packages=pip_packages)

    if (include_prerelease):
        training_env.python.conda_dependencies.set_pip_option("--pre")

    training_env.docker.enabled = True
    _ = training_env.register(workspace=ws)
    return training_env
    # ----PYTHON ENV------
    #-------------------------
    packages = CondaDependencies.create(
        conda_packages=["cudatoolkit=10.0"],
        pip_packages=[
            'azureml-sdk', 'PyYAML', 'azure-storage-blob', 'matplotlib',
            'seaborn', 'tensorflow', 'Keras', 'tensorflow-hub', 'joblib',
            'tqdm', 'Pillow', 'azureml-dataprep[pandas,fuse]>=1.1.14'
        ])

    diagnoz_env = Environment("diagnoz-pipeline-env")
    diagnoz_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diagnoz_env.docker.enabled = True  # Use a docker container
    diagnoz_env.docker.base_image = DEFAULT_GPU_IMAGE
    diagnoz_env.python.conda_dependencies = packages
    diagnoz_env.register(workspace=ws)

    # Runconfigs
    pipeline_run_config = RunConfiguration()
    pipeline_run_config.target = compute_target
    pipeline_run_config.environment = diagnoz_env
    print("Run configuration created.")

    shutil.rmtree(script_folder, ignore_errors=True)
    os.makedirs(script_folder, exist_ok=True)

    #copy all necessary scripts
    files = FilesProviders.get_path_files(
        "../", [os.path.basename(__file__), "__init__.py"])

    for f in files:
Exemple #9
0
conda_dep.add_pip_package("transformers")
conda_dep.add_pip_package("matplotlib")
conda_dep.add_pip_package("apex==0.9.10dev")
conda_dep.add_pip_package("pandas")
conda_dep.add_pip_package("pillow")
conda_dep.add_pip_package("requests")
conda_dep.add_pip_package("scikit-learn")
conda_dep.add_pip_package("tqdm")
bert_env.docker.enabled = True
# bert_env.from_conda_specification('bert', './environment.yml')
# bert_env.from_existing_conda_environment('bert_aml_env', 'bert')
bert_env.python.conda_dependencies = conda_dep

# bert_env.python.conda_dependencies = conda_dep

bert_env.register(ws)

# Choose a name for your CPU cluster
gpu_cluster_name = "gpu-compute-bert"

# Verify that cluster does not exist already
try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_NC6', max_nodes=4)
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

gpu_cluster.wait_for_completion(show_output=True)
Exemple #10
0
pipeline_cluster.wait_for_completion(show_output=True)

# Create a Python environment for the experiment
fraud_env = Environment("fraud-pipeline-env")
fraud_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
fraud_env.docker.enabled = True  # Use a docker container

# Create a set of package dependencies
fraud_packages = CondaDependencies.create(
    conda_packages=['scikit-learn', 'pandas'], pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
fraud_env.python.conda_dependencies = fraud_packages

# Register the environment (just in case you want to use it again)
fraud_env.register(workspace=ws)
registered_env = Environment.get(ws, 'fraud-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above.
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print("Run configuration created.")

# Get the training dataset
fraud_ds = ws.datasets.get("creditcard")
Exemple #11
0
def main():
    parser = argparse.ArgumentParser(
        description="NGC Set Up on AzureML Compute Cluster")
    parser.add_argument("--config_file",
                        type=str,
                        help="location of config file")
    args = parser.parse_args()
    config_file = args.config_file

    print(config_file)
    configdata = ngccontent.get_config(config_file)
    subscription_id = configdata["azureml_user"]["subscription_id"]
    resource_group = configdata["azureml_user"]["resource_group"]
    workspace_name = configdata["azureml_user"]["workspace_name"]

    ws = Workspace(workspace_name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)

    verify = f'''
    Subscription ID: {subscription_id}
    Resource Group: {resource_group}
    Workspace: {workspace_name}'''
    print(verify)

    ### vnet settings
    vnet_rg = ws.resource_group
    vnet_name = configdata["aml_compute"]["vnet_name"]
    subnet_name = configdata["aml_compute"]["subnet_name"]

    ### azure ml names
    ct_name = configdata["aml_compute"]["ct_name"]
    exp_name = configdata["aml_compute"]["exp_name"]

    ### trust but verify
    verify = f'''
    vNET RG: {vnet_rg}
    vNET name: {vnet_name}
    vNET subnet name: {subnet_name}
    Compute target: {ct_name}
    Experiment name: {exp_name}'''
    print(verify)

    if configdata["aml_compute"]["vm_name"] in configdata[
            "supported_vm_sizes"].keys():
        vm_name = configdata["aml_compute"]["vm_name"]
        gpus_per_node = configdata["supported_vm_sizes"][vm_name]

        print(
            "Setting up compute target {ct_name} with vm_size: {vm_name} with {gpus_per_node} GPUs"
            .format(ct_name=ct_name,
                    vm_name=vm_name,
                    gpus_per_node=gpus_per_node))

        if ct_name not in ws.compute_targets:
            config = AmlCompute.provisioning_configuration(
                vm_size=vm_name,
                min_nodes=configdata["aml_compute"]["min_nodes"],
                max_nodes=configdata["aml_compute"]["max_nodes"],
                vnet_resourcegroup_name=vnet_rg,
                vnet_name=vnet_name,
                subnet_name=subnet_name,
                idle_seconds_before_scaledown=configdata["aml_compute"]
                ["idle_seconds_before_scaledown"],
                remote_login_port_public_access='Enabled')
            ct = ComputeTarget.create(ws, ct_name, config)
            ct.wait_for_completion(show_output=True)
        else:
            print("Loading Pre-existing Compute Target {ct_name}".format(
                ct_name=ct_name))
            ct = ws.compute_targets[ct_name]
    else:
        print("Unsupported vm_size {vm_size}".format(vm_size=vm_name))
        print("The specified vm size must be one of ...")
        for azure_gpu_vm_size in configdata["supported_vm_sizes"].keys():
            print("... " + azure_gpu_vm_size)
        raise Exception(
            "{vm_size} does not support Pascal or above GPUs".format(
                vm_size=vm_name))

    environment_name = configdata["aml_compute"]["environment_name"]
    python_interpreter = configdata["aml_compute"]["python_interpreter"]
    conda_packages = configdata["aml_compute"]["conda_packages"]
    from azureml.core import ContainerRegistry

    if environment_name not in ws.environments:
        env = Environment(name=environment_name)
        env.docker.enabled = configdata["aml_compute"]["docker_enabled"]
        env.docker.base_image = None
        env.docker.base_dockerfile = "FROM {dockerfile}".format(
            dockerfile=configdata["ngc_content"]["base_dockerfile"])
        env.python.interpreter_path = python_interpreter
        env.python.user_managed_dependencies = True
        conda_dep = CondaDependencies()

        for conda_package in conda_packages:
            conda_dep.add_conda_package(conda_package)

        env.python.conda_dependencies = conda_dep
        env.register(workspace=ws)
        evn = env
    else:
        env = ws.environments[environment_name]

    amlcluster = AzureMLComputeCluster(
        workspace=ws,
        compute_target=ct,
        initial_node_count=1,
        experiment_name=configdata["aml_compute"]["exp_name"],
        environment_definition=env,
        use_gpu=True,
        n_gpus_per_node=1,
        jupyter=True,
        jupyter_port=configdata["aml_compute"]["jupyter_port"],
        dashboard_port=9001,
        scheduler_port=9002,
        scheduler_idle_timeout=1200,
        worker_death_timeout=30,
        additional_ports=[],
        datastores=[],
        telemetry_opt_out=True,
        asynchronous=False)

    print(amlcluster.jupyter_link)
    amlcluster.jupyter_link
    print('Exiting script')
                                pip_packages=['azureml-defaults']
env.python.conda_dependencies = deps


#override w custom img
env.docker.base_image='my-base-image'
env.docker.base_image_registry='myregistry.azurecr.io/myimage'


#override how AZ auto handles pkg dependencies etc
env.python.user_managed_dependencies=True
env.python.interpreter_path = '/opt/miniconda/bin/python'


#register
env.register(workspace=ws)

#view registered
from azureml.core import Environment

env_names = Environment.list(workspace=ws)
for env_name in env_names:
    print('Name:',env_name)

#retrieve enviro
from azureml.core import Environment
from azureml.train.estimator import Estimator

training_env = Environment.get(workspace=ws, name='training_environment')
estimator = Estimator(source_directory='experiment_folder'
                      entry_script='training_script.py',
def get_environment(
    workspace: Workspace,
    environment_name: str,
    conda_dependencies_file: str = None,
    create_new: bool = False,
    enable_docker: bool = None,
    docker_image: str = None,
    dockerfile: str = None,
    use_gpu: bool = False,
):
    try:
        e = Env()
        environments = Environment.list(workspace=workspace)
        restored_environment = None
        for env in environments:
            if env == environment_name:
                restored_environment = environments[environment_name]

        if restored_environment is None or create_new:

            # Environment has to be created
            if conda_dependencies_file is not None:
                new_env = Environment.from_conda_specification(
                    environment_name,
                    os.path.join(e.sources_directory_train, conda_dependencies_file),  # NOQA: E501
                )  # NOQA: E501
                restored_environment = new_env
            else:
                restored_environment = Environment(environment_name)

            if enable_docker is not None:
                restored_environment.docker.enabled = enable_docker

                if docker_image is not None:
                    restored_environment.docker.base_image = docker_image
                    # In case of own image
                    # don't append AML managed dependencies
                    restored_environment.python.\
                        user_managed_dependencies = True
                elif dockerfile is not None:
                    # Alternatively, load from a file.
                    with open(dockerfile, "r") as f:
                        dockerfile = f.read()
                        restored_environment.docker.\
                            base_dockerfile = dockerfile
                    # In case of own Dockerfile
                    # don't append AML managed dependencies
                    restored_environment.python.\
                        user_managed_dependencies = True
                else:
                    restored_environment.docker.\
                        base_image = DEFAULT_GPU_IMAGE if use_gpu else DEFAULT_CPU_IMAGE  # NOQA: E501

            restored_environment.register(workspace)

        if restored_environment is not None:
            print(restored_environment)
        return restored_environment
    except Exception as e:
        print(e)
        exit(1)
Exemple #14
0
# Create a Python environment for the experiment

model_env = Environment(env_name)
model_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
model_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                             pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
model_env.python.conda_dependencies = packages

# Register the environment (just in case you want to use it again)
model_env.register(workspace=ws)

registered_env = Environment.get(ws, env_name)

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Exemple #15
0
        "requests==2.21.0",
        "sklearn",
        "pandas",
        "numpy",
        "pillow==6.0.0",
        "tensorflow-gpu==1.15",
        "keras",
        "matplotlib",
        "seaborn",
    ])

env = Environment("prednet")
env.python.conda_dependencies = conda_dependencies
env.docker.enabled = True
env.docker.base_image = DEFAULT_GPU_IMAGE
env.register(ws)

# Runconfigs
runconfig = RunConfiguration()
runconfig.environment = env
print("PipelineData object created")

create_pipelines = PythonScriptStep(
    name="create pipelines",
    script_name="pipelines_slave.py",
    compute_target=cpu_compute_target,
    arguments=[
        "--cpu_compute_name", cpu_compute_name, "--gpu_compute_name",
        gpu_compute_name
    ],
    source_directory=script_folder,
freezer_environment = Environment("sktime_freezer_environment")
cd = CondaDependencies.create(
    conda_packages=["numpy", "cython", "pandas", "scikit-learn"],
    pip_packages=[
        "azureml-defaults",
        "inference-schema[numpy-support]",
        "joblib==0.13.*",
        "azureml-dataprep[pandas, fuse]",
        "sktime",
    ],
)
freezer_environment.docker.enabled = True
freezer_environment.docker.base_image = DEFAULT_CPU_IMAGE
freezer_environment.python.conda_dependencies = cd
freezer_environment.register(workspace=ws)
logger.info("Environment registered")

try:
    cpu_cluster = ComputeTarget(workspace=ws, name="freezertrain")
    logger.info("Found existing compute target")
except ComputeTargetException:
    logger.info("Creating a new compute target...")
    cpu_cluster = ComputeTarget.create(
        ws,
        "freezertrain",
        AmlCompute.provisioning_configuration(vm_size="STANDARD_DS3_V2", max_nodes=8),
    )

    cpu_cluster.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20
Exemple #17
0
def get_environment(workspace,
                    name,
                    pip_requirements=None,
                    conda_specification=None,
                    conda_env=None,
                    docker_image=None,
                    docker_file=None,
                    override=False,
                    inference_stack=None):
    """
    Get an Azure ML environment from PIP or Conda. From:
    - pip_requirements
    - conda_specification
    - conda_env
    at most one can be provided. If none is provided, it is assumed that the
    requirements are taken care of by the user.

    From:
    - docker_image
    - docker_file
    at most one can be provided. If none is provided, the base Azure image is
    used.

    :params workspace:              The Azure ML workspace to look for existing
                                    environments.
    :params name:                   Name for this environment
    :params pip_requirements:       Path to the pip requirements file
    :params conda_specifidation:    Path to the conda specification file
    :params conda_env:              Name of the conda environment to use
    :params docker_image:           Base the image off an existing docker image
    :params docker_file:            Base the image off a Dockerfile.
    :params override:               Create a new environment with this name,
                                    regardless of if one already exists.
    :params inference_stack:        Add a stack that enables this environment
                                    for inference. "latest" is a valid option.
                                    Set to None to not add this.
    :returns:                       Azure ML environment or None in case of
                                    failure
    """
    if not override:
        try:
            env = Environment.get(workspace, name)

            print("Existing environment found, using that")
            return env
        except:
            print("No environment with that name found, creating new one")

    # Validate at most one of pip_requirements, conda_specification, conda_env
    # is provided
    if sum([
            1 for x in [pip_requirements, conda_specification, conda_env]
            if x is not None
    ]) > 1:
        print("Provide at most 1 of pip_requirements, conda_specification, "
              "conda_env")
        return None

    # Validate that at most one of docker_image, docker_file is
    # provided
    if sum([1 for x in [docker_image, docker_file] if x is not None]) > 1:
        print("Provide at most 1 of docker_image, docker_file")
        return None

    if pip_requirements is not None:
        env = Environment.from_pip_requirements(name, pip_requirements)
    elif conda_specification is not None:
        env = Environment.from_conda_specification(name, conda_specification)
    elif conda_env is not None:
        env = Environment.from_existing_conda_environment(name, conda_env)
    else:
        env = Environment(name)
        env.python.user_managed_dependencies = True

    if docker_file is not None:
        env.docker.enabled = True
        env.docker.base_image = None
        env.docker.base_dockerfile = docker_file
    elif docker_image is not None:
        env.docker.enabled = True
        env.docker.base_image = docker_image

    if inference_stack is not None:
        env.inferencing_stack_version = inference_stack

    # Register environment
    env.register(workspace=workspace)

    return env
from azureml.core import Workspace
ws = Workspace.from_config()

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

#created an env
my_env = Environment("My_new_env")
conda_dep = CondaDependencies.create(conda_packages=['scikit-learn'])
my_env.python.conda_dependencies = conda_dep

my_env.register()

#creating the cluster
from azureml.core.compute import AmlCompute

cluster_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2"),
                                                       max_nodes = 2)

cluster = AmlCompute.create(ws, "My_cluster", cluster_config)

cluster.wait_for_completion()

#fetching the data
input_ds = ws.datasets.get("Loan Application")

#for ScriptRunning

from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
def main():
    e = Env()

    print('********************')
    print(e.source_directory)

    files = os.listdir('./aml_pipeline')
    for f in files:
        print(f)

    print('***************')

    workspace_name = e.workspace_name
    subscription_id = e.subscription_id
    resource_group = e.resource_group

    #Connect to AML Workspace
    print('workspace_name = ' + workspace_name)
    print('subscription_id = ' + subscription_id)
    print('resource_group = ' + resource_group)

    ws = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
    )

    print('Ready to use Azure ML {} to work with {}'.format(
        azureml.core.VERSION, ws.name))

    default_ds = ws.get_default_datastore()

    if 'diabetes dataset' not in ws.datasets:
        default_ds.upload_files(
            files=['diabetes.csv',
                   'diabetes2.csv'],  # Upload the diabetes csv files in /data
            target_path=
            'diabetes-data/',  # Put it in a folder path in the datastore
            overwrite=True,  # Replace existing files of the same name
            show_progress=True)

        #Create a tabular dataset from the path on the datastore (this may take a short while)
        tab_data_set = Dataset.Tabular.from_delimited_files(
            path=(default_ds, 'diabetes-data/*.csv'))

        # Register the tabular dataset
        try:
            tab_data_set = tab_data_set.register(workspace=ws,
                                                 name='diabetes dataset',
                                                 description='diabetes data',
                                                 tags={'format': 'CSV'},
                                                 create_new_version=True)
            print('Dataset registered.')
        except Exception as ex:
            print(ex)
    else:
        print('Dataset already registered.')

    # Create a folder for the pipeline step files
    experiment_folder = 'diabetes_pipeline'
    os.makedirs(experiment_folder, exist_ok=True)

    print(experiment_folder)

    cluster_name = "mmcomputecluster"

    try:
        # Check for existing compute target
        pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing cluster, use it.')
    except ComputeTargetException:
        # If it doesn't already exist, create it
        try:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size='STANDARD_DS11_V2', max_nodes=2)
            pipeline_cluster = ComputeTarget.create(ws, cluster_name,
                                                    compute_config)
            pipeline_cluster.wait_for_completion(show_output=True)
        except Exception as ex:
            print(ex)

    # Create a Python environment for the experiment
    diabetes_env = Environment("diabetes-pipeline-env")
    diabetes_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diabetes_env.docker.enabled = True  # Use a docker container

    # Create a set of package dependencies
    diabetes_packages = CondaDependencies.create(
        conda_packages=[
            'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip'
        ],
        pip_packages=[
            'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow'
        ])

    # Add the dependencies to the environment
    diabetes_env.python.conda_dependencies = diabetes_packages

    # Register the environment
    diabetes_env.register(workspace=ws)
    registered_env = Environment.get(ws, 'diabetes-pipeline-env')

    # Create a new runconfig object for the pipeline
    pipeline_run_config = RunConfiguration()

    # Use the compute you created above.
    pipeline_run_config.target = pipeline_cluster

    # Assign the environment to the run configuration
    pipeline_run_config.environment = registered_env

    print("Run configuration created.")

    # Get the training dataset
    diabetes_ds = ws.datasets.get("diabetes dataset")

    # Create a PipelineData (temporary Data Reference) for the model folder
    prepped_data_folder = PipelineData("prepped_data_folder",
                                       datastore=ws.get_default_datastore())

    # Step 1, Run the data prep script
    prep_step = PythonScriptStep(name="Prepare Data",
                                 script_name="prep_diabetes.py",
                                 source_directory='./aml_pipeline',
                                 arguments=[
                                     '--input-data',
                                     diabetes_ds.as_named_input('raw_data'),
                                     '--prepped-data', prepped_data_folder
                                 ],
                                 outputs=[prepped_data_folder],
                                 compute_target=pipeline_cluster,
                                 runconfig=pipeline_run_config,
                                 allow_reuse=True)

    # Step 2, run the training script
    train_step = PythonScriptStep(
        name="Train and Register Model",
        source_directory='./aml_pipeline',
        script_name="train_diabetes.py",
        arguments=['--training-folder', prepped_data_folder],
        inputs=[prepped_data_folder],
        compute_target=pipeline_cluster,
        runconfig=pipeline_run_config,
        allow_reuse=True)

    print("Pipeline steps defined")

    pipeline_steps = [prep_step, train_step]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
    print("Pipeline is built.")

    # Create an experiment and run the pipeline
    experiment = Experiment(workspace=ws, name='jlg-exp')
    pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
    print("Pipeline submitted for execution.")
    pipeline_run.wait_for_completion(show_output=True)

    for run in pipeline_run.get_children():
        print(run.name, ':')
        metrics = run.get_metrics()
        for metric_name in metrics:
            print('\t', metric_name, ":", metrics[metric_name])

    for model in Model.list(ws):
        print(model.name, 'version:', model.version)
        for tag_name in model.tags:
            tag = model.tags[tag_name]
            print('\t', tag_name, ':', tag)
        for prop_name in model.properties:
            prop = model.properties[prop_name]
            print('\t', prop_name, ':', prop)
        print('\n')

    # Publish the pipeline from the run
    published_pipeline = pipeline_run.publish_pipeline(
        name="diabetes-training-pipeline",
        description="Trains diabetes model",
        version="1.0")

    published_pipeline

    rest_endpoint = published_pipeline.endpoint
    print(rest_endpoint)
Exemple #20
0
env = Environment("component-condition")
env.docker.enabled = True
cd = CondaDependencies.create(
    conda_packages=[
        "tensorflow=2.0.0",
        "pandas",
        "numpy",
        "matplotlib"
        ],
    pip_packages=[
        "azureml-mlflow==1.5.0",
        "azureml-defaults==1.5.0"
    ]
)
env.python.conda_dependencies = cd
env.register(workspace=ws)
print("Registered environment component-condition")

# Specify the run configuration
run_config = RunConfiguration()
run_config.environment.docker.enabled = True
run_config.environment.python.conda_dependencies = cd

# Pipeline definition
inputdata = DataReference(
    datastore=Datastore.get(ws, "trainingdata"),
    data_reference_name="data"
)

train_model = PythonScriptStep(
    script_name="./train.py",
Exemple #21
0
from azureml.core import Workspace, Experiment, ScriptRunConfig

ws = Workspace.from_config()

experiment = Experiment(ws, "training-exp-02")

from azureml.core import Environment
from azureml.core.environment import CondaDependencies

#creating Custom Environment
my_env = Environment(name="MyEnvironment2")

#creating the Dependencies
my_env_dep = CondaDependencies.create(conda_packages=['scikit-learn'])

my_env.python.conda_dependencies = my_env_dep

#registering the Environment
my_env.register(ws)

script_config = ScriptRunConfig(source_directory=".",
                                script="02-training_script.py",
                                environment=my_env)

new_run = experiment.submit(config=script_config)

new_run.wait_for_completion()
Exemple #22
0
def main():
    e = Env()
    
    from azureml.core.authentication import InteractiveLoginAuthentication

    myten=os.environ.get("AZURE_TENANT_ID")
    interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ.get("AZURE_TENANT_ID"))
    subscription=os.environ.get("CSUBSCRIPTION")
    workspace_name=e.workspace_name
    resource_group=e.resource_group

    aml_workspace = Workspace.get(
        name = workspace_name,
        subscription_id = subscription,
        resource_group=resource_group,
        auth=interactive_auth
    )

    from ml_service.util.attach_compute import get_compute

    # Get Azure machine learning cluster
    # If not present then get_compute will create a compute based on environment variables

    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    print("SDK version: ", azureml.core.VERSION)

    ## Variable names that can be passed in as parameter values
    from azureml.pipeline.core.graph import PipelineParameter
    from azureml.core import Datastore

    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)
    dataset_version_param = PipelineParameter(
        name="dataset_version", default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(
        name="data_file_path", default_value="none")
    caller_run_id_param = PipelineParameter(
        name="caller_run_id", default_value="none")
    #model_path = PipelineParameter(
    #    name="model_path", default_value=e.model_path)    

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name

    # Get the datastore whether it is the default or named store
    datastore = Datastore.get(aml_workspace, datastore_name)
    dataset_name = e.dataset_name

    # Create a reusable Azure ML environment
    from ml_service.util.manage_environment import get_environment
    from azureml.core import Environment

    # RUN Configuration
    ## Must have this process to work with AzureML-SDK 1.0.85
    from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
    from azureml.core.conda_dependencies import CondaDependencies

    try:
        app_env=Environment(name="smartschedule_env")
        app_env.register(workspace=aml_workspace)
    except:
        print("Environment not found")
    
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    aml_run_config.environment.environment_variables["DATASTORE_NAME"] = e.datastore_name  # NOQA: E501

    # Use the aml_compute you created above. 
    aml_run_config.target = aml_compute

    # Enable Docker
    aml_run_config.environment.docker.enabled = True

    # Set Docker base image to the default CPU-based image
    aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    #aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"

    # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
    aml_run_config.environment.python.user_managed_dependencies = False

    app_conda_deps=CondaDependencies.create(
        conda_packages=['pandas','scikit-learn', 'libgcc','pyodbc', 'sqlalchemy', 'py-xgboost==0.90'], 
        pip_packages=['azureml-sdk[automl,explain,contrib,interpret]==1.4.0', 'xgboost==0.90', 'azureml-dataprep==1.4.6', 'pyarrow', 'azureml-defaults==1.4.0', 'azureml-train-automl-runtime==1.4.0'], pin_sdk_version=False)

    # Specify CondaDependencies obj, add necessary packages
    aml_run_config.environment.python.conda_dependencies = app_conda_deps

    print ("Run configuration created.")
    from azure.common.credentials import ServicePrincipalCredentials
    #from azure.keyvault import KeyVaultClient, KeyVaultAuthentication

    from azure.keyvault.secrets import SecretClient
    from azure.identity import DefaultAzureCredential
    import pandas as pd
    #import sqlalchemy as sql
    import pyodbc

    def get_data(sql_string, columns):
        credentials = None
        credential = DefaultAzureCredential()

        secret_client = SecretClient("https://smrtschd-aml-kv.vault.azure.net", credential=credential)    
        secret = secret_client.get_secret("database-connection")

        #client = KeyVaultClient(KeyVaultAuthentication(auth_callback))
        #secret_bundle = client.get_secret("https://smrtschd-aml-kv.vault.azure.net", "database-connection", "")

        server = 'starlims-sql.database.windows.net'
        database = 'QM12_DATA_AUTOMATION'
        username = '******'
        password = secret.value
        driver= '{ODBC Driver 17 for SQL Server}'
        conn = pyodbc.connect('Driver='+driver+';'+
                            'Server='+server+';'+
                            'Database='+database+';'+
                            'PORT=1433;'+
                            'UID='+username+';'+
                            'PWD='+password+'; MARS_Connection=Yes'
        )

        try:
            SQL_Query = pd.read_sql_query(sql_string, conn)

            df = pd.DataFrame(SQL_Query, columns=columns)
            return df
        except Exception as e:
            print(e)
            raise

    sql_str = "SELECT " \
            "  Dept " \
            ", Method " \
            ", Servgrp " \
            ", Runno " \
            ", TestNo " \
            ", Testcode " \
            ", Total_Duration_Min " \
            ", Total_Duration_Hr " \
            ", Usrnam " \
            ", Eqid " \
            ", Eqtype " \
        "FROM dbo.Draft " \
        "order by TESTCODE, RUNNO, dept, method;"

    columns = ["Dept", "Method", "Servgrp", "Runno", "TestNo", "Testcode", "Total_Duration_Min", "Total_Duration_Hr", "Usrnam", "Eqid","Eqtype"]

    from azureml.core import Dataset
    from sklearn.model_selection import train_test_split

    if (e.train_dataset_name not in aml_workspace.datasets):

        
        df = get_data(sql_str, columns)

        train_df, test_df=train_test_split(df, test_size=0.2)

        MY_DIR = "data"

        CHECK_FOLDER = os.path.isdir(MY_DIR)

        if not CHECK_FOLDER:
            os.makedirs(MY_DIR)
        else:
            print("Folder ", MY_DIR, " is already created")

        #files = ["data/analyst_tests.csv"]
        files = ["data/train_data.csv","data/test_data.csv"]

        def_file_store = Datastore(aml_workspace, "workspacefilestore")

        dtfrm = df.to_csv(files[0], header=True, index=False)

        train_dataframe=train_df.to_csv(files[0], header=True, index=False)
        test_dataframe=test_df.to_csv(files[1], header=True, index=False)
        datastore.upload_files(
            files=files,
            target_path='data/',
            overwrite=True
        )

        from azureml.data.data_reference import DataReference

        blob_input_data_test=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertest",
            path_on_datastore="data/test_data.csv"
        )
        test_data=Dataset.Tabular.from_delimited_files(blob_input_data_test)
        test_data.register(aml_workspace, e.test_dataset_name, create_new_version=True)

        blob_input_data_train=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertrain",
            path_on_datastore="data/train_data.csv"
        )
        train_data=Dataset.Tabular.from_delimited_files(blob_input_data_train)
        train_data.register(aml_workspace, e.train_dataset_name, create_new_version=True)

    else:
        from azureml.data.data_reference import DataReference
        print("getting from the datastore instead of uploading")

        train_data=Dataset.get_by_name(aml_workspace, name=e.train_dataset_name)
        test_data=Dataset.get_by_name(aml_workspace, name=e.test_dataset_name)

    # check the training dataset to make sure it has at least 50 records.
    tdf=train_data.to_pandas_dataframe().head(5)

    print(tdf.shape)
    print(tdf)

    # display the first five rows of the data
    # create a variable that can be used for other purposes
    df=train_data.to_pandas_dataframe().head()

    label_column="Total_Duration_Min"

    import random
    import string

    def randomString(stringLength=15):
        letters = string.ascii_lowercase
        return ''.join(random.choice(letters) for i in range(stringLength))

    from azureml.core import Experiment

    experiment = Experiment(aml_workspace, "SmartScheduler_Pipeline")


    import logging

    aml_name = 'smart_scheduler_' + randomString(5)
    print(aml_name)

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import StrMethodFormatter

    print(df.head(5))
    print(df.shape)
    print(df.dtypes)

    #df.hist(column='Dept')
    list(df.columns.values)

    # Remove Features that are not necessary.
    #df.hist(column="Servgrp", bins=4)
    train_data=train_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])
    test_data=test_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])

    print(train_data.to_pandas_dataframe())
    print(test_data.to_pandas_dataframe())

    from azureml.automl.core.featurization import FeaturizationConfig

    # some of the columns could be change to one hot encoding especially if the categorical column
    featurization_config=FeaturizationConfig()
    featurization_config.blocked_transformers=['LabelEncoder']
    featurization_config.add_column_purpose('Dept', 'CategoricalHash')
    featurization_config.add_transformer_params('HashOneHotEncoder',['Method'], {"number_of_bits":3})
    featurization_config.add_column_purpose('Servgrp', 'CategoricalHash')
    featurization_config.add_column_purpose('Testcode', 'Numeric')
    featurization_config.add_column_purpose('Usrnam', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqid', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqtype', 'CategoricalHash')

    from azureml.pipeline.core import Pipeline, PipelineData
    from azureml.pipeline.steps import PythonScriptStep

    #train_model_folder = './scripts/trainmodel'

    automl_settings = {
        "iteration_timeout_minutes": 5,
        "iterations": 5,
        "enable_early_stopping": True,
        "primary_metric": 'spearman_correlation',
        "verbosity": logging.INFO,
        "n_cross_validation":5
    }

    automl_config = AutoMLConfig(task="regression",
                    debug_log='automated_ml_errors.log',
                    #path = train_model_folder,
                    training_data=train_data,
                    featurization=featurization_config,
                    blacklist_models=['XGBoostRegressor'],
                    label_column_name=label_column,
                    compute_target=aml_compute,
                    **automl_settings)

    from azureml.pipeline.steps import AutoMLStep
    from azureml.pipeline.core import TrainingOutput

    metrics_output_name = 'metrics_output'
    best_model_output_name='best_model_output'

    metrics_data = PipelineData(name = 'metrics_data',
                    datastore = datastore,
                    pipeline_output_name=metrics_output_name,
                    training_output=TrainingOutput(type='Metrics'))

    model_data = PipelineData(name='model_data',
                datastore=datastore,
                pipeline_output_name=best_model_output_name,
                training_output=TrainingOutput(type='Model'))

    trainWithAutomlStep = AutoMLStep(
                        name=aml_name,
                        automl_config=automl_config,
                        passthru_automl_config=False,
                        outputs=[metrics_data, model_data],
                        allow_reuse=True
    )

    evaluate_step = PythonScriptStep(
        name="Evaluate Model",
        script_name='./evaluate/evaluate_model.py',
        #  e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        arguments=[
            "--model_name", model_name_param,
            "--allow_run_cancel", e.allow_run_cancel
        ]
    )

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name='register/register_model2.py', #e.register_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        inputs=[model_data],
        arguments=[
            "--model_name", model_name_param,
            "--model_path", model_data,
            "--ds_name", e.train_dataset_name
        ],
        runconfig=aml_run_config,
        allow_reuse=False
    )

    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(trainWithAutomlStep)
        register_step.run_after(evaluate_step)
        pipeline_steps = [ trainWithAutomlStep, evaluate_step, register_step ]
    else:
        print("Exclude the evaluation step and run register step")
        register_step.run_after(trainWithAutomlStep)
        pipeline_steps = [ trainWithAutomlStep, register_step ]

    print( "this is the value for execute pipeline: {}".format(e.execute_pipeline))

    if( (e.execute_pipeline).lower() =='true' ):
        # Execute the pipe normally during testing and debugging
        print("Pipeline submitted for execution.")
        pipeline = Pipeline(workspace = aml_workspace, steps=pipeline_steps)
        pipeline_run = experiment.submit(pipeline)
        pipeline_run.wait_for_completion()
        print("Pipeline is built.")
    else:
        # Generates pipeline that will be called in ML Ops
        train_pipeline = Pipeline(workspace=aml_workspace, steps=pipeline_steps)
        train_pipeline._set_experiment_name
        train_pipeline.validate()
        published_pipeline = train_pipeline.publish(
            name=e.pipeline_name,
            description="Model training/retraining pipeline",
            version=e.build_id
        )
        print(f'Published pipeline: {published_pipeline.name}')
        print(f'for build {published_pipeline.version}')