def create_run_config(cpu_cluster, docker_proc_type, conda_env_file):
    """
    AzureML requires the run environment to be setup prior to submission.
    This configures a docker persistent compute.  Even though
    it is called Persistent compute, AzureML handles startup/shutdown
    of the compute environment.

    Args:
        cpu_cluster      (str) : Names the cluster for the test
                                 In the case of unit tests, any of
                                 the following:
                                 - Reco_cpu_test
                                 - Reco_gpu_test
        docker_proc_type (str) : processor type, cpu or gpu
        conda_env_file   (str) : filename which contains info to
                                 set up conda env
    Return:
          run_amlcompute : AzureML run config
    """

    # runconfig with max_run_duration_seconds did not work, check why:
    # run_amlcompute = RunConfiguration(max_run_duration_seconds=60*30)
    run_amlcompute = RunConfiguration()
    run_amlcompute.target = cpu_cluster
    run_amlcompute.environment.docker.enabled = True
    run_amlcompute.environment.docker.base_image = docker_proc_type

    # Use conda_dependencies.yml to create a conda environment in
    # the Docker image for execution
    # False means the user will provide a conda file for setup
    # True means the user will manually configure the environment
    run_amlcompute.environment.python.user_managed_dependencies = False
    run_amlcompute.environment.python.conda_dependencies = CondaDependencies(
        conda_dependencies_file_path=conda_env_file)
    return run_amlcompute
Esempio n. 2
0
def _create_default_run_configs(project_directory, compute_target_dict):
    """
    Creates a local.runconfig and docker.runconfig for a project.
    :return: None
    """
    from azureml.core.runconfig import RunConfiguration
    # Mocking a project object, as RunConfiguration requires a Project object, but only requires
    # project_directory field.
    project_object = empty_function
    project_object.project_directory = project_directory

    # Creating a local runconfig.
    local_run_config = RunConfiguration()
    local_run_config.save(name="local", path=project_directory)

    # Creating a docker runconfig.
    docker_run_config = RunConfiguration()
    docker_run_config.environment.docker.enabled = True
    docker_run_config.save(name="docker", path=project_directory)

    for compute_target_name, compute_target in compute_target_dict.items():
        # Creating a compute runconfig.
        compute_config = RunConfiguration()
        if compute_target.type == 'HDInsight':
            compute_config.framework = "PySpark"
        else:
            compute_config.framework = "Python"
            compute_config.environment.docker.enabled = True
        compute_config.target = compute_target_name
        compute_config.save(name=compute_target_name, path=project_directory)
Esempio n. 3
0
def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      all_azure_dataset_ids: List[str],
                      all_dataset_mountpoints: List[str],
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run.
    :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    dataset_consumptions = create_dataset_consumptions(
        azure_config, all_azure_dataset_ids, all_dataset_mountpoints)
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(
        source_config.root_folder).as_posix()
    logging.info(
        f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
        f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(
            azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(
        azure_config, source_config, environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(
            node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if len(dataset_consumptions) > 0:
        run_config.data = {
            dataset.name: dataset
            for dataset in dataset_consumptions
        }
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(
        WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(
            script_run_config)  # type: ignore
    return script_run_config
Esempio n. 4
0
    def __get_run_config(self,
                         compute_target,
                         channels=None,
                         conda_packages=None,
                         pip_packages=None):
        # Load the "cpu-dsvm.runconfig" file (created by the above attach operation) in memory
        run_config = RunConfiguration(framework="python")

        # Set compute target to the Linux DSVM
        run_config.target = compute_target.name

        # Use Docker in the remote VM
        run_config.environment.docker.enabled = False

        # Ask system to provision a new one based on the conda_dependencies.yml file
        run_config.environment.python.user_managed_dependencies = False

        # Prepare the Docker and conda environment automatically when used the first time.
        run_config.auto_prepare_environment = True

        # specify dependencies obj
        conda_dependencies = CondaDependencies.create(
            conda_packages=conda_packages, pip_packages=pip_packages)
        if (channels):
            for channel in channels:
                conda_dependencies.add_channel(channel)

        run_config.environment.python.conda_dependencies = conda_dependencies

        return run_config
Esempio n. 5
0
def fetch_run_config(compute_target, base_image, sp_username, sp_tenant,
                     sp_password):
    """ Generates a Run Configuration based on the pipeline parameters,
    specifying such things as the Compute Target and Conda Dependencies. 
    """

    # Inits configuration for Python
    run_config = RunConfiguration(framework="python")

    # Specifies compute target
    run_config.target = compute_target

    # Configures Docker/Image/Environment Variable parameters
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = base_image
    run_config.environment.environment_variables = {
        "SP_USERNAME": sp_username,
        "SP_TENANT": sp_tenant,
        "SP_PASSWORD": sp_password
    }

    # Specifies Conda file location (Auto-injected from preparing staging)
    run_config.environment.python.conda_dependencies = CondaDependencies(
        os.path.join("snapshot", "inputs", "environment.yml"))

    # Returns configuration
    return run_config
Esempio n. 6
0
def _write_compute_run_config(source_directory, compute_target_object, compute_yaml):
    """
    :param source_directory:
    :type source_directory: str
    :param compute_target_object:
    :type compute_target_object: azureml.core.compute_target.AbstractComputeTarget
    :param compute_yaml:
    :type compute_yaml: dict
    :return:
    """
    from azureml.core.compute_target import _BatchAITarget
    # Writing the target.compute file.
    run_config_dir_name = get_run_config_dir_name(source_directory)
    file_path = os.path.join(source_directory, run_config_dir_name,
                             compute_target_object.name + COMPUTECONTEXT_EXTENSION)
    with open(file_path, 'w') as outfile:
        ruamel.yaml.dump(compute_yaml, outfile, default_flow_style=False)

    # This creates a run config and writes it in the aml_config/<compute_target_name>.runconfig file
    run_config_object = RunConfiguration()
    run_config_object.target = compute_target_object

    if compute_target_object.type == _BatchAITarget._BATCH_AI_TYPE:
        run_config_object.environment.docker.enabled = True

    run_config_object.framework = compute_target_object._default_framework

    run_config_object.save(name=compute_target_object.name, path=source_directory)
Esempio n. 7
0
def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      azure_dataset_id: str = "",
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
    string to not use any datasets.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    if azure_dataset_id:
        azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
        if not azureml_dataset:
            raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
        named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
        dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
    else:
        dataset_consumption = None
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
    logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
                 f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(azure_config, source_config,
                                                              environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if dataset_consumption:
        run_config.data = {dataset_consumption.name: dataset_consumption}
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(script_run_config)  # type: ignore
    return script_run_config
Esempio n. 8
0
    def get_run_config(self, config):

        environment_config = config.get("environment")
        environment = self.get_environment(environment_config)

        cluster_name = config.get("cluster")
        cluster = ComputeTarget(workspace=self.workspace, name=cluster_name)

        pipeline_run_config = RunConfiguration()
        pipeline_run_config.target = cluster
        pipeline_run_config.environment = environment

        return pipeline_run_config
Esempio n. 9
0
def create_runconfig(aml_compute, env=None):
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    # Use the aml_compute you created above.
    aml_run_config.target = aml_compute

    if env:
        aml_run_config.environment = env
    else:
        aml_run_config.environment = create_env_from_requirements()

    return aml_run_config
Esempio n. 10
0
def create_runconfig(aml_compute, env=None):
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    # Use the aml_compute you created above.
    aml_run_config.target = aml_compute

    if env is not None:
        aml_run_config.environment = env
    else:
        # Enable Docker
        aml_run_config.environment.docker.enabled = True

        # Set Docker base image to the default CPU-based image
        aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"

        # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
        aml_run_config.environment.python.user_managed_dependencies = False

    return aml_run_config
Esempio n. 11
0
def mi_run_config(ws, compute):
    whl_url = Environment.add_private_pip_wheel(workspace=ws,
                                                file_path=d.WHL_VINX_AZURE_ML,
                                                exist_ok=True)
    run_config = RunConfiguration()
    run_config.target = compute
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = None
    run_config.environment.docker.base_dockerfile = 'FROM mcr.microsoft.com/azureml/base:latest\nRUN apt-get update && apt-get -y install freetds-dev freetds-bin vim gcc'
    run_config.environment.python.user_managed_dependencies = False
    run_config.environment.python.conda_dependencies = CondaDependencies.create(
        conda_packages=[
            'tqdm', 'cython', 'matplotlib', 'scikit-learn', 'fbprophet'
        ],
        pip_packages=[
            'azureml-sdk', 'pandas', 'lightgbm', 'scipy==1.4.1', 'statsmodels',
            'mlxtend', 'optuna', 'xgboost', 'CatBoost', 'tensorflow', 'keras',
            'jpholiday', 'joblib', 'pymssql==2.1.1'
        ],
        pin_sdk_version=False)
    run_config.environment.python.conda_dependencies.add_pip_package(whl_url)

    return run_config
Esempio n. 12
0
def get_run_config(aml_compute, conda_dep):
    run_amlcompute = RunConfiguration()
    run_amlcompute.target = aml_compute

    dockerfile = r"""
    FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04

    RUN apt-get update && \
        apt-get install -y sudo curl apt-transport-https && \
        apt-get update && \
        sudo su && \
        curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
        curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
        sudo apt-get update && \
        sudo ACCEPT_EULA=Y apt-get install -y msodbcsql17 && \
        sudo apt-get install -y unixodbc-dev
    """

    run_amlcompute.environment.docker.enabled = True
    run_amlcompute.environment.docker.base_image = None
    run_amlcompute.environment.docker.base_dockerfile = dockerfile
    run_amlcompute.environment.python.conda_dependencies = conda_dep

    return run_amlcompute
Esempio n. 13
0
from azureml.core import Workspace
ws = Workspace.from_config()

from azureml.core.compute import ComputeTarget

# refers to an existing compute resource attached to the workspace!
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
    
        
#<run_hdi>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies


# use pyspark framework
run_hdi = RunConfiguration(framework="pyspark")

# Set compute target to the HDI cluster
run_hdi.target = hdi_compute.name

# specify CondaDependencies object to ask system installing numpy
cd = CondaDependencies()
cd.add_conda_package('numpy')
run_hdi.environment.python.conda_dependencies = cd
#</run_hdi>
print(run_hdi)
Esempio n. 14
0
def run(workspace, config, args):
    compute_target_name = config['train']['compute_target_name']
    data_folder = config['train']['data_folder']

    try:
        compute_target = ComputeTarget(workspace=workspace,
                                       name=compute_target_name)
        print('found existing:', compute_target.name)
    except ComputeTargetException:
        print('creating new.')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1)
        compute_target = ComputeTarget.create(workspace, compute_target_name,
                                              compute_config)
        compute_target.wait_for_completion(show_output=True)

    # ds = Datastore.register_azure_blob_container(
    #     workspace,
    #     datastore_name=config['train']['datastore_name'],
    #     account_name=config['train']['account_name'],
    #     account_key=config['train']['account_key'],
    #     container_name=config['train']['container_name'],
    #     overwrite=True)
    #
    # # # Upload local "data" folder (incl. files) as "tfdata" folder
    # ds.upload(
    #     src_dir=config['train']['local_directory'],
    #     target_path=data_folder,
    #     overwrite=True)

    ds = Datastore.get(workspace,
                       datastore_name=config['train']['datastore_name'])

    # generate data reference configuration
    dr_conf = DataReferenceConfiguration(
        datastore_name=ds.name, path_on_datastore=data_folder, mode='mount'
    )  # set 'download' if you copy all files instead of mounting

    run_config = RunConfiguration(framework="python",
                                  conda_dependencies=CondaDependencies.create(
                                      conda_packages=ast.literal_eval(
                                          config['train']['conda_packages'])))
    run_config.target = compute_target.name
    run_config.data_references = {ds.name: dr_conf}
    run_config.environment.docker.enabled = True
    # run_config.environment.docker.gpu_support = True
    run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE

    src = ScriptRunConfig(
        source_directory='./script',
        script='train.py',
        run_config=run_config,
        arguments=[
            '--datadir',
            str(ds.as_mount()), '--step', args.step, '--train_on',
            args.train_on, '--fold', args.fold, '--epochs', args.epochs,
            '--experiment', args.experiment, '--reference', args.reference,
            '--batchsize', args.batchsize, '--optimizertype',
            args.optimizertype, '--convrnn_filters', args.convrnn_filters,
            '--learning_rate', args.learning_rate, '--pix250m', args.pix250m
        ])
    # exp = Experiment(workspace=ws, name='test20181210-09')
    exp = Experiment(workspace=workspace,
                     name=config['train']['experiment_name'])
    run = exp.submit(config=src)
    run.wait_for_completion(show_output=True)
Esempio n. 15
0
        pip_packages=[
            'azureml-sdk', 'PyYAML', 'azure-storage-blob', 'matplotlib',
            'seaborn', 'tensorflow', 'Keras', 'tensorflow-hub', 'joblib',
            'tqdm', 'Pillow', 'azureml-dataprep[pandas,fuse]>=1.1.14'
        ])

    diagnoz_env = Environment("diagnoz-pipeline-env")
    diagnoz_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diagnoz_env.docker.enabled = True  # Use a docker container
    diagnoz_env.docker.base_image = DEFAULT_GPU_IMAGE
    diagnoz_env.python.conda_dependencies = packages
    diagnoz_env.register(workspace=ws)

    # Runconfigs
    pipeline_run_config = RunConfiguration()
    pipeline_run_config.target = compute_target
    pipeline_run_config.environment = diagnoz_env
    print("Run configuration created.")

    shutil.rmtree(script_folder, ignore_errors=True)
    os.makedirs(script_folder, exist_ok=True)

    #copy all necessary scripts
    files = FilesProviders.get_path_files(
        "../", [os.path.basename(__file__), "__init__.py"])

    for f in files:
        shutil.copy(f, script_folder)
    #add generated config file to script folder
    shutil.copy(generated_config_file, script_folder)
cli_auth = AzureCliAuthentication()
print('done creating AzureCliAuthentication!')

print('get workspace...')
ws = Workspace.from_config(path=args.path, auth=cli_auth)
print('done getting workspace!')

print("looking for existing compute target.")
aml_compute = AmlCompute(ws, args.aml_compute_target)
print("found existing compute target.")

# Create a new runconfig object
run_amlcompute = RunConfiguration()

# Use the cpu_cluster you created above.
run_amlcompute.target = args.aml_compute_target

# Enable Docker
run_amlcompute.environment.docker.enabled = True

# Set Docker base image to the default CPU-based image
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE

# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_amlcompute.environment.python.user_managed_dependencies = False

# Auto-prepare the Docker image when used for execution (if it is not already prepared)
run_amlcompute.auto_prepare_environment = True

# Specify CondaDependencies obj, add necessary packages
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(
Esempio n. 17
0
ws = Workspace.from_config(path='./aml_config/config.json')
print(ws.name)

experiment_name = 'train-on-amlcompute'
experiment = Experiment(workspace = ws, name = experiment_name)

project_folder = './train-on-amlcompute'
os.makedirs(project_folder, exist_ok=True)
shutil.copy('/code/training/train.py', project_folder)

# create a new runconfig object
run_config = RunConfiguration()

# signal that you want to use AmlCompute to execute script.
run_config.target = "amlcompute"

# AmlCompute will be created in the same region as workspace
# Set vm size for AmlCompute
run_config.amlcompute.vm_size = 'STANDARD_D2_V2'

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# auto-prepare the Docker image when used for execution (if it is not already prepared)
Esempio n. 18
0
    run_conf.environment.python.user_managed_dependencies = True
    compute_target = compute_name

# Use AzureML compute target:
else:

    # Create compute target if it doesn't already exist:
    try:
        compute_target = ComputeTarget(workspace=ws, name=compute_name)
    except ComputeTargetException:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6', min_nodes=0, max_nodes=6)
        compute_target = ComputeTarget.create(ws, compute_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    run_conf.target = compute_target
    run_conf.environment.docker.enabled = True
    run_conf.environment.docker.base_image = DEFAULT_CPU_IMAGE
    run_conf.environment.python.conda_dependencies = \
        CondaDependencies(conda_dependencies_file_path='env.yml')
    run_conf.environment.python.user_managed_dependencies = False
    if cv:
        run_conf.communicator = 'OpenMPI'
        run_conf.mpi = MpiConfiguration()
        run_conf.node_count = cv + 2
exp = Experiment(workspace=ws, name=config['experiment_name'])

use_estimator = True
if use_estimator:
    if cv:
        script_params = {'--cv': cv}
Esempio n. 19
0
print('..3. completed')
print('')
print('')
print('4. Instantiate AML managed compute ref...')
print('.............................................')
amlTrainingComputeRef = AmlCompute(amlWs, args.aml_compute_target)
print('..4. completed')
print('')
print('')

print("5. Instantiate and configure run object for the managed compute...")
print('.............................................')
# Create runconfig object
amlComputeRunConf = RunConfiguration()
# Use the compute provisioned
amlComputeRunConf.target = args.aml_compute_target
# Enable Docker
amlComputeRunConf.environment.docker.enabled = True
# Set Docker base image to the default CPU-based image
amlComputeRunConf.environment.docker.base_image = DEFAULT_CPU_IMAGE
# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
amlComputeRunConf.environment.python.user_managed_dependencies = False
# Auto-prepare the Docker image when used for execution (if it is not already prepared)
amlComputeRunConf.auto_prepare_environment = True
# Specify CondaDependencies obj, add necessary packages
amlComputeRunConf.environment.python.conda_dependencies = CondaDependencies.create(
    pip_packages=['numpy', 'pandas', 'scikit-learn', 'azureml-sdk'])
print("..5. completed")
print('')
print('')
Esempio n. 20
0
experiment = Experiment(workspace=ws, name='automl-diabetes')
aml_compute = AmlCompute(ws, compute_target_name)

# read in the data
print("Getting a reference to default datastore")
datastore = ws.get_default_datastore()

print("Preparing the 'prep data' step")
blob_diabetes_data = DataReference(
    datastore=datastore,
    data_reference_name="diabetes_data",
    path_on_datastore="diabetesdata/diabetes_pima.csv")

# Create a new runconfig object
aml_run_config = RunConfiguration()
aml_run_config.target = aml_compute
aml_run_config.environment.docker.enabled = True
aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
aml_run_config.environment.python.user_managed_dependencies = False
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas', 'scikit-learn', 'numpy'],
    pip_packages=[
        'azureml-sdk', 'azureml-dataprep', 'azureml-dataprep[pandas]',
        'azureml-train-automl'
    ],
    pin_sdk_version=False)

scripts_folder = './scripts'
prepared_data = PipelineData("diabetes_data_prep", datastore=datastore)

prep_data_step = PythonScriptStep(name="Prep diabetes data",
Esempio n. 21
0
print(f"### Will mount datapath '{dataPathRemote}' on remote compute")
dataRef = DataReferenceConfiguration(datastore_name=ds.name,
                                     path_on_datastore=dataPathRemote,
                                     path_on_compute='/tmp',
                                     mode='download',
                                     overwrite=False)

# Create a new RunConfiguration and attach data
runConfig = RunConfiguration()
runConfig.data_references = {
    ds.name: dataRef
}  # This syntax is not documented!

if not os.environ.get('AZML_RUN_LOCAL', 'false') == "true":
    # Set it up for running in Azure ML compute
    runConfig.target = computeTarget
    runConfig.environment.docker.enabled = True
    runConfig.auto_prepare_environment = True
    runConfig.environment.python.conda_dependencies = CondaDependencies.create(
        conda_packages=['scikit-learn==0.20.3', 'pandas', 'matplotlib'])
    print(
        f"### Will execute script {trainingScriptDir}/{trainingScript} on REMOTE compute"
    )
else:
    # OR set up RunConfig to run local, needs a pre-set up Python 3 virtual env
    runConfig.environment.python.user_managed_dependencies = True
    runConfig.environment.python.interpreter_path = os.environ[
        'VIRTUAL_ENV'] + "/bin/python"
    print(
        f"### Will execute script {trainingScriptDir}/{trainingScript} on LOCAL compute"
    )
                                      provisioning_config)

# Can poll for a minimum number of nodes and for a specific timeout.
# If no min_node_count is provided, it will use the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True,
                                   min_node_count=None,
                                   timeout_in_minutes=20)

# In[41]:

#prepare the runtime
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

run_config = RunConfiguration(framework="python")
run_config.target = compute_target
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE

dependencies = CondaDependencies.create(
    pip_packages=["scikit-learn", "scipy", "numpy"])
run_config.environment.python.conda_dependencies = dependencies

# NOT xplainabe
# automl_config = AutoMLConfig(task='classification',
#                              debug_log='automl_errors.log',
#                              path=project_folder,
#                              compute_target=compute_target,
#                              run_configuration=run_config,
#                              X = X,  ##use the remote uploaded data
#                              y = y,
Esempio n. 23
0
                         history_name = run_history_name,
                         directory = project_folder)

print(project.project_directory, project.history.name, sep = '\n')

print('copy {} and iris.csv to the project folder.'.format(train_script))
shutil.copy(train_script, os.path.join(project_folder, train_script))
shutil.copy('iris.csv', os.path.join(project_folder, 'iris.csv'))

print('create an ACI run config.')

# create a new runconfig object
run_config = RunConfiguration(project_object = project, run_config_name = 'my-aci-run-config')

# signal that you want to use ACI to execute script.
run_config.target = "containerinstance"

# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.
run_config.container_instance.region = 'eastus'

# set the ACI CPU and Memory 
run_config.container_instance.cpu_cores = 1
run_config.container_instance.memory_gb = 2

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE
print('base image is', run_config.environment.docker.base_image)
#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'
Esempio n. 24
0
        vm_size='STANDARD_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
#</cpu_cluster>

#<run_amlcompute>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# Create a new runconfig object
run_amlcompute = RunConfiguration()

# Use the cpu_cluster you created above.
run_amlcompute.target = cpu_cluster

# Enable Docker
run_amlcompute.environment.docker.enabled = True

# Set Docker base image to the default CPU-based image
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE

# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_amlcompute.environment.python.user_managed_dependencies = False

# Specify CondaDependencies obj, add necessary packages
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['scikit-learn'])
#</run_amlcompute>
Esempio n. 25
0
    project_folder = './sample_projects/automl-remote-attach'

    experiment = Experiment(ws, experiment_name)
    automl_runs = list(experiment.get_runs(type='automl'))

    assert (len(automl_runs) == 1)

    compute_name = 'mydsvmb'

    dsvm_compute = ws.compute_targets[compute_name]

    # create a new RunConfig object
    conda_run_config = RunConfiguration(framework="python")

    # Set compute target to the Linux DSVM
    conda_run_config.target = dsvm_compute

    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy'])
    conda_run_config.environment.python.conda_dependencies = cd

    automl_settings = {
        "iteration_timeout_minutes": 60,
        "iterations": 100,
        "n_cross_validations": 5,
        "primary_metric": 'AUC_weighted',
        "preprocess": True,
        "max_cores_per_iteration": 2
    }

    automl_config = AutoMLConfig(task='classification',
Esempio n. 26
0
def main():
    train_file = r"EdwardFry_Microsoft_issueDataset.csv"
    ws = Workspace.from_config()

    # Default datastore
    def_data_store = ws.get_default_datastore()  # Loads config.json

    # Get the blob storage associated with the workspace
    def_blob_store = Datastore(ws, "workspaceblobstore")

    # Get file storage associated with the workspace
    def_file_store = Datastore(ws, "workspacefilestore")

    # Set data input and output
    xyz_phishing_dataset = Dataset.File.from_files([(def_blob_store,
                                                     train_file)])
    output_data1 = OutputFileDatasetConfig(
        destination=(datastore, 'outputdataset/{run-id}'))
    output_data_dataset = output_data1.register_on_complete(
        name='prepared_output_data')

    # Set compute
    compute_name = "aml-compute"
    vm_size = "STANDARD_NC6"
    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('Found compute target: ' + compute_name)
    else:
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size,  # STANDARD_NC6 is GPU-enabled
            min_nodes=0,
            max_nodes=4)
        # create the compute target
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)

        # Can poll for a minimum number of nodes and for a specific timeout.
        # If no min node count is provided it will use the scale settings for the cluster
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

        # For a more detailed view of current cluster status, use the 'status' property
        print(compute_target.status.serialize())

    aml_run_config = RunConfiguration()
    # `compute_target` as defined in "Azure Machine Learning compute" section above
    aml_run_config.target = compute_target

    USE_CURATED_ENV = True
    if USE_CURATED_ENV:
        curated_environment = Environment.get(workspace=ws,
                                              name="AzureML-Tutorial")
        aml_run_config.environment = curated_environment
    else:
        aml_run_config.environment.python.user_managed_dependencies = False

        # Add some packages relied on by data prep step
        aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
            conda_packages=['pandas', 'scikit-learn'],
            pip_packages=['azureml-sdk', 'azureml-dataprep[fuse,pandas]'],
            pin_sdk_version=False)

    dataprep_source_dir = "./dataprep_src"
    entry_point = "prepare.py"
    # `my_dataset` as defined above
    ds_input = xyz_phishing_dataset.as_named_input('input1')

    # `output_data1`, `compute_target`, `aml_run_config` as defined above
    data_prep_step = PythonScriptStep(script_name=entry_point,
                                      source_directory=dataprep_source_dir,
                                      arguments=[
                                          "--input",
                                          ds_input.as_download(), "--output",
                                          output_data1
                                      ],
                                      compute_target=compute_target,
                                      runconfig=aml_run_config,
                                      allow_reuse=True)

    train_source_dir = "./train_src"
    train_entry_point = "train.py"

    training_results = OutputFileDatasetConfig(name="training_results",
                                               destination=def_blob_store)

    train_step = PythonScriptStep(script_name=train_entry_point,
                                  source_directory=train_source_dir,
                                  arguments=[
                                      "--prepped_data",
                                      output_data1.as_input(),
                                      "--training_results", training_results
                                  ],
                                  compute_target=compute_target,
                                  runconfig=aml_run_config,
                                  allow_reuse=True)

    # list of steps to run (`compare_step` definition not shown)
    compare_models = [data_prep_step, train_step, compare_step]

    # Build the pipeline
    pipeline1 = Pipeline(workspace=ws, steps=[compare_models])

    #dataset_consuming_step = PythonScriptStep(
    #    script_name="iris_train.py",
    #    inputs=[iris_tabular_dataset.as_named_input("iris_data")],
    #    compute_target=compute_target,
    #    source_directory=project_folder
    #)

    #run_context = Run.get_context()
    #iris_dataset = run_context.input_datasets['iris_data']
    #dataframe = iris_dataset.to_pandas_dataframe()

    ## Within a PythonScriptStep

    #ws = Run.get_context().experiment.workspace

    #step = PythonScriptStep(name="Hello World",
    #                        script_name="hello_world.py",
    #                        compute_target=aml_compute,
    #                        source_directory=source_directory,
    #                        allow_reuse=False,
    #                        hash_paths=['hello_world.ipynb'])

    # Submit the pipeline to be run
    pipeline_run1 = Experiment(ws, 'Compare_Models_Exp').submit(pipeline1)
    pipeline_run1.wait_for_completion()
Esempio n. 27
0
ws = Workspace.from_config(auth=cli_auth)


# Read the New VM Config
with open("aml_config/security_config.json") as f:
    config = json.load(f)
remote_vm_name = config["remote_vm_name"]


# Attach Experiment
experiment_name = "devops-ai-demo"
exp = Experiment(workspace=ws, name=experiment_name)
print(exp.name, exp.workspace.name, sep="\n")

run_config = RunConfiguration()
run_config.target = remote_vm_name

# replace with your path to the python interpreter in the remote VM found earlier
run_config.environment.python.interpreter_path = "/anaconda/envs/myenv/bin/python"
run_config.environment.python.user_managed_dependencies = True


src = ScriptRunConfig(
    source_directory="./code", script="training/train.py", run_config=run_config
)
run = exp.submit(src)

# Shows output of the run on stdout.
run.wait_for_completion(show_output=True, wait_post_processing=True)

# Raise exception if run fails
Esempio n. 28
0
# Create a set of package dependencies
fraud_packages = CondaDependencies.create(
    conda_packages=['scikit-learn', 'pandas'], pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
fraud_env.python.conda_dependencies = fraud_packages

# Register the environment (just in case you want to use it again)
fraud_env.register(workspace=ws)
registered_env = Environment.get(ws, 'fraud-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above.
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print("Run configuration created.")

# Get the training dataset
fraud_ds = ws.datasets.get("creditcard")

# Create a PipelineData (temporary Data Reference) for the model folder
model_folder = PipelineData("model_folder",
                            datastore=ws.get_default_datastore())
#pipeline_data = PipelineData('pipeline_data',  datastore=default_ds)

data_ref = DataReference(datastore=default_ds,
Esempio n. 29
0
# ## Pipeline 1st step: Data Preprocessing
#
# We start by defining the run configuration with the needed dependencies by the preprocessing step.
#
# In the cell that follow, we compose the first step of the pipeline.
#

#%%
cd = CondaDependencies()
cd.add_conda_package('pandas')
cd.add_conda_package('matplotlib')
cd.add_conda_package('numpy')
cd.add_conda_package('scikit-learn')

run_config = RunConfiguration(framework="python", conda_dependencies=cd)
run_config.target = cluster
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
run_config.environment.python.user_managed_dependencies = False

#%%
pre_processing = PythonScriptStep(
                            name='preprocess dataset',
                            script_name='preprocess.py',
                            arguments=['--input_path', input_dir,\
                                         '--output_path', processed_dir],
                            inputs=[input_dir],
                            outputs=[processed_dir],
                            compute_target=cluster_name,
                            runconfig=run_config,
                            source_directory=PREPROCESS_DIR
Esempio n. 30
0
exp = Experiment(workspace=ws, name=experiment_name)


#<run_temp_compute>
from azureml.core.compute import ComputeTarget, AmlCompute

# First, list the supported VM families for Azure Machine Learning Compute
print(AmlCompute.supported_vmsizes(workspace=ws))

from azureml.core.runconfig import RunConfiguration
# Create a new runconfig object
run_temp_compute = RunConfiguration()

# Signal that you want to use AmlCompute to execute the script
run_temp_compute.target = "amlcompute"

# AmlCompute is created in the same region as your workspace
# Set the VM size for AmlCompute from the list of supported_vmsizes
run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2'
#</run_temp_compute>


# Submit the experiment using the run configuration
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute)
run = exp.submit(src)
run.wait_for_completion(show_output = True)