def __get_run_config(self,
                         compute_target,
                         channels=None,
                         conda_packages=None,
                         pip_packages=None):
        # Load the "cpu-dsvm.runconfig" file (created by the above attach operation) in memory
        run_config = RunConfiguration(framework="python")

        # Set compute target to the Linux DSVM
        run_config.target = compute_target.name

        # Use Docker in the remote VM
        run_config.environment.docker.enabled = False

        # Ask system to provision a new one based on the conda_dependencies.yml file
        run_config.environment.python.user_managed_dependencies = False

        # Prepare the Docker and conda environment automatically when used the first time.
        run_config.auto_prepare_environment = True

        # specify dependencies obj
        conda_dependencies = CondaDependencies.create(
            conda_packages=conda_packages, pip_packages=pip_packages)
        if (channels):
            for channel in channels:
                conda_dependencies.add_channel(channel)

        run_config.environment.python.conda_dependencies = conda_dependencies

        return run_config
def create_run_config(cpu_cluster, docker_proc_type, conda_env_file):
    """
    AzureML requires the run environment to be setup prior to submission.
    This configures a docker persistent compute.  Even though
    it is called Persistent compute, AzureML handles startup/shutdown
    of the compute environment.

    Args:
        cpu_cluster      (str) : Names the cluster for the test
                                 In the case of unit tests, any of
                                 the following:
                                 - Reco_cpu_test
                                 - Reco_gpu_test
        docker_proc_type (str) : processor type, cpu or gpu
        conda_env_file   (str) : filename which contains info to
                                 set up conda env
    Return:
          run_amlcompute : AzureML run config
    """

    # runconfig with max_run_duration_seconds did not work, check why:
    # run_amlcompute = RunConfiguration(max_run_duration_seconds=60*30)
    run_amlcompute = RunConfiguration()
    run_amlcompute.target = cpu_cluster
    run_amlcompute.environment.docker.enabled = True
    run_amlcompute.environment.docker.base_image = docker_proc_type

    # Use conda_dependencies.yml to create a conda environment in
    # the Docker image for execution
    # False means the user will provide a conda file for setup
    # True means the user will manually configure the environment
    run_amlcompute.environment.python.user_managed_dependencies = False
    run_amlcompute.environment.python.conda_dependencies = CondaDependencies(
        conda_dependencies_file_path=conda_env_file)
    return run_amlcompute
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable Azure ML environment
    environment = get_environment(aml_workspace,
                                  e.aml_env_name,
                                  create_new=False)  # NOQA: E501

    run_config = RunConfiguration()
    run_config.environment = environment

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables[
        "DATASTORE_NAME"] = datastore_name  # NOQA: E501
Beispiel #4
0
def fetch_run_config(compute_target, base_image, sp_username, sp_tenant,
                     sp_password):
    """ Generates a Run Configuration based on the pipeline parameters,
    specifying such things as the Compute Target and Conda Dependencies. 
    """

    # Inits configuration for Python
    run_config = RunConfiguration(framework="python")

    # Specifies compute target
    run_config.target = compute_target

    # Configures Docker/Image/Environment Variable parameters
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = base_image
    run_config.environment.environment_variables = {
        "SP_USERNAME": sp_username,
        "SP_TENANT": sp_tenant,
        "SP_PASSWORD": sp_password
    }

    # Specifies Conda file location (Auto-injected from preparing staging)
    run_config.environment.python.conda_dependencies = CondaDependencies(
        os.path.join("snapshot", "inputs", "environment.yml"))

    # Returns configuration
    return run_config
Beispiel #5
0
def create_runconfig(aml_compute, env=None):
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    # Use the aml_compute you created above.
    aml_run_config.target = aml_compute

    if env:
        aml_run_config.environment = env
    else:
        aml_run_config.environment = create_env_from_requirements()

    return aml_run_config
Beispiel #6
0
    def get_run_config(self, config):

        environment_config = config.get("environment")
        environment = self.get_environment(environment_config)

        cluster_name = config.get("cluster")
        cluster = ComputeTarget(workspace=self.workspace, name=cluster_name)

        pipeline_run_config = RunConfiguration()
        pipeline_run_config.target = cluster
        pipeline_run_config.environment = environment

        return pipeline_run_config
Beispiel #7
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
    )
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable Azure ML environment
    # Make sure to include `r-essentials'
    #   in h1c4driver/conda_dependencies.yml
    environment = get_environment(
        aml_workspace,
        e.aml_env_name,
        conda_dependencies_file=e.aml_env_train_conda_dep_file,
        create_new=e.rebuild_env,
    )  # NOQA: E501
    run_config = RunConfiguration()
    run_config.environment = environment

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="h1c4driver/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id,
    )
    print(f"Published pipeline: {published_pipeline.name}")
    print(f"for build {published_pipeline.version}")
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable run configuration environment
    # Read definition from diabetes_regression/azureml_environment.json
    # Make sure to include `r-essentials'
    #   in diabetes_regression/conda_dependencies.yml
    environment = Environment.load_from_directory(e.sources_directory_train)
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        environment.environment_variables["BUILDURI_BASE"] = builduri_base
    environment.register(aml_workspace)

    run_config = RunConfiguration()
    run_config.environment = environment

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="diabetes_regression/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Beispiel #9
0
def main():
    print("Beginning ML runs")
    #### This doc should contain meta-data for ML experiment runs ####
    #This sets the compute target to your local machine.
    run_user_managed = RunConfiguration()
    run_user_managed.environment.python.user_managed_dependencies = False
    #Can also adjust for a specific python interpreter if desired. Otherwise, it uses the specific one declared by IDE.

    #This determines what workspace to run experiments in. Should not need any edits.
    try:
        #This provides information to access ML Service workspace. Do not change.
        ws = Workspace(subscription_id="8402fd02-6a15-499c-bb04-1dee338962d6",
                       resource_group="NDSUCapstone2019",
                       workspace_name="Capstone2019MLServerWorksapce")
        # write the details of the workspace to a configuration file.
        ws.write_config()
        print(
            "Workspace configuration succeeded. Skip the workspace creation steps below"
        )
    except:
        print(
            "Workspace not accessible. Change your parameters or create a new workspace below"
        )
    ws = Workspace.from_config()

    #The remainder of this doc should run the models defined in other documents.
    #This experiment runs the REGRESSION model.
    experiment = Experiment(ws, "Regression_Attempt_1")
    automated_ml_config = model(preppedData[0], preppedData[2],
                                "FOLDER HARD CODED", 10, 20,
                                'spearman_correlation', 5, 'regression')
    local_run = experiment.submit(automated_ml_config, show_output=True)
Beispiel #10
0
    def get_run_cfg(ws, pip_packages, conda_packages, ext_wheels, gpu=True):
        '''
        get_run_cfg - Retrieves the AMLS run configuration.


        :returns: AMLS run configuration
        :rtype: RunConfiguration object
        '''
        conda_dep = CondaDependencies()
        for pip_package in pip_packages:
            conda_dep.add_pip_package(pip_package)
        for conda_package in conda_packages:
            conda_dep.add_conda_package(conda_package)
        for whl_path in ext_wheels:
            whl_url = Environment.add_private_pip_wheel(workspace=ws,
                                                        file_path=whl_path,
                                                        exist_ok=True)
            conda_dep.add_pip_package(whl_url)
        run_cfg = RunConfiguration(conda_dependencies=conda_dep)
        run_cfg.environment.docker.enabled = True
        run_cfg.environment.docker.gpu_support = gpu
        if gpu:
            run_cfg.environment.docker.base_image = DEFAULT_GPU_IMAGE
        else:
            run_cfg.environment.docker.base_image = DEFAULT_CPU_IMAGE
        run_cfg.environment.spark.precache_packages = False
        return run_cfg
Beispiel #11
0
def is_compute_target_prepared(experiment, source_directory, run_config):
    """Check compute target is prepared.

    Checks whether the compute target, specified in run_config, is already prepared or not for the specified run
    configuration.

    :param experiment:
    :type experiment: azureml.core.experiment.Experiment
    :param source_directory:
    :type source_directory: str
    :param run_config: The run configuration. This can be a run configuration name, as string, or a
        azureml.core.runconfig.RunConfiguration object.
    :type run_config: str or azureml.core.runconfig.RunConfiguration
    :return: True, if the compute target is prepared.
    :rtype: bool
    """
    from azureml._execution import _commands
    from azureml.core.runconfig import RunConfiguration
    from azureml._project.project import Project

    run_config_object = RunConfiguration._get_run_config_object(
        path=source_directory, run_config=run_config)
    project_object = Project(experiment=experiment, directory=source_directory)
    return _commands.prepare_compute_target(project_object,
                                            run_config_object,
                                            check=True)
def data_ingestion_step(datastore_reference, compute_target):
    run_config = RunConfiguration()
    run_config.environment.docker.enabled = True

    raw_data_dir = PipelineData(
        name='raw_data_dir', 
        pipeline_output_name='raw_data_dir',
        datastore=datastore_reference.datastore,
        output_mode='mount',
        is_directory=True)

    outputs = [raw_data_dir]
    outputs_map = { 'raw_data_dir': raw_data_dir }

    step = PythonScriptStep(
        script_name='data_ingestion.py',
        arguments=['--output_dir', raw_data_dir, ],
        inputs=[datastore_reference],
        outputs=outputs,
        compute_target=compute_target,
        source_directory=os.path.dirname(os.path.abspath(__file__)),
        runconfig=run_config,
        allow_reuse=True
    )

    return step, outputs_map
Beispiel #13
0
def main(workspace,inputs):

    print("Loading compute target")
    compute_target = ComputeTarget(
        workspace=workspace,
        name=inputs["compute"]
    )
    # create a new runconfig object
    run_config = RunConfiguration()

    # enable Docker 
    run_config.environment.docker.enabled = True

    # set Docker base image to the default CPU-based image
    run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

    # use conda_dependencies.yml to create a conda environment in the Docker image for execution
    run_config.environment.python.user_managed_dependencies = False

    # specify CondaDependencies obj
    run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

    # For this step, we use yet another source_directory
    step = PythonScriptStep(name=inputs["step_name"],
                            script_name=inputs["train_script"], 
                            compute_target=compute_target, 
                            source_directory=inputs["source_directory"],
                            runconfig=run_config,
                            allow_reuse=True)
    return step
Beispiel #14
0
def tb():
    import os
    ws = Workspace.from_config()
    print(ws.name)
    proj = Project.attach(ws, 'tbhistory', '/tmp/tb-test')
    shutil.copy('tftb.py', os.path.join(proj.project_directory, 'tftb.py'))
    from azureml.core.compute_target import RemoteTarget
    rt = RemoteTarget(name='dsvm', address='hai2.eastus2.cloudapp.azure.com:5022', username='******', password='******')
    
    proj.attach_legacy_compute_target(rt)
    rc = RunConfiguration.load(proj, "dsvm")
    rc.environment.python.user_managed_dependencies = True
    rc.environment.python.interpreter_path = '/anaconda/envs/tf/bin/python'
    print(rc.target)

    run = Run.submit(proj, rc, 'tftb.py')
    print(run.id)
    #run.wait_for_completion(show_output=True)
    from azureml.contrib.tensorboard import Tensorboard
    
    tb = Tensorboard([run])
    print('starting tensorboard...')
    print(tb.start())
    print('tensorboard started.')
    run.wait_for_completion(show_output=True)
    tb.stop()
Beispiel #15
0
def submit_job():
    ws = Workspace.from_config()
    proj = Project.attach(ws, 'util', '/tmp/random_proj')
    rc = RunConfiguration(proj, "local")
    shutil.copy('./train-sklearn-one-model.py', '/tmp/random_proj/train-sklearn-one-model.py')
    #run = Run.submit(proj, rc, "train-sklearn-one-model.py", "--alpha 0.9")
    run = Run.submit(proj, rc, "train-sklearn-one-model.py", arguments_list=["--alpha", "0.9"])
    run.wait_for_completion(show_output=True)
Beispiel #16
0
def run_local_compute_experiment(ws, experiment_name, entry_script, source_directory=os.getcwd()):
    # Edit a run configuration property on the fly.
    run_local = RunConfiguration()
    run_local.environment.python.user_managed_dependencies = True

    exp = Experiment(workspace=ws, name=experiment_name)

    src = ScriptRunConfig(source_directory=source_directory, script=entry_script, run_config=run_local)
    run = exp.submit(src)
    run.wait_for_completion(show_output=True)
Beispiel #17
0
def test():
    ws = Workspace.from_config()
    proj = Project.attach(ws, 'test_rh', '/tmp/randomproj1')
    rc = RunConfiguration(proj, 'local')
    rc.environment.python.interpreter_path = '/Users/haining/miniconda3/envs/comet/bin/python'
    with open('/tmp/randomproj1/test.py', 'w') as file:
        file.write('import sys; print(sys.version);import os;os.makedirs("./outputs",  exist_ok=True);fs=open("./outputs/f.txt","w");fs.write("hello!");')
    r = Run.submit(proj, rc, 'test.py')
    print(helpers.get_run_history_url(r))
    r.wait_for_completion(show_output=True)
Beispiel #18
0
def create_runconfig(aml_compute, env=None):
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    # Use the aml_compute you created above.
    aml_run_config.target = aml_compute

    if env is not None:
        aml_run_config.environment = env
    else:
        # Enable Docker
        aml_run_config.environment.docker.enabled = True

        # Set Docker base image to the default CPU-based image
        aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"

        # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
        aml_run_config.environment.python.user_managed_dependencies = False

    return aml_run_config
Beispiel #19
0
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
    resource_group = os.environ.get("BASE_NAME") + "-AML-RG"
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, compute_name, vm_size)
    if aml_compute is not None:
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python"

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="code/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name + "_with_R",
        description="Model training/retraining pipeline",
        version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Beispiel #20
0
def get_local_run_configuration() -> RunConfiguration:
    """
    Get Local Run Config

    :return:
    """
    # Editing a run configuration property on-fly.
    run_config_user_managed = RunConfiguration()
    run_config_user_managed.environment.python.user_managed_dependencies = True
    # Choose the specific Python environment of this tutorial by pointing to the Python path
    run_config_user_managed.environment.python.interpreter_path = sys.executable
    return run_config_user_managed
Beispiel #21
0
def _write_compute_run_config(source_directory, compute_target_object, compute_yaml):
    """
    :param source_directory:
    :type source_directory: str
    :param compute_target_object:
    :type compute_target_object: azureml.core.compute_target.AbstractComputeTarget
    :param compute_yaml:
    :type compute_yaml: dict
    :return:
    """
    from azureml.core.compute_target import _BatchAITarget
    # Writing the target.compute file.
    run_config_dir_name = get_run_config_dir_name(source_directory)
    file_path = os.path.join(source_directory, run_config_dir_name,
                             compute_target_object.name + COMPUTECONTEXT_EXTENSION)
    with open(file_path, 'w') as outfile:
        ruamel.yaml.dump(compute_yaml, outfile, default_flow_style=False)

    # This creates a run config and writes it in the aml_config/<compute_target_name>.runconfig file
    run_config_object = RunConfiguration()
    run_config_object.target = compute_target_object

    if compute_target_object.type == _BatchAITarget._BATCH_AI_TYPE:
        run_config_object.environment.docker.enabled = True

    run_config_object.framework = compute_target_object._default_framework

    run_config_object.save(name=compute_target_object.name, path=source_directory)
Beispiel #22
0
 def _get_run_config_object(self, run_config):
     if isinstance(run_config, str):
         # If it is a string then we don't need to create a copy.
         return RunConfiguration.load(self.project_directory, run_config)
     elif isinstance(run_config, RunConfiguration):
         # TODO: Deep copy of project and auth object too.
         import copy
         return copy.deepcopy(run_config)
     else:
         raise UserErrorException(
             "Unsupported runconfig type {}. run_config can be of str or "
             "azureml.core.runconfig.RunConfiguration type.".format(
                 type(run_config)))
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group
    )
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=['numpy', 'pandas',
                        'scikit-learn', 'tensorflow', 'keras'],
        pip_packages=['azure', 'azureml-core',
                      'azure-storage',
                      'azure-storage-blob'])
    )
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python"

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="code/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name + "_with_R",
        description="Model training/retraining pipeline",
        version=e.build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Beispiel #24
0
def mi_run_config(ws, compute):
    whl_url = Environment.add_private_pip_wheel(workspace=ws,
                                                file_path=d.WHL_VINX_AZURE_ML,
                                                exist_ok=True)
    run_config = RunConfiguration()
    run_config.target = compute
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = None
    run_config.environment.docker.base_dockerfile = 'FROM mcr.microsoft.com/azureml/base:latest\nRUN apt-get update && apt-get -y install freetds-dev freetds-bin vim gcc'
    run_config.environment.python.user_managed_dependencies = False
    run_config.environment.python.conda_dependencies = CondaDependencies.create(
        conda_packages=[
            'tqdm', 'cython', 'matplotlib', 'scikit-learn', 'fbprophet'
        ],
        pip_packages=[
            'azureml-sdk', 'pandas', 'lightgbm', 'scipy==1.4.1', 'statsmodels',
            'mlxtend', 'optuna', 'xgboost', 'CatBoost', 'tensorflow', 'keras',
            'jpholiday', 'joblib', 'pymssql==2.1.1'
        ],
        pin_sdk_version=False)
    run_config.environment.python.conda_dependencies.add_pip_package(whl_url)

    return run_config
Beispiel #25
0
def get_run_config(aml_compute, conda_dep):
    run_amlcompute = RunConfiguration()
    run_amlcompute.target = aml_compute

    dockerfile = r"""
    FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04

    RUN apt-get update && \
        apt-get install -y sudo curl apt-transport-https && \
        apt-get update && \
        sudo su && \
        curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
        curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
        sudo apt-get update && \
        sudo ACCEPT_EULA=Y apt-get install -y msodbcsql17 && \
        sudo apt-get install -y unixodbc-dev
    """

    run_amlcompute.environment.docker.enabled = True
    run_amlcompute.environment.docker.base_image = None
    run_amlcompute.environment.docker.base_dockerfile = dockerfile
    run_amlcompute.environment.python.conda_dependencies = conda_dep

    return run_amlcompute
def data_ingestion_step(datastore, compute_target):
    '''
    This step will leverage Azure Cognitive Services to search the web for images 
    to create a dataset. This replicates the real-world scenario of data being 
    ingested from a constantly changing source. The same 10 classes in the CIFAR-10 dataset 
    will be used (airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck). 

    :param datastore: The datastore that will be used
    :type datastore: Datastore
    :param compute_target: The compute target to run the step on
    :type compute_target: ComputeTarget
    
    :return: The ingestion step, step outputs dictionary (keys: raw_data_dir)
    :rtype: PythonScriptStep, dict
    '''

    run_config = RunConfiguration()
    run_config.environment.environment_variables = {
        'BING_SEARCH_V7_SUBSCRIPTION_KEY': os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY'],
        'BING_SEARCH_V7_ENDPOINT': os.environ['BING_SEARCH_V7_ENDPOINT'],
        'AZURE_REGION': datastore._workspace.location
        }
    run_config.environment.docker.enabled = True

    num_images = PipelineParameter(name='num_images', default_value=25)

    raw_data_dir = PipelineData(
        name='raw_data_dir', 
        pipeline_output_name='raw_data_dir',
        datastore=datastore,
        output_mode='mount',
        is_directory=True)

    outputs = [raw_data_dir]
    outputs_map = { 'raw_data_dir': raw_data_dir }

    step = PythonScriptStep(
        name="Data Ingestion",
        script_name='data_ingestion.py',
        arguments=['--output_dir', raw_data_dir, '--num_images', num_images],
        outputs=outputs,
        compute_target=compute_target,
        source_directory=os.path.dirname(os.path.abspath(__file__)),
        runconfig=run_config,
        allow_reuse=True
    )

    return step, outputs_map
def data_preprocess_step(raw_data_dir, compute_target):

    run_config = RunConfiguration()
    run_config.environment.python.conda_dependencies = CondaDependencies.create(
        pip_packages=['pandas'])
    run_config.environment.docker.enabled = True

    train_dir = PipelineData(name='train_dir',
                             pipeline_output_name='train_dir',
                             datastore=raw_data_dir.datastore,
                             output_mode='mount',
                             is_directory=True)

    test_dir = PipelineData(name='test_dir',
                            pipeline_output_name='test_dir',
                            datastore=raw_data_dir.datastore,
                            output_mode='mount',
                            is_directory=True)

    outputs = [train_dir, test_dir]
    outputs_map = {
        'train_dir': train_dir,
        'test_dir': test_dir,
    }

    step = PythonScriptStep(script_name='data_preprocess.py',
                            arguments=[
                                '--raw_data_dir',
                                raw_data_dir,
                                '--train_dir',
                                train_dir,
                                '--test_dir',
                                test_dir,
                            ],
                            inputs=[raw_data_dir],
                            outputs=outputs,
                            compute_target=compute_target,
                            runconfig=run_config,
                            source_directory=os.path.dirname(
                                os.path.abspath(__file__)),
                            allow_reuse=True)

    return step, outputs_map
Beispiel #28
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a run configuration environment
    conda_deps_file = "diabetes_regression/training_dependencies.yml"
    conda_deps = CondaDependencies(conda_deps_file)
    run_config = RunConfiguration(conda_dependencies=conda_deps)
    run_config.environment.docker.enabled = True

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="diabetes_regression/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Beispiel #29
0
def publish(azureml_workspace, entry_point, name, description, parameters={}):
    luna_config = utils.Init()
    if azureml_workspace:
        run_config = RunConfiguration.load(
            luna_config['azureml']['run_config'])

        arguments = utils.GetPipelineArguments(luna_config['MLproject'],
                                               entry_point, parameters)

        trainStep = PythonScriptStep(
            script_name=luna_config['code'][entry_point],
            arguments=arguments,
            inputs=[],
            outputs=[],
            source_directory=os.getcwd(),
            runconfig=run_config)

        pipeline = Pipeline(workspace=azureml_workspace, steps=[trainStep])
        published_pipeline = pipeline.publish(name=name,
                                              description=description)
        return published_pipeline.endpoint
Beispiel #30
0
def main():
    print("Beginning ML runs")
    #### This doc should contain meta-data for ML experiment runs ####
    #This sets the compute target to your local machine.
    run_user_managed = RunConfiguration()
    run_user_managed.environment.python.user_managed_dependencies = False
    #Can also adjust for a specific python interpreter if desired. Otherwise, it uses the specific one declared by IDE.

    #This determines what workspace to run experiments in. Should not need any edits.

    ws = Workspace.from_config()

    #The remainder of this doc should run the models defined in other documents.
    #This experiment runs the REGRESSION model.
    experiment = Experiment(ws, "RunFinal1")
    automated_ml_config = model(X_toTrain, Y_toTrain, "FOLDER HARD CODED", 10,
                                30, 'normalized_root_mean_squared_error', 5,
                                'regression')
    local_run = experiment.submit(automated_ml_config, show_output=True)

    ####Code to explore runs####
    def retrieveRunData():
        children = list(local_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(1)
        print(rundata)

    retrieveRunData()
    #The following method retrieves the best test run's model, and makes a prediction using the previously defined .csv file.
    for toPredict in to_predict_array:
        retrieveAndPredict('normalized_root_mean_squared_error', 'runFinal1',
                           toPredict, predicted_values_folder)