Esempio n. 1
0
    # Grab the requested model
    model_list = Model.list(workspace=ws)
    model = None
    
    # Unpack the generator and look through the list to find your desired model
    model, = (m for m in model_list if m.version==model_version and m.name==model_name)
    print('Model picked: {} \nModel Description: {} \nModel Version: {}'.format(model.name, model.description, model.version))

    from azureml.core.webservice import LocalWebservice, Webservice

    


    os.chdir('./score')
    print("Creating environment")
    local_env = Environment(name=f'{web_service_name}_env')
    local_env.environment_variables = {"STORAGE_CONNECTION": os.getenv("STORAGE_CONNECTION")}
    print(local_env.environment_variables)
    local_env.python.conda_dependencies = CondaDependencies.create(
        pip_packages=[
        'azureml-defaults',
        'azure-storage-blob',
        'pynacl==1.2.1'
        ],
        conda_packages=[
        'numpy',
        'scikit-learn',
        'tensorflow',
        'keras'
        ])
    
Esempio n. 2
0
    pip_packages=[
        "azure-storage-blob==2.1.0",
        "azureml-sdk",
        "hickle==3.4.3",
        "requests==2.21.0",
        "sklearn",
        "pandas",
        "numpy",
        "pillow==6.0.0",
        "tensorflow-gpu==1.15",
        "keras",
        "matplotlib",
        "seaborn",
    ])

env = Environment("prednet")
env.python.conda_dependencies = conda_dependencies
env.docker.enabled = True
env.register(ws)

# Runconfigs
runconfig = RunConfiguration()
runconfig.environment = env
print("PipelineData object created")

create_pipelines = PythonScriptStep(
    name="create pipelines",
    script_name="pipelines_slave.py",
    compute_target=cpu_compute_target,
    arguments=[
        "--cpu_compute_name", cpu_compute_name, "--gpu_compute_name",
from azureml.core import Workspace
ws = Workspace.from_config()

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

#created an env
my_env = Environment("My_new_env")
conda_dep = CondaDependencies.create(conda_packages=['scikit-learn'])
my_env.python.conda_dependencies = conda_dep

my_env.register()

#creating the cluster
from azureml.core.compute import AmlCompute

cluster_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2"),
                                                       max_nodes = 2)

cluster = AmlCompute.create(ws, "My_cluster", cluster_config)

cluster.wait_for_completion()

#fetching the data
input_ds = ws.datasets.get("Loan Application")

#for ScriptRunning

from azureml.core import ScriptRunConfig, Experiment

script_run = ScriptRunConfig(source_directory = ".",
# MAGIC         data = json.loads(raw_data)['data']
# MAGIC         data = np.array(data)
# MAGIC         result = model.predict(data)
# MAGIC 
# MAGIC         # you can return any data type as long as it is JSON-serializable
# MAGIC         return result.tolist()
# MAGIC     except Exception as e:
# MAGIC         result = str(e)
# MAGIC         return result

# COMMAND ----------

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment('CitibikeNY-deployment-env')
myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'pip==20.1.1'
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'sklearn'
])

with open('mydbxenv.yml','w') as f:
  f.write(myenv.python.conda_dependencies.serialize_to_string())

# COMMAND ----------

from azureml.core.model import InferenceConfig
Esempio n. 5
0
subscription_id = config["subscription_id"]
location = config["location"]

cli_auth = AzureCliAuthentication()

# Get workspace
#ws = Workspace.from_config(auth=cli_auth)
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group,
                   auth=cli_auth)

env = Environment.get(workspace=ws, name="AzureML-Minimal")
#print(env)

Environment(name="arimaenv")

# From a Conda specification file
arimaenv = Environment.from_conda_specification(
    name="arimaenv", file_path="./scripts/scoring/conda_dependencies.yml")
print(arimaenv)

arimaenv.register(workspace=ws)

# Creates the environment inside a Docker container.
arimaenv.docker.enabled = True

try:
    with open("./configuration/model.json") as f:
        config = json.load(f)
except:
Esempio n. 6
0
try:
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=1800)
    pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

pipeline_cluster.wait_for_completion(show_output=True)


# Create a Python environment for the experiment

model_env = Environment(env_name)
model_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
model_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                             pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
model_env.python.conda_dependencies = packages

# Register the environment (just in case you want to use it again)
model_env.register(workspace=ws)

registered_env = Environment.get(ws, env_name)
from azureml.core import Workspace, Run, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.experiment import Experiment
from azureml.core.runconfig import RunConfiguration
from azureml.core.script_run_config import get_run_config_from_script_run

# load Workspace
ws = Workspace.from_config()

# load Experiment
experiment = Experiment(workspace=ws, name='test-expt')

# Create python environment for Azure machine learning expt
# options for class methods: from_conda_specification, from_pip_requirements,
# from_existing_conda_environment
myenv = Environment(name="test")
# myenv = Environment.from_conda_specification(name="test",
#                                              file_path="./environment.yml")

# Environment: docker section
# docker_config = dict(
#     enabled=True,
#     base_image="base-gpu:openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04",
#     # comment out this environment variable if you don't have it set!
#     environment_variables={'WANDB_API_KEY': os.environ['WANDB_API_KEY']}
# )
# docker_section = DockerSection(**docker_config)

## Environment: docker section
myenv.docker.enabled = True
myenv.docker.base_image = "mcr.microsoft.com/azureml/base-gpu:openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04"
Esempio n. 8
0
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter, PipelineEndpoint
from azureml.pipeline.core._restclients.aeva.models.error_response import ErrorResponseException
from ml_service.util.env_variables import Env
"""
$ python -m ml_service.pipelines.build_pipeline
"""

if __name__ == "__main__":
    # Environment variables
    env = Env()

    # Setup run config
    ws = Workspace.from_config()

    environment = Environment(name=env.aml_environment_name)
    environment.docker.enabled = True
    environment.docker.base_image = DEFAULT_CPU_IMAGE
    environment.python.user_managed_dependencies = False
    environment.python.conda_dependencies = CondaDependencies(
        conda_dependencies_file_path=
        "./environment_setup/conda_dependencies.yml")

    run_config = RunConfiguration()
    run_config.environment = environment

    # Create Pipeline data & parameters
    ds = ws.get_default_datastore()
    data_X = PipelineData('data_X', datastore=ds).as_dataset()
    data_y = PipelineData('data_y', datastore=ds).as_dataset()
    model_dir = PipelineData('model_dir', datastore=ds)
# the training script train/train.py.
#
# Note that the model that is being trained in the example is very basic and
# does not actually use the provided --regularization parameter, nor does it
# provide an actual performance result. These are merely hardcoded to show the
# effect in AzureML.
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset
from workspace import get_workspace

workspace = get_workspace()

experiment_name = 'test_experiment_1'
experiment = Experiment(workspace=ws, name=experiment_name)

myenv = Environment("user-managed-env")
myenv.python.user_managed_dependencies = True

dataset = Dataset.get_by_name(ws, name='images')

args = ['--data-folder', dataset.as_mount(), '--regularization', 0.07]

# No compute target is provided, hence the Run is performed locally
src = ScriptRunConfig(source_directory='model_train',
                      script='train.py',
                      arguments=args,
                      environment=myenv)

run = experiment.submit(config=src)
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration
import json
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment
cluster_name = "SuccPlanning-nilanka"
ws = Workspace.from_config()

#creating the cluster
pipeline_cluster = ComputeTarget(workspace=ws, name="SuccPlanning-nilanka")

# Create a Python environment for the experiment
environment = Environment("diabetes-pipeline-env")

# Use a docker container

environment.python.conda_dependencies.add_pip_package("scipy")
environment.python.conda_dependencies.add_pip_package("joblib")
environment.python.conda_dependencies.add_pip_package("numpy")
environment.python.conda_dependencies.add_pip_package("pandas")

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above.
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
Esempio n. 11
0
        prediction=model.predict(data.reshape(1,-1))
        # Append prediction to results
        resultList.append("{}: {}".format(os.path.basename(f), prediction[0]))
    return resultList


from azureml.core import Environment
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.core.runconfig import CondaDependencies

# Add dependencies required by the model
# For scikit-learn models, you need scikit-learn
# For parallel pipeline steps, you need azureml-core and azureml-dataprep[fuse]
cd = CondaDependencies.create(pip_packages=['scikit-learn','azureml-defaults','azureml-core','azureml-dataprep[fuse]'])

batch_env = Environment(name='batch_environment')
batch_env.python.conda_dependencies = cd
batch_env.docker.enabled = True
batch_env.docker.base_image = DEFAULT_CPU_IMAGE
print('Configuration ready.')

#parallel run - run batch prediction script, gen predictions from input, save results all at same time
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.pipeline.core import PipelineData

default_ds = ws.get_default_datastore()

output_dir = PipelineData(name='inferences', 
                          datastore=default_ds, 
                          output_path_on_compute='diabetes/results')
Esempio n. 12
0
from azureml.core import Experiment, ScriptRunConfig, Workspace, Environment
from azureml.core.conda_dependencies import CondaDependencies

#connecting to workspace
ws = Workspace.from_config()

sklearn_env = Environment('sklearn-env')

#Ensures the required packages are installed
packages = CondaDependencies.create(conda_packages=['scikit-learn', 'pip'],
                                    pip_packages=['azureml-defaults'])

sklearn_env.python.conda_dependencies = packages

#creating a config file
script = ScriptRunConfig(source_directory="experiments_directory",
                         script="training_experiment.py",
                         arguments=['--reg-rate', 0.1],
                         environment=sklearn_env)

#submit the experiment
exp = Experiment(workspace=ws, name="Training_model_Experiment")
run = exp.submit(config=script)
run.wait_for_completion(show_output=True)
# MAGIC         data = np.array(data)
# MAGIC         result = model.predict(data)
# MAGIC 
# MAGIC         # you can return any data type as long as it is JSON-serializable
# MAGIC         return result.tolist()
# MAGIC       
# MAGIC     except Exception as e:
# MAGIC         result = str(e)
# MAGIC         return result

# COMMAND ----------

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

nycenv = Environment('nycitibike-deployment-env')
nycenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'pip==20.1.1',
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'sklearn'
])

with open('nycenv.yml','w') as f:
  f.write(nycenv.python.conda_dependencies.serialize_to_string())

# COMMAND ----------

from azureml.core.model import InferenceConfig
Esempio n. 14
0
    deployment_config = AciWebservice.deploy_configuration(
        cpu_cores=0.1,
        memory_gb=0.5,
        tags={"method": "sklearn"},
        description='Predict MoA activation with sklearn')

    # Despliegue local mediante docker
    #deployment_config = LocalWebservice.deploy_configuration(port=6789)

    ws = Workspace.from_config()
    model = Model(ws, args.model_name, version=args.model_version)

    print(model.name, model.id, model.version, sep='\t')

    # Creciación de azure constainer con ambiente gestionado mediante conda.
    myenv = Environment(name="envmoa")
    # Se habilita docker
    myenv.docker.enabled = True
    # Definición de dependencias de docker.
    myenv.python.conda_dependencies = CondaDependencies.create(
        conda_packages=['scikit-learn'],
        pip_packages=[
            'azureml-defaults', 'numpy', 'pandas', 'scikit-multilearn'
        ])

    inf_config = InferenceConfig(environment=myenv,
                                 source_directory='./src',
                                 entry_script='entry.py')
    service = Model.deploy(ws, "moa-webservice-v7", [model], inf_config,
                           deployment_config)
    service.wait_for_deployment(show_output=True)
Esempio n. 15
0
def get_or_create_python_environment(
        azure_config: AzureConfig,
        source_config: SourceConfig,
        environment_name: str = "",
        register_environment: bool = True) -> Environment:
    """
    Creates a description for the Python execution environment in AzureML, based on the Conda environment
    definition files that are specified in `source_config`. If such environment with this Conda environment already
    exists, it is retrieved, otherwise created afresh.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided. This parameter is meant to be used when running
    inference for an existing model.
    :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If
    False, it will only be created, but not registered. Use this for unit testing.
    """
    # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be
    # necessary if the innereye package is installed. It is necessary when working with an outer project and
    # InnerEye as a git submodule and submitting jobs from the local machine.
    # In case of version conflicts, the package version in the outer project is given priority.
    conda_dependencies, merged_yaml = merge_conda_dependencies(
        source_config.conda_dependencies_files)  # type: ignore
    if azure_config.pip_extra_index_url:
        # When an extra-index-url is supplied, swap the order in which packages are searched for.
        # This is necessary if we need to consume packages from extra-index that clash with names of packages on
        # pypi
        conda_dependencies.set_pip_option(
            f"--index-url {azure_config.pip_extra_index_url}")
        conda_dependencies.set_pip_option(
            "--extra-index-url https://pypi.org/simple")
    env_variables = {
        "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC":
        str(source_config.upload_timeout_seconds),
        # Occasionally uploading data during the run takes too long, and makes the job fail. Default is 300.
        "AZUREML_RUN_KILL_SIGNAL_TIMEOUT_SEC":
        "900",
        "MKL_SERVICE_FORCE_INTEL":
        "1",
        **(source_config.environment_variables or {})
    }
    base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"
    # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that,
    # and will re-use existing environments even if they don't have the same name.
    # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in
    # hash function gives different results for the same string in different python instances.
    hash_string = "\n".join([
        merged_yaml, azure_config.docker_shm_size, base_image,
        str(env_variables)
    ])
    sha1 = hashlib.sha1(hash_string.encode("utf8"))
    overall_hash = sha1.hexdigest()[:32]
    unique_env_name = f"InnerEye-{overall_hash}"
    try:
        env_name_to_find = environment_name or unique_env_name
        env = Environment.get(azure_config.get_workspace(),
                              name=env_name_to_find,
                              version=ENVIRONMENT_VERSION)
        logging.info(f"Using existing Python environment '{env.name}'.")
        return env
    except Exception:
        logging.info(
            f"Python environment '{unique_env_name}' does not yet exist, creating and registering it."
        )
    env = Environment(name=unique_env_name)
    env.docker.enabled = True
    env.docker.shm_size = azure_config.docker_shm_size
    env.python.conda_dependencies = conda_dependencies
    env.docker.base_image = base_image
    env.environment_variables = env_variables
    if register_environment:
        env.register(azure_config.get_workspace())
    return env
Esempio n. 16
0
    shutil.copy(f, PROJECT_FOLDER)
files = glob.glob("*.cfg")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)
files = glob.glob("*.txt")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)
shutil.copytree("model_data", os.path.join(PROJECT_FOLDER, 'model_data'))
shutil.copytree("yolo3", os.path.join(PROJECT_FOLDER, 'yolo3'))

cd = CondaDependencies.create(pip_packages=[
    'keras==2.1.5', 'tensorflow==1.6.0', 'pillow', 'matplotlib', 'h5py',
    'tensorboard'
],
                              conda_packages=['python=3.6.11'])
myenv = Environment("yolov3")
myenv.python.conda_dependencies = cd
myenv.python.conda_dependencies.add_pip_package("azureml-sdk")
myenv.python.conda_dependencies.add_channel("conda-forge")
myenv.docker.enabled = True
myenv.docker.base_image = DEFAULT_GPU_IMAGE

# Choose a name for your CPU cluster
CLUSTER_NAME = "gpu-cluster"

# Verify that cluster does not exist already
try:
    aml_cluster = AmlCompute(workspace=ws, name=CLUSTER_NAME)
    print("Found existing cluster, use it.")
except ComputeTargetException:
    print("provisioning new compute target")
Esempio n. 17
0
                            is_directory=True)
batch_input = output_data.as_dataset()

detection_data = PipelineData('detection_data',
                            datastore=def_blob_store,
                            output_name='detection_data',
                            is_directory=True)

compute_target = ws.compute_targets['cpu-cluster']

environment_variables = {
    'POSTGRES_PASSWORD': os.environ['POSTGRES_PASSWORD'],
    'POSTGRES_HOSTNAME': 'ackbar-postgres.postgres.database.azure.com',
    'AZURE_STORAGE_CONNECTION_STRING': os.environ['AZURE_STORAGE_CONNECTION_STRING']
}
env = Environment(name='env', environment_variables=environment_variables)
conda = CondaDependencies()
conda.add_conda_package('psycopg2')
# have to use pip to install azure packages...
conda.add_pip_package('azure-storage-blob')
env.python.conda_dependencies = conda
run_config = RunConfiguration()
run_config.environment = env

prepare_step = PythonScriptStep(
    script_name='prepare.py',
    arguments=['--output', batch_input],
    inputs=[],
    outputs=[batch_input],
    compute_target=compute_target,
    source_directory='prepare',
Esempio n. 18
0
def main():
    e = Env()
    
    from azureml.core.authentication import InteractiveLoginAuthentication

    myten=os.environ.get("AZURE_TENANT_ID")
    interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ.get("AZURE_TENANT_ID"))
    subscription=os.environ.get("CSUBSCRIPTION")
    workspace_name=e.workspace_name
    resource_group=e.resource_group

    aml_workspace = Workspace.get(
        name = workspace_name,
        subscription_id = subscription,
        resource_group=resource_group,
        auth=interactive_auth
    )

    from ml_service.util.attach_compute import get_compute

    # Get Azure machine learning cluster
    # If not present then get_compute will create a compute based on environment variables

    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    print("SDK version: ", azureml.core.VERSION)

    ## Variable names that can be passed in as parameter values
    from azureml.pipeline.core.graph import PipelineParameter
    from azureml.core import Datastore

    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)
    dataset_version_param = PipelineParameter(
        name="dataset_version", default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(
        name="data_file_path", default_value="none")
    caller_run_id_param = PipelineParameter(
        name="caller_run_id", default_value="none")
    #model_path = PipelineParameter(
    #    name="model_path", default_value=e.model_path)    

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name

    # Get the datastore whether it is the default or named store
    datastore = Datastore.get(aml_workspace, datastore_name)
    dataset_name = e.dataset_name

    # Create a reusable Azure ML environment
    from ml_service.util.manage_environment import get_environment
    from azureml.core import Environment

    # RUN Configuration
    ## Must have this process to work with AzureML-SDK 1.0.85
    from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
    from azureml.core.conda_dependencies import CondaDependencies

    try:
        app_env=Environment(name="smartschedule_env")
        app_env.register(workspace=aml_workspace)
    except:
        print("Environment not found")
    
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    aml_run_config.environment.environment_variables["DATASTORE_NAME"] = e.datastore_name  # NOQA: E501

    # Use the aml_compute you created above. 
    aml_run_config.target = aml_compute

    # Enable Docker
    aml_run_config.environment.docker.enabled = True

    # Set Docker base image to the default CPU-based image
    aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    #aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"

    # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
    aml_run_config.environment.python.user_managed_dependencies = False

    app_conda_deps=CondaDependencies.create(
        conda_packages=['pandas','scikit-learn', 'libgcc','pyodbc', 'sqlalchemy', 'py-xgboost==0.90'], 
        pip_packages=['azureml-sdk[automl,explain,contrib,interpret]==1.4.0', 'xgboost==0.90', 'azureml-dataprep==1.4.6', 'pyarrow', 'azureml-defaults==1.4.0', 'azureml-train-automl-runtime==1.4.0'], pin_sdk_version=False)

    # Specify CondaDependencies obj, add necessary packages
    aml_run_config.environment.python.conda_dependencies = app_conda_deps

    print ("Run configuration created.")
    from azure.common.credentials import ServicePrincipalCredentials
    #from azure.keyvault import KeyVaultClient, KeyVaultAuthentication

    from azure.keyvault.secrets import SecretClient
    from azure.identity import DefaultAzureCredential
    import pandas as pd
    #import sqlalchemy as sql
    import pyodbc

    def get_data(sql_string, columns):
        credentials = None
        credential = DefaultAzureCredential()

        secret_client = SecretClient("https://smrtschd-aml-kv.vault.azure.net", credential=credential)    
        secret = secret_client.get_secret("database-connection")

        #client = KeyVaultClient(KeyVaultAuthentication(auth_callback))
        #secret_bundle = client.get_secret("https://smrtschd-aml-kv.vault.azure.net", "database-connection", "")

        server = 'starlims-sql.database.windows.net'
        database = 'QM12_DATA_AUTOMATION'
        username = '******'
        password = secret.value
        driver= '{ODBC Driver 17 for SQL Server}'
        conn = pyodbc.connect('Driver='+driver+';'+
                            'Server='+server+';'+
                            'Database='+database+';'+
                            'PORT=1433;'+
                            'UID='+username+';'+
                            'PWD='+password+'; MARS_Connection=Yes'
        )

        try:
            SQL_Query = pd.read_sql_query(sql_string, conn)

            df = pd.DataFrame(SQL_Query, columns=columns)
            return df
        except Exception as e:
            print(e)
            raise

    sql_str = "SELECT " \
            "  Dept " \
            ", Method " \
            ", Servgrp " \
            ", Runno " \
            ", TestNo " \
            ", Testcode " \
            ", Total_Duration_Min " \
            ", Total_Duration_Hr " \
            ", Usrnam " \
            ", Eqid " \
            ", Eqtype " \
        "FROM dbo.Draft " \
        "order by TESTCODE, RUNNO, dept, method;"

    columns = ["Dept", "Method", "Servgrp", "Runno", "TestNo", "Testcode", "Total_Duration_Min", "Total_Duration_Hr", "Usrnam", "Eqid","Eqtype"]

    from azureml.core import Dataset
    from sklearn.model_selection import train_test_split

    if (e.train_dataset_name not in aml_workspace.datasets):

        
        df = get_data(sql_str, columns)

        train_df, test_df=train_test_split(df, test_size=0.2)

        MY_DIR = "data"

        CHECK_FOLDER = os.path.isdir(MY_DIR)

        if not CHECK_FOLDER:
            os.makedirs(MY_DIR)
        else:
            print("Folder ", MY_DIR, " is already created")

        #files = ["data/analyst_tests.csv"]
        files = ["data/train_data.csv","data/test_data.csv"]

        def_file_store = Datastore(aml_workspace, "workspacefilestore")

        dtfrm = df.to_csv(files[0], header=True, index=False)

        train_dataframe=train_df.to_csv(files[0], header=True, index=False)
        test_dataframe=test_df.to_csv(files[1], header=True, index=False)
        datastore.upload_files(
            files=files,
            target_path='data/',
            overwrite=True
        )

        from azureml.data.data_reference import DataReference

        blob_input_data_test=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertest",
            path_on_datastore="data/test_data.csv"
        )
        test_data=Dataset.Tabular.from_delimited_files(blob_input_data_test)
        test_data.register(aml_workspace, e.test_dataset_name, create_new_version=True)

        blob_input_data_train=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertrain",
            path_on_datastore="data/train_data.csv"
        )
        train_data=Dataset.Tabular.from_delimited_files(blob_input_data_train)
        train_data.register(aml_workspace, e.train_dataset_name, create_new_version=True)

    else:
        from azureml.data.data_reference import DataReference
        print("getting from the datastore instead of uploading")

        train_data=Dataset.get_by_name(aml_workspace, name=e.train_dataset_name)
        test_data=Dataset.get_by_name(aml_workspace, name=e.test_dataset_name)

    # check the training dataset to make sure it has at least 50 records.
    tdf=train_data.to_pandas_dataframe().head(5)

    print(tdf.shape)
    print(tdf)

    # display the first five rows of the data
    # create a variable that can be used for other purposes
    df=train_data.to_pandas_dataframe().head()

    label_column="Total_Duration_Min"

    import random
    import string

    def randomString(stringLength=15):
        letters = string.ascii_lowercase
        return ''.join(random.choice(letters) for i in range(stringLength))

    from azureml.core import Experiment

    experiment = Experiment(aml_workspace, "SmartScheduler_Pipeline")


    import logging

    aml_name = 'smart_scheduler_' + randomString(5)
    print(aml_name)

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import StrMethodFormatter

    print(df.head(5))
    print(df.shape)
    print(df.dtypes)

    #df.hist(column='Dept')
    list(df.columns.values)

    # Remove Features that are not necessary.
    #df.hist(column="Servgrp", bins=4)
    train_data=train_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])
    test_data=test_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])

    print(train_data.to_pandas_dataframe())
    print(test_data.to_pandas_dataframe())

    from azureml.automl.core.featurization import FeaturizationConfig

    # some of the columns could be change to one hot encoding especially if the categorical column
    featurization_config=FeaturizationConfig()
    featurization_config.blocked_transformers=['LabelEncoder']
    featurization_config.add_column_purpose('Dept', 'CategoricalHash')
    featurization_config.add_transformer_params('HashOneHotEncoder',['Method'], {"number_of_bits":3})
    featurization_config.add_column_purpose('Servgrp', 'CategoricalHash')
    featurization_config.add_column_purpose('Testcode', 'Numeric')
    featurization_config.add_column_purpose('Usrnam', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqid', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqtype', 'CategoricalHash')

    from azureml.pipeline.core import Pipeline, PipelineData
    from azureml.pipeline.steps import PythonScriptStep

    #train_model_folder = './scripts/trainmodel'

    automl_settings = {
        "iteration_timeout_minutes": 5,
        "iterations": 5,
        "enable_early_stopping": True,
        "primary_metric": 'spearman_correlation',
        "verbosity": logging.INFO,
        "n_cross_validation":5
    }

    automl_config = AutoMLConfig(task="regression",
                    debug_log='automated_ml_errors.log',
                    #path = train_model_folder,
                    training_data=train_data,
                    featurization=featurization_config,
                    blacklist_models=['XGBoostRegressor'],
                    label_column_name=label_column,
                    compute_target=aml_compute,
                    **automl_settings)

    from azureml.pipeline.steps import AutoMLStep
    from azureml.pipeline.core import TrainingOutput

    metrics_output_name = 'metrics_output'
    best_model_output_name='best_model_output'

    metrics_data = PipelineData(name = 'metrics_data',
                    datastore = datastore,
                    pipeline_output_name=metrics_output_name,
                    training_output=TrainingOutput(type='Metrics'))

    model_data = PipelineData(name='model_data',
                datastore=datastore,
                pipeline_output_name=best_model_output_name,
                training_output=TrainingOutput(type='Model'))

    trainWithAutomlStep = AutoMLStep(
                        name=aml_name,
                        automl_config=automl_config,
                        passthru_automl_config=False,
                        outputs=[metrics_data, model_data],
                        allow_reuse=True
    )

    evaluate_step = PythonScriptStep(
        name="Evaluate Model",
        script_name='./evaluate/evaluate_model.py',
        #  e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        arguments=[
            "--model_name", model_name_param,
            "--allow_run_cancel", e.allow_run_cancel
        ]
    )

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name='register/register_model2.py', #e.register_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        inputs=[model_data],
        arguments=[
            "--model_name", model_name_param,
            "--model_path", model_data,
            "--ds_name", e.train_dataset_name
        ],
        runconfig=aml_run_config,
        allow_reuse=False
    )

    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(trainWithAutomlStep)
        register_step.run_after(evaluate_step)
        pipeline_steps = [ trainWithAutomlStep, evaluate_step, register_step ]
    else:
        print("Exclude the evaluation step and run register step")
        register_step.run_after(trainWithAutomlStep)
        pipeline_steps = [ trainWithAutomlStep, register_step ]

    print( "this is the value for execute pipeline: {}".format(e.execute_pipeline))

    if( (e.execute_pipeline).lower() =='true' ):
        # Execute the pipe normally during testing and debugging
        print("Pipeline submitted for execution.")
        pipeline = Pipeline(workspace = aml_workspace, steps=pipeline_steps)
        pipeline_run = experiment.submit(pipeline)
        pipeline_run.wait_for_completion()
        print("Pipeline is built.")
    else:
        # Generates pipeline that will be called in ML Ops
        train_pipeline = Pipeline(workspace=aml_workspace, steps=pipeline_steps)
        train_pipeline._set_experiment_name
        train_pipeline.validate()
        published_pipeline = train_pipeline.publish(
            name=e.pipeline_name,
            description="Model training/retraining pipeline",
            version=e.build_id
        )
        print(f'Published pipeline: {published_pipeline.name}')
        print(f'for build {published_pipeline.version}')
Esempio n. 19
0
    print("get datasets from datastore")

    input_data_paths = [(blob_datastore, 'mldata')]
    input_dataset = Dataset.File.from_files(path=input_data_paths)

    # ----PYTHON ENV------
    #-------------------------
    packages = CondaDependencies.create(
        conda_packages=["cudatoolkit=10.0"],
        pip_packages=[
            'azureml-sdk', 'PyYAML', 'azure-storage-blob', 'matplotlib',
            'seaborn', 'tensorflow', 'Keras', 'tensorflow-hub', 'joblib',
            'tqdm', 'Pillow', 'azureml-dataprep[pandas,fuse]>=1.1.14'
        ])

    diagnoz_env = Environment("diagnoz-pipeline-env")
    diagnoz_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diagnoz_env.docker.enabled = True  # Use a docker container
    diagnoz_env.docker.base_image = DEFAULT_GPU_IMAGE
    diagnoz_env.python.conda_dependencies = packages
    diagnoz_env.register(workspace=ws)

    # Runconfigs
    pipeline_run_config = RunConfiguration()
    pipeline_run_config.target = compute_target
    pipeline_run_config.environment = diagnoz_env
    print("Run configuration created.")

    shutil.rmtree(script_folder, ignore_errors=True)
    os.makedirs(script_folder, exist_ok=True)
Esempio n. 20
0
joblib.dump(value = classi_model, filename = 'outputs/model.pkl')

run.complete()


# 2 process file

#Now scikit package is not present in the default env so we need to provide the conda dependencies explicitly

from azureml.core import Experiment, ScriptRunConfig, Environment, Workspace
from azureml.core.conda_dependencies import CondaDependencies

ws = Workspace.from_config()

my_env = Environment(ws, "My_env")

my_env.python.conda_dependencies = CondaDependencies.create(conda_packages= ['scikit-learn', 'pip'],
                                                            pip_packages=['azureml-defaults'])


script_config = ScriptRunConfig(source_directory = ".",
                                script = "script_name",
                                environment = my_env)


experiment = Experiment(ws, "my_training_exp")

new_run = experiment.submit(script_config)

new_run.wait_for_completion()
Esempio n. 21
0
    service_principal_password=os.environ["SP_SECRET"],
)

ws = Workspace(
    subscription_id=auth_config["subscription_id"],
    resource_group=auth_config["resource_group"],
    workspace_name=auth_config["workspace_name"],
    auth=auth,
)

# Usually, the  cluster already exists, so we just fetch
compute_target = next(
    (m for m in ComputeTarget.list(ws) if m.name == compute["name"]), None)

# Specify the compute environment and register it for use in scoring
env = Environment("component-condition")
env.docker.enabled = True
cd = CondaDependencies.create(
    conda_packages=["tensorflow=2.0.0", "pandas", "numpy", "matplotlib"],
    pip_packages=["azureml-mlflow==1.5.0", "azureml-defaults==1.5.0"],
)
env.python.conda_dependencies = cd
env.register(workspace=ws)
print("Registered environment component-condition")

# Specify the run configuration
run_config = RunConfiguration()
run_config.environment.docker.enabled = True
run_config.environment.python.conda_dependencies = cd

# Pipeline definition
Esempio n. 22
0
    # Load data
    data = pd.read_csv('diabetes.csv')

    # Count the rows and log the result
    row_count = (len(data))
    print('observations:', row_count)
    mlflow.log_metric('observations', row_count)


from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.widgets import RunDetails


# Create a Python environment for the experiment
mlflow_env = Environment("mlflow-env")

# Ensure the required packages are installed
packages = CondaDependencies.create(conda_packages=['pandas','pip'],
                                    pip_packages=['mlflow','azureml-mlflow'])
mlflow_env.python.conda_dependencies = packages

# Create a script config
script_mlflow = ScriptRunConfig(source_directory=experiment_folder,
                                script='mlflow_diabetes.py',
                                environment=mlflow_env) 

# submit the experiment
experiment = Experiment(workspace=ws, name='diabetes-mlflow-script')
run = experiment.submit(config=script_mlflow)
RunDetails(run).show()
Esempio n. 23
0
                         model_name))  #replace the placeholder MODEL-NAME
    print('score_fixed.py saved')

#Get model
model = Model(ws, model_name)

#Create conda Dependencies
conda_packages = ['numpy==1.19.1', "pip==19.2.3"]
pip_packages = [
    'azureml-sdk==1.12.0', 'azureml-defaults==1.12.0',
    'azureml-monitoring==0.1.0a21', 'xgboost==1.1.1', 'scikit-learn==0.23.1',
    'keras==2.3.1', 'tensorflow==2.0.0'
]
conda_deps = CondaDependencies.create(conda_packages=conda_packages,
                                      pip_packages=pip_packages)
myenv = Environment(name='myenv')
myenv.python.conda_dependencies = conda_deps

inf_config = InferenceConfig(entry_script='score_fixed.py', environment=myenv)

aks_config = AksWebservice.deploy_configuration()

service = Model.deploy(workspace=ws,
                       name=aks_service_name,
                       models=[model],
                       inference_config=inf_config,
                       deployment_config=aks_config,
                       deployment_target=aks_target)

service.wait_for_deployment(show_output=True)
print(service.state)
Esempio n. 24
0
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name,
                                                compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive import GridParameterSampling, HyperDriveConfig, PrimaryMetricGoal, choice
from azureml.train.hyperdrive import BayesianParameterSampling, uniform
from azureml.widgets import RunDetails

# Create a Python environment for the experiment
sklearn_env = Environment("env02")

# Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep)
packages = CondaDependencies.create(pip_packages=[
    'lightgbm', 'sklearn', 'scipy', 'numpy', 'azureml-defaults',
    'azureml-dataprep[pandas]'
])
sklearn_env.python.conda_dependencies = packages

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='training03.py',
                                arguments=[
                                    '--max_depth', 5, '--num_leaves', 50,
                                    '--subsample', 0.9, '--learning_rate',
                                    0.01, '--min_data_in_leaf', 50,
def main():
    """
    Run the experiment for training
    """
    interactive_auth = InteractiveLoginAuthentication(
        tenant_id=os.getenv("TENANT_ID"))
    work_space = Workspace.from_config(auth=interactive_auth)

    # Set up the dataset for training
    datastore = work_space.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist"))

    # Set up the experiment for training
    experiment = Experiment(workspace=work_space, name="keras-lenet-train")
    #     azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000
    config = ScriptRunConfig(
        source_directory=".",
        script="train_keras.py",
        compute_target="cpu-cluster",
        arguments=[
            "--data_folder",
            dataset.as_named_input("input").as_mount(),
            "--log_folder",
            "./logs",
        ],
    )

    # Set up the Tensoflow/Keras environment
    environment = Environment("keras-environment")

    # environment = Environment.from_conda_specification(
    #     name='keras-environment',
    #     file_path='keras-environment.yml'
    # )
    environment.python.conda_dependencies = CondaDependencies.create(
        python_version="3.7.7",
        pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"])
    config.run_config.environment = environment

    # Run the experiment for training
    run = experiment.submit(config)
    aml_url = run.get_portal_url()
    print(
        "Submitted to an Azure Machine Learning compute cluster. Click on the link below"
    )
    print("")
    print(aml_url)

    tboard = Tensorboard([run])
    # If successful, start() returns a string with the URI of the instance.
    tboard.start(start_browser=True)
    run.wait_for_completion(show_output=True)
    # After your job completes, be sure to stop() the streaming otherwise it will continue to run.
    print("Press enter to stop")
    input()
    tboard.stop()

    # Register Model
    metrics = run.get_metrics()
    run.register_model(
        model_name="keras_mnist",
        tags={
            "data": "mnist",
            "model": "classification"
        },
        model_path="outputs/keras_lenet.h5",
        model_framework=Model.Framework.TENSORFLOW,
        model_framework_version="2.3.1",
        properties={
            "train_loss": metrics["train_loss"][-1],
            "train_accuracy": metrics["train_accuracy"][-1],
            "val_loss": metrics["val_loss"][-1],
            "val_accuracy": metrics["val_accuracy"][-1],
        },
    )
# script arguments
arguments = [
    "--deepspeed",
    "--deepspeed_config",
    "ds_config.json",
    "--deepspeed_mpi",
    "--global_rank",
    "$AZ_BATCHAI_TASK_INDEX",
    "--with_aml_log",
    True,
]

# create an environment
# Note: We will use the Dockerfile method to create an environment for DeepSpeed.
# In future, we plan to create a Curated environment for DeepSpeed.
env = Environment(name="deepspeed")
env.docker.enabled = True

# indicate how to run Python
env.python.user_managed_dependencies = True
env.python.interpreter_path = "/opt/miniconda/bin/python"

# To install any Python packages you need, simply add RUN pip install package-name to the docker string. E.g. `RUN pip install sklearn`
# Specify docker steps as a string and use the base DeepSpeed Docker image
dockerfile = r"""
FROM deepspeed/base-aml:with-pt-ds-and-deps
RUN pip install azureml-mlflow
RUN echo "Welcome to the DeepSpeed custom environment!"
"""

# set base image to None, because the image is defined by dockerfile.
def main():
    e = Env()

    print('********************')
    print(e.source_directory)

    files = os.listdir('./aml_pipeline')
    for f in files:
        print(f)

    print('***************')

    workspace_name = e.workspace_name
    subscription_id = e.subscription_id
    resource_group = e.resource_group

    #Connect to AML Workspace
    print('workspace_name = ' + workspace_name)
    print('subscription_id = ' + subscription_id)
    print('resource_group = ' + resource_group)

    ws = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
    )

    print('Ready to use Azure ML {} to work with {}'.format(
        azureml.core.VERSION, ws.name))

    default_ds = ws.get_default_datastore()

    if 'diabetes dataset' not in ws.datasets:
        default_ds.upload_files(
            files=['diabetes.csv',
                   'diabetes2.csv'],  # Upload the diabetes csv files in /data
            target_path=
            'diabetes-data/',  # Put it in a folder path in the datastore
            overwrite=True,  # Replace existing files of the same name
            show_progress=True)

        #Create a tabular dataset from the path on the datastore (this may take a short while)
        tab_data_set = Dataset.Tabular.from_delimited_files(
            path=(default_ds, 'diabetes-data/*.csv'))

        # Register the tabular dataset
        try:
            tab_data_set = tab_data_set.register(workspace=ws,
                                                 name='diabetes dataset',
                                                 description='diabetes data',
                                                 tags={'format': 'CSV'},
                                                 create_new_version=True)
            print('Dataset registered.')
        except Exception as ex:
            print(ex)
    else:
        print('Dataset already registered.')

    # Create a folder for the pipeline step files
    experiment_folder = 'diabetes_pipeline'
    os.makedirs(experiment_folder, exist_ok=True)

    print(experiment_folder)

    cluster_name = "mmcomputecluster"

    try:
        # Check for existing compute target
        pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing cluster, use it.')
    except ComputeTargetException:
        # If it doesn't already exist, create it
        try:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size='STANDARD_DS11_V2', max_nodes=2)
            pipeline_cluster = ComputeTarget.create(ws, cluster_name,
                                                    compute_config)
            pipeline_cluster.wait_for_completion(show_output=True)
        except Exception as ex:
            print(ex)

    # Create a Python environment for the experiment
    diabetes_env = Environment("diabetes-pipeline-env")
    diabetes_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diabetes_env.docker.enabled = True  # Use a docker container

    # Create a set of package dependencies
    diabetes_packages = CondaDependencies.create(
        conda_packages=[
            'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip'
        ],
        pip_packages=[
            'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow'
        ])

    # Add the dependencies to the environment
    diabetes_env.python.conda_dependencies = diabetes_packages

    # Register the environment
    diabetes_env.register(workspace=ws)
    registered_env = Environment.get(ws, 'diabetes-pipeline-env')

    # Create a new runconfig object for the pipeline
    pipeline_run_config = RunConfiguration()

    # Use the compute you created above.
    pipeline_run_config.target = pipeline_cluster

    # Assign the environment to the run configuration
    pipeline_run_config.environment = registered_env

    print("Run configuration created.")

    # Get the training dataset
    diabetes_ds = ws.datasets.get("diabetes dataset")

    # Create a PipelineData (temporary Data Reference) for the model folder
    prepped_data_folder = PipelineData("prepped_data_folder",
                                       datastore=ws.get_default_datastore())

    # Step 1, Run the data prep script
    prep_step = PythonScriptStep(name="Prepare Data",
                                 script_name="prep_diabetes.py",
                                 source_directory='./aml_pipeline',
                                 arguments=[
                                     '--input-data',
                                     diabetes_ds.as_named_input('raw_data'),
                                     '--prepped-data', prepped_data_folder
                                 ],
                                 outputs=[prepped_data_folder],
                                 compute_target=pipeline_cluster,
                                 runconfig=pipeline_run_config,
                                 allow_reuse=True)

    # Step 2, run the training script
    train_step = PythonScriptStep(
        name="Train and Register Model",
        source_directory='./aml_pipeline',
        script_name="train_diabetes.py",
        arguments=['--training-folder', prepped_data_folder],
        inputs=[prepped_data_folder],
        compute_target=pipeline_cluster,
        runconfig=pipeline_run_config,
        allow_reuse=True)

    print("Pipeline steps defined")

    pipeline_steps = [prep_step, train_step]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
    print("Pipeline is built.")

    # Create an experiment and run the pipeline
    experiment = Experiment(workspace=ws, name='jlg-exp')
    pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
    print("Pipeline submitted for execution.")
    pipeline_run.wait_for_completion(show_output=True)

    for run in pipeline_run.get_children():
        print(run.name, ':')
        metrics = run.get_metrics()
        for metric_name in metrics:
            print('\t', metric_name, ":", metrics[metric_name])

    for model in Model.list(ws):
        print(model.name, 'version:', model.version)
        for tag_name in model.tags:
            tag = model.tags[tag_name]
            print('\t', tag_name, ':', tag)
        for prop_name in model.properties:
            prop = model.properties[prop_name]
            print('\t', prop_name, ':', prop)
        print('\n')

    # Publish the pipeline from the run
    published_pipeline = pipeline_run.publish_pipeline(
        name="diabetes-training-pipeline",
        description="Trains diabetes model",
        version="1.0")

    published_pipeline

    rest_endpoint = published_pipeline.endpoint
    print(rest_endpoint)
Esempio n. 28
0
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath("code", "train", "fastai", "pets-resnet34")
script_name = "train.py"

# environment file
environment_file = prefix.joinpath("environments", "fastai.dockerfile")

# azure ml settings
environment_name = "fastai-pets-example"
experiment_name = "fastai-pets-example"
compute_target = "gpu-cluster"

# create environment
env = Environment(environment_name)
env.docker.enabled = True
env.docker.base_image = None
env.docker.base_dockerfile = environment_file
env.python.user_managed_dependencies = True

# create job config
src = ScriptRunConfig(
    source_directory=script_dir,
    script=script_name,
    environment=env,
    compute_target=compute_target,
)

# submit job
run = Experiment(ws, experiment_name).submit(src)
Esempio n. 29
0
def start(config_file):

    print(config_file)
    configdata = ngccontent.get_config(config_file)
    subscription_id = configdata["azureml_user"]["subscription_id"]
    resource_group = configdata["azureml_user"]["resource_group"]
    workspace_name = configdata["azureml_user"]["workspace_name"]

    ws = Workspace(workspace_name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)

    verify = f'''
    Subscription ID: {subscription_id}
    Resource Group: {resource_group}
    Workspace: {workspace_name}'''
    print(verify)

    ### vnet settings
    vnet_rg = ws.resource_group
    vnet_name = configdata["aml_compute"]["vnet_name"]
    subnet_name = configdata["aml_compute"]["subnet_name"]

    ### azure ml names
    ct_name = configdata["aml_compute"]["ct_name"]
    exp_name = configdata["aml_compute"]["exp_name"]

    ### trust but verify
    verify = f'''
    vNET RG: {vnet_rg}
    vNET name: {vnet_name}
    vNET subnet name: {subnet_name}
    Compute target: {ct_name}
    Experiment name: {exp_name}'''
    print(verify)

    if configdata["aml_compute"]["vm_name"] in configdata[
            "supported_vm_sizes"].keys():
        vm_name = configdata["aml_compute"]["vm_name"]
        gpus_per_node = configdata["supported_vm_sizes"][vm_name]

        print(
            "Setting up compute target {ct_name} with vm_size: {vm_name} with {gpus_per_node} GPUs"
            .format(ct_name=ct_name,
                    vm_name=vm_name,
                    gpus_per_node=gpus_per_node))

        if ct_name not in ws.compute_targets:
            config = AmlCompute.provisioning_configuration(
                vm_size=vm_name,
                min_nodes=configdata["aml_compute"]["min_nodes"],
                max_nodes=configdata["aml_compute"]["max_nodes"],
                vnet_resourcegroup_name=vnet_rg,
                vnet_name=vnet_name,
                subnet_name=subnet_name,
                idle_seconds_before_scaledown=configdata["aml_compute"]
                ["idle_seconds_before_scaledown"],
                remote_login_port_public_access='Enabled')
            ct = ComputeTarget.create(ws, ct_name, config)
            ct.wait_for_completion(show_output=True)
        else:
            print("Loading Pre-existing Compute Target {ct_name}".format(
                ct_name=ct_name))
            ct = ws.compute_targets[ct_name]
    else:
        print("Unsupported vm_size {vm_size}".format(vm_size=vm_name))
        print("The specified vm size must be one of ...")
        for azure_gpu_vm_size in configdata["supported_vm_sizes"].keys():
            print("... " + azure_gpu_vm_size)
        raise Exception(
            "{vm_size} does not support Pascal or above GPUs".format(
                vm_size=vm_name))

    environment_name = configdata["aml_compute"]["environment_name"]
    python_interpreter = configdata["aml_compute"]["python_interpreter"]
    conda_packages = configdata["aml_compute"]["conda_packages"]
    from azureml.core import ContainerRegistry

    if environment_name not in ws.environments:
        env = Environment(name=environment_name)
        env.docker.enabled = configdata["aml_compute"]["docker_enabled"]
        env.docker.base_image = None
        env.docker.base_dockerfile = "FROM {dockerfile}".format(
            dockerfile=configdata["ngc_content"]["base_dockerfile"])
        env.python.interpreter_path = python_interpreter
        env.python.user_managed_dependencies = True
        conda_dep = CondaDependencies()

        for conda_package in conda_packages:
            conda_dep.add_conda_package(conda_package)

        env.python.conda_dependencies = conda_dep
        env.register(workspace=ws)
        evn = env
    else:
        env = ws.environments[environment_name]

    amlcluster = Azuremlcomputecluster.AzureMLComputeCluster(
        workspace=ws,
        compute_target=ct,
        initial_node_count=1,
        experiment_name=configdata["aml_compute"]["exp_name"],
        environment_definition=env,
        use_gpu=True,
        n_gpus_per_node=1,
        jupyter=True,
        jupyter_port=configdata["aml_compute"]["jupyter_port"],
        dashboard_port=9001,
        scheduler_port=9002,
        scheduler_idle_timeout=1200,
        worker_death_timeout=30,
        additional_ports=[],
        datastores=[],
        telemetry_opt_out=True,
        asynchronous=False)

    print(amlcluster.jupyter_link)
    amlcluster.jupyter_link
    print('Exiting script')
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

logger = logging.getLogger()
logger.setLevel("INFO")
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)

ws = Workspace.from_config()

freezer_environment = Environment("sktime_freezer_environment")
cd = CondaDependencies.create(
    conda_packages=["numpy", "cython", "pandas", "scikit-learn"],
    pip_packages=[
        "azureml-defaults",
        "inference-schema[numpy-support]",
        "joblib==0.13.*",
        "azureml-dataprep[pandas, fuse]",
        "sktime",
    ],
)
freezer_environment.docker.enabled = True
freezer_environment.docker.base_image = DEFAULT_CPU_IMAGE
freezer_environment.python.conda_dependencies = cd
freezer_environment.register(workspace=ws)
logger.info("Environment registered")