# Grab the requested model model_list = Model.list(workspace=ws) model = None # Unpack the generator and look through the list to find your desired model model, = (m for m in model_list if m.version==model_version and m.name==model_name) print('Model picked: {} \nModel Description: {} \nModel Version: {}'.format(model.name, model.description, model.version)) from azureml.core.webservice import LocalWebservice, Webservice os.chdir('./score') print("Creating environment") local_env = Environment(name=f'{web_service_name}_env') local_env.environment_variables = {"STORAGE_CONNECTION": os.getenv("STORAGE_CONNECTION")} print(local_env.environment_variables) local_env.python.conda_dependencies = CondaDependencies.create( pip_packages=[ 'azureml-defaults', 'azure-storage-blob', 'pynacl==1.2.1' ], conda_packages=[ 'numpy', 'scikit-learn', 'tensorflow', 'keras' ])
pip_packages=[ "azure-storage-blob==2.1.0", "azureml-sdk", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas", "numpy", "pillow==6.0.0", "tensorflow-gpu==1.15", "keras", "matplotlib", "seaborn", ]) env = Environment("prednet") env.python.conda_dependencies = conda_dependencies env.docker.enabled = True env.register(ws) # Runconfigs runconfig = RunConfiguration() runconfig.environment = env print("PipelineData object created") create_pipelines = PythonScriptStep( name="create pipelines", script_name="pipelines_slave.py", compute_target=cpu_compute_target, arguments=[ "--cpu_compute_name", cpu_compute_name, "--gpu_compute_name",
from azureml.core import Workspace ws = Workspace.from_config() from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies #created an env my_env = Environment("My_new_env") conda_dep = CondaDependencies.create(conda_packages=['scikit-learn']) my_env.python.conda_dependencies = conda_dep my_env.register() #creating the cluster from azureml.core.compute import AmlCompute cluster_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2"), max_nodes = 2) cluster = AmlCompute.create(ws, "My_cluster", cluster_config) cluster.wait_for_completion() #fetching the data input_ds = ws.datasets.get("Loan Application") #for ScriptRunning from azureml.core import ScriptRunConfig, Experiment script_run = ScriptRunConfig(source_directory = ".",
# MAGIC data = json.loads(raw_data)['data'] # MAGIC data = np.array(data) # MAGIC result = model.predict(data) # MAGIC # MAGIC # you can return any data type as long as it is JSON-serializable # MAGIC return result.tolist() # MAGIC except Exception as e: # MAGIC result = str(e) # MAGIC return result # COMMAND ---------- from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies myenv = Environment('CitibikeNY-deployment-env') myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[ 'pip==20.1.1' 'azureml-defaults', 'inference-schema[numpy-support]', 'joblib', 'numpy', 'sklearn' ]) with open('mydbxenv.yml','w') as f: f.write(myenv.python.conda_dependencies.serialize_to_string()) # COMMAND ---------- from azureml.core.model import InferenceConfig
subscription_id = config["subscription_id"] location = config["location"] cli_auth = AzureCliAuthentication() # Get workspace #ws = Workspace.from_config(auth=cli_auth) ws = Workspace.get(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, auth=cli_auth) env = Environment.get(workspace=ws, name="AzureML-Minimal") #print(env) Environment(name="arimaenv") # From a Conda specification file arimaenv = Environment.from_conda_specification( name="arimaenv", file_path="./scripts/scoring/conda_dependencies.yml") print(arimaenv) arimaenv.register(workspace=ws) # Creates the environment inside a Docker container. arimaenv.docker.enabled = True try: with open("./configuration/model.json") as f: config = json.load(f) except:
try: pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: # If not, create it compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4, idle_seconds_before_scaledown=1800) pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config) pipeline_cluster.wait_for_completion(show_output=True) # Create a Python environment for the experiment model_env = Environment(env_name) model_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies model_env.docker.enabled = True # Use a docker container # Create a set of package dependencies packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'], pip_packages=['azureml-sdk']) # Add the dependencies to the environment model_env.python.conda_dependencies = packages # Register the environment (just in case you want to use it again) model_env.register(workspace=ws) registered_env = Environment.get(ws, env_name)
from azureml.core import Workspace, Run, Environment, ScriptRunConfig from azureml.core.conda_dependencies import CondaDependencies from azureml.core.experiment import Experiment from azureml.core.runconfig import RunConfiguration from azureml.core.script_run_config import get_run_config_from_script_run # load Workspace ws = Workspace.from_config() # load Experiment experiment = Experiment(workspace=ws, name='test-expt') # Create python environment for Azure machine learning expt # options for class methods: from_conda_specification, from_pip_requirements, # from_existing_conda_environment myenv = Environment(name="test") # myenv = Environment.from_conda_specification(name="test", # file_path="./environment.yml") # Environment: docker section # docker_config = dict( # enabled=True, # base_image="base-gpu:openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04", # # comment out this environment variable if you don't have it set! # environment_variables={'WANDB_API_KEY': os.environ['WANDB_API_KEY']} # ) # docker_section = DockerSection(**docker_config) ## Environment: docker section myenv.docker.enabled = True myenv.docker.base_image = "mcr.microsoft.com/azureml/base-gpu:openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04"
from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter, PipelineEndpoint from azureml.pipeline.core._restclients.aeva.models.error_response import ErrorResponseException from ml_service.util.env_variables import Env """ $ python -m ml_service.pipelines.build_pipeline """ if __name__ == "__main__": # Environment variables env = Env() # Setup run config ws = Workspace.from_config() environment = Environment(name=env.aml_environment_name) environment.docker.enabled = True environment.docker.base_image = DEFAULT_CPU_IMAGE environment.python.user_managed_dependencies = False environment.python.conda_dependencies = CondaDependencies( conda_dependencies_file_path= "./environment_setup/conda_dependencies.yml") run_config = RunConfiguration() run_config.environment = environment # Create Pipeline data & parameters ds = ws.get_default_datastore() data_X = PipelineData('data_X', datastore=ds).as_dataset() data_y = PipelineData('data_y', datastore=ds).as_dataset() model_dir = PipelineData('model_dir', datastore=ds)
# the training script train/train.py. # # Note that the model that is being trained in the example is very basic and # does not actually use the provided --regularization parameter, nor does it # provide an actual performance result. These are merely hardcoded to show the # effect in AzureML. from azureml.core import Experiment from azureml.core import Environment from azureml.core import ScriptRunConfig from azureml.core import Dataset from workspace import get_workspace workspace = get_workspace() experiment_name = 'test_experiment_1' experiment = Experiment(workspace=ws, name=experiment_name) myenv = Environment("user-managed-env") myenv.python.user_managed_dependencies = True dataset = Dataset.get_by_name(ws, name='images') args = ['--data-folder', dataset.as_mount(), '--regularization', 0.07] # No compute target is provided, hence the Run is performed locally src = ScriptRunConfig(source_directory='model_train', script='train.py', arguments=args, environment=myenv) run = experiment.submit(config=src)
from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies from azureml.core.runconfig import RunConfiguration import json from azureml.pipeline.core.graph import PipelineParameter from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline from azureml.core import Experiment cluster_name = "SuccPlanning-nilanka" ws = Workspace.from_config() #creating the cluster pipeline_cluster = ComputeTarget(workspace=ws, name="SuccPlanning-nilanka") # Create a Python environment for the experiment environment = Environment("diabetes-pipeline-env") # Use a docker container environment.python.conda_dependencies.add_pip_package("scipy") environment.python.conda_dependencies.add_pip_package("joblib") environment.python.conda_dependencies.add_pip_package("numpy") environment.python.conda_dependencies.add_pip_package("pandas") # Create a new runconfig object for the pipeline pipeline_run_config = RunConfiguration() # Use the compute you created above. pipeline_run_config.target = pipeline_cluster # Assign the environment to the run configuration
prediction=model.predict(data.reshape(1,-1)) # Append prediction to results resultList.append("{}: {}".format(os.path.basename(f), prediction[0])) return resultList from azureml.core import Environment from azureml.core.runconfig import DEFAULT_CPU_IMAGE from azureml.core.runconfig import CondaDependencies # Add dependencies required by the model # For scikit-learn models, you need scikit-learn # For parallel pipeline steps, you need azureml-core and azureml-dataprep[fuse] cd = CondaDependencies.create(pip_packages=['scikit-learn','azureml-defaults','azureml-core','azureml-dataprep[fuse]']) batch_env = Environment(name='batch_environment') batch_env.python.conda_dependencies = cd batch_env.docker.enabled = True batch_env.docker.base_image = DEFAULT_CPU_IMAGE print('Configuration ready.') #parallel run - run batch prediction script, gen predictions from input, save results all at same time from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep from azureml.pipeline.core import PipelineData default_ds = ws.get_default_datastore() output_dir = PipelineData(name='inferences', datastore=default_ds, output_path_on_compute='diabetes/results')
from azureml.core import Experiment, ScriptRunConfig, Workspace, Environment from azureml.core.conda_dependencies import CondaDependencies #connecting to workspace ws = Workspace.from_config() sklearn_env = Environment('sklearn-env') #Ensures the required packages are installed packages = CondaDependencies.create(conda_packages=['scikit-learn', 'pip'], pip_packages=['azureml-defaults']) sklearn_env.python.conda_dependencies = packages #creating a config file script = ScriptRunConfig(source_directory="experiments_directory", script="training_experiment.py", arguments=['--reg-rate', 0.1], environment=sklearn_env) #submit the experiment exp = Experiment(workspace=ws, name="Training_model_Experiment") run = exp.submit(config=script) run.wait_for_completion(show_output=True)
# MAGIC data = np.array(data) # MAGIC result = model.predict(data) # MAGIC # MAGIC # you can return any data type as long as it is JSON-serializable # MAGIC return result.tolist() # MAGIC # MAGIC except Exception as e: # MAGIC result = str(e) # MAGIC return result # COMMAND ---------- from azureml.core import Environment from azureml.core.conda_dependencies import CondaDependencies nycenv = Environment('nycitibike-deployment-env') nycenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[ 'pip==20.1.1', 'azureml-defaults', 'inference-schema[numpy-support]', 'joblib', 'numpy', 'sklearn' ]) with open('nycenv.yml','w') as f: f.write(nycenv.python.conda_dependencies.serialize_to_string()) # COMMAND ---------- from azureml.core.model import InferenceConfig
deployment_config = AciWebservice.deploy_configuration( cpu_cores=0.1, memory_gb=0.5, tags={"method": "sklearn"}, description='Predict MoA activation with sklearn') # Despliegue local mediante docker #deployment_config = LocalWebservice.deploy_configuration(port=6789) ws = Workspace.from_config() model = Model(ws, args.model_name, version=args.model_version) print(model.name, model.id, model.version, sep='\t') # Creciación de azure constainer con ambiente gestionado mediante conda. myenv = Environment(name="envmoa") # Se habilita docker myenv.docker.enabled = True # Definición de dependencias de docker. myenv.python.conda_dependencies = CondaDependencies.create( conda_packages=['scikit-learn'], pip_packages=[ 'azureml-defaults', 'numpy', 'pandas', 'scikit-multilearn' ]) inf_config = InferenceConfig(environment=myenv, source_directory='./src', entry_script='entry.py') service = Model.deploy(ws, "moa-webservice-v7", [model], inf_config, deployment_config) service.wait_for_deployment(show_output=True)
def get_or_create_python_environment( azure_config: AzureConfig, source_config: SourceConfig, environment_name: str = "", register_environment: bool = True) -> Environment: """ Creates a description for the Python execution environment in AzureML, based on the Conda environment definition files that are specified in `source_config`. If such environment with this Conda environment already exists, it is retrieved, otherwise created afresh. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided. This parameter is meant to be used when running inference for an existing model. :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If False, it will only be created, but not registered. Use this for unit testing. """ # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be # necessary if the innereye package is installed. It is necessary when working with an outer project and # InnerEye as a git submodule and submitting jobs from the local machine. # In case of version conflicts, the package version in the outer project is given priority. conda_dependencies, merged_yaml = merge_conda_dependencies( source_config.conda_dependencies_files) # type: ignore if azure_config.pip_extra_index_url: # When an extra-index-url is supplied, swap the order in which packages are searched for. # This is necessary if we need to consume packages from extra-index that clash with names of packages on # pypi conda_dependencies.set_pip_option( f"--index-url {azure_config.pip_extra_index_url}") conda_dependencies.set_pip_option( "--extra-index-url https://pypi.org/simple") env_variables = { "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds), # Occasionally uploading data during the run takes too long, and makes the job fail. Default is 300. "AZUREML_RUN_KILL_SIGNAL_TIMEOUT_SEC": "900", "MKL_SERVICE_FORCE_INTEL": "1", **(source_config.environment_variables or {}) } base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04" # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that, # and will re-use existing environments even if they don't have the same name. # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in # hash function gives different results for the same string in different python instances. hash_string = "\n".join([ merged_yaml, azure_config.docker_shm_size, base_image, str(env_variables) ]) sha1 = hashlib.sha1(hash_string.encode("utf8")) overall_hash = sha1.hexdigest()[:32] unique_env_name = f"InnerEye-{overall_hash}" try: env_name_to_find = environment_name or unique_env_name env = Environment.get(azure_config.get_workspace(), name=env_name_to_find, version=ENVIRONMENT_VERSION) logging.info(f"Using existing Python environment '{env.name}'.") return env except Exception: logging.info( f"Python environment '{unique_env_name}' does not yet exist, creating and registering it." ) env = Environment(name=unique_env_name) env.docker.enabled = True env.docker.shm_size = azure_config.docker_shm_size env.python.conda_dependencies = conda_dependencies env.docker.base_image = base_image env.environment_variables = env_variables if register_environment: env.register(azure_config.get_workspace()) return env
shutil.copy(f, PROJECT_FOLDER) files = glob.glob("*.cfg") for f in files: shutil.copy(f, PROJECT_FOLDER) files = glob.glob("*.txt") for f in files: shutil.copy(f, PROJECT_FOLDER) shutil.copytree("model_data", os.path.join(PROJECT_FOLDER, 'model_data')) shutil.copytree("yolo3", os.path.join(PROJECT_FOLDER, 'yolo3')) cd = CondaDependencies.create(pip_packages=[ 'keras==2.1.5', 'tensorflow==1.6.0', 'pillow', 'matplotlib', 'h5py', 'tensorboard' ], conda_packages=['python=3.6.11']) myenv = Environment("yolov3") myenv.python.conda_dependencies = cd myenv.python.conda_dependencies.add_pip_package("azureml-sdk") myenv.python.conda_dependencies.add_channel("conda-forge") myenv.docker.enabled = True myenv.docker.base_image = DEFAULT_GPU_IMAGE # Choose a name for your CPU cluster CLUSTER_NAME = "gpu-cluster" # Verify that cluster does not exist already try: aml_cluster = AmlCompute(workspace=ws, name=CLUSTER_NAME) print("Found existing cluster, use it.") except ComputeTargetException: print("provisioning new compute target")
is_directory=True) batch_input = output_data.as_dataset() detection_data = PipelineData('detection_data', datastore=def_blob_store, output_name='detection_data', is_directory=True) compute_target = ws.compute_targets['cpu-cluster'] environment_variables = { 'POSTGRES_PASSWORD': os.environ['POSTGRES_PASSWORD'], 'POSTGRES_HOSTNAME': 'ackbar-postgres.postgres.database.azure.com', 'AZURE_STORAGE_CONNECTION_STRING': os.environ['AZURE_STORAGE_CONNECTION_STRING'] } env = Environment(name='env', environment_variables=environment_variables) conda = CondaDependencies() conda.add_conda_package('psycopg2') # have to use pip to install azure packages... conda.add_pip_package('azure-storage-blob') env.python.conda_dependencies = conda run_config = RunConfiguration() run_config.environment = env prepare_step = PythonScriptStep( script_name='prepare.py', arguments=['--output', batch_input], inputs=[], outputs=[batch_input], compute_target=compute_target, source_directory='prepare',
def main(): e = Env() from azureml.core.authentication import InteractiveLoginAuthentication myten=os.environ.get("AZURE_TENANT_ID") interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ.get("AZURE_TENANT_ID")) subscription=os.environ.get("CSUBSCRIPTION") workspace_name=e.workspace_name resource_group=e.resource_group aml_workspace = Workspace.get( name = workspace_name, subscription_id = subscription, resource_group=resource_group, auth=interactive_auth ) from ml_service.util.attach_compute import get_compute # Get Azure machine learning cluster # If not present then get_compute will create a compute based on environment variables aml_compute = get_compute( aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) print("SDK version: ", azureml.core.VERSION) ## Variable names that can be passed in as parameter values from azureml.pipeline.core.graph import PipelineParameter from azureml.core import Datastore model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) dataset_version_param = PipelineParameter( name="dataset_version", default_value=e.dataset_version) data_file_path_param = PipelineParameter( name="data_file_path", default_value="none") caller_run_id_param = PipelineParameter( name="caller_run_id", default_value="none") #model_path = PipelineParameter( # name="model_path", default_value=e.model_path) if (e.datastore_name): datastore_name = e.datastore_name else: datastore_name = aml_workspace.get_default_datastore().name # Get the datastore whether it is the default or named store datastore = Datastore.get(aml_workspace, datastore_name) dataset_name = e.dataset_name # Create a reusable Azure ML environment from ml_service.util.manage_environment import get_environment from azureml.core import Environment # RUN Configuration ## Must have this process to work with AzureML-SDK 1.0.85 from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE from azureml.core.conda_dependencies import CondaDependencies try: app_env=Environment(name="smartschedule_env") app_env.register(workspace=aml_workspace) except: print("Environment not found") # Create a new runconfig object aml_run_config = RunConfiguration() aml_run_config.environment.environment_variables["DATASTORE_NAME"] = e.datastore_name # NOQA: E501 # Use the aml_compute you created above. aml_run_config.target = aml_compute # Enable Docker aml_run_config.environment.docker.enabled = True # Set Docker base image to the default CPU-based image aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE #aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1" # Use conda_dependencies.yml to create a conda environment in the Docker image for execution aml_run_config.environment.python.user_managed_dependencies = False app_conda_deps=CondaDependencies.create( conda_packages=['pandas','scikit-learn', 'libgcc','pyodbc', 'sqlalchemy', 'py-xgboost==0.90'], pip_packages=['azureml-sdk[automl,explain,contrib,interpret]==1.4.0', 'xgboost==0.90', 'azureml-dataprep==1.4.6', 'pyarrow', 'azureml-defaults==1.4.0', 'azureml-train-automl-runtime==1.4.0'], pin_sdk_version=False) # Specify CondaDependencies obj, add necessary packages aml_run_config.environment.python.conda_dependencies = app_conda_deps print ("Run configuration created.") from azure.common.credentials import ServicePrincipalCredentials #from azure.keyvault import KeyVaultClient, KeyVaultAuthentication from azure.keyvault.secrets import SecretClient from azure.identity import DefaultAzureCredential import pandas as pd #import sqlalchemy as sql import pyodbc def get_data(sql_string, columns): credentials = None credential = DefaultAzureCredential() secret_client = SecretClient("https://smrtschd-aml-kv.vault.azure.net", credential=credential) secret = secret_client.get_secret("database-connection") #client = KeyVaultClient(KeyVaultAuthentication(auth_callback)) #secret_bundle = client.get_secret("https://smrtschd-aml-kv.vault.azure.net", "database-connection", "") server = 'starlims-sql.database.windows.net' database = 'QM12_DATA_AUTOMATION' username = '******' password = secret.value driver= '{ODBC Driver 17 for SQL Server}' conn = pyodbc.connect('Driver='+driver+';'+ 'Server='+server+';'+ 'Database='+database+';'+ 'PORT=1433;'+ 'UID='+username+';'+ 'PWD='+password+'; MARS_Connection=Yes' ) try: SQL_Query = pd.read_sql_query(sql_string, conn) df = pd.DataFrame(SQL_Query, columns=columns) return df except Exception as e: print(e) raise sql_str = "SELECT " \ " Dept " \ ", Method " \ ", Servgrp " \ ", Runno " \ ", TestNo " \ ", Testcode " \ ", Total_Duration_Min " \ ", Total_Duration_Hr " \ ", Usrnam " \ ", Eqid " \ ", Eqtype " \ "FROM dbo.Draft " \ "order by TESTCODE, RUNNO, dept, method;" columns = ["Dept", "Method", "Servgrp", "Runno", "TestNo", "Testcode", "Total_Duration_Min", "Total_Duration_Hr", "Usrnam", "Eqid","Eqtype"] from azureml.core import Dataset from sklearn.model_selection import train_test_split if (e.train_dataset_name not in aml_workspace.datasets): df = get_data(sql_str, columns) train_df, test_df=train_test_split(df, test_size=0.2) MY_DIR = "data" CHECK_FOLDER = os.path.isdir(MY_DIR) if not CHECK_FOLDER: os.makedirs(MY_DIR) else: print("Folder ", MY_DIR, " is already created") #files = ["data/analyst_tests.csv"] files = ["data/train_data.csv","data/test_data.csv"] def_file_store = Datastore(aml_workspace, "workspacefilestore") dtfrm = df.to_csv(files[0], header=True, index=False) train_dataframe=train_df.to_csv(files[0], header=True, index=False) test_dataframe=test_df.to_csv(files[1], header=True, index=False) datastore.upload_files( files=files, target_path='data/', overwrite=True ) from azureml.data.data_reference import DataReference blob_input_data_test=DataReference( datastore=datastore, data_reference_name="smartschedulertest", path_on_datastore="data/test_data.csv" ) test_data=Dataset.Tabular.from_delimited_files(blob_input_data_test) test_data.register(aml_workspace, e.test_dataset_name, create_new_version=True) blob_input_data_train=DataReference( datastore=datastore, data_reference_name="smartschedulertrain", path_on_datastore="data/train_data.csv" ) train_data=Dataset.Tabular.from_delimited_files(blob_input_data_train) train_data.register(aml_workspace, e.train_dataset_name, create_new_version=True) else: from azureml.data.data_reference import DataReference print("getting from the datastore instead of uploading") train_data=Dataset.get_by_name(aml_workspace, name=e.train_dataset_name) test_data=Dataset.get_by_name(aml_workspace, name=e.test_dataset_name) # check the training dataset to make sure it has at least 50 records. tdf=train_data.to_pandas_dataframe().head(5) print(tdf.shape) print(tdf) # display the first five rows of the data # create a variable that can be used for other purposes df=train_data.to_pandas_dataframe().head() label_column="Total_Duration_Min" import random import string def randomString(stringLength=15): letters = string.ascii_lowercase return ''.join(random.choice(letters) for i in range(stringLength)) from azureml.core import Experiment experiment = Experiment(aml_workspace, "SmartScheduler_Pipeline") import logging aml_name = 'smart_scheduler_' + randomString(5) print(aml_name) import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import StrMethodFormatter print(df.head(5)) print(df.shape) print(df.dtypes) #df.hist(column='Dept') list(df.columns.values) # Remove Features that are not necessary. #df.hist(column="Servgrp", bins=4) train_data=train_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"]) test_data=test_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"]) print(train_data.to_pandas_dataframe()) print(test_data.to_pandas_dataframe()) from azureml.automl.core.featurization import FeaturizationConfig # some of the columns could be change to one hot encoding especially if the categorical column featurization_config=FeaturizationConfig() featurization_config.blocked_transformers=['LabelEncoder'] featurization_config.add_column_purpose('Dept', 'CategoricalHash') featurization_config.add_transformer_params('HashOneHotEncoder',['Method'], {"number_of_bits":3}) featurization_config.add_column_purpose('Servgrp', 'CategoricalHash') featurization_config.add_column_purpose('Testcode', 'Numeric') featurization_config.add_column_purpose('Usrnam', 'CategoricalHash') featurization_config.add_column_purpose('Eqid', 'CategoricalHash') featurization_config.add_column_purpose('Eqtype', 'CategoricalHash') from azureml.pipeline.core import Pipeline, PipelineData from azureml.pipeline.steps import PythonScriptStep #train_model_folder = './scripts/trainmodel' automl_settings = { "iteration_timeout_minutes": 5, "iterations": 5, "enable_early_stopping": True, "primary_metric": 'spearman_correlation', "verbosity": logging.INFO, "n_cross_validation":5 } automl_config = AutoMLConfig(task="regression", debug_log='automated_ml_errors.log', #path = train_model_folder, training_data=train_data, featurization=featurization_config, blacklist_models=['XGBoostRegressor'], label_column_name=label_column, compute_target=aml_compute, **automl_settings) from azureml.pipeline.steps import AutoMLStep from azureml.pipeline.core import TrainingOutput metrics_output_name = 'metrics_output' best_model_output_name='best_model_output' metrics_data = PipelineData(name = 'metrics_data', datastore = datastore, pipeline_output_name=metrics_output_name, training_output=TrainingOutput(type='Metrics')) model_data = PipelineData(name='model_data', datastore=datastore, pipeline_output_name=best_model_output_name, training_output=TrainingOutput(type='Model')) trainWithAutomlStep = AutoMLStep( name=aml_name, automl_config=automl_config, passthru_automl_config=False, outputs=[metrics_data, model_data], allow_reuse=True ) evaluate_step = PythonScriptStep( name="Evaluate Model", script_name='./evaluate/evaluate_model.py', # e.evaluate_script_path, compute_target=aml_compute, source_directory='../app', arguments=[ "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel ] ) register_step = PythonScriptStep( name="Register Model ", script_name='register/register_model2.py', #e.register_script_path, compute_target=aml_compute, source_directory='../app', inputs=[model_data], arguments=[ "--model_name", model_name_param, "--model_path", model_data, "--ds_name", e.train_dataset_name ], runconfig=aml_run_config, allow_reuse=False ) if ((e.run_evaluation).lower() == 'true'): print("Include evaluation step before register step.") evaluate_step.run_after(trainWithAutomlStep) register_step.run_after(evaluate_step) pipeline_steps = [ trainWithAutomlStep, evaluate_step, register_step ] else: print("Exclude the evaluation step and run register step") register_step.run_after(trainWithAutomlStep) pipeline_steps = [ trainWithAutomlStep, register_step ] print( "this is the value for execute pipeline: {}".format(e.execute_pipeline)) if( (e.execute_pipeline).lower() =='true' ): # Execute the pipe normally during testing and debugging print("Pipeline submitted for execution.") pipeline = Pipeline(workspace = aml_workspace, steps=pipeline_steps) pipeline_run = experiment.submit(pipeline) pipeline_run.wait_for_completion() print("Pipeline is built.") else: # Generates pipeline that will be called in ML Ops train_pipeline = Pipeline(workspace=aml_workspace, steps=pipeline_steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
print("get datasets from datastore") input_data_paths = [(blob_datastore, 'mldata')] input_dataset = Dataset.File.from_files(path=input_data_paths) # ----PYTHON ENV------ #------------------------- packages = CondaDependencies.create( conda_packages=["cudatoolkit=10.0"], pip_packages=[ 'azureml-sdk', 'PyYAML', 'azure-storage-blob', 'matplotlib', 'seaborn', 'tensorflow', 'Keras', 'tensorflow-hub', 'joblib', 'tqdm', 'Pillow', 'azureml-dataprep[pandas,fuse]>=1.1.14' ]) diagnoz_env = Environment("diagnoz-pipeline-env") diagnoz_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies diagnoz_env.docker.enabled = True # Use a docker container diagnoz_env.docker.base_image = DEFAULT_GPU_IMAGE diagnoz_env.python.conda_dependencies = packages diagnoz_env.register(workspace=ws) # Runconfigs pipeline_run_config = RunConfiguration() pipeline_run_config.target = compute_target pipeline_run_config.environment = diagnoz_env print("Run configuration created.") shutil.rmtree(script_folder, ignore_errors=True) os.makedirs(script_folder, exist_ok=True)
joblib.dump(value = classi_model, filename = 'outputs/model.pkl') run.complete() # 2 process file #Now scikit package is not present in the default env so we need to provide the conda dependencies explicitly from azureml.core import Experiment, ScriptRunConfig, Environment, Workspace from azureml.core.conda_dependencies import CondaDependencies ws = Workspace.from_config() my_env = Environment(ws, "My_env") my_env.python.conda_dependencies = CondaDependencies.create(conda_packages= ['scikit-learn', 'pip'], pip_packages=['azureml-defaults']) script_config = ScriptRunConfig(source_directory = ".", script = "script_name", environment = my_env) experiment = Experiment(ws, "my_training_exp") new_run = experiment.submit(script_config) new_run.wait_for_completion()
service_principal_password=os.environ["SP_SECRET"], ) ws = Workspace( subscription_id=auth_config["subscription_id"], resource_group=auth_config["resource_group"], workspace_name=auth_config["workspace_name"], auth=auth, ) # Usually, the cluster already exists, so we just fetch compute_target = next( (m for m in ComputeTarget.list(ws) if m.name == compute["name"]), None) # Specify the compute environment and register it for use in scoring env = Environment("component-condition") env.docker.enabled = True cd = CondaDependencies.create( conda_packages=["tensorflow=2.0.0", "pandas", "numpy", "matplotlib"], pip_packages=["azureml-mlflow==1.5.0", "azureml-defaults==1.5.0"], ) env.python.conda_dependencies = cd env.register(workspace=ws) print("Registered environment component-condition") # Specify the run configuration run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.python.conda_dependencies = cd # Pipeline definition
# Load data data = pd.read_csv('diabetes.csv') # Count the rows and log the result row_count = (len(data)) print('observations:', row_count) mlflow.log_metric('observations', row_count) from azureml.core import Experiment, ScriptRunConfig, Environment from azureml.core.conda_dependencies import CondaDependencies from azureml.widgets import RunDetails # Create a Python environment for the experiment mlflow_env = Environment("mlflow-env") # Ensure the required packages are installed packages = CondaDependencies.create(conda_packages=['pandas','pip'], pip_packages=['mlflow','azureml-mlflow']) mlflow_env.python.conda_dependencies = packages # Create a script config script_mlflow = ScriptRunConfig(source_directory=experiment_folder, script='mlflow_diabetes.py', environment=mlflow_env) # submit the experiment experiment = Experiment(workspace=ws, name='diabetes-mlflow-script') run = experiment.submit(config=script_mlflow) RunDetails(run).show()
model_name)) #replace the placeholder MODEL-NAME print('score_fixed.py saved') #Get model model = Model(ws, model_name) #Create conda Dependencies conda_packages = ['numpy==1.19.1', "pip==19.2.3"] pip_packages = [ 'azureml-sdk==1.12.0', 'azureml-defaults==1.12.0', 'azureml-monitoring==0.1.0a21', 'xgboost==1.1.1', 'scikit-learn==0.23.1', 'keras==2.3.1', 'tensorflow==2.0.0' ] conda_deps = CondaDependencies.create(conda_packages=conda_packages, pip_packages=pip_packages) myenv = Environment(name='myenv') myenv.python.conda_dependencies = conda_deps inf_config = InferenceConfig(entry_script='score_fixed.py', environment=myenv) aks_config = AksWebservice.deploy_configuration() service = Model.deploy(workspace=ws, name=aks_service_name, models=[model], inference_config=inf_config, deployment_config=aks_config, deployment_target=aks_target) service.wait_for_deployment(show_output=True) print(service.state)
compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_DS11_V2', max_nodes=2) training_cluster = ComputeTarget.create(ws, cluster_name, compute_config) training_cluster.wait_for_completion(show_output=True) except Exception as ex: print(ex) from azureml.core import Experiment, ScriptRunConfig, Environment from azureml.core.conda_dependencies import CondaDependencies from azureml.train.hyperdrive import GridParameterSampling, HyperDriveConfig, PrimaryMetricGoal, choice from azureml.train.hyperdrive import BayesianParameterSampling, uniform from azureml.widgets import RunDetails # Create a Python environment for the experiment sklearn_env = Environment("env02") # Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep) packages = CondaDependencies.create(pip_packages=[ 'lightgbm', 'sklearn', 'scipy', 'numpy', 'azureml-defaults', 'azureml-dataprep[pandas]' ]) sklearn_env.python.conda_dependencies = packages # Create a script config script_config = ScriptRunConfig(source_directory=experiment_folder, script='training03.py', arguments=[ '--max_depth', 5, '--num_leaves', 50, '--subsample', 0.9, '--learning_rate', 0.01, '--min_data_in_leaf', 50,
def main(): """ Run the experiment for training """ interactive_auth = InteractiveLoginAuthentication( tenant_id=os.getenv("TENANT_ID")) work_space = Workspace.from_config(auth=interactive_auth) # Set up the dataset for training datastore = work_space.get_default_datastore() dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist")) # Set up the experiment for training experiment = Experiment(workspace=work_space, name="keras-lenet-train") # azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000 config = ScriptRunConfig( source_directory=".", script="train_keras.py", compute_target="cpu-cluster", arguments=[ "--data_folder", dataset.as_named_input("input").as_mount(), "--log_folder", "./logs", ], ) # Set up the Tensoflow/Keras environment environment = Environment("keras-environment") # environment = Environment.from_conda_specification( # name='keras-environment', # file_path='keras-environment.yml' # ) environment.python.conda_dependencies = CondaDependencies.create( python_version="3.7.7", pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"]) config.run_config.environment = environment # Run the experiment for training run = experiment.submit(config) aml_url = run.get_portal_url() print( "Submitted to an Azure Machine Learning compute cluster. Click on the link below" ) print("") print(aml_url) tboard = Tensorboard([run]) # If successful, start() returns a string with the URI of the instance. tboard.start(start_browser=True) run.wait_for_completion(show_output=True) # After your job completes, be sure to stop() the streaming otherwise it will continue to run. print("Press enter to stop") input() tboard.stop() # Register Model metrics = run.get_metrics() run.register_model( model_name="keras_mnist", tags={ "data": "mnist", "model": "classification" }, model_path="outputs/keras_lenet.h5", model_framework=Model.Framework.TENSORFLOW, model_framework_version="2.3.1", properties={ "train_loss": metrics["train_loss"][-1], "train_accuracy": metrics["train_accuracy"][-1], "val_loss": metrics["val_loss"][-1], "val_accuracy": metrics["val_accuracy"][-1], }, )
# script arguments arguments = [ "--deepspeed", "--deepspeed_config", "ds_config.json", "--deepspeed_mpi", "--global_rank", "$AZ_BATCHAI_TASK_INDEX", "--with_aml_log", True, ] # create an environment # Note: We will use the Dockerfile method to create an environment for DeepSpeed. # In future, we plan to create a Curated environment for DeepSpeed. env = Environment(name="deepspeed") env.docker.enabled = True # indicate how to run Python env.python.user_managed_dependencies = True env.python.interpreter_path = "/opt/miniconda/bin/python" # To install any Python packages you need, simply add RUN pip install package-name to the docker string. E.g. `RUN pip install sklearn` # Specify docker steps as a string and use the base DeepSpeed Docker image dockerfile = r""" FROM deepspeed/base-aml:with-pt-ds-and-deps RUN pip install azureml-mlflow RUN echo "Welcome to the DeepSpeed custom environment!" """ # set base image to None, because the image is defined by dockerfile.
def main(): e = Env() print('********************') print(e.source_directory) files = os.listdir('./aml_pipeline') for f in files: print(f) print('***************') workspace_name = e.workspace_name subscription_id = e.subscription_id resource_group = e.resource_group #Connect to AML Workspace print('workspace_name = ' + workspace_name) print('subscription_id = ' + subscription_id) print('resource_group = ' + resource_group) ws = Workspace.get( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, ) print('Ready to use Azure ML {} to work with {}'.format( azureml.core.VERSION, ws.name)) default_ds = ws.get_default_datastore() if 'diabetes dataset' not in ws.datasets: default_ds.upload_files( files=['diabetes.csv', 'diabetes2.csv'], # Upload the diabetes csv files in /data target_path= 'diabetes-data/', # Put it in a folder path in the datastore overwrite=True, # Replace existing files of the same name show_progress=True) #Create a tabular dataset from the path on the datastore (this may take a short while) tab_data_set = Dataset.Tabular.from_delimited_files( path=(default_ds, 'diabetes-data/*.csv')) # Register the tabular dataset try: tab_data_set = tab_data_set.register(workspace=ws, name='diabetes dataset', description='diabetes data', tags={'format': 'CSV'}, create_new_version=True) print('Dataset registered.') except Exception as ex: print(ex) else: print('Dataset already registered.') # Create a folder for the pipeline step files experiment_folder = 'diabetes_pipeline' os.makedirs(experiment_folder, exist_ok=True) print(experiment_folder) cluster_name = "mmcomputecluster" try: # Check for existing compute target pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: # If it doesn't already exist, create it try: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_DS11_V2', max_nodes=2) pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config) pipeline_cluster.wait_for_completion(show_output=True) except Exception as ex: print(ex) # Create a Python environment for the experiment diabetes_env = Environment("diabetes-pipeline-env") diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies diabetes_env.docker.enabled = True # Use a docker container # Create a set of package dependencies diabetes_packages = CondaDependencies.create( conda_packages=[ 'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip' ], pip_packages=[ 'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow' ]) # Add the dependencies to the environment diabetes_env.python.conda_dependencies = diabetes_packages # Register the environment diabetes_env.register(workspace=ws) registered_env = Environment.get(ws, 'diabetes-pipeline-env') # Create a new runconfig object for the pipeline pipeline_run_config = RunConfiguration() # Use the compute you created above. pipeline_run_config.target = pipeline_cluster # Assign the environment to the run configuration pipeline_run_config.environment = registered_env print("Run configuration created.") # Get the training dataset diabetes_ds = ws.datasets.get("diabetes dataset") # Create a PipelineData (temporary Data Reference) for the model folder prepped_data_folder = PipelineData("prepped_data_folder", datastore=ws.get_default_datastore()) # Step 1, Run the data prep script prep_step = PythonScriptStep(name="Prepare Data", script_name="prep_diabetes.py", source_directory='./aml_pipeline', arguments=[ '--input-data', diabetes_ds.as_named_input('raw_data'), '--prepped-data', prepped_data_folder ], outputs=[prepped_data_folder], compute_target=pipeline_cluster, runconfig=pipeline_run_config, allow_reuse=True) # Step 2, run the training script train_step = PythonScriptStep( name="Train and Register Model", source_directory='./aml_pipeline', script_name="train_diabetes.py", arguments=['--training-folder', prepped_data_folder], inputs=[prepped_data_folder], compute_target=pipeline_cluster, runconfig=pipeline_run_config, allow_reuse=True) print("Pipeline steps defined") pipeline_steps = [prep_step, train_step] pipeline = Pipeline(workspace=ws, steps=pipeline_steps) print("Pipeline is built.") # Create an experiment and run the pipeline experiment = Experiment(workspace=ws, name='jlg-exp') pipeline_run = experiment.submit(pipeline, regenerate_outputs=True) print("Pipeline submitted for execution.") pipeline_run.wait_for_completion(show_output=True) for run in pipeline_run.get_children(): print(run.name, ':') metrics = run.get_metrics() for metric_name in metrics: print('\t', metric_name, ":", metrics[metric_name]) for model in Model.list(ws): print(model.name, 'version:', model.version) for tag_name in model.tags: tag = model.tags[tag_name] print('\t', tag_name, ':', tag) for prop_name in model.properties: prop = model.properties[prop_name] print('\t', prop_name, ':', prop) print('\n') # Publish the pipeline from the run published_pipeline = pipeline_run.publish_pipeline( name="diabetes-training-pipeline", description="Trains diabetes model", version="1.0") published_pipeline rest_endpoint = published_pipeline.endpoint print(rest_endpoint)
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir) # training script script_dir = prefix.joinpath("code", "train", "fastai", "pets-resnet34") script_name = "train.py" # environment file environment_file = prefix.joinpath("environments", "fastai.dockerfile") # azure ml settings environment_name = "fastai-pets-example" experiment_name = "fastai-pets-example" compute_target = "gpu-cluster" # create environment env = Environment(environment_name) env.docker.enabled = True env.docker.base_image = None env.docker.base_dockerfile = environment_file env.python.user_managed_dependencies = True # create job config src = ScriptRunConfig( source_directory=script_dir, script=script_name, environment=env, compute_target=compute_target, ) # submit job run = Experiment(ws, experiment_name).submit(src)
def start(config_file): print(config_file) configdata = ngccontent.get_config(config_file) subscription_id = configdata["azureml_user"]["subscription_id"] resource_group = configdata["azureml_user"]["resource_group"] workspace_name = configdata["azureml_user"]["workspace_name"] ws = Workspace(workspace_name=workspace_name, subscription_id=subscription_id, resource_group=resource_group) verify = f''' Subscription ID: {subscription_id} Resource Group: {resource_group} Workspace: {workspace_name}''' print(verify) ### vnet settings vnet_rg = ws.resource_group vnet_name = configdata["aml_compute"]["vnet_name"] subnet_name = configdata["aml_compute"]["subnet_name"] ### azure ml names ct_name = configdata["aml_compute"]["ct_name"] exp_name = configdata["aml_compute"]["exp_name"] ### trust but verify verify = f''' vNET RG: {vnet_rg} vNET name: {vnet_name} vNET subnet name: {subnet_name} Compute target: {ct_name} Experiment name: {exp_name}''' print(verify) if configdata["aml_compute"]["vm_name"] in configdata[ "supported_vm_sizes"].keys(): vm_name = configdata["aml_compute"]["vm_name"] gpus_per_node = configdata["supported_vm_sizes"][vm_name] print( "Setting up compute target {ct_name} with vm_size: {vm_name} with {gpus_per_node} GPUs" .format(ct_name=ct_name, vm_name=vm_name, gpus_per_node=gpus_per_node)) if ct_name not in ws.compute_targets: config = AmlCompute.provisioning_configuration( vm_size=vm_name, min_nodes=configdata["aml_compute"]["min_nodes"], max_nodes=configdata["aml_compute"]["max_nodes"], vnet_resourcegroup_name=vnet_rg, vnet_name=vnet_name, subnet_name=subnet_name, idle_seconds_before_scaledown=configdata["aml_compute"] ["idle_seconds_before_scaledown"], remote_login_port_public_access='Enabled') ct = ComputeTarget.create(ws, ct_name, config) ct.wait_for_completion(show_output=True) else: print("Loading Pre-existing Compute Target {ct_name}".format( ct_name=ct_name)) ct = ws.compute_targets[ct_name] else: print("Unsupported vm_size {vm_size}".format(vm_size=vm_name)) print("The specified vm size must be one of ...") for azure_gpu_vm_size in configdata["supported_vm_sizes"].keys(): print("... " + azure_gpu_vm_size) raise Exception( "{vm_size} does not support Pascal or above GPUs".format( vm_size=vm_name)) environment_name = configdata["aml_compute"]["environment_name"] python_interpreter = configdata["aml_compute"]["python_interpreter"] conda_packages = configdata["aml_compute"]["conda_packages"] from azureml.core import ContainerRegistry if environment_name not in ws.environments: env = Environment(name=environment_name) env.docker.enabled = configdata["aml_compute"]["docker_enabled"] env.docker.base_image = None env.docker.base_dockerfile = "FROM {dockerfile}".format( dockerfile=configdata["ngc_content"]["base_dockerfile"]) env.python.interpreter_path = python_interpreter env.python.user_managed_dependencies = True conda_dep = CondaDependencies() for conda_package in conda_packages: conda_dep.add_conda_package(conda_package) env.python.conda_dependencies = conda_dep env.register(workspace=ws) evn = env else: env = ws.environments[environment_name] amlcluster = Azuremlcomputecluster.AzureMLComputeCluster( workspace=ws, compute_target=ct, initial_node_count=1, experiment_name=configdata["aml_compute"]["exp_name"], environment_definition=env, use_gpu=True, n_gpus_per_node=1, jupyter=True, jupyter_port=configdata["aml_compute"]["jupyter_port"], dashboard_port=9001, scheduler_port=9002, scheduler_idle_timeout=1200, worker_death_timeout=30, additional_ports=[], datastores=[], telemetry_opt_out=True, asynchronous=False) print(amlcluster.jupyter_link) amlcluster.jupyter_link print('Exiting script')
from azureml.core.compute import AmlCompute, ComputeTarget from azureml.core.compute_target import ComputeTargetException from azureml.core.conda_dependencies import CondaDependencies from azureml.core.runconfig import DEFAULT_CPU_IMAGE logger = logging.getLogger() logger.setLevel("INFO") ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) ws = Workspace.from_config() freezer_environment = Environment("sktime_freezer_environment") cd = CondaDependencies.create( conda_packages=["numpy", "cython", "pandas", "scikit-learn"], pip_packages=[ "azureml-defaults", "inference-schema[numpy-support]", "joblib==0.13.*", "azureml-dataprep[pandas, fuse]", "sktime", ], ) freezer_environment.docker.enabled = True freezer_environment.docker.base_image = DEFAULT_CPU_IMAGE freezer_environment.python.conda_dependencies = cd freezer_environment.register(workspace=ws) logger.info("Environment registered")