def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" resource_group = os.environ.get("BASE_NAME")+"-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("DATABRICKS_COMPUTE_NAME") db_cluster_id = os.environ.get("DB_CLUSTER_ID") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace( workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) train_step = DatabricksStep( name="DBPythonInLocalMachine", num_workers=1, python_script_name="train_with_r_on_databricks.py", source_directory="code/training/R", run_name='DB_Python_R_demo', existing_cluster_id=db_cluster_id, compute_target=aml_compute, allow_reuse=False ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name + "_with_R_on_DB", description="Model training/retraining pipeline", version=build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python" train_step = PythonScriptStep( name="Train Model", script_name="train_with_r.py", compute_target=aml_compute, source_directory="code/training/R", runconfig=run_config, allow_reuse=False, ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name + "_with_R", description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def test_get_workspace(): workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) assert aml_workspace.name == workspace_name
def test_get_workspace(): e = Env() workspace_name = e.workspace_name resource_group = e.resource_group subscription_id = e.subscription_id tenant_id = e.tenant_id app_id = e.app_id app_secret = e.app_secret aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) assert aml_workspace.name == workspace_name
def test_get_workspace(): workspace_name = os.environ.get("AML_WORKSPACE_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") aml_workspace = get_workspace( workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) assert aml_workspace.name == workspace_name
def main(): load_dotenv() workspace_name = os.environ.get("WS_NAME") resource_group = os.environ.get("RG_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") model_name = os.environ.get("MODEL_NAME") inference_config_file = os.environ.get("INFERENCE_CONFIG") deployment_aci_config = os.environ.get("DEPLOYMENT_ACI_CONFIG") conda_dep_yml = os.environ.get("CONDA_DEPENDENCIES") score_path = os.environ.get("SCORE_PATH") score_source_dir = os.environ.get("SCORE_SOURCE_DIR") aci_service_name = os.environ.get("SERVICE_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) inference_config = InferenceConfig(source_directory=score_source_dir, runtime="python", entry_script=score_path, conda_file=conda_dep_yml) aciconfig = AciWebservice.deploy_configuration( cpu_cores=2, memory_gb=4, tags={ "model": "BERT", "method": "tensorflow" }, description='Predict StackoverFlow tags with BERT') model = aml_workspace.models[model_name] aci_service = Model.deploy(aml_workspace, aci_service_name, [model], inference_config, aciconfig, overwrite=True) aci_service.wait_for_deployment(True) print(aci_service.state)
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( e.workspace_name, e.resource_group, e.subscription_id, e.tenant_id, e.app_id, e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print(aml_compute) train_step = DatabricksStep( name="DBPythonInLocalMachine", num_workers=1, python_script_name="train_with_r_on_databricks.py", source_directory="code/training/R", run_name='DB_Python_R_demo', existing_cluster_id=e.db_cluster_id, compute_target=aml_compute, allow_reuse=False ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name + "_with_R_on_DB", description="Model training/retraining pipeline", version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace(e.workspace_name, e.resource_group, e.subscription_id, e.tenant_id, e.app_id, e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python" train_step = PythonScriptStep( name="Train Model", script_name="train_with_r.py", compute_target=aml_compute, source_directory="code/training/R", runconfig=run_config, allow_reuse=False, ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name + "_with_R", description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") MODEL_NAME = os.environ.get('MODEL_NAME') model_data_path = os.environ.get("MODEL_DATA_PATH_DATASTORE") ws = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) modelName = MODEL_NAME.rstrip('h5') + 'onnx' model = Model(workspace=ws, name=modelName) print(model) model.download() ds = ws.get_default_datastore() print(ds) ds.download(target_path='.', prefix=model_data_path, show_progress=True)
def workspace(): return get_workspace()
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") deploy_script_path = os.environ.get("DEPLOY_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("DEPLOY_PIPELINE_NAME") service_name = os.environ.get("DEPLOY_SERVICE_NAME") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) conda_dependencies = CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn'], pip_packages=[ 'azureml-core==1.0.72.*', 'azureml-sdk==1.0.72.*', 'azure-storage', 'azure-storage-blob', 'azureml-dataprep', 'azureml-datadrift==1.0.72.*' ], pin_sdk_version=False) print(conda_dependencies.serialize_to_string()) run_config = RunConfiguration(framework='Python', conda_dependencies=conda_dependencies) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) print(model_name) release_id = PipelineParameter(name="release_id", default_value="0") print(release_id) service_name = PipelineParameter(name="service_name", default_value=service_name) print(service_name) deploy_step = PythonScriptStep( name="Deploy Model", script_name=deploy_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, "--service_name", service_name ], runconfig=run_config, allow_reuse=False, ) print("Step Deploy created") steps = [deploy_step] deploy_pipeline = Pipeline(workspace=aml_workspace, steps=steps) deploy_pipeline.validate() published_pipeline = deploy_pipeline.publish( name=pipeline_name, description="Model deploy pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
# AIDAtaPipeLine - A series of examples and utilities for Azure Machine Learning Services # Copyright (C) 2020-2021 The Ocean Cleanupâ„¢ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # Example how to get a Datastore object from the workspace from azureml.core import Datastore from workspace import get_workspace workspace = get_workspace() datastore = Datastore.get(workspace, 'new_images_1') print(datastore.get_path())
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") # register_script_path = os.environ.get("REGISTER_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") print(app_secret) # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) release_id = PipelineParameter(name="release_id", default_value="0") train_step = PythonScriptStep( name="Train Model", script_name=train_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. # register_model_step = PythonScriptStep( # name="Register New Trained Model", # script_name=register_script_path, # compute_target=aml_compute, # source_directory=sources_directory_train, # arguments=[ # "--release_id", release_id, # "--model_name", model_name, # ], # runconfig=run_config, # allow_reuse=False, # ) # print("Step register model created") evaluate_step.run_after(train_step) # register_model_step.run_after(evaluate_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = "AML-RG-" + os.environ.get("BASE_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") aks_name = os.environ.get("AKS_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn', 'keras'], pip_packages=[ 'azure', 'azureml-sdk', 'azure-storage', 'azure-storage-blob', 'transformers>=2.1.1', 'tensorflow>=2.0.0', 'tensorflow-gpu>=2.0.0' ])) run_config.environment.docker.enabled = True datastore_name = 'tfworld' container_name = 'azure-service-classifier' account_name = 'johndatasets' sas_token = '?sv=2019-02-02&ss=bfqt&srt=sco&sp=rl&se=2021-06-02T03:40:25Z&st=2020-03-09T19:40:25Z&spr=https&sig=bUwK7AJUj2c%2Fr90Qf8O1sojF0w6wRFgL2c9zMVCWNPA%3D' try: existing_datastore = Datastore.get(aml_workspace, datastore_name) except: # noqa: E722 existing_datastore = Datastore \ .register_azure_blob_container(workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, sas_token=sas_token ) azure_dataset = Dataset.File.from_files(path=(existing_datastore, 'data')) azure_dataset = azure_dataset.register( workspace=aml_workspace, name='Azure Services Dataset', description='Dataset containing azure related posts on Stackoverflow', create_new_version=True) azure_dataset.to_path() input_data = azure_dataset.as_named_input('input_data1').as_mount( '/tmp/data') model_name = PipelineParameter(name="model_name", default_value=model_name) max_seq_length = PipelineParameter(name="max_seq_length", default_value=128) learning_rate = PipelineParameter(name="learning_rate", default_value=3e-5) num_epochs = PipelineParameter(name="num_epochs", default_value=3) export_dir = PipelineParameter(name="export_dir", default_value="./outputs/exports") batch_size = PipelineParameter(name="batch_size", default_value=32) steps_per_epoch = PipelineParameter(name="steps_per_epoch", default_value=100) # initialize the TensorFlow estimator estimator = TensorFlow(source_directory=sources_directory_train, entry_script=train_script_path, compute_target=aml_compute, framework_version='2.0', use_gpu=True, pip_packages=[ 'transformers==2.0.0', 'azureml-dataprep[fuse,pandas]==1.3.0' ]) train_step = EstimatorStep( name="Train Model", estimator=estimator, estimator_entry_script_arguments=[ "--data_dir", input_data, "--max_seq_length", max_seq_length, "--learning_rate", learning_rate, "--num_epochs", num_epochs, "--export_dir", export_dir, "--batch_size", batch_size, "--steps_per_epoch", steps_per_epoch ], compute_target=aml_compute, inputs=[input_data], allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--model_name", model_name, "--build_id", build_id, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. evaluate_step.run_after(train_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') response = published_pipeline.submit( # noqa: F841 workspace=aml_workspace, experiment_name=experiment_name) # Get AKS cluster for deployment aks_compute = get_aks(aml_workspace, aks_name) if aks_compute is not None: print(aks_compute)
def get_data(server_url,client_id,client_secret): access_token = get_acess_token(server_url,client_id,client_secret) workspace_data = workspace.get_workspace(server_url, access_token) final_data={} firstnext=[] secondnext=[] thirdnext=[] defectS=[] spikedata=[] r_burn=[] s_sprint=[] defectRej=[] defectAge=[] defectConc=[] for i in range(len(workspace_data)): wid = str(workspace_data[i]['workspace_id']) wname = str(workspace_data[i]['workspace_name']) backlog_data = backlog.get_backlog(server_url, access_token,wid,wname) sprint_data = sprints.get_sprints(server_url, access_token,wid,wname) release_data = releases.get_releases_items(server_url, access_token,wid,wname) release_burn_down.getreleaseforecast(release_data,backlog_data,wname,wid) app_data = applications.get_applications(server_url,access_token,wid) defectS = defectS + defectStatus.getDefectStatus(app_data,release_data,sprint_data,backlog_data) data = applications.get_defect(server_url, access_token,wid,wname,backlog_data,app_data) #defectRej = defectRej + applications.defectRejectionRatio(server_url, access_token,wid,wname,app_data,data) #defectAge = defectAge + applications.defectAgeing(server_url, access_token,wid,wname,data) #defectConc = defectConc + applications.defectConcentration(server_url, access_token,wid,wname,app_data,data) sData = sprint_sp.sprint_remaining_sp(server_url, access_token,wid,wname,release_data,backlog_data,sprint_data,app_data) s_sprint = s_sprint + sData["sdata"] firstnext = firstnext + sData["firstnext"] secondnext = secondnext + sData["secondnext"] thirdnext = thirdnext + sData["thirdnext"] r_burn = r_burn + release_burn_down.remaning_sp(server_url, access_token,wid,wname,server_url,release_data,backlog_data,app_data)["r_data"] spikedata = spikedata + release_burn_down.spikedata(release_data,backlog_data,app_data,wname,wid) # try: conn = MongoClient(config.Agilemanager_collector['db_host'],config.Agilemanager_collector['db_port']) # print "Connected successfully!!!" # except: # print "Could not connect to MongoDB" hpamCurrentSprint=get_currentSprint(s_sprint) final_data["hpamRelease"] = r_burn final_data["hpamSprint"] = s_sprint final_data["hpamCurrentSprint"] = hpamCurrentSprint final_data["hpamReleaseforecast"] = rBurn.get_data() #print final_data["hpamReleaseforecast"] final_data["hpamDefectRejection"]=defectRej final_data["hpamDefectAgeing"] = defectAge final_data["hpamDefectConcentration"] = defectConc final_data["hpamFirstnext"] = firstnext final_data["hpamSecondnext"] = secondnext final_data["hpamThirdnext"] =thirdnext final_data["hpamSpikedata"] = spikedata final_data["hpamDefectdata"] = defectS final_data["hpamCurrentSprintDefect"] = get_currentSprint(defectS) db = conn.dashboard print db my_collection = db.hpamdata my_collection.drop() my_collection.insert(final_data)
def main(): load_dotenv() workspace_name = os.environ.get("WS_NAME") resource_group = os.environ.get("RG_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") aks_name = os.environ.get("AKS_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace( workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print('Now accessing:') print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn', 'keras'], pip_packages=['azureml-core==1.25.0', 'azureml-defaults==1.25.0', 'azureml-telemetry==1.25.0', 'azureml-train-restclients-hyperdrive==1.25.0', 'azureml-train-core==1.25.0', 'azureml-dataprep', 'tensorflow-gpu==2.0.0', 'transformers==2.0.0', 'absl-py', 'azureml-dataprep', 'h5py<3.0.0']) ) # run_config.environment.docker.enabled = True datastore_name = 'mtcseattle' container_name = 'azure-service-classifier' account_name = 'mtcseattle' sas_token = '?sv=2020-04-08&st=2021-05-26T04%3A39%3A46Z&se=2022-05-27T04%3A39%3A00Z&sr=c&sp=rl&sig=CTFMEu24bo2X06G%2B%2F2aKiiPZBzvlWHELe15rNFqULUk%3D' try: existing_datastore = Datastore.get(aml_workspace, datastore_name) except: # noqa: E722 existing_datastore = Datastore \ .register_azure_blob_container(workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, sas_token=sas_token, overwrite=True) azure_dataset = Dataset.File.from_files( path=(existing_datastore, 'data')) azure_dataset = azure_dataset.register( workspace=aml_workspace, name='Azure Services Dataset', description='Dataset containing azure related posts on Stackoverflow', create_new_version=True) azure_dataset.to_path() input_data = azure_dataset.as_named_input('azureservicedata').as_mount( '/tmp/data') model_name = PipelineParameter( name="model_name", default_value=model_name) max_seq_length = PipelineParameter( name="max_seq_length", default_value=128) learning_rate = PipelineParameter( name="learning_rate", default_value=3e-5) num_epochs = PipelineParameter( name="num_epochs", default_value=1) export_dir = PipelineParameter( name="export_dir", default_value="./outputs/model") batch_size = PipelineParameter( name="batch_size", default_value=32) steps_per_epoch = PipelineParameter( name="steps_per_epoch", default_value=1) # initialize the PythonScriptStep train_step = PythonScriptStep( name='Train Model', script_name=train_script_path, arguments=['--data_dir', input_data, '--max_seq_length', max_seq_length, '--batch_size', batch_size, '--learning_rate', learning_rate, '--steps_per_epoch', steps_per_epoch, '--num_epochs', num_epochs, '--export_dir',export_dir], compute_target=aml_compute, source_directory=sources_directory_train, runconfig=run_config, allow_reuse=True) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--model_name", model_name, "--build_id", build_id, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. evaluate_step.run_after(train_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline.", version=build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') response = published_pipeline.submit( # noqa: F841 workspace=aml_workspace, experiment_name=experiment_name)
def main(): load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") resource_group = os.environ.get("RESOURCE_GROUP_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") generate_report_path = os.environ.get("GENERATE_REPORT_PATH") generate_report_name = os.environ.get("GENERATE_REPORT_NAME") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_GPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") ckpt_path = os.environ.get("MODEL_CHECKPOINT_PATH") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") epis_datastore = os.environ.get("EPIS_DATASTORE") epis_container = os.environ.get("EPIS_CONTAINER") aml_workspace = get_workspace( workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) aml_compute = get_compute( aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=['numpy==1.18.1', 'pandas', 'tensorflow-gpu==2.0.0'], pip_packages=['azure', 'azureml-core==1.0.60', 'azureml-tensorboard', 'azure-storage==0.36.0', 'tqdm==4.41.1', 'opencv-python==4.1.2.30', 'easydict==1.9', 'matplotlib==3.1.3']) ) run_config.environment.docker.enabled = True run_config.environment.docker.gpu_support = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE model_name = PipelineParameter( name="model_name", default_value=model_name) release_id = PipelineParameter( name="release_id", default_value=build_id) train_step = PythonScriptStep( name="Train Model", script_name=train_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, "--ckpt_path", ckpt_path, "--datastore", epis_datastore, "--storage_container", epis_container, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, "--ckpt_path", ckpt_path, "--datastore", epis_datastore, "--storage_container", epis_container, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") generate_report_step = PythonScriptStep( name="Generate Report Model", script_name=generate_report_name, compute_target=aml_compute, source_directory=generate_report_path, arguments=[ "--release_id", release_id, "--model_name", model_name, "--ckpt_path", ckpt_path, "--datastore", epis_datastore, "--storage_container", epis_container, ], runconfig=run_config, allow_reuse=False, ) print("Step generate report created") evaluate_step.run_after(train_step) generate_report_step.run_after(evaluate_step) steps = [train_step, evaluate_step, generate_report_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") data_path = os.environ.get("DATA_PATH_DATASTORE") model_data_path = os.environ.get("MODEL_DATA_PATH_DATASTORE") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) model_name = PipelineParameter(name="model_name", default_value=model_name) release_id = PipelineParameter(name="release_id", default_value="0") ds = aml_workspace.get_default_datastore() dataref_folder = ds.path(data_path).as_mount() model_dataref = ds.path(model_data_path).as_mount() # NEED those two folders mounted on datastore and env variables specified in variable groups #ds.upload(src_dir='./VOCdevkit', target_path='VOCdevkit', overwrite=True, show_progress=True) #ds.upload(src_dir='./model_data', target_path='VOCmodel_data', overwrite=True, show_progress=True) yoloEstimator = TensorFlow( source_directory=sources_directory_train + '/training', compute_target=aml_compute, entry_script=train_script_path, pip_packages=[ 'keras', 'pillow', 'matplotlib', 'onnxmltools', 'keras2onnx==1.5.1' ], # recent versions of keras2onnx give conversion issues use_gpu=True, framework_version='1.13') train_step = EstimatorStep(name="Train & Convert Model", estimator=yoloEstimator, estimator_entry_script_arguments=[ "--release_id", release_id, "--model_name", model_name, "--data_folder", dataref_folder, "--model_path", model_dataref ], runconfig_pipeline_params=None, inputs=[dataref_folder, model_dataref], compute_target=aml_compute, allow_reuse=False) print("Step Train & Convert created") train_pipeline = Pipeline(workspace=aml_workspace, steps=[train_step]) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): cluster_id = os.environ.get("DATABRICKS_CLUSTER_ID", None) # If databricks_cluster_id is not None, but it's an empty string: its None if cluster_id is not None and not cluster_id: cluster_id = None workspace_name = os.environ.get("AML_WORKSPACE_NAME", None) resource_group = os.environ.get("RESOURCE_GROUP", None) subscription_id = os.environ.get("SUBSCRIPTION_ID", None) tenant_id = os.environ.get("TENANT_ID", None) app_id = os.environ.get("SP_APP_ID", None) app_secret = os.environ.get("SP_APP_SECRET", None) experiment_subfolder = os.environ.get("EXPERIMENT_FOLDER", 'aml_service/experiment') sources_directory = os.environ.get("SOURCES_DIR", None) experiment_folder = os.path.join(sources_directory, experiment_subfolder) train_script_path = os.environ.get("TRAIN_SCRIPT_PATH", None) databricks_workspace_name = os.environ.get("DATABRICKS_WORKSPACE_NAME", None) databricks_access_token = os.environ.get("DATABRICKS_ACCESS_TOKEN", None) databricks_compute_name_aml = os.environ.get("DATABRICKS_COMPUTE_NAME_AML", None) model_dir = os.environ.get("MODEL_DIR", 'dbfs:/model') model_name = os.environ.get("MODEL_NAME", 'torchcnn') path_components = model_dir.split("/", 1) model_path = "/dbfs/" + path_components[1] + "/" + model_name + ".pth" print("The model path will be %s" % (model_path)) aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) databricks_compute = get_compute(aml_workspace, databricks_compute_name_aml, resource_group, databricks_workspace_name, databricks_access_token) print(databricks_compute) step1 = DatabricksStep(name="DBPythonInLocalMachine", num_workers=1, python_script_name=train_script_path, source_directory=sources_directory, run_name='DB_Python_Local_demo', existing_cluster_id=cluster_id, compute_target=databricks_compute, allow_reuse=False, python_script_params=['--MODEL_PATH', model_path]) step2 = DatabricksStep(name="RegisterModel", num_workers=1, python_script_name="register_model.py", source_directory=experiment_folder, run_name='Register_model', existing_cluster_id=cluster_id, compute_target=databricks_compute, allow_reuse=False, python_script_params=[ '--MODEL_PATH', model_path, '--TENANT_ID', tenant_id, '--APP_ID', app_id, '--APP_SECRET', app_secret, '--MODEL_NAME', model_name ]) step2.run_after(step1) print("Step lists created") pipeline = Pipeline( workspace=aml_workspace, # steps=[step1]) steps=[step1, step2]) print("Pipeline is built") pipeline.validate() print("Pipeline validation complete") pipeline_run = pipeline.submit(experiment_name="pipetest") print("Pipeline is submitted for execution") pipeline_details = pipeline_run.get_details() pipeline_run_id = pipeline_details['runId'] azure_run_url = get_experiment_run_url(subscription_id, resource_group, workspace_name, pipeline_run_id) print("To check details of the Pipeline run, go to " + azure_run_url) pipeline_status = pipeline_run.get_status() timer_mod = 0 while pipeline_status == 'Running' or pipeline_status == 'NotStarted': timer_mod = timer_mod + 10 time.sleep(10) if (timer_mod % 30) == 0: print("Status: %s. %s seconds have passed." % (pipeline_status, timer_mod)) pipeline_status = pipeline_run.get_status() if pipeline_status == 'Failed': print("AML Pipelne failed. Check %s for details." % (azure_run_url)) sys.exit(1) else: print(pipeline_status) print("Pipeline completed")
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") register_script_path = os.environ.get("REGISTER_SCRIPT_PATH") vm_size_cpu = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name_cpu = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute_cpu = get_compute(aml_workspace, compute_name_cpu, vm_size_cpu) if aml_compute_cpu is not None: print(aml_compute_cpu) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ])) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) def_blob_store = Datastore(aml_workspace, "workspaceblobstore") jsonconfigs = PipelineData("jsonconfigs", datastore=def_blob_store) config_suffix = datetime.datetime.now().strftime("%Y%m%d%H") train_step = PythonScriptStep( name="Train Model", script_name=train_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, "--model_name", model_name, ], runconfig=run_config, # inputs=[jsonconfigs], outputs=[jsonconfigs], allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, ], runconfig=run_config, inputs=[jsonconfigs], # outputs=[jsonconfigs], allow_reuse=False, ) print("Step Evaluate created") register_model_step = PythonScriptStep( name="Register New Trained Model", script_name=register_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, "--model_name", model_name, ], runconfig=run_config, inputs=[jsonconfigs], # outputs=[jsonconfigs], allow_reuse=False, ) print("Step register model created") evaluate_step.run_after(train_step) register_model_step.run_after(evaluate_step) steps = [register_model_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name="training-pipeline", description="Model training/retraining pipeline") train_pipeline_json = {} train_pipeline_json["rest_endpoint"] = published_pipeline.endpoint json_file_path = "ml_service/pipelines/train_pipeline.json" with open(json_file_path, "w") as outfile: json.dump(train_pipeline_json, outfile)
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace(e.workspace_name, e.resource_group, e.subscription_id, e.tenant_id, e.app_id, e.app_secret) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=e.model_name) release_id = PipelineParameter(name="release_id", default_value="0") train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") evaluate_step.run_after(train_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')