def get_environment(workspace: Workspace, environment_name: str, conda_dependencies_file: str, create_new: bool = False, enable_docker: bool = None, use_gpu: bool = False): try: e = Env() environments = Environment.list(workspace=workspace) restored_environment = None for env in environments: if env == environment_name: restored_environment = environments[environment_name] if restored_environment is None or create_new: new_env = Environment.from_conda_specification( environment_name, os.path.join(e.sources_directory_train, conda_dependencies_file), # NOQA: E501 ) # NOQA: E501 restored_environment = new_env if enable_docker is not None: restored_environment.docker.enabled = enable_docker restored_environment.docker.base_image = DEFAULT_GPU_IMAGE if use_gpu else DEFAULT_CPU_IMAGE # NOQA: E501 restored_environment.register(workspace) if restored_environment is not None: print(restored_environment) return restored_environment except Exception as e: print(e) exit(1)
def main(): parser = argparse.ArgumentParser("smoke_test_scoring_service.py") parser.add_argument("--type", type=str, choices=["AKS", "ACI", "Webapp"], required=True, help="type of service") parser.add_argument("--service", type=str, required=True, help="Name of the image to test") args = parser.parse_args() e = Env() if args.type == "Webapp": output = call_web_app(args.service, {}) else: output = call_web_service(e, args.type, args.service) print("Verifying service output") assert "result" in output assert len(output["result"]) == output_len print("Smoke test successful.")
def run_batchscore_pipeline(): try: env = Env() args = parse_args() aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) scoringpipeline = get_pipeline(args.pipeline_id, aml_workspace, env) experiment = Experiment(workspace=aml_workspace, name=env.experiment_name) # NOQA: E501 run = experiment.submit( scoringpipeline, pipeline_parameters={ "model_name": env.model_name, "model_version": env.model_version, "model_tag_name": " ", "model_tag_value": " ", }, ) run.wait_for_completion(show_output=True) if run.get_status() == "Finished": copy_output(list(run.get_steps())[0].id, env) except Exception as ex: print("Error: {}".format(ex))
def build_batchscore_pipeline(): """ Main method that builds and publishes a scoring pipeline. """ try: env = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) # Get Azure machine learning cluster aml_compute_score = get_compute( aml_workspace, env.compute_name_scoring, env.vm_size_scoring, for_batch_scoring=True, ) input_dataset, output_location = get_inputds_outputloc( aml_workspace, env ) # NOQA: E501 scoring_runconfig, score_copy_runconfig = get_run_configs( aml_workspace, aml_compute_score, env ) scoring_pipeline = get_scoring_pipeline( input_dataset, output_location, scoring_runconfig, score_copy_runconfig, aml_compute_score, aml_workspace, env, ) published_pipeline = scoring_pipeline.publish( name=env.scoring_pipeline_name, description="COVID19Articles Batch Scoring Pipeline", ) pipeline_id_string = "##vso[task.setvariable variable=pipeline_id;isOutput=true]{}".format( # NOQA: E501 published_pipeline.id ) print(pipeline_id_string) except Exception as e: print(e) exit(1)
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group, ) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable Azure ML environment # Make sure to include `r-essentials' # in COVID19Articles/conda_dependencies.yml environment = get_environment( aml_workspace, e.aml_env_name, conda_dependencies_file=e.aml_env_train_conda_dep_file, create_new=e.rebuild_env, ) # NOQA: E501 run_config = RunConfiguration() run_config.environment = environment train_step = PythonScriptStep( name="Train Model", script_name="train_with_r.py", compute_target=aml_compute, source_directory="COVID19Articles/training/R", runconfig=run_config, allow_reuse=False, ) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id, ) print(f"Published pipeline: {published_pipeline.name}") print(f"for build {published_pipeline.version}")
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) train_step = DatabricksStep( name="DBPythonInLocalMachine", num_workers=1, python_script_name="train_with_r_on_databricks.py", source_directory="COVID19Articles/training/R", run_name='DB_Python_R_demo', existing_cluster_id=e.db_cluster_id, compute_target=aml_compute, allow_reuse=False) print("Step Train created") steps = [train_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name + "_with_R_on_DB", description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch_scoring: bool = False): # NOQA E501 try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print("Found existing compute target " + compute_name + " so using it.") # NOQA else: e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=e.vm_priority if not for_batch_scoring else e.vm_priority_scoring, # NOQA E501 min_nodes=e.min_nodes if not for_batch_scoring else e.min_nodes_scoring, # NOQA E501 max_nodes=e.max_nodes if not for_batch_scoring else e.max_nodes_scoring, # NOQA E501 idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as ex: print(ex) print("An error occurred trying to provision compute.") exit(1)
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group, ) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable Azure ML environment environment = get_environment( aml_workspace, e.aml_env_name, conda_dependencies_file=e.aml_env_train_conda_dep_file, create_new=e.rebuild_env, ) # run_config = RunConfiguration() run_config.environment = environment if e.datastore_name: datastore_name = e.datastore_name else: datastore_name = aml_workspace.get_default_datastore().name run_config.environment.environment_variables[ "DATASTORE_NAME"] = datastore_name # NOQA: E501 model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) # NOQA: E501 dataset_version_param = PipelineParameter(name="dataset_version", default_value=e.dataset_version) data_file_path_param = PipelineParameter(name="data_file_path", default_value="none") caller_run_id_param = PipelineParameter(name="caller_run_id", default_value="none") # NOQA: E501 # Get dataset name dataset_name = e.dataset_name # Check to see if dataset exists if dataset_name not in aml_workspace.datasets: create_sample_data_csv() # Use a CSV to read in the data set. file_name = "COVID19Articles.csv" if not os.path.exists(file_name): raise Exception( 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 % file_name) # NOQA: E501 # Upload file to default datastore in workspace datatstore = Datastore.get(aml_workspace, datastore_name) target_path = "training-data/" datatstore.upload_files( files=[file_name], target_path=target_path, overwrite=True, show_progress=False, ) # Register dataset path_on_datastore = os.path.join(target_path, file_name) dataset = Dataset.Tabular.from_delimited_files( path=(datatstore, path_on_datastore)) dataset = dataset.register( workspace=aml_workspace, name=dataset_name, description="COVID19Articles training data", tags={"format": "CSV"}, create_new_version=True, ) # Create a PipelineData to pass data between steps pipeline_data = PipelineData( "pipeline_data", datastore=aml_workspace.get_default_datastore()) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, outputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_output", pipeline_data, "--dataset_version", dataset_version_param, "--data_file_path", data_file_path_param, "--caller_run_id", caller_run_id_param, "--dataset_name", dataset_name, ], runconfig=run_config, allow_reuse=True, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_input", pipeline_data, ], # NOQA: E501 runconfig=run_config, allow_reuse=False, ) print("Step Register created") # Check run_evaluation flag to include or exclude evaluation step. if (e.run_evaluation).lower() == "true": print("Include evaluation step before register step.") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] else: print("Exclude evaluation step and directly run register step.") register_step.run_after(train_step) steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id, ) print(f"Published pipeline: {published_pipeline.name}") print(f"for build {published_pipeline.version}")
import os import argparse from azureml.core import Workspace from azureml.core.environment import Environment from azureml.core.model import Model, InferenceConfig import shutil from COVID19Articles.ml_service.util.env_variables import Env e = Env() # Get Azure machine learning workspace ws = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group ) parser = argparse.ArgumentParser("create scoring image") parser.add_argument( "--output_image_location_file", type=str, help=("Name of a file to write image location to, " "in format REGISTRY.azurecr.io/IMAGE_NAME:IMAGE_VERSION") ) args = parser.parse_args() model = Model(ws, name=e.model_name, version=e.model_version) sources_dir = e.sources_directory_train if (sources_dir is None): sources_dir = 'COVID19Articles' score_script = os.path.join(".", sources_dir, e.score_script)
def main(): parser = argparse.ArgumentParser("register") parser.add_argument( "--output_pipeline_id_file", type=str, default="pipeline_id.txt", help="Name of a file to write pipeline ID to" ) parser.add_argument( "--skip_train_execution", action="store_true", help=("Do not trigger the execution. " "Use this in Azure DevOps when using a server job to trigger") ) args = parser.parse_args() e = Env() aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group ) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.name == e.pipeline_name: if p.version == e.build_id: matched_pipes.append(p) if(len(matched_pipes) > 1): published_pipeline = None raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 elif(len(matched_pipes) == 0): published_pipeline = None raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 else: published_pipeline = matched_pipes[0] print("published pipeline id is", published_pipeline.id) # Save the Pipeline ID for other AzDO jobs after script is complete if args.output_pipeline_id_file is not None: with open(args.output_pipeline_id_file, "w") as out_file: out_file.write(published_pipeline.id) if(args.skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} tags = {"BuildId": e.build_id} if (e.build_uri is not None): tags["BuildUri"] = e.build_uri experiment = Experiment( workspace=aml_workspace, name=e.experiment_name) run = experiment.submit( published_pipeline, tags=tags, pipeline_parameters=pipeline_parameters) print("Pipeline run initiated ", run.id)