pipeline_path = "Azure_ML/03_pipeline/" # - Crawl Data crawl_data_dir = PipelineData( "extracted_data", is_directory=True, ) crawl_data_step = PythonScriptStep( name="Crawl Data", script_name=pipeline_path + "01_crawl_data/main.py", source_directory='.', compute_target=compute_target, runconfig=run_config, outputs=[crawl_data_dir], arguments=[ "--output-dir", crawl_data_dir, "--sql-name-in", sql_name, "--sql-pw-in", sql_pw, ], allow_reuse=False, ) # - Clean Crawled Data clean_data_dir = PipelineData( "extracted_data", is_directory=True, ) clean_data_step = PythonScriptStep(
input_data = input_dataset.as_named_input('input_dataset').as_mount() data_store = ws.get_default_datastore() prepped_data = PipelineData('prepped_data', datastore=data_store) pipeline_mode_param = PipelineParameter(name="mode", default_value="execute") prep_step = PythonScriptStep( name='Prepare data', source_directory=script_folder, script_name='prep_data.py', compute_target=compute_target, runconfig=pipeline_run_config, # Specify dataset as initial input inputs=[input_data], # Specify PipelineData as output outputs=[prepped_data], # Also pass as data reference to script arguments=[ '--input_data', input_data, '--prepped_data', prepped_data, '--mode', pipeline_mode_param ], allow_reuse=False) # Construct the pipeline pipeline_steps = [prep_step] #pipeline_steps = [step_test] pipeline = Pipeline(workspace=ws, steps=pipeline_steps) print("Pipeline is built.") # Create an experiment and run the pipeline
# create the cluster CPU_compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it uses the scale settings for the cluster CPU_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) # use get_status() to get a detailed status for the current cluster. print(CPU_compute_target.get_status().serialize()) ####################################################################################################### register_step = PythonScriptStep(name = "register_step", script_name= "register/estimator_register.py", runconfig = run_config_user_managed, source_directory = './scripts', compute_target=CPU_compute_target ) ####################################################################################################### pipeline = Pipeline(workspace = ws,steps=[register_step]) #Validate pipeline pipeline.validate() print("Pipeline validation complete") #submit Pipeline run = exp.submit(pipeline,pipeline_parameters={}) print("Pipeline is submitted for execution")
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group, ) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable Azure ML environment environment = get_environment( aml_workspace, e.aml_env_name, conda_dependencies_file=e.aml_env_train_conda_dep_file, create_new=e.rebuild_env, ) # run_config = RunConfiguration() run_config.environment = environment if e.datastore_name: datastore_name = e.datastore_name else: datastore_name = aml_workspace.get_default_datastore().name run_config.environment.environment_variables[ "DATASTORE_NAME"] = datastore_name # NOQA: E501 model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) # NOQA: E501 dataset_version_param = PipelineParameter(name="dataset_version", default_value=e.dataset_version) data_file_path_param = PipelineParameter(name="data_file_path", default_value="none") caller_run_id_param = PipelineParameter(name="caller_run_id", default_value="none") # NOQA: E501 # Get dataset name dataset_name = e.dataset_name # Check to see if dataset exists if dataset_name not in aml_workspace.datasets: # This call creates an example CSV from sklearn sample data. If you # have already bootstrapped your project, you can comment this line # out and use your own CSV. create_sample_data_csv() # Use a CSV to read in the data set. file_name = "diabetes.csv" if not os.path.exists(file_name): raise Exception( 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 % file_name) # NOQA: E501 # Upload file to default datastore in workspace datatstore = Datastore.get(aml_workspace, datastore_name) target_path = "training-data/" datatstore.upload_files( files=[file_name], target_path=target_path, overwrite=True, show_progress=False, ) # Register dataset path_on_datastore = os.path.join(target_path, file_name) dataset = Dataset.Tabular.from_delimited_files( path=(datatstore, path_on_datastore)) dataset = dataset.register( workspace=aml_workspace, name=dataset_name, description="diabetes training data", tags={"format": "CSV"}, create_new_version=True, ) # Create a PipelineData to pass data between steps pipeline_data = PipelineData( "pipeline_data", datastore=aml_workspace.get_default_datastore()) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, outputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_output", pipeline_data, "--dataset_version", dataset_version_param, "--data_file_path", data_file_path_param, "--caller_run_id", caller_run_id_param, "--dataset_name", dataset_name, ], runconfig=run_config, allow_reuse=True, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_input", pipeline_data, ], # NOQA: E501 runconfig=run_config, allow_reuse=False, ) print("Step Register created") # Check run_evaluation flag to include or exclude evaluation step. if (e.run_evaluation).lower() == "true": print("Include evaluation step before register step.") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] else: print("Exclude evaluation step and directly run register step.") register_step.run_after(train_step) steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id, ) print(f'This is a great demo!') print(f"Published pipeline: {published_pipeline.name}") print(f"for build {published_pipeline.version}")
def main(): """Build pipeline.""" # Environment variables env = Env() # Azure ML workspace aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) logger.info(f"Azure ML workspace: {aml_workspace}") # Azure ML compute cluster aml_compute = get_compute(aml_workspace, env.compute_name) logger.info(f"Aazure ML compute cluster: {aml_compute}") # Azure ML environment environment = Environment(name=env.aml_env_name) conda_dep = CondaDependencies( conda_dependencies_file_path="./local_development/dev_dependencies.yml" ) environment.python.conda_dependencies = conda_dep run_config = RunConfiguration() run_config.environment = environment # Pipeline Data preparation_pipelinedata = PipelineData("preparation_pipelinedata", is_directory=True).as_dataset() extraction_pipelinedata = PipelineData("extraction_pipelinedata", is_directory=True) training_pipelinedata = PipelineData("training_pipelinedata", is_directory=True) # List of pipeline steps step_list = list() preparation_step = PythonScriptStep( name="preparation-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.preparation_step_script_path, outputs=[preparation_pipelinedata], arguments=[ "--input_path", env.input_dir, "--output_path", preparation_pipelinedata, "--datastore_name", env.blob_datastore_name ], runconfig=run_config) step_list.append(preparation_step) parallel_run_config = ParallelRunConfig( source_directory=env.sources_directory_train, entry_script=env.extraction_step_script_path, mini_batch_size=env.mini_batch_size, error_threshold=env.error_threshold, output_action="append_row", environment=environment, compute_target=aml_compute, node_count=env.node_count, run_invocation_timeout=env.run_invocation_timeout, process_count_per_node=env.process_count_per_node, append_row_file_name="extraction_output.txt") extraction_step = ParallelRunStep( name="extraction-step", inputs=[preparation_pipelinedata], output=extraction_pipelinedata, arguments=["--output_dir", extraction_pipelinedata], parallel_run_config=parallel_run_config) step_list.append(extraction_step) training_step = PythonScriptStep( name="traning-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.training_step_script_path, inputs=[extraction_pipelinedata], outputs=[training_pipelinedata], arguments=[ "--input_dir", extraction_pipelinedata, "--output_dir", training_pipelinedata ], runconfig=run_config) step_list.append(training_step) # Build pipeline pipeline = Pipeline(workspace=aml_workspace, steps=step_list) pipeline.validate() logger.info(f"Built pipeline {pipeline}") # Publish pipeline published_pipeline = pipeline.publish( env.pipeline_name, description=env.pipeline_name, version=datetime.utcnow().isoformat()) try: pipeline_endpoint = PipelineEndpoint.get( workspace=aml_workspace, name=env.pipeline_endpoint_name) pipeline_endpoint.add_default(published_pipeline) except ErrorResponseException: pipeline_endpoint = PipelineEndpoint.publish( workspace=aml_workspace, name=env.pipeline_endpoint_name, pipeline=published_pipeline, description=env.pipeline_endpoint_name)
def main(): load_dotenv() workspace_name = os.environ.get("AML_WORKSPACE_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") register_script_path = os.environ.get("REGISTER_SCRIPT_PATH") vm_size_cpu = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name_cpu = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute_cpu = get_compute(aml_workspace, compute_name_cpu, vm_size_cpu) if aml_compute_cpu is not None: print(aml_compute_cpu) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) def_blob_store = Datastore(aml_workspace, "workspaceblobstore") jsonconfigs = PipelineData("jsonconfigs", datastore=def_blob_store) config_suffix = datetime.datetime.now().strftime("%Y%m%d%H") train_step = PythonScriptStep( name="Train Model", script_name=train_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, "--model_name", model_name, ], runconfig=run_config, # inputs=[jsonconfigs], outputs=[jsonconfigs], allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, ], runconfig=run_config, inputs=[jsonconfigs], # outputs=[jsonconfigs], allow_reuse=False, ) print("Step Evaluate created") register_model_step = PythonScriptStep( name="Register New Trained Model", script_name=register_script_path, compute_target=aml_compute_cpu, source_directory=sources_directory_train, arguments=[ "--config_suffix", config_suffix, "--json_config", jsonconfigs, "--model_name", model_name, ], runconfig=run_config, inputs=[jsonconfigs], # outputs=[jsonconfigs], allow_reuse=False, ) print("Step register model created") evaluate_step.run_after(train_step) register_model_step.run_after(evaluate_step) steps = [register_model_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name="training-pipeline", description="Model training/retraining pipeline") train_pipeline_json = {} train_pipeline_json["rest_endpoint"] = published_pipeline.endpoint json_file_path = "ml_service/pipelines/train_pipeline.json" with open(json_file_path, "w") as outfile: json.dump(train_pipeline_json, outfile)
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create( pip_packages=[ 'numpy', 'pandas', 'scikit-learn==0.20.3', 'sklearn_pandas', 'azureml-sdk' ]) scripts_folder = 'scripts' def_blob_store = ws.get_default_datastore() train_output = PipelineData('train_output', datastore=def_blob_store) print("train_output PipelineData object created") trainStep = PythonScriptStep(name="train", script_name="train.py", arguments=["--model_name", args.model_name], compute_target=aml_compute, runconfig=run_amlcompute, source_directory=scripts_folder, allow_reuse=False) print("trainStep created") evaluate_output = PipelineData('evaluate_output', datastore=def_blob_store) evaluateStep = PythonScriptStep(name="evaluate", script_name="evaluate.py", arguments=[ "--model_name", args.model_name, "--metric_threshold", float(args.metric_threshold), "--image_name", args.image_name, "--output", evaluate_output
datastore = Datastore.get(ws, "xray_datastore") PreProcessingData = PipelineData("PreProcessingData", datastore=datastore) ModelData = PipelineData("ModelData", datastore=datastore) ####################################################################################################### preprocessing_step = PythonScriptStep( name="preprocessing_step", script_name="estimator_data_preprocessing.py", compute_target=GPU_compute_target, runconfig=run_config_user_managed, source_directory='./scripts/data_preprocess', inputs=[ xrayimage_dataset.as_named_input('xrayimage_dataset').as_mount( '/temp/xray_images'), traindata_dataset.as_named_input('traindata_dataset'), validdata_dataset.as_named_input('validdata_dataset'), testdata_dataset.as_named_input('testdata_dataset'), traintarget_dataset.as_named_input('traintarget_dataset'), validtarget_dataset.as_named_input('validtarget_dataset'), testtarget_dataset.as_named_input('testtarget_dataset') ], arguments=['--PreProcessingData', PreProcessingData], outputs=[PreProcessingData], allow_reuse=True) print("preprocessing_step") ####################################################################################################### est = TensorFlow(source_directory='./scripts/train',
env.register(workspace=ws) print("Registered environment component-condition") # Specify the run configuration run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.python.conda_dependencies = cd # Pipeline definition inputdata = DataReference(datastore=Datastore.get(ws, "trainingdata"), data_reference_name="data") train_model = PythonScriptStep( script_name="./train.py", name="fit-nlp-model", inputs=[inputdata.as_download(path_on_compute="./data")], runconfig=run_config, compute_target=compute_target, ) pipeline = Pipeline( workspace=ws, steps=[train_model], description="Builds Keras model for detecting component defects", ) if __name__ == "__main__": Experiment( ws, "fit-component-defects-model").submit(pipeline).wait_for_completion( show_output=True)
def evaluate_step(datastore, test_dir, model_dir, compute_target): ''' This step will take the raw data downloaded from the previous step, preprocess it, and split into train, valid, and test directories. :param datastore: The datastore that will be used :type datastore: Datastore :param test_dir: The reference to the directory containing the test data :type test_dir: DataReference :param model_dir: The reference to the directory containing the NMT model :type model_dir: DataReference :param compute_target: The compute target to run the step on :type compute_target: ComputeTarget :return: The evaluate step, step outputs dictionary (keys: eval_dir) :rtype: PythonScriptStep, dict ''' run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE run_config.environment.python.user_managed_dependencies = False conda_packages = ['pytorch', 'tqdm', 'nltk'] run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=conda_packages ) # set hyperparameters of the model training step input_col = PipelineParameter(name='input_col', default_value='Title') output_col = PipelineParameter(name='output_col', default_value='Abstract') cuda = PipelineParameter(name='cuda', default_value=True) seed = PipelineParameter(name='seed', default_value=0) beam_size = PipelineParameter(name='beam_size', default_value=5) max_decoding_time_step = PipelineParameter(name='max_decoding_time_step', default_value=70) eval_dir = PipelineData( name='eval_dir', pipeline_output_name='eval_dir', datastore=datastore, output_mode='mount', is_directory=True) outputs = [eval_dir] outputs_map = { 'eval_dir': eval_dir, } step = PythonScriptStep( name="Evaluate", script_name='evaluate.py', arguments=[ '--test_dir', test_dir, '--model_dir', model_dir, '--input_col', input_col, '--output_col', output_col, '--cuda', cuda, '--seed', seed, '--beam_size', beam_size, '--max_decoding_time_step', max_decoding_time_step, '--eval_dir', eval_dir ], inputs=[test_dir, model_dir], outputs=outputs, compute_target=compute_target, runconfig=run_config, source_directory=os.path.dirname(os.path.abspath(__file__)), allow_reuse=True ) return step, outputs_map
# Review and run the cell below to construct the PipelineData objects and the PythonScriptStep pipeline step: # # *Open preprocess.py in the local machine and examine the arguments, inputs, and outputs for the script. Note that there is an argument called process_mode to distinguish between processing training data vs test data. Reviewing the Python script file will give you a good sense of why the script argument names used below are important.* # In[ ]: processed_train_data = PipelineData('processed_train_data', datastore=def_blob_store) print("PipelineData object created") processTrainDataStep = PythonScriptStep( name="process_train_data", script_name="preprocess.py", arguments=["--process_mode", 'train', "--input", raw_train_data, "--output", processed_train_data], inputs=[raw_train_data], outputs=[processed_train_data], compute_target=aml_compute, runconfig=run_amlcompute, source_directory=project_folder ) print("preprocessStep created") # ### Create the Train Pipeline Step # The train pipeline step takes the *processed_train_data* created in the above step as input and generates another PipelineData object to save the *trained_model* as its output. This is an example of how machine learning pipelines can have many steps and these steps could use or reuse datasources and intermediate data. # # *Open train.py in the local machine and examine the arguments, inputs, and outputs for the script.* ### Challenge Task
ws = get_workspace(config) compute_target = get_or_create_compute(ws, **config["compute"]) ### # Define and set up pipeline ### pipeline_param = PipelineParameter(name="my_arg", default_value="default") my_step = PythonScriptStep( name="My Script Step", script_name="scriptstep.py", arguments=[pipeline_param], inputs=[], outputs=[], compute_target=compute_target, source_directory="src", allow_reuse=True, runconfig=RunConfiguration(conda_dependencies=CondaDependencies( conda_dependencies_file_path="environment.yml")), ) pipeline_id, pipeline_endpoint = publish_pipeline(ws, [my_step], "blabla") ### # Trigger pipeline via REST API ### # To trigger the pipeline, a service principal is required: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication token = requests.post(
def main(): load_dotenv() workspace_name = os.environ.get("WS_NAME") resource_group = os.environ.get("RG_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") aks_name = os.environ.get("AKS_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace( workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print('Now accessing:') print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn', 'keras'], pip_packages=['azureml-core==1.25.0', 'azureml-defaults==1.25.0', 'azureml-telemetry==1.25.0', 'azureml-train-restclients-hyperdrive==1.25.0', 'azureml-train-core==1.25.0', 'azureml-dataprep', 'tensorflow-gpu==2.0.0', 'transformers==2.0.0', 'absl-py', 'azureml-dataprep', 'h5py<3.0.0']) ) # run_config.environment.docker.enabled = True datastore_name = 'mtcseattle' container_name = 'azure-service-classifier' account_name = 'mtcseattle' sas_token = '?sv=2020-04-08&st=2021-05-26T04%3A39%3A46Z&se=2022-05-27T04%3A39%3A00Z&sr=c&sp=rl&sig=CTFMEu24bo2X06G%2B%2F2aKiiPZBzvlWHELe15rNFqULUk%3D' try: existing_datastore = Datastore.get(aml_workspace, datastore_name) except: # noqa: E722 existing_datastore = Datastore \ .register_azure_blob_container(workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, sas_token=sas_token, overwrite=True) azure_dataset = Dataset.File.from_files( path=(existing_datastore, 'data')) azure_dataset = azure_dataset.register( workspace=aml_workspace, name='Azure Services Dataset', description='Dataset containing azure related posts on Stackoverflow', create_new_version=True) azure_dataset.to_path() input_data = azure_dataset.as_named_input('azureservicedata').as_mount( '/tmp/data') model_name = PipelineParameter( name="model_name", default_value=model_name) max_seq_length = PipelineParameter( name="max_seq_length", default_value=128) learning_rate = PipelineParameter( name="learning_rate", default_value=3e-5) num_epochs = PipelineParameter( name="num_epochs", default_value=1) export_dir = PipelineParameter( name="export_dir", default_value="./outputs/model") batch_size = PipelineParameter( name="batch_size", default_value=32) steps_per_epoch = PipelineParameter( name="steps_per_epoch", default_value=1) # initialize the PythonScriptStep train_step = PythonScriptStep( name='Train Model', script_name=train_script_path, arguments=['--data_dir', input_data, '--max_seq_length', max_seq_length, '--batch_size', batch_size, '--learning_rate', learning_rate, '--steps_per_epoch', steps_per_epoch, '--num_epochs', num_epochs, '--export_dir',export_dir], compute_target=aml_compute, source_directory=sources_directory_train, runconfig=run_config, allow_reuse=True) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--model_name", model_name, "--build_id", build_id, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. evaluate_step.run_after(train_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline.", version=build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') response = published_pipeline.submit( # noqa: F841 workspace=aml_workspace, experiment_name=experiment_name)
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True config_envvar = {} if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" config_envvar["BUILDURI_BASE"] = builduri_base run_config.environment.environment_variables = config_envvar model_name_param = PipelineParameter(name="model_name", default_value=e.model_name) build_id_param = PipelineParameter(name="build_id", default_value=e.build_id) hyperparameter_alpha_param = PipelineParameter(name="hyperparameter_alpha", default_value=0.5) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, "--alpha", hyperparameter_alpha_param, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, ], runconfig=run_config, allow_reuse=False, ) print("Step Register created") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("BASE_NAME") + "-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") # register_script_path = os.environ.get("REGISTER_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") print(app_secret) # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob' ])) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) release_id = PipelineParameter(name="release_id", default_value="0") train_step = PythonScriptStep( name="Train Model", script_name=train_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. # register_model_step = PythonScriptStep( # name="Register New Trained Model", # script_name=register_script_path, # compute_target=aml_compute, # source_directory=sources_directory_train, # arguments=[ # "--release_id", release_id, # "--model_name", model_name, # ], # runconfig=run_config, # allow_reuse=False, # ) # print("Step register model created") evaluate_step.run_after(train_step) # register_model_step.run_after(evaluate_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
path_on_datastore=path_on_datastore) print("DataReference object created") # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1. # raw_data = PipelineData("raw_video_fames", datastore=def_blob_store) logits_data = PipelineData("logits_from_xception", datastore=def_blob_store) data_metrics = PipelineData("data_metrics", datastore=def_blob_store) data_output = PipelineData("output_data", datastore=def_blob_store) # prepare dataset for training/testing prednet get_logits_from_xception = PythonScriptStep( name='get_logits_from_xception', script_name="get_logits_from_xception.py", arguments=["--data-folder", labeled_data, "--output_data", logits_data], inputs=[labeled_data], outputs=[logits_data], compute_target=gpu_compute_target, source_directory=script_folder, runconfig=gpu_compute_run_config, allow_reuse=True, hash_paths=['.']) print("logit step created") # upload data to default datastore def_blob_store = ws.get_default_datastore() # script_params = { # '--data-folder': def_blob_store.path('256_ObjectCategories_preproc').as_mount(), # '--remote_execution': "" # estimator_entry_script_arguments=[ # '--data-folder', preprocessed_data,
print("Azure Machine Learning Compute attached") # get pointer to default blob store def_blob_store = Datastore(ws, "workspaceblobstore") print("Blobstore's name: {}".format(def_blob_store.name)) # Naming the intermediate data as anomaly data and assigning it to a variable anomaly_data = PipelineData("anomaly_data", datastore=def_blob_store) print("Anomaly data object created") anom_detect = PythonScriptStep( name="anomaly_detection", # script_name="anom_detect.py", script_name="code/anom_detect.py", arguments=["--output_directory", anomaly_data], outputs=[anomaly_data], compute_target=aml_compute, source_directory=project_folder, allow_reuse=True, runconfig=amlcompute_run_config) print("Anomaly Detection Step created.") automl_train = PythonScriptStep( name="automl_train", # script_name="automl_train.py", script_name="code/automl_train.py", arguments=["--input_directory", anomaly_data], inputs=[anomaly_data], compute_target=aml_compute, source_directory=project_folder, allow_reuse=True,
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = "AML-RG-" + os.environ.get("BASE_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") aks_name = os.environ.get("AKS_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") experiment_name = os.environ.get("EXPERIMENT_NAME") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn', 'keras'], pip_packages=[ 'azure', 'azureml-sdk', 'azure-storage', 'azure-storage-blob', 'transformers>=2.1.1', 'tensorflow>=2.0.0', 'tensorflow-gpu>=2.0.0' ])) run_config.environment.docker.enabled = True datastore_name = 'tfworld' container_name = 'azure-service-classifier' account_name = 'johndatasets' sas_token = '?sv=2019-02-02&ss=bfqt&srt=sco&sp=rl&se=2021-06-02T03:40:25Z&st=2020-03-09T19:40:25Z&spr=https&sig=bUwK7AJUj2c%2Fr90Qf8O1sojF0w6wRFgL2c9zMVCWNPA%3D' try: existing_datastore = Datastore.get(aml_workspace, datastore_name) except: # noqa: E722 existing_datastore = Datastore \ .register_azure_blob_container(workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, sas_token=sas_token ) azure_dataset = Dataset.File.from_files(path=(existing_datastore, 'data')) azure_dataset = azure_dataset.register( workspace=aml_workspace, name='Azure Services Dataset', description='Dataset containing azure related posts on Stackoverflow', create_new_version=True) azure_dataset.to_path() input_data = azure_dataset.as_named_input('input_data1').as_mount( '/tmp/data') model_name = PipelineParameter(name="model_name", default_value=model_name) max_seq_length = PipelineParameter(name="max_seq_length", default_value=128) learning_rate = PipelineParameter(name="learning_rate", default_value=3e-5) num_epochs = PipelineParameter(name="num_epochs", default_value=3) export_dir = PipelineParameter(name="export_dir", default_value="./outputs/exports") batch_size = PipelineParameter(name="batch_size", default_value=32) steps_per_epoch = PipelineParameter(name="steps_per_epoch", default_value=100) # initialize the TensorFlow estimator estimator = TensorFlow(source_directory=sources_directory_train, entry_script=train_script_path, compute_target=aml_compute, framework_version='2.0', use_gpu=True, pip_packages=[ 'transformers==2.0.0', 'azureml-dataprep[fuse,pandas]==1.3.0' ]) train_step = EstimatorStep( name="Train Model", estimator=estimator, estimator_entry_script_arguments=[ "--data_dir", input_data, "--max_seq_length", max_seq_length, "--learning_rate", learning_rate, "--num_epochs", num_epochs, "--export_dir", export_dir, "--batch_size", batch_size, "--steps_per_epoch", steps_per_epoch ], compute_target=aml_compute, inputs=[input_data], allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=evaluate_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--model_name", model_name, "--build_id", build_id, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") # Currently, the Evaluate step will automatically register # the model if it performs better. This step is based on a # previous version of the repo which utilized JSON files to # track evaluation results. evaluate_step.run_after(train_step) steps = [evaluate_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( name=pipeline_name, description="Model training/retraining pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') response = published_pipeline.submit( # noqa: F841 workspace=aml_workspace, experiment_name=experiment_name) # Get AKS cluster for deployment aks_compute = get_aks(aml_workspace, aks_name) if aks_compute is not None: print(aks_compute)
'onnxruntime==1.3.0', 'tf2onnx==1.6.3' ]) scripts_folder = 'scripts' def_blob_store = ws.get_default_datastore() train_output = PipelineData('train_output', datastore=def_blob_store) print("train_output PipelineData object created") trainStep = PythonScriptStep( name="train", script_name="train.py", arguments=["--model_name", args.model_name, "--build_number", args.build_number, "--output", train_output], outputs=[train_output], compute_target=aml_compute, runconfig=run_amlcompute, source_directory=scripts_folder, allow_reuse=False ) print("trainStep created") evaluate_output = PipelineData('evaluate_output', datastore=def_blob_store) evaluateStep = PythonScriptStep( name="evaluate", script_name="evaluate.py", arguments=["--model_name", args.model_name, "--build_number", args.build_number, "--input", train_output,
run_config.environment = env # Step 1: Train Model train_output_dir = PipelineData(name='train_output', pipeline_output_name='train_output', datastore=datastore, output_mode='mount', is_directory=True) train_step = PythonScriptStep(name='Train Model', source_directory='./src', script_name='train.py', compute_target=compute_target, arguments=[ '--data_dir', dataset, '--checkpoint_dir', checkpoint, '--tensorflow_models_dir', tensorflow_models, '--output_dir', train_output_dir ], inputs=[dataset, checkpoint, tensorflow_models], outputs=[train_output_dir], runconfig=run_config) # Step 2: Export Model export_output_dir = PipelineData(name='export_output', pipeline_output_name='export_output', datastore=datastore, output_mode='mount', is_directory=True) export_step = PythonScriptStep(
processed_mnist_data run_config = RunConfiguration() run_config.environment.docker.enabled = True run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE run_config.environment.python.user_managed_dependencies = False run_config.environment.python.conda_dependencies = CondaDependencies.create( pip_packages=['azureml-sdk', 'numpy']) # source directory source_directory = 'DataExtraction' extractDataStep = PythonScriptStep( script_name="extract.py", arguments=["--output_extract", processed_mnist_data], outputs=[processed_mnist_data], compute_target=compute_target_cpu, source_directory=source_directory, runconfig=run_config) print("Data Extraction Step created") from azureml.train.dnn import TensorFlow source_directory = 'Training' est = TensorFlow(source_directory=source_directory, compute_target=compute_target_cpu, entry_script='train.py', use_gpu=False, framework_version='1.13')
def build_pipeline(dataset, ws, config): print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name)) base_dir = '.' def_blob_store = ws.get_default_datastore() # folder for scripts that need to be uploaded to Aml compute target script_folder = './scripts' os.makedirs(script_folder, exist_ok=True) shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder) shutil.copy(os.path.join(base_dir, 'train.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder) shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder) shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder) shutil.copy(os.path.join(base_dir, 'config.json'), script_folder) cpu_compute_name = config['cpu_compute'] try: cpu_compute_target = AmlCompute(ws, cpu_compute_name) print("found existing compute target: %s" % cpu_compute_name) except:# ComputeTargetException: print("creating new compute target") provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4, idle_seconds_before_scaledown=1800) cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config) cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. print(cpu_compute_target.get_status().serialize()) # choose a name for your cluster gpu_compute_name = config['gpu_compute'] try: gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name) print("found existing compute target: %s" % gpu_compute_name) except: print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=10, idle_seconds_before_scaledown=1800) # create the cluster gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it uses the scale settings for the cluster gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. try: print(gpu_compute_target.get_status().serialize()) except BaseException as e: print("Could not get status of compute target.") print(e) # conda dependencies for compute targets cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"]) # Runconfigs cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd) cpu_compute_run_config.environment.docker.enabled = True cpu_compute_run_config.environment.docker.gpu_support = False cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE cpu_compute_run_config.environment.spark.precache_packages = False print("PipelineData object created") # DataReference to where video data is stored. video_data = DataReference( datastore=def_blob_store, data_reference_name="video_data", path_on_datastore=os.path.join("prednet", "data", "video", dataset)) print("DataReference object created") # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1. raw_data = PipelineData("raw_video_fames", datastore=def_blob_store) preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store) data_metrics = PipelineData("data_metrics", datastore=def_blob_store) data_output = PipelineData("output_data", datastore=def_blob_store) # prepare dataset for training/testing prednet video_decoding = PythonScriptStep( name='decode_videos', script_name="video_decoding.py", arguments=["--input_data", video_data, "--output_data", raw_data], inputs=[video_data], outputs=[raw_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.'] ) print("video_decode step created") # prepare dataset for training/testing recurrent neural network data_prep = PythonScriptStep( name='prepare_data', script_name="data_preparation.py", arguments=["--input_data", raw_data, "--output_data", preprocessed_data], inputs=[raw_data], outputs=[preprocessed_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.'] ) data_prep.run_after(video_decoding) print("data_prep step created") # configure access to ACR for pulling our custom docker image acr = ContainerRegistry() acr.address = config['acr_address'] acr.username = config['acr_username'] acr.password = config['acr_password'] est = Estimator(source_directory=script_folder, compute_target=gpu_compute_target, entry_script='train.py', use_gpu=True, node_count=1, custom_docker_image = "wopauli_1.8-gpu:1", image_registry_details=acr, user_managed=True ) ps = RandomParameterSampling( { '--batch_size': choice(1, 2, 4, 8), '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"), '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"), '--learning_rate': loguniform(-6, -1), '--lr_decay': loguniform(-9, -1), '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"), '--transfer_learning': choice("True", "False") } ) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10) hdc = HyperDriveConfig(estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE, max_total_runs=10, max_concurrent_runs=5, max_duration_minutes=60*6 ) hd_step = HyperDriveStep( name="train_w_hyperdrive", hyperdrive_run_config=hdc, estimator_entry_script_arguments=[ '--data-folder', preprocessed_data, '--remote_execution', '--dataset', dataset ], inputs=[preprocessed_data], metrics_output = data_metrics, allow_reuse=True ) hd_step.run_after(data_prep) registration_step = PythonScriptStep( name='register_model', script_name='model_registration.py', arguments=['--input_dir', data_metrics, '--output_dir', data_output], compute_target=cpu_compute_target, inputs=[data_metrics], outputs=[data_output], source_directory=script_folder, allow_reuse=True, hash_paths=['.'] ) registration_step.run_after(hd_step) pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step]) print ("Pipeline is built") pipeline.validate() print("Simple validation complete") pipeline_name = 'prednet_' + dataset published_pipeline = pipeline.publish(name=pipeline_name) schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch", pipeline_id=published_pipeline.id, experiment_name=pipeline_name, datastore=def_blob_store, wait_for_provisioning=True, description="Datastore scheduler for Pipeline" + pipeline_name, path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'), polling_interval=1 ) return pipeline_name
'azureml-sdk', 'tqdm' ]) # Runconfig gpu_compute_run_config = RunConfiguration(conda_dependencies=gpu_cd) gpu_compute_run_config.environment.docker.enabled = True gpu_compute_run_config.environment.docker.gpu_support = True gpu_compute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE gpu_compute_run_config.environment.spark.precache_packages = False # Training step for Xception train_xception = PythonScriptStep( name='train_Xception', script_name="train_xception.py", arguments=["--data-folder", labeled_data, "--remote_execution"], inputs=[labeled_data], compute_target=gpu_compute_target, source_directory=script_folder, runconfig=gpu_compute_run_config, allow_reuse=True, hash_paths=['.']) print("training step created") # Define Pipeline pipeline = Pipeline(workspace=ws, steps=[train_xception]) print("Pipeline is built") # Validate Pipeline pipeline.validate() print("Validation complete") pipeline_name = 'kd_train_the_teacher'
def main(): e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) if aml_compute is not None: print("aml_compute:") print(aml_compute) # Create a reusable Azure ML environment environment = get_environment(aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501 run_config = RunConfiguration() run_config.environment = environment if (e.datastore_name): datastore_name = e.datastore_name else: datastore_name = aml_workspace.get_default_datastore().name run_config.environment.environment_variables[ "DATASTORE_NAME"] = datastore_name # NOQA: E501 model_name_param = PipelineParameter(name="model_name", default_value=e.model_name) dataset_version_param = PipelineParameter(name="dataset_version", default_value=e.dataset_version) data_file_path_param = PipelineParameter(name="data_file_path", default_value="none") caller_run_id_param = PipelineParameter(name="caller_run_id", default_value="none") # Get dataset name dataset_name = e.dataset_name # # Check to see if dataset exists # if (dataset_name not in aml_workspace.datasets): # # Create dataset from lacemlops sample data # sample_data = load_lacemlops() # df = pd.DataFrame( # data=sample_data.data, # columns=sample_data.feature_names) # df['Y'] = sample_data.target # file_name = 'lacemlops.csv' # df.to_csv(file_name, index=False) # # Upload file to default datastore in workspace # datatstore = Datastore.get(aml_workspace, datastore_name) # target_path = 'training-data/' # datatstore.upload_files( # files=[file_name], # target_path=target_path, # overwrite=True, # show_progress=False) # # Register dataset # path_on_datastore = os.path.join(target_path, file_name) # dataset = Dataset.Tabular.from_delimited_files( # path=(datatstore, path_on_datastore)) # dataset = dataset.register( # workspace=aml_workspace, # name=dataset_name, # description='lacemlops training data', # tags={'format': 'CSV'}, # create_new_version=True) # Create a PipelineData to pass data between steps pipeline_data = PipelineData( 'pipeline_data', datastore=aml_workspace.get_default_datastore()) train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, outputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_output", pipeline_data, "--dataset_version", dataset_version_param, "--data_file_path", data_file_path_param, "--caller_run_id", caller_run_id_param, "--dataset_name", dataset_name, ], runconfig=run_config, allow_reuse=False, ) print("Step Train created") evaluate_step = PythonScriptStep( name="Evaluate Model ", script_name=e.evaluate_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, ) print("Step Evaluate created") register_step = PythonScriptStep( name="Register Model ", script_name=e.register_script_path, compute_target=aml_compute, source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ "--model_name", model_name_param, "--step_input", pipeline_data, ], runconfig=run_config, allow_reuse=False, ) print("Step Register created") # Check run_evaluation flag to include or exclude evaluation step. if ((e.run_evaluation).lower() == 'true'): print("Include evaluation step before register step.") evaluate_step.run_after(train_step) register_step.run_after(evaluate_step) steps = [train_step, evaluate_step, register_step] else: print("Exclude evaluation step and directly run register step.") register_step.run_after(train_step) steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish( name=e.pipeline_name, description="Model training/retraining pipeline", version=e.build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") app_id = os.environ.get("SP_APP_ID") app_secret = os.environ.get("SP_APP_SECRET") deploy_script_path = os.environ.get("DEPLOY_SCRIPT_PATH") vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") model_name = os.environ.get("MODEL_NAME") build_id = os.environ.get("BUILD_BUILDID") pipeline_name = os.environ.get("DEPLOY_PIPELINE_NAME") service_name = os.environ.get("DEPLOY_SERVICE_NAME") sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") # Get Azure machine learning workspace aml_workspace = get_workspace(workspace_name, resource_group, subscription_id, tenant_id, app_id, app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute(aml_workspace, compute_name, vm_size) if aml_compute is not None: print(aml_compute) conda_dependencies = CondaDependencies.create( conda_packages=['numpy', 'pandas', 'scikit-learn'], pip_packages=[ 'azureml-core==1.0.72.*', 'azureml-sdk==1.0.72.*', 'azure-storage', 'azure-storage-blob', 'azureml-dataprep', 'azureml-datadrift==1.0.72.*' ], pin_sdk_version=False) print(conda_dependencies.serialize_to_string()) run_config = RunConfiguration(framework='Python', conda_dependencies=conda_dependencies) run_config.environment.docker.enabled = True model_name = PipelineParameter(name="model_name", default_value=model_name) print(model_name) release_id = PipelineParameter(name="release_id", default_value="0") print(release_id) service_name = PipelineParameter(name="service_name", default_value=service_name) print(service_name) deploy_step = PythonScriptStep( name="Deploy Model", script_name=deploy_script_path, compute_target=aml_compute, source_directory=sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, "--service_name", service_name ], runconfig=run_config, allow_reuse=False, ) print("Step Deploy created") steps = [deploy_step] deploy_pipeline = Pipeline(workspace=aml_workspace, steps=steps) deploy_pipeline.validate() published_pipeline = deploy_pipeline.publish( name=pipeline_name, description="Model deploy pipeline", version=build_id) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}')
def get_pipeline(aml_compute: ComputeTarget, blob_ds: Datastore, batch_env: Environment, tf_env: Environment) -> str: """ Creates pipeline steps Parameters: aml_compute (ComputeTarget): a reference to a compute blob_ds (DataStore): a reference to a datastore batch_env (Environment): a reference to environment object tf_env (Environment): a horovod/tf environment Returns: string: a set of pipeline steps """ # We need something to generate data by the way pipeline_files = PipelineData("pipeline_files", datastore=blob_ds).as_dataset() # Pipeline parameters to use with every run is_debug = PipelineParameter("is_debug", default_value=False) relay_connection_name = PipelineParameter("debug_relay_connection_name", default_value="none") single_step_config = RunConfiguration() single_step_config.environment = batch_env single_step = PythonScriptStep( name=f"single-step", script_name="samples/azure_ml_advanced/steps/single_step.py", source_directory=".", runconfig=single_step_config, arguments=[ "--pipeline-files", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5678, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], inputs=[], outputs=[pipeline_files], compute_target=aml_compute, allow_reuse=False) output_dir = PipelineData("output_dir") parallel_run_config = ParallelRunConfig( entry_script="samples/azure_ml_advanced/steps/parallel_step.py", source_directory=".", mini_batch_size="5", output_action="summary_only", environment=batch_env, compute_target=aml_compute, error_threshold=10, run_invocation_timeout=600, # very important for debugging node_count=2, process_count_per_node=1) parallelrun_step = ParallelRunStep( name="parallel-run-step", parallel_run_config=parallel_run_config, inputs=[pipeline_files], output=output_dir, arguments=[ "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5679, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], allow_reuse=False) parallelrun_step.run_after(single_step) distr_config = MpiConfiguration(process_count_per_node=1, node_count=2) src = ScriptRunConfig( source_directory=".", script="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=compute_name, environment=tf_env, distributed_job_config=distr_config, ) mpi_step = PythonScriptStep( name="mpi-step", script_name="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=aml_compute, inputs=[pipeline_files], outputs=[], runconfig=src.run_config, source_directory=".") mpi_step.run_after(parallelrun_step) print("Pipeline Steps Created") steps = [single_step, parallelrun_step, mpi_step] print(f"Returning {len(steps)} steps") return steps
cd = CondaDependencies.create(pip_packages=["azureml-train-automl"]) # Runconfig amlcompute_run_config = RunConfiguration(framework="python", conda_dependencies=cd) amlcompute_run_config.environment.docker.enabled = False amlcompute_run_config.environment.docker.gpu_support = False amlcompute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE amlcompute_run_config.environment.spark.precache_packages = False def_data_store = ws.get_default_datastore() automl_step = PythonScriptStep( name="automl_step", script_name="automl_step.py", compute_target=aml_compute_target, source_directory='.', #project_folder, allow_reuse=True, runconfig=amlcompute_run_config) print("AutoML Training Step created.") steps = [automl_step] print("Step lists created") pipeline = Pipeline(workspace=ws, steps=steps) print("Pipeline is built") pipeline.validate() print("Pipeline validation complete")
def build_pipeline_steps(automlconfig: AutoMLConfig, data: Dataset, target_column: str, compute_target: ComputeTarget, group_column_names: list, time_column_name: str, deploy: bool, service_name: str = 'grouping-demo') -> StepSequence: steps = [] metrics_output_name = 'metrics_{}' best_model_output_name = 'best_model_{}' count = 0 model_names = [] # get all automl configs by group configs = _get_configs(automlconfig, data, target_column, compute_target, group_column_names) # build a runconfig for register model register_config = RunConfiguration() cd = CondaDependencies() cd.add_pip_package('azureml-pipeline') register_config.environment.python.conda_dependencies = cd # create each automl step end-to-end (train, register) for group_name, conf in configs.items(): # create automl metrics output metrics_data = PipelineData( name='metrics_data_{}'.format(group_name), pipeline_output_name=metrics_output_name.format(group_name), training_output=TrainingOutput(type='Metrics')) # create automl model output model_data = PipelineData( name='model_data_{}'.format(group_name), pipeline_output_name=best_model_output_name.format(group_name), training_output=TrainingOutput(type='Model', metric=conf.user_settings['primary_metric'])) automl_step = AutoMLStep( name='automl_{}'.format(group_name), automl_config=conf, outputs=[metrics_data, model_data], allow_reuse=True) steps.append(automl_step) # pass the group name as a parameter to the register step -> # this will become the name of the model for this group. group_name_param = PipelineParameter("group_name_{}".format(count), default_value=group_name) count += 1 reg_model_step = PythonScriptStep( 'register.py', name='register_{}'.format(group_name), arguments=["--model_name", group_name_param, "--model_path", model_data], inputs=[model_data], compute_target=compute_target, runconfig=register_config, source_directory="register", allow_reuse=True ) steps.append(reg_model_step) model_names.append(group_name) final_steps = steps if deploy: # modify the conda dependencies to ensure we pick up correct # versions of azureml-defaults and azureml-train-automl cd = CondaDependencies.create(pip_packages=['azureml-defaults', 'azureml-train-automl']) automl_deps = CondaDependencies(conda_dependencies_file_path='deploy/myenv.yml') cd._merge_dependencies(automl_deps) cd.save('deploy/myenv.yml') # add deployment step pp_group_column_names = PipelineParameter( "group_column_names", default_value="#####".join(list(reversed(group_column_names)))) pp_model_names = PipelineParameter( "model_names", default_value=json.dumps(model_names)) pp_service_name = PipelineParameter( "service_name", default_value=service_name) deployment_step = PythonScriptStep( 'deploy.py', name='service_deploy', arguments=["--group_column_names", pp_group_column_names, "--model_names", pp_model_names, "--service_name", pp_service_name, "--time_column_name", time_column_name], compute_target=compute_target, runconfig=RunConfiguration(), source_directory="deploy" ) final_steps = StepSequence(steps=[steps, deployment_step]) return final_steps
def get_scoring_pipeline( scoring_dataset: Dataset, output_loc: PipelineData, score_run_config: ParallelRunConfig, copy_run_config: RunConfiguration, computetarget: ComputeTarget, ws: Workspace, env: Env, ) -> Pipeline: """ Creates the scoring pipeline. :param scoring_dataset: Data to score :param output_loc: Location to save the scoring results :param score_run_config: Parallel Run configuration to support parallelized scoring :param copy_run_config: Script Run configuration to support score copying :param computetarget: AML Compute target :param ws: AML Workspace :param env: Environment Variables :returns: Scoring pipeline instance """ # To help filter the model make the model name, model version and a # tag/value pair bindable parameters so that they can be passed to # the pipeline when invoked either over REST or via the AML SDK. model_name_param = PipelineParameter( "model_name", default_value=" " ) # NOQA: E501 model_version_param = PipelineParameter( "model_version", default_value=" " ) # NOQA: E501 model_tag_name_param = PipelineParameter( "model_tag_name", default_value=" " ) # NOQA: E501 model_tag_value_param = PipelineParameter( "model_tag_value", default_value=" " ) # NOQA: E501 scoring_step = ParallelRunStep( name="scoringstep", inputs=[scoring_dataset], output=output_loc, arguments=[ "--model_name", model_name_param, "--model_version", model_version_param, "--model_tag_name", model_tag_name_param, "--model_tag_value", model_tag_value_param, ], parallel_run_config=score_run_config, allow_reuse=False, ) copying_step = PythonScriptStep( name="scorecopystep", script_name=env.batchscore_copy_script_path, source_directory=env.sources_directory_train, arguments=[ "--output_path", output_loc, "--scoring_output_filename", env.scoring_datastore_output_filename if env.scoring_datastore_output_filename is not None else "", "--scoring_datastore", env.scoring_datastore_storage_name if env.scoring_datastore_storage_name is not None else "", "--score_container", env.scoring_datastore_output_container if env.scoring_datastore_output_container is not None else "", "--scoring_datastore_key", env.scoring_datastore_access_key if env.scoring_datastore_access_key is not None else "", ], inputs=[output_loc], allow_reuse=False, compute_target=computetarget, runconfig=copy_run_config, ) return Pipeline(workspace=ws, steps=[scoring_step, copying_step])
conda_packages=['pandas', 'scikit-learn', 'numpy'], pip_packages=[ 'azureml-sdk', 'azureml-dataprep', 'azureml-dataprep[pandas]', 'azureml-train-automl' ], pin_sdk_version=False) scripts_folder = './scripts' prepared_data = PipelineData("diabetes_data_prep", datastore=datastore) prep_data_step = PythonScriptStep(name="Prep diabetes data", script_name="prep_data.py", arguments=[ "--input_file", blob_diabetes_data, "--output_path", prepared_data ], inputs=[blob_diabetes_data], outputs=[prepared_data], compute_target=aml_compute, runconfig=aml_run_config, source_directory=scripts_folder, allow_reuse=True) print("Preparing the 'split train and data' step") output_split_train_x = PipelineData("diabetes_automl_split_train_x", datastore=datastore) output_split_train_y = PipelineData("diabetes_automl_split_train_y", datastore=datastore) output_split_test_x = PipelineData("diabetes_automl_split_test_x", datastore=datastore) output_split_test_y = PipelineData("diabetes_automl_split_test_y", datastore=datastore)