def publish_automl_pipeline(ws, pipeline, build_id): published_pipeline = pipeline.publish(name=PIPELINE_NAME, description=build_id, version=build_id) try: pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name=PIPELINE_ENDPOINT_NAME) print("pipeline endpoint exists, add a version") pipeline_endpoint.add_default(published_pipeline) except Exception: print("publish a new pipeline endpoint") pipeline_endpoint = PipelineEndpoint.publish( workspace=ws, name=PIPELINE_ENDPOINT_NAME, pipeline=published_pipeline, description='NYCtaxi_automl_training_pipeline_endpoint') print(f'Published pipeline: {published_pipeline.name}') print(f' version: {published_pipeline.version}') print(f'Pipeline endpoint: {pipeline_endpoint.name}') print('##vso[task.setvariable variable=PIPELINE_ENDPOINT_NAME;]{}'.format( pipeline_endpoint.name)) print('##vso[task.setvariable variable=PIPELINE_ENDPOINT_DEFAULT_VER;]{}'. format(pipeline_endpoint.default_version)) print('##vso[task.setvariable variable=PUBLISHED_PIPELINE_VERSION;]{}'. format(published_pipeline.version)) return pipeline_endpoint
def add_endpoint(ws: Workspace, pipeline: PublishedPipeline, endpoint_name: str) -> PipelineEndpoint: endpoint_list = [p.name for p in PipelineEndpoint.list(ws)] endpoint = None # endpoint does not exist so add if endpoint_name in endpoint_list: endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name) endpoint.add_default(published_pipeline) else: endpoint = PipelineEndpoint.publish( workspace=ws, name=endpoint_name, pipeline=published_pipeline, description="Seer Pipeline Endpoint") return endpoint
def publish_pipeline(workspace: Workspace, steps: List, name: str, description: str = "") -> Tuple[str, PipelineEndpoint]: published_pipeline = Pipeline(workspace=workspace, steps=steps).publish(name) try: pipeline_endpoint = PipelineEndpoint.get(workspace, name=name) pipeline_endpoint.add_default(published_pipeline) except ErrorResponseException: pipeline_endpoint = PipelineEndpoint.publish( workspace, name=name, pipeline=published_pipeline, description=description) return published_pipeline.id, pipeline_endpoint
print("Azure ML SDK version:", azureml.core.VERSION) parser = argparse.ArgumentParser("publish_to_pipeline_endpoint") parser.add_argument("--pipeline_id", type=str, help="Id of the published pipeline that should get added to the Pipeline Endpoint", required=True) parser.add_argument("--pipeline_endpoint_name", type=str, help="Name of the Pipeline Endpoint that the the pipeline should be added to", required=True) parser.add_argument("--pipeline_endpoint_description", type=str, help="Description for the Pipeline Endpoint", default="Pipeline Endpoint", required=False) args = parser.parse_args() print(f'Arguments: {args}') print('Connecting to workspace') ws = Workspace.from_config() print(f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}') endpoint_name = args.pipeline_endpoint_name pipeline_description = args.pipeline_endpoint_description pipeline_id = args.pipeline_id published_pipeline = PublishedPipeline.get(workspace=ws, id=pipeline_id) # Add tested published pipeline to pipeline endpoint try: pl_endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name) pl_endpoint.add_default(published_pipeline) print(f'Added pipeline {pipeline_id} to Pipeline Endpoint with name {endpoint_name}') except Exception: print(f'Will create new Pipeline Endpoint with name {endpoint_name} with pipeline {pipeline_id}') pl_endpoint = PipelineEndpoint.publish(workspace=ws, name=endpoint_name, pipeline=published_pipeline, description=pipeline_description)
def main(): """Build pipeline.""" # Environment variables env = Env() # Azure ML workspace aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) logger.info(f"Azure ML workspace: {aml_workspace}") # Azure ML compute cluster aml_compute = get_compute(aml_workspace, env.compute_name) logger.info(f"Aazure ML compute cluster: {aml_compute}") # Azure ML environment environment = Environment(name=env.aml_env_name) conda_dep = CondaDependencies( conda_dependencies_file_path="./local_development/dev_dependencies.yml" ) environment.python.conda_dependencies = conda_dep run_config = RunConfiguration() run_config.environment = environment # Pipeline Data preparation_pipelinedata = PipelineData("preparation_pipelinedata", is_directory=True).as_dataset() extraction_pipelinedata = PipelineData("extraction_pipelinedata", is_directory=True) training_pipelinedata = PipelineData("training_pipelinedata", is_directory=True) # List of pipeline steps step_list = list() preparation_step = PythonScriptStep( name="preparation-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.preparation_step_script_path, outputs=[preparation_pipelinedata], arguments=[ "--input_path", env.input_dir, "--output_path", preparation_pipelinedata, "--datastore_name", env.blob_datastore_name ], runconfig=run_config) step_list.append(preparation_step) parallel_run_config = ParallelRunConfig( source_directory=env.sources_directory_train, entry_script=env.extraction_step_script_path, mini_batch_size=env.mini_batch_size, error_threshold=env.error_threshold, output_action="append_row", environment=environment, compute_target=aml_compute, node_count=env.node_count, run_invocation_timeout=env.run_invocation_timeout, process_count_per_node=env.process_count_per_node, append_row_file_name="extraction_output.txt") extraction_step = ParallelRunStep( name="extraction-step", inputs=[preparation_pipelinedata], output=extraction_pipelinedata, arguments=["--output_dir", extraction_pipelinedata], parallel_run_config=parallel_run_config) step_list.append(extraction_step) training_step = PythonScriptStep( name="traning-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.training_step_script_path, inputs=[extraction_pipelinedata], outputs=[training_pipelinedata], arguments=[ "--input_dir", extraction_pipelinedata, "--output_dir", training_pipelinedata ], runconfig=run_config) step_list.append(training_step) # Build pipeline pipeline = Pipeline(workspace=aml_workspace, steps=step_list) pipeline.validate() logger.info(f"Built pipeline {pipeline}") # Publish pipeline published_pipeline = pipeline.publish( env.pipeline_name, description=env.pipeline_name, version=datetime.utcnow().isoformat()) try: pipeline_endpoint = PipelineEndpoint.get( workspace=aml_workspace, name=env.pipeline_endpoint_name) pipeline_endpoint.add_default(published_pipeline) except ErrorResponseException: pipeline_endpoint = PipelineEndpoint.publish( workspace=aml_workspace, name=env.pipeline_endpoint_name, pipeline=published_pipeline, description=env.pipeline_endpoint_name)
published_pipeline = pipeline.publish( # name=pipeline_name, description=pipeline_description, version={...some version...} name=pipeline_name, description=pipeline_description, ) print(f"Newly published pipeline id: {published_pipeline.id}") try: pipeline_endpoint = PipelineEndpoint.get(workspace=workspace, name=pipeline_name) pipeline_endpoint.add(published_pipeline) except: pipeline_endpoint = PipelineEndpoint.publish( workspace=workspace, name=pipeline_name, pipeline=published_pipeline, description=f"Pipeline Endpoint for {pipeline_name}", ) # TODO: cleanup older pipeline endpoints(?) # --- add a schedule for the pipeline (if told to do so) # note: this is a sample schedule which runs time-based. # there is also the option to trigger the pipeline based on changes. # details at https://github.com/Azure/MachineLearningNotebooks/blob/4e7b3784d50e81c313c62bcdf9a330194153d9cd/how-t # o-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipelin # e.ipynb if schedule: recurrence = ScheduleRecurrence(frequency="Day", interval=2, hours=[22],
from azureml.pipeline.core import Pipeline, PublishedPipeline, PipelineEndpoint print(f'Azure ML SDK version: {azureml.core.VERSION}') endpoint_name = "training-pipeline-endpoint" pipeline_id = os.getenv('PIPELINE_ID') # Connect to the workspace ws = Workspace.from_config() print(f'WS name: {ws.name}') print(f'Region: {ws.location}') print(f'Subscription id: {ws.subscription_id}') print(f'Resource group: {ws.resource_group}') print(f'Pipeline ID: {pipeline_id}') published_pipeline = PublishedPipeline.get(workspace=ws, id=pipeline_id) print(f'Published Pipeline: {published_pipeline}') # Check if PipelineEndpoint already exists if any(pe.name == endpoint_name for pe in PipelineEndpoint.list(ws)): print(f'Pipeline Endpoint with name {endpoint_name} already exists, will add pipeline to it') pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name) pipeline_endpoint.add(published_pipeline) # Set it to default, as we already tested it beforehand! pipeline_endpoint.set_default(published_pipeline) else: print(f'Will create Pipeline Endpoint with name {endpoint_name}') pipeline_endpoint = PipelineEndpoint.publish(workspace=ws, name=endpoint_name, pipeline=published_pipeline, description="New Training Pipeline Endpoint")
def main(): # Environment variables env = Env() # Azure ML workspace aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) logger.info(f"Azure ML workspace: {aml_workspace}") # Azure ML compute cluster aml_compute = get_compute(aml_workspace, env.compute_name) logger.info(f"Aazure ML compute cluster: {aml_compute}") # Azure ML environment environment = Environment(name=env.aml_env_name) conda_dep = CondaDependencies( conda_dependencies_file_path="./local_development/dev_dependencies.yml" ) environment.docker.enabled = True environment.docker.base_image = env.acr_image environment.docker.base_image_registry.address = env.acr_address environment.docker.base_image_registry.username = env.acr_username environment.docker.base_image_registry.password = env.acr_password environment.python.conda_dependencies = conda_dep run_config = RunConfiguration() run_config.environment = environment # List of pipeline steps step_list = list() first_step = PythonScriptStep(name="first_step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.first_step_script_path, outputs=[], arguments=[ "--input_dataset_name", env.input_dataset_name, "--waves_dataset_name", env.waves_dataset_name ], runconfig=run_config) step_list.append(first_step) # Build pipeline pipeline = Pipeline(workspace=aml_workspace, steps=step_list) pipeline.validate() logger.info(f"Built pipeline {pipeline}") # Publish pipeline published_pipeline = pipeline.publish( env.pipeline_name, description=env.pipeline_name, version=datetime.utcnow().isoformat()) try: pipeline_endpoint = PipelineEndpoint.get( workspace=aml_workspace, name=env.pipeline_endpoint_name) pipeline_endpoint.add_default(published_pipeline) except ErrorResponseException: pipeline_endpoint = PipelineEndpoint.publish( workspace=aml_workspace, name=env.pipeline_endpoint_name, pipeline=published_pipeline, description=env.pipeline_endpoint_name)