def publish_automl_pipeline(ws, pipeline, build_id):
    published_pipeline = pipeline.publish(name=PIPELINE_NAME,
                                          description=build_id,
                                          version=build_id)

    try:
        pipeline_endpoint = PipelineEndpoint.get(workspace=ws,
                                                 name=PIPELINE_ENDPOINT_NAME)
        print("pipeline endpoint exists, add a version")
        pipeline_endpoint.add_default(published_pipeline)
    except Exception:
        print("publish a new pipeline endpoint")
        pipeline_endpoint = PipelineEndpoint.publish(
            workspace=ws,
            name=PIPELINE_ENDPOINT_NAME,
            pipeline=published_pipeline,
            description='NYCtaxi_automl_training_pipeline_endpoint')

    print(f'Published pipeline: {published_pipeline.name}')
    print(f' version: {published_pipeline.version}')
    print(f'Pipeline endpoint: {pipeline_endpoint.name}')
    print('##vso[task.setvariable variable=PIPELINE_ENDPOINT_NAME;]{}'.format(
        pipeline_endpoint.name))
    print('##vso[task.setvariable variable=PIPELINE_ENDPOINT_DEFAULT_VER;]{}'.
          format(pipeline_endpoint.default_version))
    print('##vso[task.setvariable variable=PUBLISHED_PIPELINE_VERSION;]{}'.
          format(published_pipeline.version))
    return pipeline_endpoint
Ejemplo n.º 2
0
def add_endpoint(ws: Workspace, pipeline: PublishedPipeline,
                 endpoint_name: str) -> PipelineEndpoint:
    endpoint_list = [p.name for p in PipelineEndpoint.list(ws)]
    endpoint = None
    # endpoint does not exist so add
    if endpoint_name in endpoint_list:
        endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name)
        endpoint.add_default(published_pipeline)
    else:
        endpoint = PipelineEndpoint.publish(
            workspace=ws,
            name=endpoint_name,
            pipeline=published_pipeline,
            description="Seer Pipeline Endpoint")
    return endpoint
Ejemplo n.º 3
0
def publish_pipeline(workspace: Workspace,
                     steps: List,
                     name: str,
                     description: str = "") -> Tuple[str, PipelineEndpoint]:

    published_pipeline = Pipeline(workspace=workspace,
                                  steps=steps).publish(name)

    try:
        pipeline_endpoint = PipelineEndpoint.get(workspace, name=name)
        pipeline_endpoint.add_default(published_pipeline)
    except ErrorResponseException:
        pipeline_endpoint = PipelineEndpoint.publish(
            workspace,
            name=name,
            pipeline=published_pipeline,
            description=description)

    return published_pipeline.id, pipeline_endpoint
print("Azure ML SDK version:", azureml.core.VERSION)

parser = argparse.ArgumentParser("publish_to_pipeline_endpoint")
parser.add_argument("--pipeline_id", type=str, help="Id of the published pipeline that should get added to the Pipeline Endpoint", required=True)
parser.add_argument("--pipeline_endpoint_name", type=str, help="Name of the Pipeline Endpoint that the the pipeline should be added to", required=True)
parser.add_argument("--pipeline_endpoint_description", type=str, help="Description for the Pipeline Endpoint", default="Pipeline Endpoint", required=False)
args = parser.parse_args()
print(f'Arguments: {args}')

print('Connecting to workspace')
ws = Workspace.from_config()
print(f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}')

endpoint_name = args.pipeline_endpoint_name
pipeline_description = args.pipeline_endpoint_description
pipeline_id = args.pipeline_id
published_pipeline = PublishedPipeline.get(workspace=ws, id=pipeline_id)

# Add tested published pipeline to pipeline endpoint
try:
    pl_endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name)
    pl_endpoint.add_default(published_pipeline)
    print(f'Added pipeline {pipeline_id} to Pipeline Endpoint with name {endpoint_name}')
except Exception:
    print(f'Will create new Pipeline Endpoint with name {endpoint_name} with pipeline {pipeline_id}')
    pl_endpoint = PipelineEndpoint.publish(workspace=ws,
                                           name=endpoint_name,
                                           pipeline=published_pipeline,
                                           description=pipeline_description)
Ejemplo n.º 5
0
def main():
    """Build pipeline."""
    # Environment variables
    env = Env()

    # Azure ML workspace
    aml_workspace = Workspace.get(
        name=env.workspace_name,
        subscription_id=env.subscription_id,
        resource_group=env.resource_group,
    )
    logger.info(f"Azure ML workspace: {aml_workspace}")

    # Azure ML compute cluster
    aml_compute = get_compute(aml_workspace, env.compute_name)
    logger.info(f"Aazure ML compute cluster: {aml_compute}")

    # Azure ML environment
    environment = Environment(name=env.aml_env_name)
    conda_dep = CondaDependencies(
        conda_dependencies_file_path="./local_development/dev_dependencies.yml"
    )
    environment.python.conda_dependencies = conda_dep

    run_config = RunConfiguration()
    run_config.environment = environment

    # Pipeline Data
    preparation_pipelinedata = PipelineData("preparation_pipelinedata",
                                            is_directory=True).as_dataset()
    extraction_pipelinedata = PipelineData("extraction_pipelinedata",
                                           is_directory=True)
    training_pipelinedata = PipelineData("training_pipelinedata",
                                         is_directory=True)

    # List of pipeline steps
    step_list = list()
    preparation_step = PythonScriptStep(
        name="preparation-step",
        compute_target=aml_compute,
        source_directory=env.sources_directory_train,
        script_name=env.preparation_step_script_path,
        outputs=[preparation_pipelinedata],
        arguments=[
            "--input_path", env.input_dir, "--output_path",
            preparation_pipelinedata, "--datastore_name",
            env.blob_datastore_name
        ],
        runconfig=run_config)

    step_list.append(preparation_step)

    parallel_run_config = ParallelRunConfig(
        source_directory=env.sources_directory_train,
        entry_script=env.extraction_step_script_path,
        mini_batch_size=env.mini_batch_size,
        error_threshold=env.error_threshold,
        output_action="append_row",
        environment=environment,
        compute_target=aml_compute,
        node_count=env.node_count,
        run_invocation_timeout=env.run_invocation_timeout,
        process_count_per_node=env.process_count_per_node,
        append_row_file_name="extraction_output.txt")

    extraction_step = ParallelRunStep(
        name="extraction-step",
        inputs=[preparation_pipelinedata],
        output=extraction_pipelinedata,
        arguments=["--output_dir", extraction_pipelinedata],
        parallel_run_config=parallel_run_config)
    step_list.append(extraction_step)

    training_step = PythonScriptStep(
        name="traning-step",
        compute_target=aml_compute,
        source_directory=env.sources_directory_train,
        script_name=env.training_step_script_path,
        inputs=[extraction_pipelinedata],
        outputs=[training_pipelinedata],
        arguments=[
            "--input_dir", extraction_pipelinedata, "--output_dir",
            training_pipelinedata
        ],
        runconfig=run_config)

    step_list.append(training_step)

    # Build pipeline
    pipeline = Pipeline(workspace=aml_workspace, steps=step_list)
    pipeline.validate()
    logger.info(f"Built pipeline {pipeline}")

    # Publish pipeline
    published_pipeline = pipeline.publish(
        env.pipeline_name,
        description=env.pipeline_name,
        version=datetime.utcnow().isoformat())
    try:
        pipeline_endpoint = PipelineEndpoint.get(
            workspace=aml_workspace, name=env.pipeline_endpoint_name)
        pipeline_endpoint.add_default(published_pipeline)
    except ErrorResponseException:
        pipeline_endpoint = PipelineEndpoint.publish(
            workspace=aml_workspace,
            name=env.pipeline_endpoint_name,
            pipeline=published_pipeline,
            description=env.pipeline_endpoint_name)
Ejemplo n.º 6
0
published_pipeline = pipeline.publish(
    # name=pipeline_name, description=pipeline_description, version={...some version...}
    name=pipeline_name,
    description=pipeline_description,
)
print(f"Newly published pipeline id: {published_pipeline.id}")

try:
    pipeline_endpoint = PipelineEndpoint.get(workspace=workspace,
                                             name=pipeline_name)
    pipeline_endpoint.add(published_pipeline)
except:
    pipeline_endpoint = PipelineEndpoint.publish(
        workspace=workspace,
        name=pipeline_name,
        pipeline=published_pipeline,
        description=f"Pipeline Endpoint for {pipeline_name}",
    )

# TODO: cleanup older pipeline endpoints(?)

# --- add a schedule for the pipeline (if told to do so)
# note: this is a sample schedule which runs time-based.
#       there is also the option to trigger the pipeline based on changes.
#       details at https://github.com/Azure/MachineLearningNotebooks/blob/4e7b3784d50e81c313c62bcdf9a330194153d9cd/how-t
#       o-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipelin
#       e.ipynb
if schedule:
    recurrence = ScheduleRecurrence(frequency="Day",
                                    interval=2,
                                    hours=[22],
from azureml.pipeline.core import Pipeline, PublishedPipeline, PipelineEndpoint

print(f'Azure ML SDK version: {azureml.core.VERSION}')

endpoint_name = "training-pipeline-endpoint"
pipeline_id = os.getenv('PIPELINE_ID')

# Connect to the workspace
ws = Workspace.from_config()
print(f'WS name: {ws.name}')
print(f'Region: {ws.location}')
print(f'Subscription id: {ws.subscription_id}')
print(f'Resource group: {ws.resource_group}')

print(f'Pipeline ID: {pipeline_id}')
published_pipeline = PublishedPipeline.get(workspace=ws, id=pipeline_id)
print(f'Published Pipeline: {published_pipeline}')

# Check if PipelineEndpoint already exists
if any(pe.name == endpoint_name for pe in PipelineEndpoint.list(ws)):
    print(f'Pipeline Endpoint with name {endpoint_name} already exists, will add pipeline to it')
    pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name=endpoint_name)
    pipeline_endpoint.add(published_pipeline)
    # Set it to default, as we already tested it beforehand!
    pipeline_endpoint.set_default(published_pipeline)
else:
    print(f'Will create Pipeline Endpoint with name {endpoint_name}')
    pipeline_endpoint = PipelineEndpoint.publish(workspace=ws,
                                                name=endpoint_name,
                                                pipeline=published_pipeline,
                                                description="New Training Pipeline Endpoint")
Ejemplo n.º 8
0
def main():
    # Environment variables
    env = Env()

    # Azure ML workspace
    aml_workspace = Workspace.get(
        name=env.workspace_name,
        subscription_id=env.subscription_id,
        resource_group=env.resource_group,
    )
    logger.info(f"Azure ML workspace: {aml_workspace}")

    # Azure ML compute cluster
    aml_compute = get_compute(aml_workspace, env.compute_name)
    logger.info(f"Aazure ML compute cluster: {aml_compute}")

    # Azure ML environment
    environment = Environment(name=env.aml_env_name)
    conda_dep = CondaDependencies(
        conda_dependencies_file_path="./local_development/dev_dependencies.yml"
    )
    environment.docker.enabled = True
    environment.docker.base_image = env.acr_image
    environment.docker.base_image_registry.address = env.acr_address
    environment.docker.base_image_registry.username = env.acr_username
    environment.docker.base_image_registry.password = env.acr_password
    environment.python.conda_dependencies = conda_dep

    run_config = RunConfiguration()
    run_config.environment = environment

    # List of pipeline steps
    step_list = list()
    first_step = PythonScriptStep(name="first_step",
                                  compute_target=aml_compute,
                                  source_directory=env.sources_directory_train,
                                  script_name=env.first_step_script_path,
                                  outputs=[],
                                  arguments=[
                                      "--input_dataset_name",
                                      env.input_dataset_name,
                                      "--waves_dataset_name",
                                      env.waves_dataset_name
                                  ],
                                  runconfig=run_config)

    step_list.append(first_step)

    # Build pipeline
    pipeline = Pipeline(workspace=aml_workspace, steps=step_list)
    pipeline.validate()
    logger.info(f"Built pipeline {pipeline}")

    # Publish pipeline
    published_pipeline = pipeline.publish(
        env.pipeline_name,
        description=env.pipeline_name,
        version=datetime.utcnow().isoformat())
    try:
        pipeline_endpoint = PipelineEndpoint.get(
            workspace=aml_workspace, name=env.pipeline_endpoint_name)
        pipeline_endpoint.add_default(published_pipeline)
    except ErrorResponseException:
        pipeline_endpoint = PipelineEndpoint.publish(
            workspace=aml_workspace,
            name=env.pipeline_endpoint_name,
            pipeline=published_pipeline,
            description=env.pipeline_endpoint_name)