Beispiel #1
0
from azureml.core import Experiment, Workspace
from azureml.pipeline.core import PublishedPipeline, Schedule, ScheduleRecurrence

ws = Workspace.from_config()
exp = Experiment(ws,
                 "MaxFreezerTemperatureExceededPipeline",
                 _create_in_cloud=True)
pipeline_id = PublishedPipeline.list(ws)[0]

schedule = Schedule.create(
    ws,
    name="four_updates_per_day",
    description="runs the pipeline every 6 hours",
    pipeline_id=pipeline_id,
    recurrence=ScheduleRecurrence(
        frequency="Hour",
        interval=6,
        start_time=None,  # run instantly
        time_zone=None,  # default UTC
    ),
    experiment_name=exp.name,
)

# Schedule.list(ws)
# schedule = Schedule.list(ws)[0]
# schedule.get_last_pipeline_run()
def main():
    parser = argparse.ArgumentParser("register")
    parser.add_argument("--aml_pipeline_name",
                        type=str,
                        help="Name of a the aml pipeline to retrieve ID from")
    parser.add_argument("--output_pipeline_id_file",
                        type=str,
                        default="preprocessing_pipeline_id.txt",
                        help="Name of a file to write pipeline ID to")
    parser.add_argument(
        "--skip_preprocessing_execution",
        action="store_true",
        help=("Do not trigger the execution. "
              "Use this in Azure DevOps when using a server job to trigger"))
    args = parser.parse_args()

    e = Env()

    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    # TODO: delete latest_version logic
    latest_version = 0
    latest_pipe = None
    for p in pipelines:
        if p.name == args.aml_pipeline_name:
            if p.version == e.build_id:
                matched_pipes.append(p)
            elif int(p.version) > latest_version:
                latest_version = int(p.version)
                latest_pipe = p

    if len(matched_pipes) == 0 and latest_version > 0:
        matched_pipes.append(latest_pipe)

    if (len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(
            f"Multiple active pipelines are published for build {e.build_id}."
        )  # NOQA: E501
    elif (len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(
            f"Unable to find a published pipeline for this build {e.build_id}"
        )  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]
        print("published pipeline id is", published_pipeline.id)

        # Save the Pipeline ID for other AzDO jobs after script is complete
        if args.output_pipeline_id_file is not None:
            with open(args.output_pipeline_id_file, "w") as out_file:
                out_file.write(published_pipeline.id)

        if (args.skip_preprocessing_execution is False):
            tags = {"BuildId": e.build_id}
            if (e.build_uri is not None):
                tags["BuildUri"] = e.build_uri
            experiment = Experiment(workspace=aml_workspace,
                                    name=e.experiment_name + "_preprocess")
            run = experiment.submit(published_pipeline, tags=tags)

            print("Pipeline run initiated ", run.id)
Beispiel #3
0
print("Blobstore's name: {}".format(def_blob_store.name))

# create a list of datasets stored in blob
print("Checking for new datasets")
blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key)
generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/video")
datasets = []
for blob in generator:
    dataset = blob.name.split('/')[3]
    if dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt"):
        datasets.append(dataset)
        print("Found dataset:", dataset)

# Get all published pipeline objects in the workspace
all_pub_pipelines = PublishedPipeline.get_all(ws)

# Create a list of datasets for which we have (old) and don't have (new) a published pipeline
old_datasets = []
new_datasets = []
for dataset in datasets:
    for pub_pipeline in all_pub_pipelines:
        if pub_pipeline.name.endswith(dataset):
            old_datasets.append(dataset)
    if not dataset in old_datasets:
        new_datasets.append(dataset)

for dataset in new_datasets:
    print("Creating pipeline for dataset", dataset)
    build_pipeline(dataset, ws, config)
Beispiel #4
0
from azureml.pipeline.core import PublishedPipeline
from azureml.core.experiment import Experiment
from azureml.core import Workspace

workspace = Workspace.from_config()

published_pipeline_id = ""
is_debug = True
debug_relay_connection_name = "test"

if published_pipeline_id is None or published_pipeline_id == "":
    raise ValueError("Initialize published_pipeline_id")

pipeline_parameters = {"is_debug": is_debug}
if is_debug:
    if debug_relay_connection_name == "":
        raise ValueError("Hybrid connection name cannot be empty!")

    pipeline_parameters.update(
        {"debug_relay_connection_name": debug_relay_connection_name})

experiment = Experiment(workspace, "Pipeline_debug_experiment")
published_pipeline = PublishedPipeline.get(workspace=workspace,
                                           id=published_pipeline_id)
experiment.submit(published_pipeline, pipeline_parameters=pipeline_parameters)
Beispiel #5
0
cli_auth = AzureCliAuthentication()

##------------- Get Workspace

subscriptionId = "<your subscription id>"  # make this a parameter
resourceGroup = "<your resource group>"  # make this a parameter
workspaceName = "<your ml workspace name>"  # make this a parameter

ws = Workspace(subscriptionId, resourceGroup, workspaceName, auth=cli_auth)

##------------- Run Published pipeline using REST endpoint

aad_token = cli_auth.get_authentication_header()
published_pipeline_id = "ab0691a9-438f-416b-a146-5c7660d1be11"  # Replace this with the published pipeline id
published_pipeline = PublishedPipeline.get(ws, published_pipeline_id)
rest_endpoint = published_pipeline.endpoint
print("Rest endpoint: " + rest_endpoint)

response = requests.post(rest_endpoint,
                         headers=aad_token,
                         json={
                             "ExperimentName": "quality_prediction_gb",
                             "RunSource": "SDK",
                             "ParameterAssignments": {
                                 "modelName": "quality_gbm_model.pkl",
                                 "datasetName": "qualitydataset",
                                 "datasetStorePath": "/inputdata/train.csv"
                             }
                         })
print(response)
def disable_pipeline(pipeline_name="", dry_run=True):
    from azureml.pipeline.core import PublishedPipeline
    from azureml.pipeline.core.schedule import Schedule

    if dry_run:
        print("Dry run: only printing what would be done")
    else:
        print("Disabling pipelines")

    ws = get_workspace()

    # Get all published pipeline objects in the workspace
    all_pub_pipelines = PublishedPipeline.list(ws)

    # We will iterate through the list of published pipelines and
    # use the last ID in the list for Schedule operations:
    print("Published pipelines found in the workspace:")
    for pub_pipeline in all_pub_pipelines:
        if (
            pub_pipeline.name.startswith("prednet")
            and pub_pipeline.name == pipeline_name
            or pipeline_name == ""
        ):
            print("Found pipeline:", pub_pipeline.name, pub_pipeline.id)
            pub_pipeline_id = pub_pipeline.id
            schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)

            # We will iterate through the list of schedules and
            # use the last ID in the list for further operations:
            print(
                "Found these schedules for the pipeline id {}:".format(
                    pub_pipeline_id
                )
            )
            for schedule in schedules:
                print(schedule.name, schedule.id)
                if not dry_run:
                    schedule_id = schedule.id
                    print(
                        "Schedule id to be used for schedule "
                        "operations: {}".format(
                            schedule_id
                        )
                    )
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print(
                        "Using schedule with id: {}".format(
                            fetched_schedule.id
                        )
                    )
                    fetched_schedule.disable(wait_for_provisioning=True)
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print(
                        "Disabled schedule {}. New status is: {}".format(
                            fetched_schedule.id, fetched_schedule.status
                        )
                    )

            if not dry_run:
                print("Disabling pipeline")
                pub_pipeline.disable()
Beispiel #7
0
def pipeline():
    return PublishedPipeline.get(workspace=ws, id=pipeline_id)
Beispiel #8
0
# create a list of datasets stored in blob
print("Checking for new datasets")
blob_service = BlockBlobService(def_blob_store.account_name,
                                def_blob_store.account_key)
generator = blob_service.list_blobs(def_blob_store.container_name,
                                    prefix="prednet/data/raw_data")
datasets = []
for blob in generator:
    dataset = blob.name.split("/")[3]
    if (dataset not in datasets and dataset.startswith("UCSD")
            and not dataset.endswith("txt")):
        datasets.append(dataset)
        print("Found dataset:", dataset)

# Get all published pipeline objects in the workspace
all_pub_pipelines = PublishedPipeline.list(ws)

# Create a list of datasets for which we have (old) and don't have (new) a
# published pipeline
old_datasets = []
new_datasets = []
for dataset in datasets:
    for pub_pipeline in all_pub_pipelines:
        if pub_pipeline.name.endswith(dataset):
            old_datasets.append(dataset)
    if dataset not in old_datasets:
        new_datasets.append(dataset)

for dataset in new_datasets:
    print("Creating pipeline for dataset", dataset)
    build_prednet_pipeline(dataset, ws)
Beispiel #9
0
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.train.estimator import Estimator

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.get_details())

from azureml.pipeline.core import PublishedPipeline, Schedule

old_pipes = PublishedPipeline.list(ws)

for old_pipe in old_pipes:
    old_schedules = Schedule.list(ws, pipeline_id=old_pipe.id)
    for schedule in old_schedules:
        schedule.disable(wait_for_provisioning=True)

    old_pipe.disable()

ds = ws.get_default_datastore()

params = {
    '--data_path': ws.get_default_datastore().path('data'),
    '--analyze': '',
    '--load_open': '',
    '--load_closed': '',