Exemple #1
0
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    experiment_name = os.environ.get("EXPERIMENT_NAME")
    model_name = os.environ.get("MODEL_NAME")
    app_id = os.environ.get('SP_APP_ID')
    app_secret = os.environ.get('SP_APP_SECRET')
    release_id = os.environ.get('RELEASE_RELEASEID')
    build_id = os.environ.get('BUILD_BUILDID')
    storageacctname = os.environ.get('STORAGE_ACCT_NAME')
    storageacctkey = os.environ.get('STORAGE_ACCT_KEY')
    containername = os.environ.get('STORAGE_BLOB_NAME')

    service_principal = ServicePrincipalAuthentication(
            tenant_id=tenant_id,
            service_principal_id=app_id,
            service_principal_password=app_secret)

    aml_workspace = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
        auth=service_principal
        )

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.version == build_id:
            matched_pipes.append(p)

    if(len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(f"Multiple active pipelines are published for build {build_id}.")  # NOQA: E501
    elif(len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(f"Unable to find a published pipeline for this build {build_id}")  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]

    pipeline_parameters = {
            "model_name": model_name, 
            "release_id": release_id,
            "storageacctname": storageacctname,
            "storageacctkey": storageacctkey,
            "containername": containername
        }

    response = published_pipeline.submit(
        aml_workspace,
        experiment_name,
        pipeline_parameters)

    run_id = response.id
    print("Pipeline run initiated ", run_id)
Exemple #2
0
def main():

    parser = argparse.ArgumentParser("register")
    parser.add_argument("--output_pipeline_id_file",
                        type=str,
                        default="pipeline_id.txt",
                        help="Name of a file to write pipeline ID to")
    parser.add_argument(
        "--skip_train_execution",
        action="store_true",
        help=("Do not trigger the execution. "
              "Use this in Azure DevOps when using a server job to trigger"))
    args = parser.parse_args()

    e = Env()

    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.name == e.pipeline_name:
            if p.version == e.build_id:
                matched_pipes.append(p)

    if (len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(
            f"Multiple active pipelines are published for build {e.build_id}."
        )  # NOQA: E501
    elif (len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(
            f"Unable to find a published pipeline for this build {e.build_id}"
        )  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]
        print("published pipeline id is", published_pipeline.id)

        # Save the Pipeline ID for other AzDO jobs after script is complete
        if args.output_pipeline_id_file is not None:
            with open(args.output_pipeline_id_file, "w") as out_file:
                out_file.write(published_pipeline.id)

        if (args.skip_train_execution is False):
            pipeline_parameters = {"model_name": e.model_name}
            tags = {"BuildId": e.build_id}
            if (e.build_uri is not None):
                tags["BuildUri"] = e.build_uri
            experiment = Experiment(workspace=aml_workspace,
                                    name=e.experiment_name)
            run = experiment.submit(published_pipeline,
                                    tags=tags,
                                    pipeline_parameters=pipeline_parameters)

            print("Pipeline run initiated ", run.id)
def main():
    load_dotenv()
    workspace_name = os.environ.get("WORKSPACE_NAME")
    resource_group = os.environ.get("RESOURCE_GROUP_NAME")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    experiment_name = os.environ.get("EXPERIMENT_NAME")
    model_name = os.environ.get("MODEL_NAME")
    ckpt_path = os.environ.get("MODEL_CHECKPOINT_PATH")
    app_id = os.environ.get('SP_APP_ID')
    app_secret = os.environ.get('SP_APP_SECRET')
    build_id = os.environ.get('BUILD_BUILDID')
    datastore = os.environ.get('EPIS_DATASTORE')
    container_name = os.environ.get('EPIS_CONTAINER')

    service_principal = ServicePrincipalAuthentication(
        tenant_id=tenant_id,
        service_principal_id=app_id,
        service_principal_password=app_secret)

    aml_workspace = Workspace.get(name=workspace_name,
                                  subscription_id=subscription_id,
                                  resource_group=resource_group,
                                  auth=service_principal)

    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.version == build_id:
            matched_pipes.append(p)

    if (len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(
            f"Multiple active pipelines are published for build {build_id}.")
    elif (len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(
            f"Unable to find a published pipeline for this build {build_id}")
    else:
        published_pipeline = matched_pipes[0]

    pipeline_parameters = {
        "model_name": model_name,
        "ckpt_path": ckpt_path,
        "datastore": datastore,
        "storage_container": container_name
    }

    response = published_pipeline.submit(aml_workspace, experiment_name,
                                         pipeline_parameters)

    run_id = response.id
    print("Pipeline run initiated ", run_id)
def main():
    e = Env()
    service_principal = ServicePrincipalAuthentication(
        tenant_id=e.tenant_id,
        service_principal_id=e.app_id,
        service_principal_password=e.app_secret)

    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group,
                                  auth=service_principal)

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.name == e.pipeline_name:
            if p.version == e.build_id:
                matched_pipes.append(p)

    if (len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(
            f"Multiple active pipelines are published for build {e.build_id}."
        )  # NOQA: E501
    elif (len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(
            f"Unable to find a published pipeline for this build {e.build_id}"
        )  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]
        print("published pipeline id is", published_pipeline.id)

        # Save the Pipeline ID for other AzDO jobs after script is complete
        os.environ['amlpipeline_id'] = published_pipeline.id
        savePIDcmd = 'echo "export AMLPIPELINE_ID=$amlpipeline_id" >tmp.sh'
        os.system(savePIDcmd)

        # Set this to True for local development or
        # if not using Azure DevOps pipeline execution task
        skip_train_execution = True
        if (skip_train_execution is False):
            pipeline_parameters = {"model_name": e.model_name}
            response = published_pipeline.submit(aml_workspace,
                                                 e.experiment_name,
                                                 pipeline_parameters)

            run_id = response.id
            print("Pipeline run initiated ", run_id)
def get_pipeline(workspace,
                 env,
                 pipeline_id=None):
    if pipeline_id is not None:
        scoring_pipeline = PublishedPipeline.get(workspace,id=pipeline_id)
    else:
        pipeline_list = PublishedPipeline.list(workspace)
        scoring_pipeline = [pl for pl in pipeline_list if pl.name == env.scoring_pipeline_name]
        if len(scoring_pipeline) == 0:
            raise ValueError('no available pipeline to download!')
        else:
            scoring_pipeline = scoring_pipeline[0]
    
    return scoring_pipeline
def main():
    parser = argparse.ArgumentParser('trigger_pipeline')
    arg = parser.add_argument
    arg('--output-write-file',
        type=str,
        default='pipeline_id_recorder',
        help='the text file to write piepeline id')
    arg('--skip-train-exc',
        action='store_true',
        help='option to skip train excecution')
    args = parser.parse_args()

    e = ENV()
    print(e.build_id)
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)

    published_pipeline = PublishedPipeline.list(aml_workspace)
    matched_pipes = []
    for pipe in published_pipeline:
        if pipe.name == e.pipeline_name:
            if pipe.version == e.build_id:
                matched_pipes.append(pipe)

    if len(matched_pipes) > 1:
        published_pipeline = None
        raise Exception('there should be only one published pipeline')
    elif len(matched_pipes) == 0:
        published_pipeline = None
        raise Exception('no pipeline is published on the provided workspace!')
    else:
        published_pipeline = matched_pipes[0]
        print(f'published piepeline id is {published_pipeline.id}')

        if args.output_write_file is not None:
            with open(args.output_write_file, 'w') as output_file:
                output_file.write(published_pipeline.id)

        if args.skip_train_exc is False:
            pipeline_param = {'model_name': e.model_name}
            tags = {'build_id': e.build_id}
            if e.build_uri is not None:
                tags = {'build_uri': e.build_uri}
            exp = Experiment(workspace=aml_workspace, name=e.experiment_name)
            run = exp.submit(published_pipeline,
                             tags=tags,
                             pipeline_param=pipeline_param)
            print(
                f'pipeline {published_pipeline.id} initiated,run id: {run.id}')
Exemple #7
0
def startup():
    import azureml.core
    from azureml.core import Run, Workspace, Experiment
    from azureml.pipeline.core import PublishedPipeline
    from azureml.core import Datastore, Dataset
    import pandas as pd
    print("SDK version:", azureml.core.VERSION)
    pd.set_option('display.max_colwidth', 120)
    from azureml.core import Datastore, Dataset

    workspace = Workspace.from_config()

    ds = workspace.get_default_datastore()

    #target_column_name = 'volume'
    #time_column_name = 'date'
    #time_series_id_column_names = 'team_tag'

    experiment_name = 'azure-stackoverflow-classifier'
    experiment = Experiment(workspace, name=experiment_name)
    train = pd.read_csv('./data/train.csv',
                        names=['ID', 'IssueTitle', 'Label'])

    try:

        run = Run(experiment,
                  'azure-stackoverflow-classifier_1592684426_3767f390')
        hd_run = Run(experiment, 'HD_ddfd3027-4b17-4afd-a42f-cec512ec544b')
        aks_service = workspace.webservices['stackoverflow-classifier']

        pipelines = PublishedPipeline.list(workspace)
        published_pipeline = pipelines[0]

    except:
        print(
            "demo not initialized ... to speed up demo, after you have run through demo script all the way, set the values for the Run, HD_Run and AKS Service to fetch from existing entities instead of running realtime"
        )
        run = ""
        hd_run = ""
        aks_service = ""
        published_pipeline = ""

    stackoverflow_dataset, raw_dataset, azure_support_volume_timeseries_train, azure_support_volume_timeseries_test = register_data(
    )

    return ds, run, hd_run, aks_service, published_pipeline, stackoverflow_dataset, raw_dataset, train, azure_support_volume_timeseries_train, azure_support_volume_timeseries_test
def find_pipeline_by_name(
        aml_workspace: Workspace,
        pipeline_name: str) -> Union[PublishedPipeline, None]:
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipelines = list(
        filter(lambda p: p.name == pipeline_name, pipelines))
    date_matched_pipelines = [
        dt.strptime(pipeline.version, "%Y-%m-%dT%H:%M:%S.%f")
        for pipeline in matched_pipelines
    ]
    matched_pipelines = [
        matched_pipelines[idx] for idx in np.argsort(date_matched_pipelines)
    ]

    if matched_pipelines:
        return matched_pipelines[-1]
    return None
Exemple #9
0
def disable_pipeline(pipeline_name="", dry_run=True):
    from azureml.pipeline.core import PublishedPipeline
    from azureml.pipeline.core.schedule import Schedule

    if dry_run:
        print("Dry run: only printing what would be done")
    else:
        print("Disabling pipelines")

    ws = get_workspace()

    # Get all published pipeline objects in the workspace
    all_pub_pipelines = PublishedPipeline.list(ws)

    # We will iterate through the list of published pipelines and
    # use the last ID in the list for Schedule operations:
    print("Published pipelines found in the workspace:")
    for pub_pipeline in all_pub_pipelines:
        if (pub_pipeline.name.startswith("prednet")
                and pub_pipeline.name == pipeline_name or pipeline_name == ""):
            print("Found pipeline:", pub_pipeline.name, pub_pipeline.id)
            pub_pipeline_id = pub_pipeline.id
            schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)

            # We will iterate through the list of schedules and
            # use the last ID in the list for further operations:
            print("Found these schedules for the pipeline id {}:".format(
                pub_pipeline_id))
            for schedule in schedules:
                print(schedule.name, schedule.id)
                if not dry_run:
                    schedule_id = schedule.id
                    print("Schedule id to be used for schedule "
                          "operations: {}".format(schedule_id))
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print("Using schedule with id: {}".format(
                        fetched_schedule.id))
                    fetched_schedule.disable(wait_for_provisioning=True)
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print("Disabled schedule {}. New status is: {}".format(
                        fetched_schedule.id, fetched_schedule.status))

            if not dry_run:
                print("Disabling pipeline")
                pub_pipeline.disable()
Exemple #10
0
def get_pipeline(pipeline_id, ws: Workspace, env: Env):
    if pipeline_id is not None:
        scoringpipeline = PublishedPipeline.get(ws, pipeline_id)
    else:
        pipelines = PublishedPipeline.list(ws)
        scoringpipelinelist = [
            pl for pl in pipelines if pl.name == env.scoring_pipeline_name
        ]  # noqa E501

        if scoringpipelinelist.count == 0:
            raise Exception("No pipeline found matching name:{}".format(
                env.scoring_pipeline_name)  # NOQA: E501
                            )
        else:
            # latest published
            scoringpipeline = scoringpipelinelist[0]

    return scoringpipeline
Exemple #11
0
def main():
    e = Env()
    service_principal = ServicePrincipalAuthentication(
            tenant_id=e.tenant_id,
            service_principal_id=e.app_id,
            service_principal_password=e.app_secret)

    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
        auth=service_principal
        )

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.version == e.build_id:
            matched_pipes.append(p)

    if(len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(f"Multiple active pipelines are published for build {e.build_id}.")  # NOQA: E501
    elif(len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]

    pipeline_parameters = {"model_name": e.model_name}

    response = published_pipeline.submit(
        aml_workspace,
        e.experiment_name,
        pipeline_parameters)

    run_id = response.id
    print("Pipeline run initiated ", run_id)
Exemple #12
0
def getExistingPipeline(workspace, pipeline_name):
    '''
        Look for an return an exising azureml.pipeline.core.PublishedPipeline instance based on name 

        PARAMS: 
            workspace               : azureml.core.Workspace    : Existing AMLS Workspace
            pipeline_name           : string                    : Name of the published pipeline to find.

        RETURNS: 
            azureml.pipeline.core.PublishedPipeline if found, None otherwise

    '''
    return_pipeline = None

    pipelines = PublishedPipeline.list(workspace)
    if len(pipelines) > 0:
        for pipe in pipelines:
            if pipe.name == pipeline_name:
                return_pipeline = pipe
                break

    return return_pipeline
Exemple #13
0
def clean_azml_workspace(ctx):
    """
    [WARNING] Only use in test-only workspace. Remove or disable all compute clusters, published pipelines, published pipeline endpoints and schedules from Azure ML workspace.
    """

    ws = Workspace.from_config()

    # remove compute clusters
    for _, compute in ws.compute_targets.items():
        if not compute.provisioning_state == "Deleting":
            compute.delete()

    # deactivate schedules
    for s in Schedule.list(ws):
        s.disable()

    # remove pipeline endpoints
    for pe in PipelineEndpoint.list(ws):
        pe.disable()

    # remove pipelines
    for p in PublishedPipeline.list(ws):
        p.disable()
Exemple #14
0
from azureml.core import Experiment, Workspace
from azureml.pipeline.core import PublishedPipeline, Schedule, ScheduleRecurrence

ws = Workspace.from_config()
exp = Experiment(ws,
                 "MaxFreezerTemperatureExceededPipeline",
                 _create_in_cloud=True)
pipeline_id = PublishedPipeline.list(ws)[0]

schedule = Schedule.create(
    ws,
    name="four_updates_per_day",
    description="runs the pipeline every 6 hours",
    pipeline_id=pipeline_id,
    recurrence=ScheduleRecurrence(
        frequency="Hour",
        interval=6,
        start_time=None,  # run instantly
        time_zone=None,  # default UTC
    ),
    experiment_name=exp.name,
)

# Schedule.list(ws)
# schedule = Schedule.list(ws)[0]
# schedule.get_last_pipeline_run()
Exemple #15
0
# create a list of datasets stored in blob
print("Checking for new datasets")
blob_service = BlockBlobService(def_blob_store.account_name,
                                def_blob_store.account_key)
generator = blob_service.list_blobs(def_blob_store.container_name,
                                    prefix="prednet/data/raw_data")
datasets = []
for blob in generator:
    dataset = blob.name.split("/")[3]
    if (dataset not in datasets and dataset.startswith("UCSD")
            and not dataset.endswith("txt")):
        datasets.append(dataset)
        print("Found dataset:", dataset)

# Get all published pipeline objects in the workspace
all_pub_pipelines = PublishedPipeline.list(ws)

# Create a list of datasets for which we have (old) and don't have (new) a
# published pipeline
old_datasets = []
new_datasets = []
for dataset in datasets:
    for pub_pipeline in all_pub_pipelines:
        if pub_pipeline.name.endswith(dataset):
            old_datasets.append(dataset)
    if dataset not in old_datasets:
        new_datasets.append(dataset)

for dataset in new_datasets:
    print("Creating pipeline for dataset", dataset)
    build_prednet_pipeline(dataset, ws)
Exemple #16
0
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.train.estimator import Estimator

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.get_details())

from azureml.pipeline.core import PublishedPipeline, Schedule

old_pipes = PublishedPipeline.list(ws)

for old_pipe in old_pipes:
    old_schedules = Schedule.list(ws, pipeline_id=old_pipe.id)
    for schedule in old_schedules:
        schedule.disable(wait_for_provisioning=True)

    old_pipe.disable()

ds = ws.get_default_datastore()

params = {
    '--data_path': ws.get_default_datastore().path('data'),
    '--analyze': '',
    '--load_open': '',
    '--load_closed': '',