Exemple #1
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name))

    base_dir = '.'
        
    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)
    
    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)
    
    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except:# ComputeTargetException:
        print("creating new compute target")
        
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                                    max_nodes=4,
                                                                    idle_seconds_before_scaledown=1800)    
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
        
    # use get_status() to get a detailed status for the current cluster. 
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except: 
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                    max_nodes=10,
                                                                    idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout. 
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster. 
    try:
        print(gpu_compute_target.get_status().serialize())
    except BaseException as e:
        print("Could not get status of compute target.")
        print(e)

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"])
    
    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    # DataReference to where video data is stored.
    video_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="video_data",
        path_on_datastore=os.path.join("prednet", "data", "video", dataset))
    print("DataReference object created")
        
    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py", 
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    print("video_decode step created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name='prepare_data',
        script_name="data_preparation.py", 
        arguments=["--input_data", raw_data, "--output_data", preprocessed_data],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    data_prep.run_after(video_decoding)

    print("data_prep step created")


    # configure access to ACR for pulling our custom docker image
    acr = ContainerRegistry()
    acr.address = config['acr_address']
    acr.username = config['acr_username']
    acr.password = config['acr_password']
    
    est = Estimator(source_directory=script_folder,
                    compute_target=gpu_compute_target,
                    entry_script='train.py', 
                    use_gpu=True,
                    node_count=1,
                    custom_docker_image = "wopauli_1.8-gpu:1",
                    image_registry_details=acr,
                    user_managed=True
                    )

    ps = RandomParameterSampling(
        {
            '--batch_size': choice(1, 2, 4, 8),
            '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
            '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"),
            '--learning_rate': loguniform(-6, -1),
            '--lr_decay': loguniform(-9, -1),
            '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
            '--transfer_learning': choice("True", "False")
        }
    )

    policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10)

    hdc = HyperDriveConfig(estimator=est, 
                            hyperparameter_sampling=ps, 
                            policy=policy, 
                            primary_metric_name='val_loss', 
                            primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                            max_total_runs=10,
                            max_concurrent_runs=5, 
                            max_duration_minutes=60*6
                            )

    hd_step = HyperDriveStep(
        name="train_w_hyperdrive",
        hyperdrive_run_config=hdc,
        estimator_entry_script_arguments=[
            '--data-folder', preprocessed_data, 
            '--remote_execution',
            '--dataset', dataset
            ],
        inputs=[preprocessed_data],
        metrics_output = data_metrics,
        allow_reuse=True
    )
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=cpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.']
    )
    registration_step.run_after(hd_step)

    pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step])
    print ("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete") 

    pipeline_name = 'prednet_' + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)
    

    schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch",
                            pipeline_id=published_pipeline.id, 
                            experiment_name=pipeline_name,
                            datastore=def_blob_store,
                            wait_for_provisioning=True,
                            description="Datastore scheduler for Pipeline" + pipeline_name,
                            path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'),
                            polling_interval=1
                            )

    return pipeline_name
Exemple #2
0
print("training step created")

# Define Pipeline
pipeline = Pipeline(workspace=ws, steps=[train_xception])
print("Pipeline is built")

# Validate Pipeline
pipeline.validate()
print("Validation complete")

pipeline_name = 'kd_train_the_teacher'
# We need to disable (delete) previously published pipelines, because we can't have two published pipelines with the same name
from utils.azure import disable_pipeline
disable_pipeline(pipeline_name=pipeline_name, prefix='', dry_run=False)

# Publish Pipeline
published_pipeline = pipeline.publish(name=pipeline_name)
print("Pipeline is built")

# Put the pipeline on a schedule
schedule = Schedule.create(workspace=ws,
                           name=pipeline_name + "_sch",
                           pipeline_id=published_pipeline.id,
                           experiment_name=pipeline_name,
                           datastore=def_blob_store,
                           wait_for_provisioning=True,
                           description="Datastore scheduler for Pipeline" +
                           pipeline_name,
                           path_on_datastore=path_on_datastore,
                           polling_interval=60)
def disable_pipeline(pipeline_name="", dry_run=True):
    from azureml.pipeline.core import PublishedPipeline
    from azureml.pipeline.core.schedule import Schedule

    if dry_run:
        print("Dry run: only printing what would be done")
    else:
        print("Disabling pipelines")

    ws = get_workspace()

    # Get all published pipeline objects in the workspace
    all_pub_pipelines = PublishedPipeline.list(ws)

    # We will iterate through the list of published pipelines and
    # use the last ID in the list for Schedule operations:
    print("Published pipelines found in the workspace:")
    for pub_pipeline in all_pub_pipelines:
        if (
            pub_pipeline.name.startswith("prednet")
            and pub_pipeline.name == pipeline_name
            or pipeline_name == ""
        ):
            print("Found pipeline:", pub_pipeline.name, pub_pipeline.id)
            pub_pipeline_id = pub_pipeline.id
            schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)

            # We will iterate through the list of schedules and
            # use the last ID in the list for further operations:
            print(
                "Found these schedules for the pipeline id {}:".format(
                    pub_pipeline_id
                )
            )
            for schedule in schedules:
                print(schedule.name, schedule.id)
                if not dry_run:
                    schedule_id = schedule.id
                    print(
                        "Schedule id to be used for schedule "
                        "operations: {}".format(
                            schedule_id
                        )
                    )
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print(
                        "Using schedule with id: {}".format(
                            fetched_schedule.id
                        )
                    )
                    fetched_schedule.disable(wait_for_provisioning=True)
                    fetched_schedule = Schedule.get(ws, schedule_id)
                    print(
                        "Disabled schedule {}. New status is: {}".format(
                            fetched_schedule.id, fetched_schedule.status
                        )
                    )

            if not dry_run:
                print("Disabling pipeline")
                pub_pipeline.disable()
Exemple #4
0
# --- add a schedule for the pipeline (if told to do so)
# note: this is a sample schedule which runs time-based.
#       there is also the option to trigger the pipeline based on changes.
#       details at https://github.com/Azure/MachineLearningNotebooks/blob/4e7b3784d50e81c313c62bcdf9a330194153d9cd/how-t
#       o-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipelin
#       e.ipynb
if schedule:
    recurrence = ScheduleRecurrence(frequency="Day",
                                    interval=2,
                                    hours=[22],
                                    minutes=[30])
    schedule = Schedule.create(
        workspace=workspace,
        name="Every-Other-Day-At-10-30-PM",
        pipeline_id=published_pipeline.id,
        experiment_name=pipeline_name,
        recurrence=recurrence,
        wait_for_provisioning=True,
        description="A sample schedule which runs every other day at 10:30pm.",
    )

# --- trigger pipeline endpoint if we have been told to do so
if trigger_after_publish == True:
    print(f"Triggering pipeline endpoint '{pipeline_name}' (as configured)...")
    pipeline_run = Experiment(workspace,
                              pipeline_name).submit(pipeline_endpoint)
    # pipeline_run.wait_for_completion()

# --- Done
print("Done.")
Exemple #5
0
published_pipeline = pipeline.publish(name=pipeline_name)

print("pipeline id: ", published_pipeline.id)

datastore = ws.get_default_datastore()

with open("placeholder.txt", "w") as f:
    f.write("This is just a placeholder to ensure "
            "that this path exists in the blobstore.\n")

datastore.upload_files(
    [os.path.join(os.getcwd(), "placeholder.txt")],
    target_path="prednet/data/raw_data/",
)

schedule = Schedule.create(
    workspace=ws,
    name=pipeline_name + "_sch",
    pipeline_id=published_pipeline.id,
    experiment_name="prednet_master",
    datastore=datastore,
    wait_for_provisioning=True,
    description="Datastore scheduler for Pipeline" + pipeline_name,
    path_on_datastore="prednet/data/raw_data",
    polling_interval=5,
)

print("Created schedule with id: {}".format(schedule.id))

published_pipeline.submit(ws, published_pipeline.name)
Exemple #6
0
    def createPipeline(self):
        '''
            A pipeline is a series of steps but also requires DataReference objects in those steps so 
            that it where to get data from and where to deposit outputs. 

            In this step, if a PublishedPipeline exists by name, a new pipeline is not created. 

            If it is created a new docker conainer is generated in the ACR instance associated with this 
            AMLS workspace. 
        '''
        self.publishedPipeline = getExistingPipeline(
            self.workspace, self.programArguments.pipeline_name, self.job_log)

        if self.publishedPipeline:
            print("Found existing pipeline - ",
                  self.programArguments.pipeline_name)
            if self.job_log:
                self.job_log.addInfo("{} - PublishedPipeline {} exists".format(
                    self.job_log.lastStep(),
                    self.programArguments.pipeline_name))
        else:
            print("Creating  pipeline - ", self.programArguments.pipeline_name)
            if self.job_log:
                self.job_log.addInfo(
                    "{} - Creating PublishedPipeline {}".format(
                        self.job_log.lastStep(),
                        self.programArguments.pipeline_name))

            print("Creating pipeline steps .....")
            self._createPipelineSteps()
            self.pipeLine = Pipeline(workspace=self.workspace,
                                     steps=self.pipelineStep)
            self.pipeLine.validate()

            print("Publishing pipeline .....")
            self.publishedPipeline = self.pipeLine.publish(
                name=self.programArguments.pipeline_name,
                description="Dummy Pipeline")
            '''
                Now we schedule it. This step on it's own will create the AMLS experiment tied to this 
                service. 

                Unlike with the RTS example, no model is creted in this step. 

                Next we generate the schedule recurrence, when this pipeline should run, and finally create
                the schedule by identifying the published pipeline that is being requested. 
            '''
            print("Scheduling pipeline .....")
            experiment_name = "exp_" + self.programArguments.pipeline_name
            recurrence = ScheduleRecurrence(
                frequency=self.programArguments.schedule_frequency,
                interval=self.programArguments.schedule_interval)

            self.Schedule = Schedule.create(
                workspace=self.workspace,
                name="{}_sched".format(self.programArguments.pipeline_name),
                pipeline_id=self.publishedPipeline.id,
                experiment_name=experiment_name,
                recurrence=recurrence,
                description="Pipeline schedule for {}".format(
                    self.programArguments.pipeline_name),
            )
        '''
            Print out what we know of the pipeline. In particular it's status and the endpoint. 
        '''
        print("Pipeline : ", self.publishedPipeline.name)
        print("Pipeline Endpoint: ", self.publishedPipeline.endpoint)
        print("Pipeline Status: ", self.publishedPipeline.status)
Exemple #7
0
def build_prednet_pipeline(dataset, ws):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    base_dir = "."

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = "./scripts"
    os.makedirs(script_folder)

    shutil.copytree(os.path.join(base_dir, "models"),
                    os.path.join(base_dir, script_folder, "models"))
    shutil.copy(os.path.join(base_dir, "train.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "data_preparation.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_prednet.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "batch_scoring.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "train_clf.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_clf.py"), script_folder)

    cpu_compute_name = args.cpu_compute_name
    cpu_compute_target = AmlCompute(ws, cpu_compute_name)
    print("found existing compute target: %s" % cpu_compute_name)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = args.gpu_compute_name

    gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
    print(gpu_compute_target.get_status().serialize())

    env = Environment.get(ws, "prednet")

    # Runconfigs
    runconfig = RunConfiguration()
    runconfig.environment = env
    print("PipelineData object created")

    # DataReference to where raw data is stored.
    raw_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="raw_data",
        path_on_datastore=os.path.join("prednet", "data", "raw_data"),
    )
    print("DataReference object created")

    # Naming the intermediate data as processed_data and assigning it to the
    # variable processed_data.
    preprocessed_data = PipelineData("preprocessed_data",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    hd_child_cwd = PipelineData("prednet_model_path", datastore=def_blob_store)
    # prednet_path = PipelineData("outputs", datastore=def_blob_store)
    scored_data = PipelineData("scored_data", datastore=def_blob_store)
    model_path = PipelineData("model_path", datastore=def_blob_store)

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name="prepare_data",
        script_name="data_preparation.py",
        arguments=[
            "--raw_data",
            raw_data,
            "--preprocessed_data",
            preprocessed_data,
            "--dataset",
            dataset,
        ],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    # data_prep.run_after(video_decoding)

    print("data_prep step created")

    est = Estimator(
        source_directory=script_folder,
        compute_target=gpu_compute_target,
        entry_script="train.py",
        node_count=1,
        environment_definition=env,
    )

    ps = BayesianParameterSampling({
        "--batch_size":
        choice(1, 2, 4, 10),
        "--filter_sizes":
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        "--stack_sizes":
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),
        "--learning_rate":
        uniform(1e-6, 1e-3),
        "--lr_decay":
        uniform(1e-9, 1e-2),
        "--freeze_layers":
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
        # "--fine_tuning": choice("True", "False"),
    })

    hdc = HyperDriveConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        primary_metric_name="val_loss",
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=3,
        max_concurrent_runs=3,
        max_duration_minutes=60 * 6,
    )

    train_prednet = HyperDriveStep(
        "train_w_hyperdrive",
        hdc,
        estimator_entry_script_arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--remote_execution",
            "--dataset",
            dataset,
        ],
        inputs=[preprocessed_data],
        outputs=[hd_child_cwd],
        metrics_output=data_metrics,
        allow_reuse=True,
    )
    train_prednet.run_after(data_prep)

    register_prednet = PythonScriptStep(
        name="register_prednet",
        script_name="register_prednet.py",
        arguments=[
            "--data_metrics",
            data_metrics,
        ],
        compute_target=cpu_compute_target,
        inputs=[data_metrics, hd_child_cwd],
        source_directory=script_folder,
        allow_reuse=True,
    )
    register_prednet.run_after(train_prednet)

    batch_scoring = PythonScriptStep(
        name="batch_scoring",
        script_name="batch_scoring.py",
        arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--scored_data",
            scored_data,
            "--dataset",
            dataset,
            # "--prednet_path",
            # prednet_path
        ],
        compute_target=gpu_compute_target,
        inputs=[preprocessed_data],
        outputs=[scored_data],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    batch_scoring.run_after(register_prednet)

    train_clf = PythonScriptStep(
        name="train_clf",
        script_name="train_clf.py",
        arguments=[
            "--preprocessed_data", preprocessed_data, "--scored_data",
            scored_data, "--model_path", model_path
        ],
        compute_target=cpu_compute_target,
        inputs=[preprocessed_data, scored_data],
        outputs=[model_path],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    train_clf.run_after(batch_scoring)

    register_clf = PythonScriptStep(
        name="register_clf",
        script_name="register_clf.py",
        arguments=["--model_path", model_path],
        inputs=[model_path],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        allow_reuse=True,
        runconfig=runconfig,
    )
    register_clf.run_after(train_clf)

    pipeline = Pipeline(
        workspace=ws,
        steps=[
            data_prep,
            train_prednet,
            register_prednet,
            batch_scoring,
            train_clf,
            register_clf,
        ],
    )
    pipeline.validate()

    pipeline_name = "prednet_" + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)

    _ = Schedule.create(
        workspace=ws,
        name=pipeline_name + "_sch",
        pipeline_id=published_pipeline.id,
        experiment_name=pipeline_name,
        datastore=def_blob_store,
        wait_for_provisioning=True,
        description="Datastore scheduler for Pipeline" + pipeline_name,
        path_on_datastore=os.path.join("prednet/data/raw_data", dataset,
                                       "Train"),
        polling_interval=60 * 24,
    )

    published_pipeline.submit(ws, pipeline_name)
Exemple #8
0
deploy_model = PythonScriptStep(script_name="src/deploy_model.py",
                                compute_target=compute_target,
                                runconfig=aml_run_config)

steps = [train_model, deploy_model]
step_seq = StepSequence(steps=steps)
pipeline = Pipeline(workspace=ws, steps=step_seq)

pp = pipeline.publish(
    name="TitanicDeploymentPipeline",
    description="Training and deployment pipeline for our titanic API.",
    version="1.0")

# We can also set up a trigger based schedule using the Datastore class - https://docs.microsoft.com/en-us/azure/machine-learning/how-to-schedule-pipelines#create-a-time-based-schedule
recurrence = ScheduleRecurrence(frequency="Month",
                                interval=1,
                                start_time='2020-11-01T00:00:00')
recurring_schedule = Schedule.create(ws,
                                     name="TitanicRetrainingSchedule",
                                     description="Once a month training",
                                     pipeline_id=pp.id,
                                     experiment_name=exp_name,
                                     recurrence=recurrence)

run = pp.submit(ws, experiment_name=exp_name)

run_id = run.id
exp = Experiment(ws, exp_name)
r = Run(exp, run_id)
r.get_details()
    allow_reuse=False,
    hash_paths=["."])
print("pipeline submit step created")

submit_pipelines.run_after(build_pipelines)

pipeline = Pipeline(workspace=ws, steps=[build_pipelines, submit_pipelines])
print("Pipeline created")

pipeline.validate()
print("Validation complete")

pipeline_name = 'prednet_master'
published_pipeline = pipeline.publish(name=pipeline_name)
print("pipeline id: ", published_pipeline.id)

datastore = ws.get_default_datastore()

schedule = Schedule.create(workspace=ws,
                           name=pipeline_name + "_sch",
                           pipeline_id=published_pipeline.id,
                           experiment_name='Schedule_Run',
                           datastore=datastore,
                           wait_for_provisioning=True,
                           description="Datastore scheduler for Pipeline" +
                           pipeline_name)

print("Created schedule with id: {}".format(schedule.id))

published_pipeline.submit(ws, published_pipeline.name)