def main(name, vm_size, nodes): ws = Workspace.from_config() try: compute_cluster = ComputeTarget(ws, name) except: compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, min_nodes=1, max_nodes=nodes) compute_cluster = ComputeTarget.create(ws, name, compute_config) compute_cluster.wait_for_completion(show_output=True)
def main(workspace): # Loading compute target print("Loading compute target") compute_target = ComputeTarget( workspace=workspace, name="aml-intTest" ) # Loading script parameters print("Loading script parameters") script_params = { "--kernel": "linear", "--penalty": 0.9 } # Creating experiment config print("Creating experiment config") estimator = Estimator( source_directory="./tests/train/train_with_python_config", entry_script="train.py", script_params=script_params, compute_target=compute_target, conda_dependencies_file="environment.yml" ) return estimator
def prepare_remote_compute(ws): compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. Using it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def main(workspace,inputs): print("Loading compute target") compute_target = ComputeTarget( workspace=workspace, name=inputs["compute"] ) # create a new runconfig object run_config = RunConfiguration() # enable Docker run_config.environment.docker.enabled = True # set Docker base image to the default CPU-based image run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE # use conda_dependencies.yml to create a conda environment in the Docker image for execution run_config.environment.python.user_managed_dependencies = False # specify CondaDependencies obj run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) # For this step, we use yet another source_directory step = PythonScriptStep(name=inputs["step_name"], script_name=inputs["train_script"], compute_target=compute_target, source_directory=inputs["source_directory"], runconfig=run_config, allow_reuse=True) return step
def main(workspace): # Load compute target print("Loading compute target") compute_target = ComputeTarget( workspace=workspace, name="mycluster" ) # Load script parameters print("Loading script parameters") script_params = { "--kernel": "linear", "--penalty": 1.0 } # Create experiment config print("Creating experiment config") estimator = Estimator( source_directory="code/train", entry_script="train.py", script_params=script_params, compute_target=compute_target, pip_packages=["azureml-dataprep[pandas,fuse]", "scikit-learn", "pandas", "matplotlib"] ) return estimator
def main(workspace,inputs): # Loading compute target print("Loading compute target") compute_target = ComputeTarget( workspace=workspace, name=inputs["compute"] ) step = PythonScriptStep(name=inputs["step_name"], script_name=inputs["train_script"], compute_target= compute_target, source_directory=inputs["source_directory"], allow_reuse=True) return step
def main(workspace, inputs): # Loading compute target print("Loading compute target") compute_target = ComputeTarget(workspace=workspace, name=inputs["compute"]) # Loading script parameters print("Loading script parameters") script_params = {"--kernel": "linear", "--penalty": 0.9} # Creating experiment config print("Creating experiment config") estimator = Estimator(source_directory=inputs["source_directory"], entry_script=inputs["train_script"], script_params=script_params, compute_target=compute_target, conda_dependencies_file="environment.yml") return estimator
def _load_compute_target(workspace, backend_config): ''' Returns the ComputeTarget object for associated with user's workspace and the name of the target compute :param workspace: AzureML Workspace object :param backend_config: dictionary containing target compute name :return ComputeTarget: AzureML ComputeTarget object ''' target_name = backend_config[COMPUTE] try: compute = ComputeTarget(workspace=workspace, name=target_name) # pylint: disable = abstract-class-instantiated _logger.info( _CONSOLE_MSG.format( "Found existing cluster {}, using it.".format(target_name))) except ComputeTargetException as e: raise ComputeTargetException(e) return compute
def prepare_remote_compute(ws, compute_name, compute_min_nodes=0, compute_max_nodes=4, compute_vm_size='STANDARD_D2_V2'): """ :param ws: azureml Workspace instance :param compute_name: String with name for compute target :param compute_min_nodes: minimum number of nodes :param compute_max_nodes: maximum number of nodes :param compute_vm_size: vm size for compute target :return: """ if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found compute target: ' + compute_name + ' of size: ' + compute_target.vm_size + '. Using it. ') print( 'For a different size create a new target with different name!' ) # TODO: Handle case if compute_name exists, but is not active! else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=compute_vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def main(experiment, environment, dataset): workspace = Workspace.from_config() experiment = Experiment(workspace, experiment) compute_target = ComputeTarget(workspace, environment) # Use the root of the solution as source folder for the run. root_folder = Path(__file__).parent.parent # Provide each of the datasets to the estimator as a named input. # You can acccess these from within the training script. datasets = [Dataset.get_by_name(workspace, ds).as_named_input(ds) for ds in dataset] estimator = SKLearn( source_directory=root_folder, entry_script='customer_churn/train.py', conda_dependencies_file='conda_dependencies.yml', compute_target=compute_target, inputs=datasets ) run = experiment.submit(estimator) run.wait_for_completion(show_output=True)
service.wait_for_deployment(show_output=True) return service # Authentication via service principle ws = Workspace.get( args.ws, ServicePrincipalAuthentication( tenant_id=os.getenv('tenant_id'), service_principal_id=os.getenv('service_principal_id'), service_principal_password=os.getenv('service_principal_password')), subscription_id=os.getenv('subscription_id'), resource_group=args.rg) model = Model(ws, "new_model") deployment_target = ComputeTarget(ws, args.aksname) img = create_image_config("score.py", "scoringenv.yml") servicename = args.servicename try: service = Webservice(ws, servicename) except Exception as e: print(e) service = None if service: print("Updating existing service with new image...") try: # create new image service = update_service(service, [model], img, ws) except Exception as e:
def main(): # Ger our configs with open("ptgnn/authentication.json") as jsonFile: authData = json.load(jsonFile)[args.auth_cluster] # Copy the convertCorpus script here. Done so we don't upload the corpus to Azure, or keep a copy of the script in here. # (It's weird, I know. It works and has a purpose though) convertCorpusLocation = Path("../convertCorpusForML.py") convertCorpusAzureLocation = Path("./convertCorpusForML.py") shutil.copy(convertCorpusLocation, convertCorpusAzureLocation) # Grab the authentication data from the JSON file subID = authData["subID"] # Get from Azure Portal; used for billing resGroup = authData["resGroup"] # Name for the resource group wsName = authData["wsName"] # Name for the workspace, which is the collection of compute clusters + experiments computeName = authData["computeName"] # Name for computer cluster datastoreName = authData["datastoreName"] # Get the workspace, the compute target and the datastore ws = Workspace.get(wsName, subscription_id=subID, resource_group=resGroup) computeTarget = ComputeTarget(ws, computeName) datastore = Datastore(ws, name=datastoreName) # Download the entire corpus to the compute target. Save the DataReference obj here # as_mount is also possible, but slows things down due to network opening of files corpus_location = datastore.path(args.aml_location).as_download() output_location = "./" # The files that will be uploaded for usage by our script (everything in the azure folder) source_directory = "." # params for the script params = { "--corpus_location": corpus_location, "--output_folder": output_location, "--aml": "", "--training_percent": args.training_percent, "--validation_percent": args.validation_percent, "-c": "" } if args.log_num is not None: params["-l"] = args.log_num tags = { "logs": str(args.log_num) } else: tags = { "logs": "MAX" } if args.statement_generation: params["-s"] = "" tags["generationType"] = "Statement" else: tags["generationType"] = "Severity" # Set up the estimator object. Note the inputs element, it tells azure that corpus_location in params # will be a DataReference Object. est = Estimator(source_directory=source_directory, compute_target=computeTarget, entry_script='convertCorpusForML.py', script_params=params, inputs=[corpus_location], conda_packages=["pip"], pip_packages=["azureml-core", "tqdm", "numpy", "protobuf"], use_docker=True, use_gpu=False) # Start the experiment run = Experiment(ws, args.exp_name).submit(config=est, tags=tags) # remove the copy of convertCorpus (Remember, don't question this) convertCorpusAzureLocation.unlink() # print out the portral URL # print("Portal URL: ", run.get_portal_url()) # this will stream everything that the compute target does. print("Experiment Started. Remember you can exit out of this program but the experiment will still run on Azure!") run.wait_for_completion(show_output=True)
from azureml.core import Workspace, Datastore, Dataset, ScriptRunConfig, ComputeTarget, Experiment from azureml.data.datapath import DataPath from azureml.train.sklearn import SKLearn from azureml.train.estimator import Estimator #multi tenant with my account from azureml.core.authentication import InteractiveLoginAuthentication int_auth = InteractiveLoginAuthentication(tenant_id='your_tenant_id') ws = Workspace.from_config(auth=int_auth) print(ws.name) dataset = Dataset.get_by_name(workspace=ws, name = 'demo_wines_live') #point to compute target comp = ComputeTarget(ws, name = 'compute-instance-demo') #estimator with SKlearn by default + azureml-sdk package est = SKLearn( source_directory='./scripts', entry_script='train.py', compute_target=comp, inputs = [dataset.as_named_input('train')], #readable from the script pip_packages=['azureml-sdk', 'pyarrow>=0.12.0'] ) exp = Experiment(workspace=ws, name = 'submitted_wine') run = exp.submit(est) run.wait_for_completion(show_output=True) #%% %%writefile ./scripts/train.py
def create_experiment_config(workspace): ######################################## ### Creating data prep Pipeline Step ### ######################################## # Load settings print("Loading settings") data_prep_step_path = os.path.join("steps", "data_prep") with open(os.path.join(data_prep_step_path, "step.json")) as f: data_prep_settings = json.load(f) # Setup datasets of first step print("Setting up datasets") data_prep_input = Dataset.get_by_name(workspace=workspace, name=data_prep_settings.get( "dataset_input_name", None)).as_named_input( data_prep_settings.get( "dataset_input_name", None)).as_mount() data_prep_output = PipelineData( name=data_prep_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=data_prep_settings.get( "datastore_output_name", "workspaceblobstore")), output_mode="mount").as_dataset() # Uncomment next lines, if you want to register intermediate dataset #data_prep_output.register( # name=data_prep_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") data_prep_dependencies = CondaDependencies.create( pip_packages=data_prep_settings.get("pip_packages", []), conda_packages=data_prep_settings.get("conda_packages", []), python_version=data_prep_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") data_prep_run_config = RunConfiguration( conda_dependencies=data_prep_dependencies, framework=data_prep_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") data_prep_compute_target = ComputeTarget(workspace=workspace, name=data_prep_settings.get( "compute_target_name", None)) # Create python step print("Creating Step") data_prep = PythonScriptStep( name=data_prep_settings.get("step_name", None), script_name=data_prep_settings.get("script_name", None), arguments=data_prep_settings.get("arguments", []), compute_target=data_prep_compute_target, runconfig=data_prep_run_config, inputs=[data_prep_input], outputs=[data_prep_output], params=data_prep_settings.get("parameters", []), source_directory=data_prep_step_path, allow_reuse=data_prep_settings.get("allow_reuse", True), version=data_prep_settings.get("version", None), ) ############################################### ### Creating data model train Pipeline Step ### ############################################### # Load settings print("Loading settings") model_train_step_path = os.path.join("steps", "model_train") with open(os.path.join(model_train_step_path, "step.json")) as f: model_train_settings = json.load(f) hyperparameter_sampling_settings = model_train_settings.get( "hyperparameter_sampling", {}) # Setup datasets of first step print("Setting up datasets") model_train_input = data_prep_output.as_named_input( name=model_train_settings.get("dataset_input_name", None)) model_train_output = PipelineData( name=model_train_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=model_train_settings.get( "datastore_output_name", None)), output_mode="mount", ).as_dataset() # Uncomment next lines, if you want to register intermediate dataset #model_train_output.register( # name=model_train_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") model_train_dependencies = CondaDependencies.create( pip_packages=model_train_settings.get("pip_packages", []), conda_packages=model_train_settings.get("conda_packages", []), python_version=model_train_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") model_train_run_config = RunConfiguration( conda_dependencies=model_train_dependencies, framework=model_train_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") model_train_compute_target = ComputeTarget(workspace=workspace, name=model_train_settings.get( "compute_target_name", None)) # Create distributed training backend print("Creating distributed training backend") distributed_training_backend = get_distributed_backend( backend_name=model_train_settings.get("distributed_backend", None)) # Create Estimator for Training print("Creating Estimator for training") model_train_estimator = Estimator( source_directory=model_train_step_path, entry_script=model_train_settings.get("script_name", None), environment_variables=model_train_settings.get("parameters", None), compute_target=model_train_compute_target, node_count=model_train_settings.get("node_count", None), distributed_training=distributed_training_backend, conda_packages=model_train_settings.get("conda_packages", None), pip_packages=model_train_settings.get("pip_packages", None), ) try: # Create parameter sampling print("Creating Parameter Sampling") parameter_dict = {} parameters = hyperparameter_sampling_settings.get( "parameters", {}) if "parameters" in hyperparameter_sampling_settings else {} for parameter_name, parameter_details in parameters.items(): parameter_distr = get_parameter_distribution( distribution=parameter_details.get("distribution", None), **parameter_details.get("settings", {})) parameter_dict[f"--{parameter_name}"] = parameter_distr model_train_ps = get_parameter_sampling( sampling_method=hyperparameter_sampling_settings.get( "method", None), parameter_dict=parameter_dict) # Get Policy definition policy_settings = hyperparameter_sampling_settings.get("policy", {}) kwargs = { key: value for key, value in policy_settings.items() if key not in ["policy_method", "evaluation_interval", "delay_evaluation"] } # Create termination policy print("Creating early termination policy") model_train_policy = get_policy( policy_method=policy_settings.get("method", ""), evaluation_interval=policy_settings.get("evaluation_interval", None), delay_evaluation=policy_settings.get("delay_evaluation", None), **kwargs) # Create HyperDriveConfig print("Creating HyperDriveConfig") model_train_hyperdrive_config = HyperDriveConfig( estimator=model_train_estimator, hyperparameter_sampling=model_train_ps, policy=model_train_policy, primary_metric_name=hyperparameter_sampling_settings.get( "primary_metric", None), primary_metric_goal=PrimaryMetricGoal.MINIMIZE if "min" in hyperparameter_sampling_settings.get( "primary_metric_goal", None) else PrimaryMetricGoal.MAXIMIZE, max_total_runs=hyperparameter_sampling_settings.get( "max_total_runs", 1), max_concurrent_runs=hyperparameter_sampling_settings.get( "max_concurrent_runs", 1), max_duration_minutes=hyperparameter_sampling_settings.get( "max_duration_minutes", None)) # Create HyperDriveStep print("Creating HyperDriveStep") model_train = HyperDriveStep( name=model_train_settings.get("step_name", None), hyperdrive_config=model_train_hyperdrive_config, estimator_entry_script_arguments=model_train_settings.get( "arguments", None), inputs=[model_train_input], outputs=[model_train_output], allow_reuse=model_train_settings.get("allow_reuse", True), version=model_train_settings.get("version", True)) except: print("Not all required parameters specified for HyperDrive step") # Create EstimatorStep print("Creating EstimatorStep") model_train = EstimatorStep( name=model_train_settings.get("step_name", None), estimator=model_train_estimator, estimator_entry_script_arguments=model_train_settings.get( "arguments", None), inputs=[model_train_input], outputs=[model_train_output], compute_target=model_train_compute_target, allow_reuse=model_train_settings.get("allow_reuse", True), version=model_train_settings.get("version", True)) ######################### ### Creating Pipeline ### ######################### # Create Pipeline print("Creating Pipeline") pipeline = Pipeline( workspace=workspace, steps=[model_train], description="Training Pipeline", ) # Validate pipeline print("Validating pipeline") pipeline.validate() return pipeline
print("Azure ML SDK Version: ", azureml.core.VERSION) ws = Workspace.from_config() print("Resource group: ", ws.resource_group) print("Location: ", ws.location) print("Workspace name: ", ws.name) from azureml.core.webservice import Webservice for web_svc in Webservice.list(ws): print("Deleting web service", web_svc.name, "...") web_svc.delete() from azureml.core import ComputeTarget for target in ComputeTarget.list(ws): print("Deleting compute target", target.name, "...") target.delete() from azureml.core import Image for img in Image.list(ws): print("Deleting image", img.id, "...") img.delete() from azureml.core.model import Model for model in Model.list(ws): print("Deleting model", model.id, "...") model.delete()
#Find workspace using connection parameters aml_workspace = Workspace.get(subscription_id=args.subscription_id, resource_group=args.resource_group, name=args.base_name + "ws") # Load yaml and store it as a dictionary with open("variables.yml", "r") as f: yaml_loaded = yaml.safe_load(f)['variables'] variables = {} for d in yaml_loaded: variables[d['name']] = d['value'] # Check if compute cluster exists. If not, create one. try: compute_target = ComputeTarget(aml_workspace, variables["AML_COMPUTE_CLUSTER_CPU_SKU"]) print('Found existing cluster, use it.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size=variables['AML_COMPUTE_CLUSTER_SIZE'], vm_priority=variables['AML_CLUSTER_PRIORITY'], min_nodes=variables['AML_CLUSTER_MIN_NODES'], max_nodes=variables['AML_CLUSTER_MAX_NODES'], idle_seconds_before_scaledown="300") cpu_cluster = ComputeTarget.create( aml_workspace, variables["AML_COMPUTE_CLUSTER_CPU_SKU"], compute_config) #create environment from conda_dependencies.yml for runconfig environment = Environment(name="myenv") conda_dep = CondaDependencies(
tenant_id=auth_config["tenant_id"], service_principal_id=auth_config["service_principal_id"], service_principal_password=os.environ["SP_SECRET"], ) ws = Workspace( subscription_id=auth_config["subscription_id"], resource_group=auth_config["resource_group"], workspace_name=auth_config["workspace_name"], auth=auth, ) # Usually, the cluster already exists, so we just fetch compute_target = next( (m for m in ComputeTarget.list(ws) if m.name == compute["name"]), None ) # Specify the compute environment and register it for use in scoring env = Environment("component-condition") env.docker.enabled = True cd = CondaDependencies.create( conda_packages=[ "tensorflow=2.0.0", "pandas", "numpy", "matplotlib" ], pip_packages=[ "azureml-mlflow==1.5.0", "azureml-defaults==1.5.0"
#try: # config_path = os.path.join(this_script_dir, "config.json") # workspace = Workspace.from_config(config_path, auth=interactive_auth) #except Exception as ex: print(f"Cannot get a workspace: {ex}") exit() print('Workspace name: ' + workspace.name, 'Azure region: ' + workspace.location, 'Subscription id: ' + workspace.subscription_id, 'Resource group: ' + workspace.resource_group, sep='\n') # Getting an Azure ML Compute Target try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_D3_V2', max_nodes=1) # create the cluster compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it uses the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
def create_experiment_config(workspace): ######################################## ### Creating data load Pipeline Step ### ######################################## # Load settings print("Loading settings") data_load_step_path = os.path.join("steps", "data_load") with open(os.path.join(data_load_step_path, "step.json")) as f: data_load_settings = json.load(f) # Setup of pipeline parameter print("Setting up pipeline parameters") data_load_environment = PipelineParameter(name="environment", default_value="golden") data_load_start_date = PipelineParameter(name="start_date", default_value="2019-01-01") data_load_end_date = PipelineParameter(name="end_date", default_value="2019-01-31") data_load_system = PipelineParameter(name="system", default_value="PAX 1") data_load_platform = PipelineParameter(name="platform", default_value="Atlantis") # Loading compute target print("Loading ComputeTarget") data_load_compute_target = ComputeTarget(workspace=workspace, name=data_load_settings.get( "compute_target_name", None)) # Create Databricks step print("Creating Step") data_load = DatabricksStep( name=data_load_settings.get("step_name", None), existing_cluster_id=data_load_settings.get("existing_cluster_id", None), inputs=[], outputs=[], compute_target=data_load_compute_target, notebook_path=data_load_settings.get("notebook_path", None), notebook_params={ "environment": data_load_environment, "start_date": data_load_start_date, "end_date": data_load_end_date, "system": data_load_system, "platform": data_load_platform }, run_name=data_load_settings.get("step_name", None), allow_reuse=data_load_settings.get("allow_reuse", True), version=data_load_settings.get("version", None), ) ######################### ### Creating Pipeline ### ######################### # Create Pipeline print("Creating Pipeline") pipeline = Pipeline( workspace=workspace, steps=[data_load], description="Training Pipeline", ) # Validate pipeline print("Validating pipeline") pipeline.validate() return pipeline
def create_experiment_config(workspace): ######################################## ### Creating data prep Pipeline Step ### ######################################## # Load settings print("Loading settings") data_prep_step_path = os.path.join("steps", "data_prep") with open(os.path.join(data_prep_step_path, "step.json")) as f: data_prep_settings = json.load(f) # Setup datasets - Create PipelineParameter for dynamic pipeline input print("Setting up datasets with dynamic input") data_prep_input_path = DataPath( datastore=Datastore(workspace=workspace, name=data_prep_settings.get( "datastore_input_name", "workspaceblobstore")), path_on_datastore= "golden/Atlantis/PAX1/15-Mar-2020-23-37-50-279971/PAX1.parquet/") data_prep_input_path_pipeline_parameter = PipelineParameter( name="input_path", default_value=data_prep_input_path) data_prep_input = (data_prep_input_path_pipeline_parameter, DataPathComputeBinding(mode="mount")) data_prep_output = PipelineData( name=data_prep_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=data_prep_settings.get( "datastore_output_name", "workspaceblobstore")), output_mode="mount").as_dataset() # Uncomment next lines, if you want to register intermediate dataset #data_prep_output.register( # name=data_prep_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") data_prep_dependencies = CondaDependencies.create( pip_packages=data_prep_settings.get("pip_packages", []), conda_packages=data_prep_settings.get("conda_packages", []), python_version=data_prep_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") data_prep_run_config = RunConfiguration( conda_dependencies=data_prep_dependencies, framework=data_prep_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") data_prep_compute_target = ComputeTarget(workspace=workspace, name=data_prep_settings.get( "compute_target_name", None)) # Create python step print("Creating Step") data_prep = PythonScriptStep( name=data_prep_settings.get("step_name", None), script_name=data_prep_settings.get("script_name", None), arguments=data_prep_settings.get("arguments", []) + ["--input-datapath", data_prep_input], compute_target=data_prep_compute_target, runconfig=data_prep_run_config, inputs=[data_prep_input], outputs=[data_prep_output], params=data_prep_settings.get("parameters", []), source_directory=data_prep_step_path, allow_reuse=data_prep_settings.get("allow_reuse", True), version=data_prep_settings.get("version", None), ) ############################################ ### Creating inference Parallel Run Step ### ############################################ # Load settings print("Loading settings") batch_inference_step_path = os.path.join("steps", "batch_inference") with open(os.path.join(batch_inference_step_path, "step.json")) as f: batch_inference_settings = json.load(f) # Setup datasets of first step print("Setting up datasets") batch_inference_input = data_prep_output.as_named_input( name=batch_inference_settings.get("dataset_input_name", None)) batch_inference_output = PipelineData( name=batch_inference_settings.get("dataset_output_name", None), datastore=Datastore(workspace=workspace, name=batch_inference_settings.get( "datastore_output_name", None)), output_mode="mount", ).as_dataset() # Uncomment next lines, if you want to register intermediate dataset #batch_inference_output.register( # name=batch_inference_settings.get("dataset_output_name", None), # create_new_version=True #) # Create conda dependencies print("Creating conda dependencies") batch_inference_dependencies = CondaDependencies.create( pip_packages=batch_inference_settings.get("pip_packages", []), conda_packages=batch_inference_settings.get("conda_packages", []), python_version=batch_inference_settings.get("python_version", "3.6.2")) # Create run configuration print("Creating RunConfiguration") data_prep_run_config = RunConfiguration( conda_dependencies=batch_inference_dependencies, framework=batch_inference_settings.get("framework", "Python")) # Loading compute target print("Loading ComputeTarget") batch_inference_compute_target = ComputeTarget( workspace=workspace, name=batch_inference_settings.get("compute_target_name", None)) # Create python step print("Creating Step") batch_inference = PythonScriptStep( name=batch_inference_settings.get("step_name", None), script_name=batch_inference_settings.get("script_name", None), arguments=batch_inference_settings.get("arguments", []), compute_target=batch_inference_compute_target, runconfig=data_prep_run_config, inputs=[batch_inference_input], outputs=[batch_inference_output], params=batch_inference_settings.get("parameters", []), source_directory=batch_inference_step_path, allow_reuse=batch_inference_settings.get("allow_reuse", True), version=batch_inference_settings.get("version", None), ) ######################### ### Creating Pipeline ### ######################### # Create Pipeline print("Creating Pipeline") pipeline = Pipeline( workspace=workspace, steps=[batch_inference], description="Batch Inference Pipeline", ) return pipeline