def load_module(workspace, namespace, name, yaml_file_path): try: module_func = Module.load(workspace=workspace, namespace=namespace, name=name) print('found the module of {}'.format(name)) return module_func except: print('not found the module of {}, register it now...'.format(name)) module_func = Module.register(workspace=workspace, yaml_file=yaml_file_path) return module_func
def test_module_from_func(self): # This test calls fasttext_evaluation from cmd line arguments. local_module = Module.from_func(self.workspace, fasttext_evaluation) module = local_module() module.set_inputs(**self.prepare_inputs()) status = module.run(use_docker=True) self.assertEqual(status, 'Completed', 'Module run failed.')
def test_module_from_func(self): # This test calls mpi_module from cmd line arguments. local_module = Module.from_func(self.workspace, mpi_module) module = local_module() module.set_inputs(**self.prepare_inputs()) module.set_parameters(**self.prepare_parameters()) status = module.run(use_docker=True) self.assertEqual(status, 'Completed', 'Module run failed.')
def test_module_from_func(self): # This test calls compare_two_models from cmd line arguments. local_module = Module.from_func(self.workspace, compare_two_models) module = local_module() module.set_inputs(**self.prepare_inputs()) module.set_parameters(**self.prepare_parameters()) status = module.run(working_dir=str(self.base_path), use_docker=True) self.assertEqual(status, 'Completed', 'Module run failed.')
def test_relative(self): local_module = Module.from_func(self.workspace, basic_module) module = local_module() with _change_working_dir(Path(__file__).parent.parent): module.set_inputs(input_dir='data/basic_module/inputs/input_dir') module.set_parameters(str_param='local_test') status = module.run(use_docker=False) self.assertEqual(status, 'Completed', 'Module run failed.')
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: try: mpi_train_module_func = Module.load( workspace, namespace="microsoft.com/azureml/samples", name="Hello World MPI Job") except: mpi_train_module_func = Module.register( workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml')) from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count)
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", min_nodes = 1, max_nodes = 4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # modules try: ejoin_module_func = Module.load(ws, namespace='microsoft.com/bing', name='ejoin') eselect_module_func = Module.load(ws, namespace='microsoft.com/bing', name='eselect') except: ejoin_module_func = Module.register(ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml')) eselect_module_func = Module.register(ws, os.path.join('modules', 'eselect', 'amlmodule.yaml')) training_data_name = "Titanic.tsv" if training_data_name not in ws.datasets: print('Registering a training dataset for sample pipeline ...') train_data = Dataset.File.from_files(path=['https://desginerdemo.blob.core.windows.net/demo/titanic.tsv']) train_data.register(workspace = ws, name = training_data_name, description = 'Training data (just for illustrative purpose)') print('Registerd') else: train_data = ws.datasets[training_data_name]
# --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- from azureml.core import Workspace from azureml.pipeline.wrapper import Module, dsl ws = Workspace.from_config() execute_python_script_module = Module.load(ws, namespace='azureml', name='Execute Python Script') @dsl.pipeline(name='external sub0 graph', description='sub0') def external_sub_pipeline0(input): module1 = execute_python_script_module( # should be pipeline input dataset1=input, ) module2 = execute_python_script_module( dataset1=module1.outputs.result_dataset, ) return module2.outputs
# register anonymous modules import os from azureml.pipeline.wrapper._module_registration import _load_anonymous_module local_module = _load_anonymous_module(ws, yaml_file=os.path.join( 'modules', 'hello_world', 'module_spec.yaml')) github_yaml = "https://github.com/sherry1989/sample_modules/blob/master/3_basic_module/basic_module.yaml" github_module = _load_anonymous_module(ws, yaml_file=github_yaml) hello_world_module_id = local_module.module_version_id basic_module_id = github_module.module_version_id # In[ ]: # get modules hello_world_anonymous = Module.load(ws, id=hello_world_module_id) basic_module_anonymous = Module.load(ws, id=basic_module_id) # In[ ]: # get dataset from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path automobile_price_data_raw = get_global_dataset_by_path( ws, 'automobile_price_data_raw', 'GenericCSV/Automobile_price_data_(Raw)') # In[ ]: # define pipeline @dsl.pipeline(name='module_SDK_test Run 8575', description='test local module',
print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # Module select_columns_in_dataset = Module.load(ws, namespace='azureml', name='Select Columns in Dataset') clean_missing_data = Module.load(ws, namespace='azureml', name='Clean Missing Data') split_data = Module.load(ws, namespace='azureml', name='Split Data') join_data = Module.load(ws, namespace='azureml', name='Join Data') # Dataset try: dset = Dataset.get_by_name(ws, 'Automobile_price_data_(Raw)') except Exception: global_datastore = Datastore(ws, name="azureml_globaldatasets") dset = Dataset.File.from_files(global_datastore.path('GenericCSV/Automobile_price_data_(Raw)')) dset.register(workspace=ws, name='Automobile_price_data_(Raw)', create_new_version=True) blob_input_data = dset
aml_compute = AmlCompute(workspace, aml_compute_target) print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) try: mpi_train_module_func = Module.load( workspace, namespace="microsoft.com/azureml/samples", name="Hello World MPI Job") except: mpi_train_module_func = Module.register( workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml')) from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count)
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # load datasets github_yaml = "https://github.com/sherry1989/sample_modules/blob/master/3_basic_module/basic_module.yaml" github_module = Module.from_yaml(workspace, yaml_file=github_yaml) blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') hello_world = Module.from_yaml(workspace, yaml_file=os.path.join('modules', 'hello_world', 'module_spec.yaml')) hello_world_demo1 = Module.from_yaml(workspace, yaml_file=os.path.join( 'modules', 'hello_world', 'module_replacement_demo1.yaml')) hello_world_demo2 = Module.from_yaml(workspace, yaml_file=os.path.join( 'modules', 'hello_world',
provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # modules try: ejoin_module_func = Module.load(ws, namespace='microsoft.com/bing', name='ejoin') eselect_module_func = Module.load(ws, namespace='microsoft.com/bing', name='eselect') except: ejoin_module_func = Module.register( ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml')) eselect_module_func = Module.register( ws, os.path.join('modules', 'eselect', 'amlmodule.yaml')) join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data') train_svd_recommender_module_func = Module.load(ws, namespace='azureml', name='Train SVD Recommender')
aml_compute = AmlCompute(ws, aml_compute_target) print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data') execute_python_script_module_func = Module.load(ws, namespace='azureml', name='Execute Python Script') remove_duplicate_rows_module_func = Module.load(ws, namespace='azureml', name='Remove Duplicate Rows') split_data_module_func = Module.load(ws, namespace='azureml', name='Split Data') train_svd_recommender_module_func = Module.load(ws, namespace='azureml', name='Train SVD Recommender') select_columns_module_func = Module.load(ws, namespace='azureml', name='Select Columns in Dataset')
from datetime import datetime from azureml.core import Workspace, Dataset from azureml.pipeline.wrapper import Module, dsl from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path # In[ ]: ws = Workspace.from_config() #ws = Workspace.get(name='itp-pilot', subscription_id='4aaa645c-5ae2-4ae9-a17a-84b9023bc56a', resource_group='itp-pilot-ResGrp') print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') # In[ ]: # Module modulefunc = Module.from_yaml(ws, yaml_file=os.path.join('modules', 'noop', '1in2out.spec.yaml')) # Dataset data = get_global_dataset_by_path(ws, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') # In[ ]: @dsl.pipeline( name='A huge pipeline composed with nodes 1 in 2 outs', description='A sample', default_compute_target='aml-compute' # 'k80-16-a' ) def cell_division():
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: try: train_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Train') except: train_module_func = Module.register( ws, os.path.join('modules', 'train-score-eval', 'train.yaml')) try: score_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Score') except: score_module_func = Module.register( ws, os.path.join('modules', 'train-score-eval', 'score.yaml')) try: eval_module_func = Module.load(ws,