from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count) mpi_train.runsettings.node_count = 1 # In[ ]: test_pipeline = Pipeline(nodes=[mpi_train], name="test mpi", default_compute_target='aml-compute') # In[ ]: errors = test_pipeline.validate() # In[ ]: run = test_pipeline.submit(experiment_name='mpi_test', ) run.wait_for_completion() # In[ ]: pipeline_draft = test_pipeline.save(experiment_name='module_SDK_mpi_test', )
ejoin.inputs.leftinput.configure(mode='mount') print(ejoin.inputs.leftinput.mode) # Configure outputs ejoin.outputs.ejoin_output.configure(output_mode='mount', datastore=Datastore(ws, name="myownblob")) print(ejoin.outputs.ejoin_output.output_mode) print(ejoin.outputs.ejoin_output.datastore.name) eselect = eselect_module_func( columns='Survived;Name;Sex;Age', input=ejoin.outputs.ejoin_output ) # pipeline pipeline = Pipeline(nodes=[ejoin, eselect], outputs=eselect.outputs, default_compute_target='aml-compute') # In[ ]: pipeline.validate() # In[ ]: run = pipeline.submit( experiment_name='module_SDK_test', )
train_data.register( workspace=ws, name=training_data_name, description='Training data (just for illustrative purpose)') print('Registerd') else: train_data = ws.datasets[training_data_name] print('Training dataset found in workspace') # In[ ]: module1 = execute_python_script_module(dataset1=global_input_data, ) module2 = execute_python_script_module( dataset1=module1.outputs.result_dataset, ) pipeline1 = Pipeline(nodes=[module2, module1], outputs=module2.outputs, name="p1", default_compute_target='aml-compute') module3 = execute_python_script_module( dataset1=pipeline1.outputs.result_dataset, ) module4 = execute_python_script_module( dataset1=module3.outputs.result_dataset, ) pipeline2 = Pipeline(nodes=[module3, module4, pipeline1], outputs=module4.outputs, name="p2") module5 = execute_python_script_module( dataset1=train_data, dataset2=pipeline2.outputs.result_dataset) pipeline = Pipeline(nodes=[pipeline2, module5], outputs=module5.outputs,
workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml')) from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count) mpi_train.runsettings.node_count = 1 test_pipeline = Pipeline(nodes=[mpi_train], name="test mpi", default_compute_target='aml-compute') test_pipeline.validate() # In[ ]: import json from azureml.core import Workspace, Dataset from azureml.pipeline.wrapper import Module, dsl from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path from external_sub_pipeline import external_sub_pipeline0 ws = Workspace.from_config() print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') # Module
# steps ejoin = ejoin_module_func().set_parameters( leftcolumns='m:query;querId', # missing 'rightcolumns' parameter leftkeys='m:query', rightkeys='m:Query', jointype='HashInner').set_inputs(left_input=input1, right_input=input2) eselect = eselect_module_func( # missing 'columns' parameter input=ejoin.outputs.ejoin_output) # pipeline pipeline = Pipeline(nodes=[ejoin, eselect], outputs=eselect.outputs, default_compute_target="aml-compute") # In[ ]: graph = pipeline.validate() graph # In[ ]: # Type mismatch & Invalid range join_data = join_data_module_func( dataset1=movie_ratings_data, dataset2=imdb_movie_titles_data, comma_separated_case_sensitive_names_of_join_key_columns_for_l= "{\"isFilter\":true,\"rules\":[{\"exclude\":false,\"ruleType\":\"ColumnNames\",\"columns\":[\"MovieId\"]}]}",
rightcolumns='Market', leftkeys='m:query', rightkeys='m:Query', jointype='HashInner' ).set_inputs( left_input=input1, right_input=input2 ) eselect = eselect_module_func( columns='m:query;Market', input=ejoin.outputs.ejoin_output ) # pipeline pipeline = Pipeline(nodes=[ejoin, eselect], outputs=eselect.outputs, name='module sdk test draft', default_compute_target='aml-compute') # In[ ]: # Graph/module validation and visualization with .validate() function # pipeline.validate() #TODO # In[ ]: run = pipeline.submit( experiment_name='module_SDK_test' )