Beispiel #1
0
def load_module(workspace, namespace, name, yaml_file_path):
    try:
        module_func = Module.load(workspace=workspace, namespace=namespace, name=name)
        print('found the module of {}'.format(name))
        return module_func
    except:
        print('not found the module of {}, register it now...'.format(name))
        module_func = Module.register(workspace=workspace, yaml_file=yaml_file_path)
        return module_func
Beispiel #2
0
except:
    print("Creating new compute target: {}".format(aml_compute_target))

    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(workspace, aml_compute_target,
                                       provisioning_config)
    aml_compute.wait_for_completion(show_output=True,
                                    min_node_count=None,
                                    timeout_in_minutes=20)

# In[ ]:

try:
    mpi_train_module_func = Module.load(
        workspace,
        namespace="microsoft.com/azureml/samples",
        name="Hello World MPI Job")
except:
    mpi_train_module_func = Module.register(
        workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml'))

from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path
blob_input_data = get_global_dataset_by_path(
    workspace, 'Automobile_price_data',
    'GenericCSV/Automobile_price_data_(Raw)')

mpi_train = mpi_train_module_func(input_path=blob_input_data,
                                  string_parameter="test1")
mpi_train.runsettings.configure(node_count=2, process_count_per_node=2)

print(mpi_train.runsettings.node_count)
except:
    print("Creating new compute target: {}".format(aml_compute_target))
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)


# In[ ]:


# modules
try:
    ejoin_module_func = Module.load(ws, namespace='microsoft.com/bing', name='ejoin')
    eselect_module_func = Module.load(ws, namespace='microsoft.com/bing', name='eselect')
except:
    ejoin_module_func = Module.register(ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml'))
    eselect_module_func = Module.register(ws, os.path.join('modules', 'eselect', 'amlmodule.yaml'))
    
training_data_name = "Titanic.tsv"
if training_data_name not in ws.datasets:
    print('Registering a training dataset for sample pipeline ...')
    train_data = Dataset.File.from_files(path=['https://desginerdemo.blob.core.windows.net/demo/titanic.tsv'])
    train_data.register(workspace = ws, 
                              name = training_data_name, 
                              description = 'Training data (just for illustrative purpose)')
    print('Registerd')
else:
    train_data = ws.datasets[training_data_name]
Beispiel #4
0
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from azureml.core import Workspace
from azureml.pipeline.wrapper import Module, dsl

ws = Workspace.from_config()

execute_python_script_module = Module.load(ws, namespace='azureml', name='Execute Python Script')


@dsl.pipeline(name='external sub0 graph', description='sub0')
def external_sub_pipeline0(input):
    module1 = execute_python_script_module(
        # should be pipeline input
        dataset1=input,
    )
    module2 = execute_python_script_module(
        dataset1=module1.outputs.result_dataset,
    )
    return module2.outputs
Beispiel #5
0
# register anonymous modules
import os
from azureml.pipeline.wrapper._module_registration import _load_anonymous_module
local_module = _load_anonymous_module(ws,
                                      yaml_file=os.path.join(
                                          'modules', 'hello_world',
                                          'module_spec.yaml'))
github_yaml = "https://github.com/sherry1989/sample_modules/blob/master/3_basic_module/basic_module.yaml"
github_module = _load_anonymous_module(ws, yaml_file=github_yaml)
hello_world_module_id = local_module.module_version_id
basic_module_id = github_module.module_version_id

# In[ ]:

# get modules
hello_world_anonymous = Module.load(ws, id=hello_world_module_id)
basic_module_anonymous = Module.load(ws, id=basic_module_id)

# In[ ]:

# get dataset
from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path
automobile_price_data_raw = get_global_dataset_by_path(
    ws, 'automobile_price_data_raw', 'GenericCSV/Automobile_price_data_(Raw)')

# In[ ]:


# define pipeline
@dsl.pipeline(name='module_SDK_test Run 8575',
              description='test local module',
Beispiel #6
0
    print("Found existing compute target: {}".format(aml_compute_target))
except:
    print("Creating new compute target: {}".format(aml_compute_target))

    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                                min_nodes=1,
                                                                max_nodes=4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)


# In[ ]:


# Module
select_columns_in_dataset = Module.load(ws, namespace='azureml', name='Select Columns in Dataset')
clean_missing_data = Module.load(ws, namespace='azureml', name='Clean Missing Data')
split_data = Module.load(ws, namespace='azureml', name='Split Data')
join_data = Module.load(ws, namespace='azureml', name='Join Data')


# Dataset
try:
    dset = Dataset.get_by_name(ws, 'Automobile_price_data_(Raw)')
except Exception:
    global_datastore = Datastore(ws, name="azureml_globaldatasets")
    dset = Dataset.File.from_files(global_datastore.path('GenericCSV/Automobile_price_data_(Raw)'))
    dset.register(workspace=ws,
                  name='Automobile_price_data_(Raw)',
                  create_new_version=True)
blob_input_data = dset
Beispiel #7
0
    aml_compute = AmlCompute(workspace, aml_compute_target)
    print("Found existing compute target: {}".format(aml_compute_target))
except:
    print("Creating new compute target: {}".format(aml_compute_target))

    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(workspace, aml_compute_target,
                                       provisioning_config)
    aml_compute.wait_for_completion(show_output=True,
                                    min_node_count=None,
                                    timeout_in_minutes=20)

try:
    mpi_train_module_func = Module.load(
        workspace,
        namespace="microsoft.com/azureml/samples",
        name="Hello World MPI Job")
except:
    mpi_train_module_func = Module.register(
        workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml'))

from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path
blob_input_data = get_global_dataset_by_path(
    workspace, 'Automobile_price_data',
    'GenericCSV/Automobile_price_data_(Raw)')

mpi_train = mpi_train_module_func(input_path=blob_input_data,
                                  string_parameter="test1")
mpi_train.runsettings.configure(node_count=2, process_count_per_node=2)

print(mpi_train.runsettings.node_count)
Beispiel #8
0
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(ws, aml_compute_target,
                                       provisioning_config)
    aml_compute.wait_for_completion(show_output=True,
                                    min_node_count=None,
                                    timeout_in_minutes=20)

# In[ ]:

# modules

try:
    ejoin_module_func = Module.load(ws,
                                    namespace='microsoft.com/bing',
                                    name='ejoin')
    eselect_module_func = Module.load(ws,
                                      namespace='microsoft.com/bing',
                                      name='eselect')
except:
    ejoin_module_func = Module.register(
        ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml'))
    eselect_module_func = Module.register(
        ws, os.path.join('modules', 'eselect', 'amlmodule.yaml'))

join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data')
train_svd_recommender_module_func = Module.load(ws,
                                                namespace='azureml',
                                                name='Train SVD Recommender')
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("Found existing compute target: {}".format(aml_compute_target))
except:
    print("Creating new compute target: {}".format(aml_compute_target))

    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(ws, aml_compute_target,
                                       provisioning_config)
    aml_compute.wait_for_completion(show_output=True,
                                    min_node_count=None,
                                    timeout_in_minutes=20)

# In[ ]:

join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data')
execute_python_script_module_func = Module.load(ws,
                                                namespace='azureml',
                                                name='Execute Python Script')
remove_duplicate_rows_module_func = Module.load(ws,
                                                namespace='azureml',
                                                name='Remove Duplicate Rows')
split_data_module_func = Module.load(ws,
                                     namespace='azureml',
                                     name='Split Data')
train_svd_recommender_module_func = Module.load(ws,
                                                namespace='azureml',
                                                name='Train SVD Recommender')
select_columns_module_func = Module.load(ws,
                                         namespace='azureml',
                                         name='Select Columns in Dataset')
except:
    print("Creating new compute target: {}".format(aml_compute_target))

    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(ws, aml_compute_target,
                                       provisioning_config)
    aml_compute.wait_for_completion(show_output=True,
                                    min_node_count=None,
                                    timeout_in_minutes=20)

# In[ ]:

try:
    train_module_func = Module.load(ws,
                                    namespace='microsoft.com/aml/samples',
                                    name='Train')
except:
    train_module_func = Module.register(
        ws, os.path.join('modules', 'train-score-eval', 'train.yaml'))

try:
    score_module_func = Module.load(ws,
                                    namespace='microsoft.com/aml/samples',
                                    name='Score')
except:
    score_module_func = Module.register(
        ws, os.path.join('modules', 'train-score-eval', 'score.yaml'))

try:
    eval_module_func = Module.load(ws,