Exemple #1
0
def _write_compute_run_config(source_directory, compute_target_object, compute_yaml):
    """
    :param source_directory:
    :type source_directory: str
    :param compute_target_object:
    :type compute_target_object: azureml.core.compute_target.AbstractComputeTarget
    :param compute_yaml:
    :type compute_yaml: dict
    :return:
    """
    from azureml.core.compute_target import _BatchAITarget
    # Writing the target.compute file.
    run_config_dir_name = get_run_config_dir_name(source_directory)
    file_path = os.path.join(source_directory, run_config_dir_name,
                             compute_target_object.name + COMPUTECONTEXT_EXTENSION)
    with open(file_path, 'w') as outfile:
        ruamel.yaml.dump(compute_yaml, outfile, default_flow_style=False)

    # This creates a run config and writes it in the aml_config/<compute_target_name>.runconfig file
    run_config_object = RunConfiguration()
    run_config_object.target = compute_target_object

    if compute_target_object.type == _BatchAITarget._BATCH_AI_TYPE:
        run_config_object.environment.docker.enabled = True

    run_config_object.framework = compute_target_object._default_framework

    run_config_object.save(name=compute_target_object.name, path=source_directory)
Exemple #2
0
def _create_default_run_configs(project_directory, compute_target_dict):
    """
    Creates a local.runconfig and docker.runconfig for a project.
    :return: None
    """
    from azureml.core.runconfig import RunConfiguration
    # Mocking a project object, as RunConfiguration requires a Project object, but only requires
    # project_directory field.
    project_object = empty_function
    project_object.project_directory = project_directory

    # Creating a local runconfig.
    local_run_config = RunConfiguration()
    local_run_config.save(name="local", path=project_directory)

    # Creating a docker runconfig.
    docker_run_config = RunConfiguration()
    docker_run_config.environment.docker.enabled = True
    docker_run_config.save(name="docker", path=project_directory)

    for compute_target_name, compute_target in compute_target_dict.items():
        # Creating a compute runconfig.
        compute_config = RunConfiguration()
        if compute_target.type == 'HDInsight':
            compute_config.framework = "PySpark"
        else:
            compute_config.framework = "Python"
            compute_config.environment.docker.enabled = True
        compute_config.target = compute_target_name
        compute_config.save(name=compute_target_name, path=project_directory)
# define data set names
input_name_train = 'newsgroups_train'
input_name_test = 'newsgroups_test'

# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Runconfig
amlcompute_run_config = RunConfiguration(
    script="train.py",
    conda_dependencies=cd,
    framework='Python',
)

amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.spark.precache_packages = False
amlcompute_run_config.target = compute_target
amlcompute_run_config.data = {
    input_name_train: load_data(dataset_train, input_name_train),
    input_name_test: load_data(dataset_test, input_name_test)
}

amlcompute_run_config.save(path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    "RunConfig/",
    "runconfig_fullmodel.yml",
),
                           name='sklearn',
                           separate_environment_yaml=True)
Exemple #4
0
# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Runconfig
amlcompute_run_config = RunConfiguration(conda_dependencies=cd,
                                         script="data_validation.py")

amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.spark.precache_packages = False
amlcompute_run_config.target = compute_target
amlcompute_run_config.data = {
    input_name_train: load_data(dataset_train, input_name_train),
    input_name_test: load_data(dataset_test, input_name_test)
}
amlcompute_run_config.data_references = {
    "baseline_profile":
    DataReferenceConfiguration(
        datastore_name='workspaceblobstore',
        mode='download',
        path_on_datastore='baseline_profile',
    )
}
amlcompute_run_config.save(path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    "RunConfig/",
    "runconfig_data_validation.yml",
),
                           name='datavalidationsubset',
                           separate_environment_yaml=True)
input_name_train = 'newsgroups_raw_subset_train'

# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)

# Runconfig
amlcompute_run_config = RunConfiguration(conda_dependencies=cd,
                                         script="create_historic_profile.py")

amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.spark.precache_packages = False
amlcompute_run_config.target = compute_target
amlcompute_run_config.data = {
    input_name_train: load_data(dataset_train, input_name_train),
}
amlcompute_run_config.data_references = {
    'historic_profile':
    DataReferenceConfiguration(
        datastore_name='workspaceblobstore',
        mode='download',
        path_on_datastore='historic_profile',
    )
}

amlcompute_run_config.save(path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    "RunConfig/",
    "runconfig_data_profiling.yml",
),
                           name='dataprofiling',
                           separate_environment_yaml=True)