def _write_compute_run_config(source_directory, compute_target_object, compute_yaml): """ :param source_directory: :type source_directory: str :param compute_target_object: :type compute_target_object: azureml.core.compute_target.AbstractComputeTarget :param compute_yaml: :type compute_yaml: dict :return: """ from azureml.core.compute_target import _BatchAITarget # Writing the target.compute file. run_config_dir_name = get_run_config_dir_name(source_directory) file_path = os.path.join(source_directory, run_config_dir_name, compute_target_object.name + COMPUTECONTEXT_EXTENSION) with open(file_path, 'w') as outfile: ruamel.yaml.dump(compute_yaml, outfile, default_flow_style=False) # This creates a run config and writes it in the aml_config/<compute_target_name>.runconfig file run_config_object = RunConfiguration() run_config_object.target = compute_target_object if compute_target_object.type == _BatchAITarget._BATCH_AI_TYPE: run_config_object.environment.docker.enabled = True run_config_object.framework = compute_target_object._default_framework run_config_object.save(name=compute_target_object.name, path=source_directory)
def _create_default_run_configs(project_directory, compute_target_dict): """ Creates a local.runconfig and docker.runconfig for a project. :return: None """ from azureml.core.runconfig import RunConfiguration # Mocking a project object, as RunConfiguration requires a Project object, but only requires # project_directory field. project_object = empty_function project_object.project_directory = project_directory # Creating a local runconfig. local_run_config = RunConfiguration() local_run_config.save(name="local", path=project_directory) # Creating a docker runconfig. docker_run_config = RunConfiguration() docker_run_config.environment.docker.enabled = True docker_run_config.save(name="docker", path=project_directory) for compute_target_name, compute_target in compute_target_dict.items(): # Creating a compute runconfig. compute_config = RunConfiguration() if compute_target.type == 'HDInsight': compute_config.framework = "PySpark" else: compute_config.framework = "Python" compute_config.environment.docker.enabled = True compute_config.target = compute_target_name compute_config.save(name=compute_target_name, path=project_directory)
# define data set names input_name_train = 'newsgroups_train' input_name_test = 'newsgroups_test' # Retrieve datsets dataset_train = Dataset.get_by_name(workspace, name=input_name_train) dataset_test = Dataset.get_by_name(workspace, name=input_name_test) # Runconfig amlcompute_run_config = RunConfiguration( script="train.py", conda_dependencies=cd, framework='Python', ) amlcompute_run_config.environment.docker.enabled = True amlcompute_run_config.environment.spark.precache_packages = False amlcompute_run_config.target = compute_target amlcompute_run_config.data = { input_name_train: load_data(dataset_train, input_name_train), input_name_test: load_data(dataset_test, input_name_test) } amlcompute_run_config.save(path=os.path.join( os.path.dirname(os.path.realpath(__file__)), "RunConfig/", "runconfig_fullmodel.yml", ), name='sklearn', separate_environment_yaml=True)
# Retrieve datsets dataset_train = Dataset.get_by_name(workspace, name=input_name_train) dataset_test = Dataset.get_by_name(workspace, name=input_name_test) # Runconfig amlcompute_run_config = RunConfiguration(conda_dependencies=cd, script="data_validation.py") amlcompute_run_config.environment.docker.enabled = True amlcompute_run_config.environment.spark.precache_packages = False amlcompute_run_config.target = compute_target amlcompute_run_config.data = { input_name_train: load_data(dataset_train, input_name_train), input_name_test: load_data(dataset_test, input_name_test) } amlcompute_run_config.data_references = { "baseline_profile": DataReferenceConfiguration( datastore_name='workspaceblobstore', mode='download', path_on_datastore='baseline_profile', ) } amlcompute_run_config.save(path=os.path.join( os.path.dirname(os.path.realpath(__file__)), "RunConfig/", "runconfig_data_validation.yml", ), name='datavalidationsubset', separate_environment_yaml=True)
input_name_train = 'newsgroups_raw_subset_train' # Retrieve datsets dataset_train = Dataset.get_by_name(workspace, name=input_name_train) # Runconfig amlcompute_run_config = RunConfiguration(conda_dependencies=cd, script="create_historic_profile.py") amlcompute_run_config.environment.docker.enabled = True amlcompute_run_config.environment.spark.precache_packages = False amlcompute_run_config.target = compute_target amlcompute_run_config.data = { input_name_train: load_data(dataset_train, input_name_train), } amlcompute_run_config.data_references = { 'historic_profile': DataReferenceConfiguration( datastore_name='workspaceblobstore', mode='download', path_on_datastore='historic_profile', ) } amlcompute_run_config.save(path=os.path.join( os.path.dirname(os.path.realpath(__file__)), "RunConfig/", "runconfig_data_profiling.yml", ), name='dataprofiling', separate_environment_yaml=True)