Exemple #1
0
    def update_io(inputs, outputs):
        for key, value in inputs.items():
            if isinstance(value, _Dataset):
                raise UserErrorException(
                    "Dataset cannot be used without providing a name for the run. Please provide "
                    "a name by calling the as_named_input instance method on dataset."
                )
            elif isinstance(value, DatasetConsumptionConfig):
                value.dataset._ensure_saved(workspace)
                inputs[key] = Data.create(value)
                input_data.append(value)

                # Set the environment variable for mount validation
                if value.dataset._consume_latest:
                    env_vars = run_config.environment.environment_variables
                    if _SKIP_VALIDATE_DATASETS not in env_vars:
                        env_vars[_SKIP_VALIDATE_DATASETS] = value.name
                    else:
                        env_vars[_SKIP_VALIDATE_DATASETS] = ",".join(
                            [env_vars[_SKIP_VALIDATE_DATASETS], value.name])
            elif isinstance(value, Data):
                input_data.append(value)
            else:
                raise UserErrorException("{} cannot be used as input.".format(
                    type(value).__name__))
        for key, value in outputs.items():
            if isinstance(value, OutputDatasetConfig):
                outputs[key] = output_data[key] = value._to_output_data()
            elif isinstance(value, OutputData):
                output_data[key] = value
            else:
                raise UserErrorException("{} cannot be used as output.".format(
                    type(value).__name__))
def load_data(dataset, input_name):
    data = Data(
        data_location=DataLocation(dataset=RunDataset(dataset_id=dataset.id)),
        create_output_directories=False,
        mechanism='mount',
        environment_variable_name=input_name,
        overwrite=True)
    return data
Exemple #3
0
 def _to_input_config(config):
     from azureml.core.runconfig import Data, DataLocation, Dataset
     data_location_json = config.get("DataLocation", None)
     dataset_json = data_location_json.get(
         "Dataset", None) if data_location_json else None
     dataset_id = dataset_json.get("Id") if dataset_json else None
     dataset_name = dataset_json.get("Name") if dataset_json else None
     dataset_version = dataset_json.get("Version") if dataset_json else None
     dataset = Dataset(dataset_id=dataset_id,
                       dataset_name=dataset_name,
                       dataset_version=dataset_version)
     data_location = DataLocation(dataset=dataset)
     create_output_directories = config.get("CreateOutputDirectories",
                                            False)
     mechanism = config.get("Mechanism", None).lower()
     environment_variable_name = config.get("EnvironmentVariableName", None)
     path_on_compute = config.get("PathOnCompute", None)
     overwrite = config.get("Overwrite", False)
     return Data(data_location=data_location,
                 create_output_directories=create_output_directories,
                 mechanism=mechanism,
                 environment_variable_name=environment_variable_name,
                 path_on_compute=path_on_compute,
                 overwrite=overwrite)
compute_name = 'cpu-cluster'

# Define the script run config
src = ScriptRunConfig(
    source_directory='scripts',
    script='train.py',
    arguments=[
        '--data-folder',
        'DatasetConsumptionConfig:{}'.format(input_name)
    ])

# Define the data section of the runconfig
src.run_config.data = {
    input_name: Data(
        data_location=DataLocation(
            dataset=RunDataset(dataset_id=dataset.id)),
        create_output_directories=False,
        mechanism='mount',
        environment_variable_name=input_name,
        overwrite=False
    )
}
# Set other parameters for the run
src.run_config.framework = 'python'
src.run_config.environment = conda_env
src.run_config.target = compute_name
src.run_config.node_count = 4

# Save the run configuration as a .azureml/mnist.runconfig
get_run_config_from_script_run(src).save(name='mnist.runconfig')