def test_get_hyperdrive_config(number_of_cross_validation_splits: int, test_output_dirs: OutputFolderForTests) -> None: """ Test to make sure the number of dataset reader workers are set correctly """ config = HyperDriveTestModel() config.number_of_cross_validation_splits = number_of_cross_validation_splits run_config = ScriptRunConfig(source_directory=str( test_output_dirs.root_dir), script=str(Path("something.py")), arguments=["foo"], compute_target="Local") hd_config = config.get_hyperdrive_config(run_config=run_config) if number_of_cross_validation_splits > 0: assert hd_config._max_total_runs == number_of_cross_validation_splits else: assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS if config.perform_cross_validation: # check sampler is as expected sampler = config.get_cross_validation_hyperdrive_sampler() expected_sampler_dict = { CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: choice(list(range(number_of_cross_validation_splits))) } assert sampler._parameter_space == expected_sampler_dict else: assert vars(config.get_hyperdrive_config(run_config)) \ == vars(_create_dummy_hyperdrive_param_search_config(run_config))
def submit_experiment_run(self, wait_for_completion=True) -> Run: """ :param wait_for_completion: :return: """ assert self.source_directory assert self.train_py assert self.run_configuration assert self.experiment_name assert os.path.isfile(self.source_directory + "/" + self.train_py), ( f"The file {self.train_py} could not be found at " f"{self.source_directory}") src = ScriptRunConfig( source_directory=self.source_directory, script=self.train_py, arguments=self.args, run_config=get_local_run_configuration(), ) self.image_tags["train_py_hash"] = self._get_file_md5( self.source_directory + "/" + self.train_py) exp = Experiment(workspace=self, name=self.experiment_name) run = exp.submit(src) if wait_for_completion: try: run.wait_for_completion(show_output=self.show_output) except ActivityFailedException as e: print(run.get_details()) raise e return run
def load_runconfig_yaml(runconfig_yaml_file): root = os.environ.get("GITHUB_WORKSPACE", default=None) print("::debug::Adding root to system path") sys.path.insert(1, f"{root}") try: run_config = RunConfiguration().load(path=runconfig_yaml_file) # Setting source directory for script run config source_directory = os.path.split(runconfig_yaml_file)[0] source_directory = os.path.split(source_directory)[0] if os.path.split( source_directory)[-1] == ".azureml" or os.path.split( source_directory)[-1] == "aml_config" else source_directory # defining scriptrunconfig run_config = ScriptRunConfig(source_directory=source_directory, run_config=run_config) except TypeError as exception: print( f"Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}" ) run_config = None except FileNotFoundError as exception: print( f"Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}" ) run_config = None return run_config
def scale_up(self, workers=1): """ Scale up the number of workers. """ run_config = RunConfiguration() run_config.target = self.compute_target run_config.environment = self.environment_definition scheduler_ip = self.run.get_metrics()["scheduler"] args = [ f"--scheduler_ip_port={scheduler_ip}", f"--use_gpu={self.use_gpu}", f"--n_gpus_per_node={self.n_gpus_per_node}", f"--worker_death_timeout={self.worker_death_timeout}", ] child_run_config = ScriptRunConfig( source_directory=os.path.join(self.abs_path, "setup"), script="start_worker.py", arguments=args, run_config=run_config, ) for i in range(workers): child_run = self.run.submit_child(child_run_config, tags=self.tags) self.workers_list.append(child_run)
def load_runconfig_yaml(runconfig_yaml_file): try: run_config = RunConfiguration().load(path=runconfig_yaml_file) # Setting source directory for script run config source_directory = os.path.split(runconfig_yaml_file)[0] source_directory = os.path.split(source_directory)[0] if os.path.split( source_directory)[-1] == ".azureml" or os.path.split( source_directory)[-1] == "aml_config" else source_directory # defining scriptrunconfig run_config = ScriptRunConfig(source_directory=source_directory, run_config=run_config) except TypeError as exception: print( f"::debug::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}" ) run_config = None except FileNotFoundError as exception: print( f"::debug::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}" ) run_config = None except UserErrorException as exception: print( f"::debug::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file}): {exception}" ) run_config = None return run_config
def perform_run(experiment, script, source_directory, environment=None, compute_target=None, trainsets=[], testsets=[], parameters={}, distributed_job_config=None): if environment is None: environment = Environment("user-managed-env") environment.python.user_managed_dependencies = True args = _create_args(trainsets, testsets, parameters) # No compute target is provided, hence the Run is performed locally src = ScriptRunConfig(source_directory=source_directory, compute_target=compute_target, script=script, arguments=args, environment=environment, distributed_job_config=distributed_job_config) run = experiment.submit(config=src) return run
def Azure_ML_experiment(): # create experiment experiment_name = 'aftershock-model-train' exp = Experiment(workspace=ws, name=experiment_name) # creation of compute task compute_target = create_compute_resource(ws) # to install required packages env = Environment('aftershock-env-01') cd = CondaDependencies.create(pip_packages=[ 'azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults' ], conda_packages=['scikit-learn==0.24.2']) env.python.conda_dependencies = cd src = ScriptRunConfig(source_directory=os.getcwd(), script='train.py', arguments=[], compute_target=compute_target, environment=env) # submit the train script to the experiment run = exp.submit(config=src) print(run.get_file_names())
def run_rolling_forecast(test_experiment, compute_target, train_run, test_dataset, target_column_name, inference_folder='./forecast'): train_run.download_file('outputs/model.pkl', inference_folder + '/model.pkl') inference_env = train_run.get_environment() config = ScriptRunConfig(source_directory=inference_folder, script='forecasting_script.py', arguments=[ '--target_column_name', target_column_name, '--test_dataset', test_dataset.as_named_input(test_dataset.name) ], compute_target=compute_target, environment=inference_env) run = test_experiment.submit(config, tags={ 'training_run_id': train_run.id, 'run_algorithm': train_run.properties['run_algorithm'], 'valid_score': train_run.properties['score'], 'primary_metric': train_run.properties['primary_metric'] }) run.log("run_algorithm", run.tags['run_algorithm']) return run
def test_get_hyperdrive_config(number_of_cross_validation_splits: int, test_output_dirs: OutputFolderForTests) -> None: """ Testing that the hyperdrive config returned for the lightnig container is right for submitting to AzureML. Note that because the function get_hyperdrive_config now lives in the super class WorkflowParams, it is also tested for other aspects of functionality by a test of the same name in Tests.ML.test_model_config_base. """ container = DummyContainerWithAzureDataset() container.number_of_cross_validation_splits = number_of_cross_validation_splits run_config = ScriptRunConfig(source_directory=str( test_output_dirs.root_dir), script=str(Path("something.py")), arguments=["foo"], compute_target="EnormousCluster") if number_of_cross_validation_splits == 0: with pytest.raises(NotImplementedError) as not_implemented_error: container.get_hyperdrive_config(run_config=run_config) assert 'Parameter search is not implemented' in str( not_implemented_error.value) # The error should be thrown by # InnerEye.ML.lightning_container.LightningContainer.get_parameter_search_hyperdrive_config # since number_of_cross_validation_splits == 0 implies a parameter search hyperdrive config and # not a cross validation one. else: hd_config = container.get_hyperdrive_config(run_config=run_config) assert isinstance(hd_config, HyperDriveConfig)
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, all_azure_dataset_ids: List[str], all_dataset_mountpoints: List[str], environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run. :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ dataset_consumptions = create_dataset_consumptions( azure_config, all_azure_dataset_ids, all_dataset_mountpoints) # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to( source_config.root_folder).as_posix() logging.info( f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds( azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment( azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration( node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if len(dataset_consumptions) > 0: run_config.data = { dataset.name: dataset for dataset in dataset_consumptions } # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get( WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func( script_run_config) # type: ignore return script_run_config
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, azure_dataset_id: str = "", environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty string to not use any datasets. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ if azure_dataset_id: azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id) if not azureml_dataset: raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.") named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY) dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download() else: dataset_consumption = None # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix() logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment(azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if dataset_consumption: run_config.data = {dataset_consumption.name: dataset_consumption} # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func(script_run_config) # type: ignore return script_run_config
def run_local_compute_experiment(ws, experiment_name, entry_script, source_directory=os.getcwd()): # Edit a run configuration property on the fly. run_local = RunConfiguration() run_local.environment.python.user_managed_dependencies = True exp = Experiment(workspace=ws, name=experiment_name) src = ScriptRunConfig(source_directory=source_directory, script=entry_script, run_config=run_local) run = exp.submit(src) run.wait_for_completion(show_output=True)
def main(): """ Minimum sample to use the Observability logger in Azure ML Run or alone """ pwd = sys.path[0] # Submit an Azure ML Run which uses the logger aml_ws = Workspace.from_config() aml_exp = Experiment(aml_ws, 'test_logger') aml_env = Environment.from_conda_specification( 'test_logger_env', f'{pwd}/conda_dependency.yml') # if aml_cluster isn't specified, it'll run locally but # will still log to AML and AppInsights if run_on_local: aml_config = ScriptRunConfig(source_directory=pwd, script='train.py', environment=aml_env) else: # setting aicxn does not work when running local # because AML add "\" to ";" in the cxn string, # making the cxn string invalid. aicxn = 'APPLICATIONINSIGHTS_CONNECTION_STRING' aml_env.environment_variables[aicxn] = os.environ[aicxn] aml_cluster = aml_ws.compute_targets['train-cluster'] aml_config = ScriptRunConfig(source_directory=pwd, script='train.py', environment=aml_env, compute_target=aml_cluster) aml_exp.submit(aml_config) # Use the logger directly logger.log("Shouldn't log INFO if default severity is WARNING") logger.log("Run into ERROR", severity=Severity.ERROR) logger.log_metric(name="metric1_no_parent", value=100) logger.log_metric(name="metric2_with_parent", value=200, log_parent=True) try: raise Exception("Run into EXCEPTION") except Exception as ex: logger.exception(ex) # allow time for appinsights exporter to send metrics time.sleep(30)
def run(self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id): # doing this handling bc positional argument fix not in mlflow <= 1.10.0 release # https://github.com/mlflow/mlflow/issues/3138 if _AZUREML_URI not in tracking_uri and _AZUREML_URI in experiment_id: tracking_uri, experiment_id = experiment_id, tracking_uri # use_conda value from mlflow.project.run call propagated to backend_config # release after 1.10.0, so if use_conda key not in backend config, assume to be True # if the user hasn't passed a backend_config and has set backend="azureml", assume # that it's a local run using local conda environment (i.e. use_conda = False) if backend_config is None: backend_config = {PROJECT_USE_CONDA: False} elif PROJECT_USE_CONDA not in backend_config: backend_config[PROJECT_USE_CONDA] = True use_conda = backend_config[PROJECT_USE_CONDA] stream_output = backend_config[ STREAM_OUTPUT] if STREAM_OUTPUT in backend_config else True compute = backend_config[COMPUTE] if COMPUTE in backend_config else None try: work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) mlproject = load_project(work_dir) except ExecutionException as e: raise ExecutionException(e) # process mlflow parameters into a format usable for AzureML ScriptRunConfig command_args = [] command_args += get_entry_point_command(mlproject, entry_point, params, None) # components for launching an AzureML ScriptRun workspace = load_azure_workspace() experiment = _load_azure_experiment(workspace, experiment_id) # TODO: mlflow system tag mlflow.source.name is null after the switch from script, args to command src = ScriptRunConfig(source_directory=work_dir, command=command_args) # in case customer sets target to local if compute and compute != _LOCAL and compute != _LOCAL.upper(): remote_environment = _load_remote_environment(mlproject) registered_env = remote_environment.register(workspace=workspace) cpu_cluster = _load_compute_target(workspace, backend_config) src.run_config.target = cpu_cluster.name src.run_config.environment = registered_env else: local_environment = _load_local_environment(mlproject, use_conda) src.run_config.environment = local_environment submitted_run = experiment.submit(config=src) _logger.info( _CONSOLE_MSG.format( "AzureML-Mlflow {} Experiment submitted".format( experiment.name))) return AzureMLSubmittedRun(submitted_run, stream_output)
def main(workspace): """ input: workspace output: ScriptRunConfig For more information see: https://github.com/Azure/aml-run """ logging.info(f"run_config") return ScriptRunConfig(source_directory=".", script="train.py", compute_target="amlcompute3")
def submit_run(aml_interface): experiment = Experiment(aml_interface.workspace, AML_EXPERIMENT_NAME) src_dir = __here__ run_config = ScriptRunConfig(source_directory=src_dir, script='train.py') run_config.run_config.target = aml_interface.get_compute_target( AML_COMPUTE_NAME, 'STANDARD_D2_V2') aml_run_env = Environment.get(aml_interface.workspace, AML_ENV_NAME) run_config.run_config.environment = aml_run_env print("Submitting Run") run = experiment.submit(config=run_config) run.wait_for_completion(show_output=True) print(run.get_metrics())
def submit(experiment_name: str, kernal: str, penalty: float): print("This notebook was created using version 1.0.83 of the Azure ML SDK") print("You are using version", azureml.core.VERSION, "of the SDK") # Get a reference to the workspace. Be sure to download the config.json # from your workspace and place in the parent folder. ws = Workspace.from_config() print('Loaded workspace', ws.name) # Reference the experiment experiment = Experiment(workspace=ws, name=experiment_name) print('Logging to experiment', experiment_name) # Create the RunConfiguration that will be used arguments = [ '--output-dir', "outputs", '--kernel', kernal, '--penalty', penalty, ] script_run_config = ScriptRunConfig(source_directory='.', script='train.py', arguments=arguments) # As we will run locally we can use our existing python environment script_run_config.run_config.environment. \ python.user_managed_dependencies = True # Submit the experiment to get a run and wait for completion run = experiment.submit(script_run_config) print('Submitted please wait...') run.wait_for_completion(show_output=True) # register the trained model model = run.register_model( model_name='covid-tweets-analyis', model_path='outputs/model/covid-tweets-analyis.joblib') print('Run number:', run.number) print('Run id:', run.id) print("Run details are available at:", run.get_portal_url()) print("Model: {} v{}".format(model.name, model.version)) if 'azureml.git.dirty' in run.properties: if run.properties['azureml.git.dirty']: print("WARNNG: You have uncomitted changes. To ensure " "reproducability check in your code before you train.") else: print('WARNNG: To ensure reproducability you should be using git!')
def _submit_profile(dataset_profile_config_object, workspace, experiment_name): """Start Profile execution with the given config on the given workspace. :param dataset_profile_config_object: :param workspace: :param experiment_name: :param kwargs: :return: """ dataset = dataset_profile_config_object._dataset compute_target = dataset_profile_config_object._compute_target if isinstance(compute_target, ComputeTarget): compute_target = compute_target.name else: compute_target = compute_target run_id = 'dataset_' + str(uuid.uuid4()) saved_dataset_id = dataset._ensure_saved(workspace) action_dto = _restclient(workspace).dataset.generate_profile_with_preview( workspace.subscription_id, workspace.resource_group, workspace.name, id=saved_dataset_id, compute_target=compute_target, experiment_name=experiment_name, run_id=run_id, custom_headers=_custom_headers) if dataset_profile_config_object._compute_target == _LOCAL_COMPUTE: with tempfile.TemporaryDirectory() as temp_dir: script = os.path.join(temp_dir, 'profile_run_script.py') copyfile( os.path.join(os.path.dirname(__file__), '_profile_run_script.py'), script) run_local = RunConfiguration() run_local.environment.python.user_managed_dependencies = True run_local.environment.python.interpreter_path = sys.executable script_config = ScriptRunConfig(source_directory=temp_dir, script="profile_run_script.py", arguments=[ action_dto.dataset_id, action_dto.action_id, saved_dataset_id ], run_config=run_local) experiment = Experiment(workspace, experiment_name) experiment.submit(script_config, run_id=run_id) else: experiment = Experiment(workspace, action_dto.experiment_name) run_id = action_dto.run_id run = get_run(experiment, run_id) return DatasetProfileRun(workspace, dataset, run)
def run_script_in_cloud(): ws = Workspace.from_config() experiment = Experiment(workspace=ws, name='day1-experiment-hello') config = ScriptRunConfig(source_directory="../../", script='src/azure/modelling.py', compute_target='cpu-cluster') env = Environment.from_pip_requirements(name="env", file_path="../../requirements.txt") config.run_config.environment = env run = experiment.submit(config) aml_url = run.get_portal_url() print(aml_url)
def get_script_config(self, script_folder, entry_script, script_params, compute_target, channels=None, conda_packages=None, pip_packages=None): run_config = self.__get_run_config(compute_target, channels, conda_packages, pip_packages) script_run_config = ScriptRunConfig(source_directory=script_folder, script=entry_script, arguments=script_params, run_config=run_config) return script_run_config
def main(config, log): ws = Workspace(**config.workspace) env = Environment.from_conda_specification(**config.environment) env.docker.enabled = True env.docker.base_image = DEFAULT_GPU_IMAGE src = ScriptRunConfig(**config.run_config) src.run_config.environment = env src.run_config.target = ws.compute_targets[config.compute] experiment = Experiment(workspace=ws, name=config.experiment) run = experiment.submit(src) aml_url = run.get_portal_url() log.info(f'Run URL: {aml_url}')
def executeAction(self, experiment, project_folder, training_Script, cpu_cluster, github_SHA): #TODO: take environment as input from user for reusing it, may be useful when resubmitting the experiment. myenv = Environment("myenv") myenv.docker.enabled = True myenv.python.conda_dependencies = CondaDependencies.create( conda_packages=['scikit-learn']) src = ScriptRunConfig(source_directory=project_folder, script=training_Script) # Set compute target to the one created in previous step src.run_config.target = cpu_cluster.name # Set environment src.run_config.environment = myenv tags = {"github_SHA": github_SHA} run = experiment.submit(config=src, tags=tags)
def PrepareAzureScript(): """ Create Script Run Config """ # Use an Azure curated environment to create docker container # curated_env_name = 'AzureML-PyTorch-1.6-GPU' curated_env_name = 'AzureML-Pytorch1.7-Cuda11-OpenMpi4.1.0-py36' pytorch_env = Environment.get(workspace=az_workspace, name=curated_env_name) pytorch_env = pytorch_env.clone(new_name='pytorch-1.6-gpu') # OR # use build the conda environment used on local machine (from a python terminal) to create docker container # build yml file with 'conda env export -n [name of environment] -f [filename.yml]' # place yml file in the ./azureml directory # pytorch_env = Environment.from_conda_specification( # name='AzurePytorch', # file_path='./.azureml/AzurePytorch.yml' # ) # arguments can be passed to training script # they have to be parsed in the training script # import argparse # parser = argparse.ArgumentParser() # parser.add_argument("--data-folder", type=str, dest="data_folder", help="data folder mounting point", default="") # parser.add_argument("--num-epochs", type=int, dest="num_epochs", help="Number of epochs", default="") # args = parser.parse_args() # data_path = args.data_folder args = [ '--data-folder', az_dataset.as_named_input('data').as_mount(), '--num-epochs', 50000 ] # Script Run Config defines the wrapper for the python scripts and will be used to create the Docker container project_folder = "./scripts" # this refers to local location of scripts, these scripts will be built into docker container global az_config global az_computetarget az_config = ScriptRunConfig( source_directory=project_folder, script='model.py', compute_target=az_computetarget, environment=pytorch_env, arguments=args, )
def submit_run(aml_interface): experiment = Experiment(aml_interface.workspace, AML_EXPERIMENT_NAME) src_dir = __here__ run_config = ScriptRunConfig( source_directory=src_dir, script='train.py', ) run_config.run_config.target = aml_interface.get_compute_target( AML_COMPUTE_NAME, 'STANDARD_D1_V2') aml_run_env = Environment.get(aml_interface.workspace, AML_ENVIRONMENT_NAME) run_config.run_config.environment = aml_run_env logger.info(f"Submitting Run to {AML_COMPUTE_NAME}@{AML_EXPERIMENT_NAME}") run = experiment.submit(config=run_config) run.wait_for_completion(show_output=True) logger.info(run.get_metrics()) logger.info(f"Finished Run on {AML_COMPUTE_NAME}@{AML_EXPERIMENT_NAME}")
def test_perf(perf_test_configuration, workspace, request, wheel_file): print(f"Starting with test case {request.node.name}") script_name = determine_script_name(request.node.name) generate_script(request, perf_test_configuration, script_name, SCRIPT_DIRECTORY, workspace) if workspace: # run remotely on Azure ML Cluster from azureml.core import Experiment, RunConfiguration, ScriptRunConfig experiment = Experiment(workspace=workspace, name=EXPERIMENT_NAME) compute_target = workspace.compute_targets['cpu-cluster'] run_config = RunConfiguration() run_config.target = compute_target environment = configure_environment(workspace, wheel_file=wheel_file, requirements_file=os.path.join( "fairlearn", "requirements.txt")) run_config.environment = environment environment.register(workspace=workspace) script_run_config = ScriptRunConfig(source_directory=SCRIPT_DIRECTORY, script=script_name, run_config=run_config) print("submitting run") experiment.submit(config=script_run_config, tags=perf_test_configuration.__dict__) print("submitted run") else: # run locally print(f"starting local run: {request.node.name}") cmd = ["python", f"{SCRIPT_DIRECTORY}/{script_name}"] popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) for stdout_line in iter(popen.stdout.readline, ""): print(stdout_line, end="") popen.stdout.close() return_code = popen.wait() if return_code: raise subprocess.CalledProcessError(return_code, cmd) print(f"completed local run: {request.node.name}")
def main(): """ Hello on Azure machine learning. """ interactive_auth = InteractiveLoginAuthentication( tenant_id=os.getenv("TENANT_ID")) work_space = Workspace.from_config(auth=interactive_auth) experiment = Experiment(workspace=work_space, name="hello-experiment") config = ScriptRunConfig(source_directory=".", script="hello.py", compute_target="cpu-cluster") # azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000 run = experiment.submit(config) aml_url = run.get_portal_url() print(aml_url) run.wait_for_completion(show_output=True)
def run_remote_inference( test_experiment, compute_target, train_run, test_dataset, target_column_name, inference_folder="./forecast", ): # Create local directory to copy the model.pkl and forecsting_script.py files into. # These files will be uploaded to and executed on the compute instance. os.makedirs(inference_folder, exist_ok=True) shutil.copy("forecasting_script.py", inference_folder) train_run.download_file("outputs/model.pkl", os.path.join(inference_folder, "model.pkl")) inference_env = train_run.get_environment() config = ScriptRunConfig( source_directory=inference_folder, script="forecasting_script.py", arguments=[ "--target_column_name", target_column_name, "--test_dataset", test_dataset.as_named_input(test_dataset.name), ], compute_target=compute_target, environment=inference_env, ) run = test_experiment.submit( config, tags={ "training_run_id": train_run.id, "run_algorithm": train_run.properties["run_algorithm"], "valid_score": train_run.properties["score"], "primary_metric": train_run.properties["primary_metric"], }, ) run.log("run_algorithm", run.tags["run_algorithm"]) return run
def test_perf(perf_test_configuration, workspace, request, wheel_file): print("Starting with test case {}".format(request.node.name)) script_name = determine_script_name(request.node.name) generate_script(request, perf_test_configuration, script_name, SCRIPT_DIRECTORY) experiment = Experiment(workspace=workspace, name=EXPERIMENT_NAME) compute_target = workspace.get_default_compute_target(type='cpu') run_config = RunConfiguration() run_config.target = compute_target environment = configure_environment(workspace, wheel_file=wheel_file) run_config.environment = environment environment.register(workspace=workspace) script_run_config = ScriptRunConfig(source_directory=SCRIPT_DIRECTORY, script=script_name, run_config=run_config) print("submitting run") experiment.submit(config=script_run_config, tags=perf_test_configuration.__dict__) print("submitted run")
def submit(): # define workspace ws = Workspace.from_config() # create compute if it does not already exist cluster_name = "goazurego" try: target = ComputeTarget(workspace=ws, name=cluster_name) print(f"Found existing cluster - {cluster_name}.") except ComputeTargetException: # create a configuration compute_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", max_nodes=2, min_nodes=0) target = ComputeTarget.create(ws, cluster_name, compute_config) target.wait_for_completion(show_output=True) # use the curated tensorflow 1.15 environment environment_name = "AzureML-TensorFlow-1.15-Inference-CPU" tf_env = Environment.get(workspace=ws, name=environment_name) # create script run configuration src = ScriptRunConfig(source_directory=".", script="train.py", compute_target=target, environment=tf_env) src.run_config.target = target # create an experiment experiment_name = "pycon-experiment" experiment = Experiment(workspace=ws, name=experiment_name) # run experiment run = experiment.submit(config=src) run.wait_for_completion(show_output=True) return True
def run_remote_inference(test_experiment, compute_target, train_run, test_dataset, target_column_name, inference_folder='./forecast'): # Create local directory to copy the model.pkl and forecsting_script.py files into. # These files will be uploaded to and executed on the compute instance. os.makedirs(inference_folder, exist_ok=True) shutil.copy('forecasting_script.py', inference_folder) train_run.download_file('outputs/model.pkl', os.path.join(inference_folder, 'model.pkl')) inference_env = train_run.get_environment() config = ScriptRunConfig(source_directory=inference_folder, script='forecasting_script.py', arguments=[ '--target_column_name', target_column_name, '--test_dataset', test_dataset.as_named_input(test_dataset.name) ], compute_target=compute_target, environment=inference_env) run = test_experiment.submit(config, tags={ 'training_run_id': train_run.id, 'run_algorithm': train_run.properties['run_algorithm'], 'valid_score': train_run.properties['score'], 'primary_metric': train_run.properties['primary_metric'] }) run.log("run_algorithm", run.tags['run_algorithm']) return run