def cancel_all_runs(exp_name, run_id=None): ws = get_workspace() exp = Experiment(ws, exp_name) if run_id: r = get_run(experiment=exp, run_id=run_id, rehydrate=True) # check the returned run type and status print(type(r), r.get_status()) # you can cancel a run if it hasn't completed or failed if r.get_status() not in ['Complete', 'Failed']: r.cancel() else: # if you don't know the run id, you can list all runs under an experiment for r in exp.get_runs(): run = get_run(experiment=exp, run_id=r.id, rehydrate=True) for c in run.get_children(): for gc in c.get_children(): if gc.get_status() == "Running" or gc.get_status( ) == "Queued": print(gc.id, gc.get_status()) gc.cancel() if c.get_status() == "Running" or c.get_status() == "Queued": print(c.id, c.get_status()) c.cancel() if r.get_status() == "Running" or r.get_status() == "Queued": print(r.id, r.get_status()) r.cancel()
def fetch_run_for_experiment(experiment_to_recover: Experiment, run_id_or_number: str) -> Run: """ :param experiment_to_recover: an experiment :param run_id_or_number: a string representing the Run ID or Run Number of one of the runs of the experiment :return: the run matching run_id_or_number; raises an exception if not found """ available_runs = experiment_to_recover.get_runs() try: run_number = int(run_id_or_number) for run in available_runs: if run.number == run_number: return run except ValueError: # will be raised in run_id_or_number does not represent a number pass try: return get_run(experiment=experiment_to_recover, run_id=run_id_or_number, rehydrate=True) except Exception: available_ids = ", ".join([run.id for run in available_runs]) raise (Exception( "Run {} not found for experiment: {}. Available runs are: {}". format(run_id_or_number, experiment_to_recover.name, available_ids)))
def get_producing_run(self): """Return the experiment Run object of type `Run` that produced this profile. :return: The submitted experiment run for this profile run. See https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.run(class) for more information on run. :rtype: azureml.core.Run """ from azureml.core import Experiment, get_run experiment = Experiment(self._workspace, self._experiment_name) return get_run(experiment, self._run_id)
def _submit_profile(dataset_profile_config_object, workspace, experiment_name): """Start Profile execution with the given config on the given workspace. :param dataset_profile_config_object: :param workspace: :param experiment_name: :param kwargs: :return: """ dataset = dataset_profile_config_object._dataset compute_target = dataset_profile_config_object._compute_target if isinstance(compute_target, ComputeTarget): compute_target = compute_target.name else: compute_target = compute_target run_id = 'dataset_' + str(uuid.uuid4()) saved_dataset_id = dataset._ensure_saved(workspace) action_dto = _restclient(workspace).dataset.generate_profile_with_preview( workspace.subscription_id, workspace.resource_group, workspace.name, id=saved_dataset_id, compute_target=compute_target, experiment_name=experiment_name, run_id=run_id, custom_headers=_custom_headers) if dataset_profile_config_object._compute_target == _LOCAL_COMPUTE: with tempfile.TemporaryDirectory() as temp_dir: script = os.path.join(temp_dir, 'profile_run_script.py') copyfile( os.path.join(os.path.dirname(__file__), '_profile_run_script.py'), script) run_local = RunConfiguration() run_local.environment.python.user_managed_dependencies = True run_local.environment.python.interpreter_path = sys.executable script_config = ScriptRunConfig(source_directory=temp_dir, script="profile_run_script.py", arguments=[ action_dto.dataset_id, action_dto.action_id, saved_dataset_id ], run_config=run_local) experiment = Experiment(workspace, experiment_name) experiment.submit(script_config, run_id=run_id) else: experiment = Experiment(workspace, action_dto.experiment_name) run_id = action_dto.run_id run = get_run(experiment, run_id) return DatasetProfileRun(workspace, dataset, run)
def status(args): ws = get_workspace() log = read_log_file() if log is None: print("No log files found. Exiting!!") exit(0) experiments = log['experiments'] if args.experiment_name in experiments: exp = Experiment(ws, name=log['aml_experiment_name']) print("Experiment: {}".format(args.experiment_name)) print(experiments[args.experiment_name]['description']) all = True if args.j: all = False to_print = [["Name", "Status", "Link"]] # print("Name", "Status", "Link", sep='\t') else: to_print = [["Name", "Status"]] # print("Name", "Status", sep='\t') for run, name in experiments[args.experiment_name]['ids']: if all: run = get_run(exp, run) details = run.get_details() to_print.append([run.tags['name'], details['status']]) # print(run.tags['name'], details['status'], sep='\t') elif name in args.j: run = get_run(exp, run) details = run.get_details() to_print.append([ run.tags['name'], details['status'], run.get_portal_url() ]) # print(run.tags['name'], details['status'], run.get_portal_url(), sep='\t') to_print.append(["", "", ""]) pretty_print(to_print) else: print("Experiment not found")
def fetch_run_for_experiment(experiment_to_recover: Experiment, run_id: str) -> Run: """ :param experiment_to_recover: an experiment :param run_id: a string representing the Run ID of one of the runs of the experiment :return: the run matching run_id_or_number; raises an exception if not found """ try: return get_run(experiment=experiment_to_recover, run_id=run_id, rehydrate=True) except Exception: available_runs = experiment_to_recover.get_runs() available_ids = ", ".join([run.id for run in available_runs]) raise (Exception( "Run {} not found for experiment: {}. Available runs are: {}".format( run_id, experiment_to_recover.name, available_ids)))
config = json.load(f) if not config["run_id"]: raise Exception( 'No new model to register as production model perform better') except: print('No new model to register as production model perform better') #raise Exception('No new model to register as production model perform better') sys.exit(0) run_id = config["run_id"] experiment_name = config["experiment_name"] exp = Experiment(workspace=ws, name=experiment_name) try: run = get_run(experiment=exp, run_id=run_id, rehydrate=True) except: print( "Dir you replace the run_id in the script with that of your hyperdrive run?" ) raise # children = run.get_children() # best_metric = 1.0 # best_run_id = "" # for child in children: # status = child.get_status() # if status == 'Completed':
def get_profile_runs(self, workspace=None): """Return previous profile runs associated with this or same dataset in the workspace. :param workspace: The workspace where profile run was submitted. Defaults to the workspace of this dataset. Required if dataset is not associated to a workspace. See https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace for more information on workspaces. :type workspace: azureml.core.Workspace :return: iterator object of type azureml.core.Run. :rtype: iter(azureml.core.Run) """ workspace = self._ensure_workspace(workspace) from azureml._restclient.models import ActionRequestDto request_dto = ActionRequestDto( action_type=_ACTION_TYPE_PROFILE, saved_dataset_id=self._ensure_saved(workspace), arguments={ 'generate_preview': 'True', 'row_count': '1000' }) continuation_token = None paginated_action_dto_list = [] index = 0 while index == 0 or continuation_token is not None: paginated_action_dto = _restclient( workspace).dataset.list_actions_from_request( workspace.subscription_id, workspace.resource_group, workspace.name, dataset_id=_LEGACY_DATASET_ID, request=request_dto, count=1000, custom_headers=_custom_headers, continuation_token=continuation_token) index = index + 1 for item in paginated_action_dto.value: paginated_action_dto_list.append(item) continuation_token = paginated_action_dto.continuation_token if not paginated_action_dto_list: raise AzureMLException( 'Unable to find any run information. Please submit a new profile run.' ) run_list = [] for item in paginated_action_dto_list: flag = True # This is done to ensure backward compatibility. Earlier we do not persist # run_id for local runs. Hence for older runs run_id is empty. if item.run_id is None: continue from azureml.core import Experiment, get_run experiment = Experiment(workspace, item.experiment_name) try: run = get_run(experiment, item.run_id) except Exception: flag = False if flag: run_list.append(run) return iter(run_list)
def command_wrapper(*args, **kwargs): # Diagnostics command_logger = kwargs.get('logger') if command_logger is None: # The az cli framework will take control of the loggers that start with a `cli.` namespace. # The logger output level will be set according to the --debug, --verbose, --only-show-errors flags. # # On the other hand, for loggers that without a `cli.` namespace, the cli framework will # set to CRITICAL by default, regardless of --verbose, --only-show-errors flags. # # Related code could be referenced here: # https://github.com/microsoft/knack/blob/fe3bf5d3a79a3dd2ce5ddb0c38d93843a3380f6f/knack/log.py#L179 # # Here we try to get the logger from the az cli framework first. # If failed, (e.g. When doing a local debug from `azml` command that may not have # the `knack` package installed), fallback to the default logger. try: from knack.log import get_logger command_logger = get_logger(function.__module__) except ImportError: command_logger = logging.getLogger(function.__module__) kwargs['logger'] = command_logger command_logger.debug( "Invoked {} with args {} and kwargs {}".format( function.__name__, args, kwargs)) def _get(kwargs, arg, default=None): argname = arg.function_arg_name pop = argname in command_auto_args_only ret = kwargs.pop(argname, default) if pop else kwargs.get( argname, default) command_logger.debug("Popping auto argument %s: %s => %s", argname, pop, ret) return ret # Call cli-agnostic correlation setter set_correlation_id() # Auto load workspace if declared if AutoArg.Workspace in command_auto_args or AutoArg.Experiment in command_auto_args or \ AutoArg.Run in command_auto_args: sub_id = _get(kwargs, argument.SUBSCRIPTION_ID) rg_name = _get(kwargs, argument.RESOURCE_GROUP_NAME) ws_name = _get(kwargs, argument.WORKSPACE_NAME) proj_path = _get(kwargs, argument.PROJECT_PATH, ".") command_logger.debug( "Hydrating auto arg workspace from " "subscription %s, " "resource_group %s, " "workspace name %s, " "project path %s", sub_id, rg_name, ws_name, proj_path) workspace = get_workspace_or_default(subscription_id=sub_id, resource_group=rg_name, workspace_name=ws_name, auth=None, project_path=proj_path, logger=command_logger) if AutoArg.Experiment in command_auto_args or AutoArg.Run in command_auto_args: experiment_name = _get(kwargs, argument.EXPERIMENT_NAME) command_logger.debug("Hydrating auto arg experiment %s", experiment_name) experiment = _get_experiment_or_default( workspace=workspace, experiment_name=experiment_name, project_path=proj_path, logger=command_logger) if AutoArg.Run in command_auto_args: run_id = _get(kwargs, argument.RUN_ID_OPTION) command_logger.debug("Hydrating auto arg run %s", run_id) run = get_run(experiment, run_id) if AutoArg.Workspace in command_auto_args: kwargs[AutoArg.Workspace] = workspace if AutoArg.Experiment in command_auto_args: kwargs[AutoArg.Experiment] = experiment if AutoArg.Run in command_auto_args: kwargs[AutoArg.Run] = run # Handle common -t output for all commands output_metadata_file_path = _get(kwargs, argument.OUTPUT_METADATA_FILE) # Call the underlying command function command_logger.debug("Calling %s with args %s and kwargs %s", function.__name__, args, kwargs) retval = function(*args, **kwargs) if output_metadata_file_path: _write_output_metadata_file(retval, output_metadata_file_path, command_logger) return retval