def update(self, task_id, workspace=None, command=None, command_list=None, interactive=False): """Update the task metadata""" if not task_id: raise RequiredArgumentMissing( __("error", "controller.task.delete.arg", "id")) if command_list: command = " ".join(command_list) elif command: command_list = shlex.split(command) validate( "update_task", { "workspace": workspace, "command": command, "command_list": command_list, "interactive": interactive }) update_task_input_dict = {'id': task_id} if workspace is not None: update_task_input_dict['workspace'] = workspace if command is not None: update_task_input_dict['command'] = command if command_list is not None: update_task_input_dict['command_list'] = command_list if interactive: update_task_input_dict['interactive'] = interactive return self.dal.task.update(update_task_input_dict)
def stop(self, **kwargs): self.task_controller = TaskController() input_dict = {} mutually_exclusive(["id", "all"], kwargs, input_dict) if "id" in input_dict: self.cli_helper.echo(__("info", "cli.run.stop", input_dict['id'])) elif "all" in input_dict: self.cli_helper.echo(__("info", "cli.run.stop.all")) else: raise RequiredArgumentMissing() try: if "id" in input_dict: result = self.task_controller.stop(task_id=input_dict['id']) if not result: self.cli_helper.echo( __("error", "cli.run.stop", input_dict['id'])) else: self.cli_helper.echo( __("info", "cli.run.stop.success", input_dict['id'])) if "all" in input_dict: result = self.task_controller.stop(all=input_dict['all']) if not result: self.cli_helper.echo(__("error", "cli.run.stop.all")) else: self.cli_helper.echo(__("info", "cli.run.stop.all.success")) return result except Exception: if "id" in input_dict: self.cli_helper.echo( __("error", "cli.run.stop", input_dict['id'])) if "all" in input_dict: self.cli_helper.echo(__("error", "cli.run.stop.all")) return False
def mutually_exclusive(mutually_exclusive_args, input_dictionary, output_dictionary): """ Goes through args to check for and adds them to a dictionary. The dictionary is mutated in the function. This function will raise errors if at least one of the arguments is not present or more than 1 Parameters ---------- mutually_exclusive_args : list arg names to search for in input dictionary input_dictionary : dict input dictionary of arguments and values to search through output_dictionary : dict output dictionary Raises ------ MutuallyExclusiveArguments RequiredArgumentMissing """ mutually_exclusive_arg_count = 0 for arg in mutually_exclusive_args: if input_dictionary.get(arg, None): output_dictionary[arg] = input_dictionary[arg] mutually_exclusive_arg_count += 1 if mutually_exclusive_arg_count == 0 and len(mutually_exclusive_args) > 0: raise RequiredArgumentMissing() if mutually_exclusive_arg_count > 1: raise MutuallyExclusiveArguments( __("error", "util.misc_functions.mutually_exclusive", ' '.join(mutually_exclusive_args))) return
def query(self, collection, query_params, sort_key=None, sort_order=None): self.__reload() if query_params.get('id', None) is not None: query_params['pk'] = query_params['id'] del query_params['id'] if sort_key is not None and sort_order is not None: if sort_order == 'ascending': return list( map(normalize_entity, [ item.attributes.copy() for item in self.backend.filter( collection, query_params).sort( sort_key, queryset.QuerySet.ASCENDING) ])) elif sort_order == 'descending': return list( map(normalize_entity, [ item.attributes.copy() for item in self.backend.filter( collection, query_params).sort( sort_key, queryset.QuerySet.DESCENDING) ])) else: raise InvalidArgumentType() else: if sort_key is not None and sort_order is None or \ sort_key is None and sort_order is not None: raise RequiredArgumentMissing() return list( map(normalize_entity, [ item.attributes.copy() for item in self.backend.filter(collection, query_params) ]))
def stop(self, task_id=None, all=False, status="STOPPED"): """Stop and remove run for the task and update task object statuses Parameters ---------- task_id : str, optional id for the task you would like to stop all : bool, optional if specified, will stop all tasks within project Returns ------- return_code : bool system return code of the stop Raises ------ RequiredArgumentMissing TooManyArgumentsFound """ if task_id is None and all is False: raise RequiredArgumentMissing( __("error", "controller.task.stop.arg.missing", "id")) if task_id and all: raise TooManyArgumentsFound() if task_id: try: task_obj = self.get(task_id) except DoesNotExist: time.sleep(1) task_obj = self.get(task_id) task_match_string = "datmo-task-" + self.model.id + "-" + task_id # Get the environment id associated with the task kwargs = {'match_string': task_match_string} # Get the environment from the task before_snapshot_id = task_obj.before_snapshot_id after_snapshot_id = task_obj.after_snapshot_id if not before_snapshot_id and not after_snapshot_id: # TODO: remove...for now database may not be in sync. no task that has run can have NO before_snapshot_id time.sleep(1) task_obj = self.get(task_id) if after_snapshot_id: after_snapshot_obj = self.snapshot.get(after_snapshot_id) kwargs['environment_id'] = after_snapshot_obj.environment_id if not after_snapshot_id and before_snapshot_id: before_snapshot_obj = self.snapshot.get(before_snapshot_id) kwargs['environment_id'] = before_snapshot_obj.environment_id return_code = self.environment.stop(**kwargs) if all: return_code = self.environment.stop(all=True) # Set stopped task statuses to STOPPED if return success if return_code: if task_id: self.dal.task.update({"id": task_id, "status": status}) if all: task_objs = self.dal.task.query({}) for task_obj in task_objs: self.dal.task.update({"id": task_obj.id, "status": status}) return return_code
def update(self, snapshot_id, config=None, stats=None, message=None, label=None, visible=None): """Update the snapshot metadata""" if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) update_snapshot_input_dict = {'id': snapshot_id} validate( "update_snapshot", { "config": config, "stats": stats, "message": message, "label": label, "visible": visible }) if config is not None: update_snapshot_input_dict['config'] = config if stats is not None: update_snapshot_input_dict['stats'] = stats if message is not None: update_snapshot_input_dict['message'] = message if label is not None: update_snapshot_input_dict['label'] = label if visible is not None: update_snapshot_input_dict['visible'] = visible return self.dal.snapshot.update(update_snapshot_input_dict)
def delete(self, task_id): if not task_id: raise RequiredArgumentMissing( __("error", "controller.task.delete.arg", "id")) stopped_success = self.stop(task_id) delete_task_success = self.dal.task.delete(task_id) return stopped_success and delete_task_success
def create(self, dictionary): """Create Task object Parameters ---------- dictionary : dict command : str full command used Returns ------- Task object entity for Task (datmo.core.entity.task.Task) """ # Validate Inputs create_dict = { "model_id": self.model.id, "session_id": self.current_session.id } ## Required args required_args = ["command"] for required_arg in required_args: # Add in any values that are if required_arg in dictionary and dictionary[ required_arg] is not None: create_dict[required_arg] = dictionary[required_arg] else: raise RequiredArgumentMissing( __("error", "controller.task.create.arg", required_arg)) # Create Task return self.dal.task.create(Task(create_dict))
def stop(self, run_id=None, match_string=None, environment_id=None, all=False): """Stop the trace of running environment Parameters ---------- run_id : str, optional stop environment with specific run id (default is None, which means it is not used) match_string : str, optional stop environment with a string to match the environment name (default is None, which means it is not used) environment_id : str environment object id to remove the artifacts all : bool, optional stop all environments Notes ----- The user must provide only one of the above, if multiple are given or none are given the function will error Returns ------- bool True if success Raises ------ RequiredArgumentMissing TooManyArguments """ self.environment_driver.init() if not (run_id or match_string or all): raise RequiredArgumentMissing() if sum(map(bool, [run_id, match_string, all])) > 1: raise TooManyArgumentsFound() stop_success = False if run_id: # Stop the instance(e.g. container) running using environment driver(e.g. docker) stop_success = self.environment_driver.stop(run_id, force=True) if match_string: # Stop all tasks matching the string given stop_success = self.environment_driver.stop_remove_containers_by_term( term=match_string, force=True) if all: # Stop all tasks associated within the enclosed project all_match_string = "datmo-task-" + self.model.id stop_success = self.environment_driver.stop_remove_containers_by_term( term=all_match_string, force=True) return stop_success
def delete(self, **kwargs): self.task_controller = TaskController() task_id = kwargs.get("id", None) if task_id: self.cli_helper.echo(__("info", "cli.run.delete", task_id)) else: raise RequiredArgumentMissing() try: # Delete the task for the run result = self.task_controller.delete(task_id) if result: self.cli_helper.echo( __("info", "cli.run.delete.success", task_id)) return result except Exception: self.cli_helper.echo(__("error", "cli.run.delete", task_id)) return False
def stop(self, task_id=None, all=False): """Stop and remove run for the task and update task object statuses Parameters ---------- task_id : str, optional id for the task you would like to stop all : bool, optional if specified, will stop all tasks within project Returns ------- return_code : bool system return code of the stop Raises ------ RequiredArgumentMissing TooManyArgumentsFound """ if task_id is None and all is False: raise RequiredArgumentMissing( __("error", "controller.task.stop.arg.missing", "id")) if task_id and all: raise TooManyArgumentsFound() if task_id: _ = self.dal.task.get_by_id(task_id) # verify if task_id exists task_match_string = "datmo-task-" + self.model.id + "-" + task_id return_code = self.environment.stop(match_string=task_match_string) if all: return_code = self.environment.stop(all=True) # Set stopped task statuses to STOPPED if return success if return_code: if task_id: self.dal.task.update({"id": task_id, "status": "STOPPED"}) if all: task_objs = self.dal.task.query({}) for task_obj in task_objs: self.dal.task.update({ "id": task_obj.id, "status": "STOPPED" }) return return_code
def stop(self, task_id): """Stop and remove run for the task Parameters ---------- task_id : str id for the task you would like to stop Returns ------- return_code : bool system return code of the stop """ if not task_id: raise RequiredArgumentMissing( __("error", "controller.task.stop.arg", "id")) task_obj = self.dal.task.get_by_id(task_id) run_id = task_obj.run_id return_code = self.environment.stop(run_id) return return_code
def delete(self, snapshot_id): """Delete all traces of a snapshot Parameters ---------- snapshot_id : str id for the snapshot to remove Returns ------- bool True if success Raises ------ RequiredArgumentMissing if the provided snapshot_id is None """ if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) return self.dal.snapshot.delete(snapshot_id)
def init(self, name, description): # Error if name is not given if not name: raise RequiredArgumentMissing( __("error", "controller.project.init.arg", "name")) # Create the Model, is it new or update? is_new_model = False if not self.model: _ = self.dal.model.create( Model({ "name": name, "description": description })) is_new_model = True else: self._model = self.dal.model.update({ "id": self.model.id, "name": name, "description": description }) # Initialize Code Manager if needed if not self.code_driver.is_initialized: self.code_driver.init() # Initialize File Manager if needed if not self.file_driver.is_initialized: self.file_driver.init() # Initialize Environment Manager if needed if not self.environment_driver.is_initialized: self.environment_driver.init() # Build the initial default Environment (NOT NECESSARY) # self.environment_driver.build_image(tag="datmo-" + \ # self.model.name) # Add in Project template files if specified # TODO: Add in project template files # Create and set current session if is_new_model: # Create new default session _ = self.dal.session.create( Session({ "name": "default", "model_id": self.model.id, "current": True })) else: if not self.current_session: default_session_obj = self.dal.session.query({ "name": "default", "model_id": self.model.id }) if not default_session_obj: raise SessionDoesNotExistException( __("error", "controller.project.init")) # Update default session to be current self.dal.session.update({ "id": default_session_obj.id, "current": True }) return True
def run(self, task_id, snapshot_dict=None, task_dict=None): """Run a task with parameters. If dictionary specified, create a new task with new run parameters. Snapshot objects are created before and after the task to keep track of the state. During the run, you can access task outputs using environment variable DATMO_TASK_DIR or `/task` which points to location for the task files. Create config.json, stats.json and any weights or any file such as graphs and visualizations within that directory for quick access Parameters ---------- task_id : str id for the task you would like to run snapshot_dict : dict set of parameters to create a snapshot (see SnapshotController for details. default is None, which means dictionary with `visible` False will be added to hide auto-generated snapshot) NOTE: `visible` False will always be False regardless of whether the user provides another value for `visible`. task_dict : dict set of parameters to characterize the task run (default is None, which translate to {}, see datmo.core.entity.task.Task for more details on inputs) Returns ------- Task the Task object which completed its run with updated parameters Raises ------ TaskRunError If there is any error in creating files for the task or downstream errors """ # Ensure visible=False is present in the snapshot dictionary if not snapshot_dict: snapshot_dict = {"visible": False} else: snapshot_dict['visible'] = False if not task_dict: task_dict = {} # Obtain Task to run task_obj = self.dal.task.get_by_id(task_id) # Ensure that at least 1 of command, command_list, or interactive is present in task_dict important_task_args = ["command", "command_list", "interactive"] if not task_dict.get('command', task_obj.command) and \ not task_dict.get('command_list', task_obj.command_list) and \ not task_dict.get('interactive', task_obj.interactive): raise RequiredArgumentMissing( __("error", "controller.task.run.arg", " or ".join(important_task_args))) if task_obj.status is None: task_obj.status = "RUNNING" else: raise TaskRunError( __("error", "cli.run.run.already_running", task_obj.id)) # Create Task directory for user during run task_dirpath = os.path.join(".datmo", "tasks", task_obj.id) try: _ = self.file_driver.create(task_dirpath, directory=True) except Exception: raise TaskRunError( __("error", "controller.task.run", task_dirpath)) # Create the before snapshot prior to execution before_snapshot_dict = snapshot_dict.copy() before_snapshot_dict[ 'message'] = "autogenerated snapshot created before task %s is run" % task_obj.id before_snapshot_obj = self.snapshot.create(before_snapshot_dict) # Update the task with pre-execution parameters, prefer list first then look for string command # List command will overwrite a string command if given if task_dict.get('command_list', task_obj.command_list): task_dict['command'] = " ".join( task_dict.get('command_list', task_obj.command_list)) else: if task_dict.get('command', task_obj.command): task_dict['command_list'] = shlex.split( task_dict.get('command', task_obj.command)) elif not task_dict.get('interactive', task_obj.interactive): # If it's not interactive then there is not expected task raise TaskNoCommandGiven() validate("create_task", task_dict) task_obj = self.dal.task.update({ "id": task_obj.id, "before_snapshot_id": task_dict.get('before_snapshot_id', before_snapshot_obj.id), "command": task_dict.get('command', task_obj.command), "command_list": task_dict.get('command_list', task_obj.command_list), "gpu": task_dict.get('gpu', False), "mem_limit": task_dict.get('mem_limit', None), "workspace": task_dict.get('workspace', None), "interactive": task_dict.get('interactive', task_obj.interactive), "detach": task_dict.get('detach', task_obj.detach), "ports": task_dict.get('ports', task_obj.ports), "task_dirpath": task_dict.get('task_dirpath', task_dirpath), "log_filepath": task_dict.get('log_filepath', os.path.join(task_dirpath, "task.log")), "start_time": task_dict.get('start_time', datetime.utcnow()), "status": task_obj.status }) # Copy over files from the before_snapshot file collection to task dir file_collection_obj = \ self.dal.file_collection.get_by_id(before_snapshot_obj.file_collection_id) self.file_driver.copytree( os.path.join(self.home, file_collection_obj.path), os.path.join(self.home, task_obj.task_dirpath)) return_code, run_id, logs = 0, None, None try: # Set the parameters set in the task if task_obj.detach and task_obj.interactive: raise TaskInteractiveDetachError( __("error", "controller.task.run.args.detach.interactive")) environment_run_options = { "command": task_obj.command_list, "ports": [] if task_obj.ports is None else task_obj.ports, "name": "datmo-task-" + self.model.id + "-" + task_obj.id, "volumes": { os.path.join(self.home, task_obj.task_dirpath): { 'bind': '/task/', 'mode': 'rw' }, self.home: { 'bind': '/home/', 'mode': 'rw' } }, "mem_limit": task_obj.mem_limit, "workspace": task_obj.workspace, "gpu": task_obj.gpu, "detach": task_obj.detach, "stdin_open": task_obj.interactive, "tty": task_obj.interactive, "api": False } # Run environment via the helper function return_code, run_id, logs = \ self._run_helper(before_snapshot_obj.environment_id, environment_run_options, os.path.join(self.home, task_obj.log_filepath)) except Exception as e: return_code = 1 logs += "Error running task: %" % e.message finally: # Create the after snapshot after execution is completed with new paths after_snapshot_dict = snapshot_dict.copy() after_snapshot_dict[ 'message'] = "autogenerated snapshot created after task %s is run" % task_obj.id # Add in absolute paths from running task directory absolute_task_dir_path = os.path.join(self.home, task_obj.task_dirpath) absolute_paths = [] for item in os.listdir(absolute_task_dir_path): path = os.path.join(absolute_task_dir_path, item) if os.path.isfile(path) or os.path.isdir(path): absolute_paths.append(path) after_snapshot_dict.update({ "paths": absolute_paths, "environment_id": before_snapshot_obj.environment_id, }) after_snapshot_obj = self.snapshot.create(after_snapshot_dict) # (optional) Remove temporary task directory path # Update the task with post-execution parameters end_time = datetime.utcnow() duration = (end_time - task_obj.start_time).total_seconds() update_task_dict = { "id": task_obj.id, "after_snapshot_id": after_snapshot_obj.id, "logs": logs, "status": "SUCCESS" if return_code == 0 else "FAILED", # "results": task_obj.results, # TODO: update during run "end_time": end_time, "duration": duration } if logs is not None: update_task_dict["results"] = self._parse_logs_for_results( logs) if run_id is not None: update_task_dict["run_id"] = run_id return self.dal.task.update(update_task_dict)
def delete(self, snapshot_id): if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) return self.dal.snapshot.delete(snapshot_id)
def create(self, dictionary): """Create snapshot object Parameters ---------- dictionary : dict for each of the 5 key components, this function will search for one of the variables below starting from the top. Default functionality is described below for each component as well for reference if none of the variables are given. code : code_id : str, optional code reference associated with the snapshot; if not provided will look to inputs below for code creation commit_id : str, optional commit id provided by the user if already available Default ------- commits will be taken and code created via the CodeController and are added to the snapshot at the time of snapshot creation environment : environment_id : str, optional id for environment used to create snapshot environment_paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") Default ------- default environment files will be searched and environment will be created with the EnvironmentController and added to the snapshot at the time of snapshot creation file_collection : file_collection_id : str, optional file collection associated with the snapshot paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file:hello", "/path/to/file2", "/path/to/dir:newdir") Default ------- paths will be considered empty ([]), and the FileCollectionController will create a blank FileCollection that is empty. config : config : dict, optional key, value pairs of configurations config_filepath : str, optional absolute filepath to configuration parameters file config_filename : str, optional name of file with configuration parameters Default ------- config will be considered empty ({}) and saved to the snapshot stats : stats : dict, optional key, value pairs of metrics and statistics stats_filepath : str, optional absolute filepath to stats parameters file stats_filename : str, optional name of file with metrics and statistics. Default ------- stats will be considered empty ({}) and saved to the snapshot for the remaining optional arguments it will search for them in the input dictionary message : str long description of snapshot session_id : str, optional session id within which snapshot is created, will overwrite default if given task_id : str, optional task id associated with snapshot label : str, optional short description of snapshot visible : bool, optional True if visible to user via list command else False Returns ------- datmo.core.entity.snapshot.Snapshot snapshot object with all relevant parameters Raises ------ RequiredArgumentMissing if required arguments are not given by the user FileIOError if files are not present or there is an error in File IO """ # Validate Inputs create_dict = { "model_id": self.model.id, "session_id": self.current_session.id, } validate("create_snapshot", dictionary) # Message must be present if "message" in dictionary: create_dict['message'] = dictionary['message'] else: raise RequiredArgumentMissing( __("error", "controller.snapshot.create.arg", "message")) # Code setup self._code_setup(dictionary, create_dict) # Environment setup self._env_setup(dictionary, create_dict) # File setup self._file_setup(dictionary, create_dict) # Config setup self._config_setup(dictionary, create_dict) # Stats setup self._stats_setup(dictionary, create_dict) # If snapshot object with required args already exists, return it # DO NOT create a new snapshot with the same required arguments results = self.dal.snapshot.query({ "model_id": create_dict["model_id"], "code_id": create_dict['code_id'], "environment_id": create_dict['environment_id'], "file_collection_id": create_dict['file_collection_id'], "config": create_dict['config'], "stats": create_dict['stats'] }) if results: return results[0] # Optional args for Snapshot entity optional_args = ["task_id", "label", "visible"] for optional_arg in optional_args: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Create snapshot and return return self.dal.snapshot.create(Snapshot(create_dict))
def delete(self, task_id): if not task_id: raise RequiredArgumentMissing( __("error", "controller.task.delete.arg", "id")) return self.dal.task.delete(task_id)