def transmit_artifacts(self, id): artifacts = (self.info[id][self.artifacts] if self.artifacts in self.info[id].keys() else {}) task = self.call_func( 'Task.get_task', id, lambda id_: Task.get_task(project_name=PROJECT_NAME, task_name=id_), self.get_run_name_by_id(id)) for type, l in artifacts.items(): if type == "folder": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type == 'text': for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type == "dataframe": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type == "image": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type == "dictionary": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type == "storage-server": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj)
def _verify_node(self, node): # type: (Node) -> bool """ Raise ValueError on verification errors :return: Return True iff the specific node is verified """ if not node.base_task_id: raise ValueError("Node '{}', base_task_id is empty".format(node.name)) if not self._default_execution_queue and not node.queue: raise ValueError("Node '{}' missing execution queue, " "no default queue defined and no specific node queue defined".format(node.name)) task = Task.get_task(task_id=node.base_task_id) if not task: raise ValueError("Node '{}', base_task_id={} is invalid".format(node.name, node.base_task_id)) pattern = self._step_ref_pattern for v in node.parameters.values(): if isinstance(v, str): for g in pattern.findall(v): self.__verify_step_reference(node, g) return True
def clone_and_queue(template_task: str, queue: str) -> Task: github_payload = os.getenv('GITHUB_EVENT_PATH') with open(github_payload, 'r') as f: payload = json.load(f) task = Task.get_task(task_id=template_task) # Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified. cloned_task = Task.clone(source_task=task, name=f"{template_task} cloned task from Github") script_commit = payload.get("comment", {}).get("body", "").partition(" ")[2] selected_type, _, selected_value = script_commit.partition(" ") if selected_type and selected_value: data_script = cloned_task.data.script if selected_type == "branch": data_script.branch = selected_value data_script.tag = "" data_script.version_num = "" elif selected_type == "tag": data_script.branch = "" data_script.tag = selected_value data_script.version_num = "" elif selected_type == "commit": data_script.branch = "" data_script.tag = "" data_script.version_num = selected_value else: raise Exception( f"You must supply branch, tag or commit as type, not {selected_type}" ) print(f"Change train script head to {selected_value} {selected_type}") # noinspection PyProtectedMember cloned_task._update_script(script=data_script) Task.enqueue(cloned_task.id, queue_name=queue) owner, repo = payload.get("repository", {}).get("full_name", "").split("/") if owner and repo: gh = login(token=os.getenv("GITHUB_TOKEN")) if gh: issue = gh.issue(owner, repo, payload.get("issue", {}).get("number")) if issue: issue.create_comment( f"New task, id:{cloned_task.id} is in queue {queue_name}") else: print( f'can not comment issue, {payload.get("issue", {}).get("number")}' ) else: print(f"can not log in to gh, {os.getenv('GITHUB_TOKEN')}") return cloned_task
def transmit_metrics(self, id): task = self.call_func( 'Task.get_task', id, lambda id_: Task.get_task(project_name=PROJECT_NAME, task_name=id_), self.get_run_name_by_id(id)) logger = task.get_logger() metrics = self.get_metrics(id) for graph_name, series_name, table in metrics: for p in table: logger.report_scalar(graph_name, series_name, iteration=p[0], value=float(p[1])) task.completed()
def transmit_information(self, id): parameters = self.get_params(id) general_information = self.get_general_information(id) artifact = self.get_artifact(id) tags = self.get_tags(id) task = self.call_func( 'Task.get_task', id, lambda id_: Task.get_task(project_name=PROJECT_NAME, task_name=id_), self.get_run_name_by_id(id)) task_values = self.call_func('task.export_task', id, lambda _: task.export_task(), self.get_run_name_by_id(id)) task_values["comment"] = (tags["note.content"] if "note.content" in tags.keys() else "") task_values["hyperparams"]["Args"] = parameters task_values["started"] = general_information["started"] task_values["completed"] = general_information["completed"] task_values["script"]["branch"] = (tags["source.git.branch"] if "source.git.branch" in tags.keys() else self.branch) task_values["script"]["repository"] = (tags["source.git.repoURL"] if "source.git.repoURL" in tags.keys() else "") task_values["script"]["version_num"] = (tags["source.git.commit"] if "source.git.commit" in tags.keys() else "") task_values["script"]["entry_point"] = tags["entry_point"] task_values["script"]["working_dir"] = tags["working_dir"] if "project.env" in tags.keys(): task_values["script"]["requirements"][tags["project.env"]] = ( artifact["requirements"] if "requirements" in artifact.keys() else "") task_values["user"] = tags["user"] self.call_func('task.update_task', id, lambda _task_values: task.update_task(_task_values), task_values) if len(tags["VALUETAG"].keys()) > 0: self.call_func( 'task.connect_configuration', id, lambda _dict: task.connect_configuration( _dict, name="MLflow Tags"), tags["VALUETAG"])
def read(self): self.thread_id = threading.current_thread().ident for id, path in self.paths: self.info[id] = {} self.call_func( 'read_general_information', id, lambda id_, path_: self.read_general_information(id_, path_), id, path) self.call_func('read_tags', id, lambda id_, path_: self.read_tags(id_, path_), id, path + self.tags) if "runName" in self.info[id][self.tags].keys(): self.ID_to_Name[id] = self.info[id][self.tags]["runName"] if self.project_exist: task = self.call_func( 'Task.get_task', id, lambda id_: Task.get_task(project_name=PROJECT_NAME, task_name=id_), self.get_run_name_by_id(id)) if task: task_tags = task.data.system_tags if hasattr( task.data, 'system_tags') else task.data.tags if not ARCHIVED_TAG in task_tags: del self.info[id] self.msgs['FAILED'].append( 'task ' + id + ' already exist, if you want to migrate it again, you can archive it in Allegro Trains' ) self.pbar.update(1) continue self.call_func('read_artifacts', id, lambda id_, path_: self.read_artifacts(id_, path_), id, path + self.artifacts) self.call_func('read_metrics', id, lambda id_, path_: self.read_metrics(id_, path_), id, path + self.metrics) self.call_func('read_params', id, lambda id_, path_: self.read_params(id_, path_), id, path + self.params)
def trains(self, x: data_type, y: data_type = None, x_cv: data_type = None, y_cv: data_type = None, *, trains_config: Dict[str, Any] = None, keep_task_open: bool = False, queue: str = None) -> "Wrapper": if trains_config is None: return self.fit(x, y, x_cv, y_cv) # init trains if trains_config is None: trains_config = {} project_name = trains_config.get("project_name") task_name = trains_config.get("task_name") if queue is None: task = Task.init(**trains_config) cloned_task = None else: task = Task.get_task(project_name=project_name, task_name=task_name) cloned_task = Task.clone(source_task=task, parent=task.id) # before loop self._verbose_level = 6 self._data_config["verbose_level"] = 6 self._before_loop(x, y, x_cv, y_cv) self.pipeline.use_tqdm = False copied_config = shallow_copy_dict(self.config) if queue is not None: cloned_task.set_parameters(copied_config) Task.enqueue(cloned_task.id, queue) return self # loop task.connect(copied_config) global trains_logger trains_logger = task.get_logger() self._loop() if not keep_task_open: task.close() trains_logger = None return self
def get_task_stats(task_id): task = Task.get_task(task_id=task_id) if task: task_status = task.get_status() # Try to get the task stats if task_status in ["completed", "published", "publishing", "in_progress", "stopped", "failed"]: table = create_comment_output(task, task_status) if table: return f"Results\n\n{table}\n\n" \ f"You can view full task results [here]({task.get_output_log_web_page()})" if task_status == "failed": return f"No data for task FAILED task {task_id}, " \ f"you can view full task results [here]({task.get_output_log_web_page()})" else: return f"No data yet... You can view full task results [here]({task.get_output_log_web_page()})" # Update the user about the task status, can not get any stats elif task_status in ["created", "queued", "unknown"]: return f"Task is in {task_status} status, no stats yet." else: return f"Can not find task {task}.\n\n"
# Connecting TRAINS task = Task.init(project_name='Hyper-Parameter Optimization', task_name='Automatic Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) # experiment template to optimize in the hyper-parameter optimization args = { 'template_task_id': None, 'run_as_service': False, } args = task.connect(args) # Get the template task experiment that we want to optimize if not args['template_task_id']: args['template_task_id'] = Task.get_task( project_name='examples', task_name='Keras HP optimization base').id # Example use case: an_optimizer = HyperParameterOptimizer( # This is the experiment we want to optimize base_task_id=args['template_task_id'], # here we define the hyper-parameters to optimize hyper_parameters=[ UniformIntegerParameterRange('layer_1', min_value=128, max_value=512, step_size=128), UniformIntegerParameterRange('layer_2', min_value=128, max_value=512, step_size=128),
def _patched_parse_args(original_parse_fn, self, args=None, namespace=None): current_task = PatchArgumentParser._current_task # if we are running remotely, we always have a task id, so we better patch the argparser as soon as possible. if not current_task: from ..config import running_remotely, get_remote_task_id if running_remotely(): # this will cause the current_task() to set PatchArgumentParser._current_task from trains import Task # noinspection PyBroadException try: current_task = Task.get_task(task_id=get_remote_task_id()) except Exception: pass # automatically connect to current task: if current_task: from ..config import running_remotely if PatchArgumentParser._calling_current_task: # if we are here and running remotely by now we should try to parse the arguments if original_parse_fn: PatchArgumentParser._add_last_parsed_args( original_parse_fn(self, args=args, namespace=namespace)) return PatchArgumentParser._last_parsed_args[-1] PatchArgumentParser._calling_current_task = True # Store last instance and result PatchArgumentParser._add_last_arg_parser(self) parsed_args = None # parse if we are running in dev mode if not running_remotely() and original_parse_fn: parsed_args = original_parse_fn(self, args=args, namespace=namespace) PatchArgumentParser._add_last_parsed_args(parsed_args) # noinspection PyBroadException try: # sync to/from task # noinspection PyProtectedMember current_task._connect_argparse( self, args=args, namespace=namespace, parsed_args=parsed_args[0] if isinstance( parsed_args, tuple) else parsed_args) except Exception: pass # sync back and parse if running_remotely() and original_parse_fn: # if we are running python2 check if we have subparsers, # if we do we need to patch the args, because there is no default subparser if PY2: import itertools def _get_sub_parsers_defaults(subparser, prev=[]): actions_grp = [ a._actions for a in subparser.choices.values() ] if isinstance(subparser, _SubParsersAction) else [ subparser._actions ] sub_parsers_defaults = [[ subparser ]] if hasattr(subparser, 'default') and subparser.default else [] for actions in actions_grp: sub_parsers_defaults += [ _get_sub_parsers_defaults(a, prev) for a in actions if isinstance(a, _SubParsersAction) and hasattr(a, 'default') and a.default ] return list( itertools.chain.from_iterable( sub_parsers_defaults)) sub_parsers_defaults = _get_sub_parsers_defaults(self) if sub_parsers_defaults: if args is None: # args default to the system args import sys as _sys args = _sys.argv[1:] else: args = list(args) # make sure we append the subparsers for a in sub_parsers_defaults: if a.default not in args: args.append(a.default) PatchArgumentParser._add_last_parsed_args( original_parse_fn(self, args=args, namespace=namespace)) else: PatchArgumentParser._add_last_parsed_args(parsed_args or {}) PatchArgumentParser._calling_current_task = False return PatchArgumentParser._last_parsed_args[-1] # Store last instance and result PatchArgumentParser._add_last_arg_parser(self) PatchArgumentParser._add_last_parsed_args( {} if not original_parse_fn else original_parse_fn( self, args=args, namespace=namespace)) return PatchArgumentParser._last_parsed_args[-1]
def __setstate__(self, state: str) -> None: self._rank = 0 self._trains = None if state: self._trains = Task.get_task(task_id=state)
# In this example we pass next task's name as a parameter param['next_task_name'] = 'Toy Base Task' # This is a parameter name in the next task we want to change param['param_name'] = 'Example_Param' # This is the parameter value in the next task we want to change param['param_name_new_value'] = 3 # The queue where we want the template task (clone) to be sent to param['execution_queue_name'] = 'default' # Simulate the work of a Task print('Processing....') sleep(2.0) print('Done processing :)') # Get a reference to the task to pipe to. next_task = Task.get_task(project_name=task.get_project_name(), task_name=param['next_task_name']) # Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified. cloned_task = Task.clone(source_task=next_task, name='Auto generated cloned task') # Get the original parameters of the Task, modify the value of one parameter, # and set the parameters in the next Task cloned_task_parameters = cloned_task.get_parameters() cloned_task_parameters[param['param_name']] = param['param_name_new_value'] cloned_task.set_parameters(cloned_task_parameters) # Enqueue the Task for execution. The enqueued Task must already exist in the trains platform print('Enqueue next step in pipeline to queue: {}'.format( param['execution_queue_name'])) Task.enqueue(cloned_task.id, queue_name=param['execution_queue_name'])
# Connecting TRAINS task = Task.init(project_name="examples", task_name="pipeline step 3 train model") # Arguments args = { 'dataset_task_id': 'REPLACE_WITH_DATASET_TASK_ID', } task.connect(args) # only create the task, we will actually execute it later task.execute_remotely() print('Retrieving Iris dataset') dataset_task = Task.get_task(task_id=args['dataset_task_id']) X_train = dataset_task.artifacts['X_train'].get() X_test = dataset_task.artifacts['X_test'].get() y_train = dataset_task.artifacts['y_train'].get() y_test = dataset_task.artifacts['y_test'].get() print('Iris dataset loaded') model = LogisticRegression(solver='liblinear', multi_class='auto') model.fit(X_train, y_train) joblib.dump(model, 'model.pkl', compress=True) loaded_model = joblib.load('model.pkl') result = loaded_model.score(X_test, y_test) print('model trained & stored')
# (can be integrated with 'bayesian-optimization' 'hpbandster' etc.) space = { 'batch_size': lambda: sample([64, 96, 128, 160, 192], 1)[0], 'layer_1': lambda: sample(range(128, 512, 32), 1)[0], 'layer_2': lambda: sample(range(128, 512, 32), 1)[0], } # number of random samples to test from 'space' total_number_of_experiments = 3 # execution queue to add experiments to execution_queue_name = 'default' # Select base template task # Notice we can be more imaginative and use task_id which will eliminate the need to use project name template_task = Task.get_task(project_name='examples', task_name='Keras AutoML base') for i in range(total_number_of_experiments): # clone the template task into a new write enabled task (where we can change parameters) cloned_task = Task.clone(source_task=template_task, name=template_task.name + ' {}'.format(i), parent=template_task.id) # get the original template parameters cloned_task_parameters = cloned_task.get_parameters() # override with random samples form grid for k in space.keys(): cloned_task_parameters[k] = space[k]() # put back into the new cloned task
def add_step( self, name, # type: str base_task_id=None, # type: Optional[str] parents=None, # type: Optional[Sequence[str]] parameter_override=None, # type: Optional[Mapping[str, Any]] execution_queue=None, # type: Optional[str] time_limit=None, # type: Optional[float] base_task_project=None, # type: Optional[str] base_task_name=None, # type: Optional[str] ): # type: (...) -> bool """ Add a step to the pipeline execution DAG. Each step must have a unique name (this name will later be used to address the step) :param str name: Unique of the step. For example `stage1` :param str base_task_id: The Task ID to use for the step. Each time the step is executed, the base Task is cloned, then the cloned task will be sent for execution. :param list parents: Optional list of parent nodes in the DAG. The current step in the pipeline will be sent for execution only after all the parent nodes have been executed successfully. :param dict parameter_override: Optional parameter overriding dictionary. The dict values can reference a previously executed step using the following form '${step_name}' Examples: Artifact access parameter_override={'Args/input_file': '${stage1.artifacts.mydata.url}' } Model access (last model used) parameter_override={'Args/input_file': '${stage1.models.output.-1.url}' } Parameter access parameter_override={'Args/input_file': '${stage3.parameters.Args/input_file}' } Task ID parameter_override={'Args/input_file': '${stage3.id}' } :param str execution_queue: Optional, the queue to use for executing this specific step. If not provided, the task will be sent to the default execution queue, as defined on the class :param float time_limit: Default None, no time limit. Step execution time limit, if exceeded the Task is aborted and the pipeline is stopped and marked failed. :param str base_task_project: If base_task_id is not given, use the base_task_project and base_task_name combination to retrieve the base_task_id to use for the step. :param str base_task_name: If base_task_id is not given, use the base_task_project and base_task_name combination to retrieve the base_task_id to use for the step. :return: True if successful """ # when running remotely do nothing, we will deserialize ourselves when we start if self._task and not self._task.running_locally() and self._task.is_main_task(): return True if name in self._nodes: raise ValueError('Node named \'{}\' already exists in the pipeline dag'.format(name)) if not base_task_id: if not base_task_project or not base_task_name: raise ValueError('Either base_task_id or base_task_project/base_task_name must be provided') base_task = Task.get_task(project_name=base_task_project, task_name=base_task_name) if not base_task: raise ValueError('Could not find base_task_project={} base_task_name={}'.format( base_task_project, base_task_name)) base_task_id = base_task.id self._nodes[name] = self.Node( name=name, base_task_id=base_task_id, parents=parents or [], queue=execution_queue, timeout=time_limit, parameters=parameter_override or {}) return True
def __parse_step_reference(self, step_ref_string): """ return the adjusted value for "${step...}" :param step_ref_string: reference string of the form ${step_name.type.value}" :return: str with value """ parts = step_ref_string[2:-1].split('.') if len(parts) < 2: raise ValueError("Could not parse reference '{}'".format(step_ref_string)) prev_step = parts[0] input_type = parts[1].lower() if prev_step not in self._nodes or not self._nodes[prev_step].job: raise ValueError("Could not parse reference '{}', step {} could not be found".format( step_ref_string, prev_step)) if input_type not in ('artifacts', 'parameters', 'models', 'id'): raise ValueError("Could not parse reference '{}', type {} not valid".format(step_ref_string, input_type)) if input_type != 'id' and len(parts) < 3: raise ValueError("Could not parse reference '{}', missing fields in {}".format(step_ref_string, parts)) task = self._nodes[prev_step].job.task if self._nodes[prev_step].job \ else Task.get_task(task_id=self._nodes[prev_step].executed) task.reload() if input_type == 'artifacts': # fix \. to use . in artifacts artifact_path = ('.'.join(parts[2:])).replace('\\.', '\\_dot_\\') artifact_path = artifact_path.split('.') obj = task.artifacts for p in artifact_path: p = p.replace('\\_dot_\\', '.') if isinstance(obj, dict): obj = obj.get(p) elif hasattr(obj, p): obj = getattr(obj, p) else: raise ValueError("Could not locate artifact {} on previous step {}".format( '.'.join(parts[1:]), prev_step)) return str(obj) elif input_type == 'parameters': step_params = task.get_parameters() param_name = '.'.join(parts[2:]) if param_name not in step_params: raise ValueError("Could not locate parameter {} on previous step {}".format( '.'.join(parts[1:]), prev_step)) return step_params.get(param_name) elif input_type == 'models': model_type = parts[2].lower() if model_type not in ('input', 'output'): raise ValueError("Could not locate model {} on previous step {}".format( '.'.join(parts[1:]), prev_step)) try: model_idx = int(parts[3]) model = task.models[model_type][model_idx] except Exception: raise ValueError("Could not locate model {} on previous step {}, index {} is invalid".format( '.'.join(parts[1:]), prev_step, parts[3])) return str(getattr(model, parts[4])) elif input_type == 'id': return task.id return None
'layer_1': lambda: sample(range(*params['layer_1']), 1)[0], 'layer_2': lambda: sample(range(*params['layer_2']), 1)[0], } # number of random samples to test from 'space' params['total_number_of_experiments'] = 3 # execution queue to add experiments to params['execution_queue_name'] = 'default' # experiment template to optimize with random parameter search params['experiment_template_name'] = 'Keras HP optimization base' # Select base template task # Notice we can be more imaginative and use task_id which will eliminate the need to use project name template_task = Task.get_task(project_name='examples', task_name=params['experiment_template_name']) for i in range(params['total_number_of_experiments']): # clone the template task into a new write enabled task (where we can change parameters) cloned_task = Task.clone(source_task=template_task, name=template_task.name + ' {}'.format(i), parent=template_task.id) # get the original template parameters cloned_task_parameters = cloned_task.get_parameters() # override with random samples form grid for k in space.keys(): cloned_task_parameters[k] = space[k]() # put back into the new cloned task