def _wait_for_mapper_tasks(self, flow, step_name): """ When lauching multinode task with UBF, need to wait for the secondary tasks to finish cleanly and produce their output before exiting the main task. Otherwise main task finishing will cause secondary nodes to terminate immediately, and possibly prematurely. """ from metaflow import Step # avoid circular dependency TIMEOUT = 600 last_completion_timeout = time.time() + TIMEOUT print("Waiting for batch secondary tasks to finish") while last_completion_timeout > time.time(): time.sleep(2) try: step_path = "%s/%s/%s" % (flow.name, current.run_id, step_name) tasks = [task for task in Step(step_path)] if len(tasks) == len(flow._control_mapper_tasks): if all(task.finished_at is not None for task in tasks): # for some reason task.finished fails return True else: print( "Waiting for all parallel tasks to finish. Finished: {}/{}" .format( len(tasks), len(flow._control_mapper_tasks), )) except Exception as e: pass raise Exception( "Batch secondary workers did not finish in %s seconds" % TIMEOUT)
def execute(cls, message=None, keys=None, existing_keys={}, stream_output=None, invalidate_cache=False, **kwargs): results = {} flow_id = message['flow_id'] run_number = message['run_number'] result_key = [key for key in keys if key.startswith('dag:result')][0] with streamed_errors(stream_output): run = Run("{}/{}".format(flow_id, run_number)) param_step = Step("{}/_parameters".format(run.pathspec)) try: dag = DataArtifact("{}/_graph_info".format( param_step.task.pathspec)).data except MetaflowNotFound: dag = generate_dag(run) results[result_key] = json.dumps(dag) return results
def resolve_task_from_pathspec(flow_name, pathspec): """ resolves a task object for the pathspec query on the CLI. Args: flow_name : (str) : name of flow pathspec (str) : can be `stepname` / `runid/stepname` / `runid/stepname/taskid` Returns: metaflow.Task | None """ from metaflow import Flow, Step, Task from metaflow.exception import MetaflowNotFound # since pathspec can have many variations. pthsplits = pathspec.split("/") task = None run_id = None resolving_from = "task_pathspec" if len(pthsplits) == 1: # This means stepname resolving_from = "stepname" latest_run = Flow(flow_name).latest_run if latest_run is not None: run_id = latest_run.pathspec try: task = latest_run[pathspec].task except KeyError: pass elif len(pthsplits) == 2: # This means runid/stepname namespace(None) resolving_from = "step_pathspec" try: task = Step("/".join([flow_name, pathspec])).task except MetaflowNotFound: pass elif len(pthsplits) == 3: # this means runid/stepname/taskid namespace(None) resolving_from = "task_pathspec" try: task = Task("/".join([flow_name, pathspec])) except MetaflowNotFound: pass else: # raise exception for invalid pathspec format raise CommandException( msg= "The PATHSPEC argument should be of the form 'stepname' Or '<runid>/<stepname>' Or '<runid>/<stepname>/<taskid>'" ) if task is None: # raise Exception that task could not be resolved for the query. raise TaskNotFoundException(pathspec, resolving_from, run_id=run_id) return task
def get_step(self, run: Run, step_name: str) -> Step: """ Get step function returns metaflow `Step` object for given `Run` and `step_name` :param run: metaflow.Run :param step_name: step name :return: metaflow.client.Step """ flow_id = run._object['flow_id'] return Step(f'{flow_id}/{run.id}/{step_name}')
def fetch_data(cls, pathspec: str, stream_output: Callable[[object], None]): """ Fetch data using Metaflow Client. Parameters ---------- pathspec : str Run pathspec: "FlowId/RunNumber" stream_output : Callable[[object], None] Stream output callable from execute() that accepts a JSON serializable object. Used for generic messaging. Errors can be streamed to cache client using `stream_output` in combination with the error_event_msg helper. This way failures won't be cached for individual artifacts, thus making it necessary to retry fetching during next attempt. (Will add significant overhead/delay). Stream error example: stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str())) """ try: with streamed_errors(stream_output): step = Step("{}/_parameters".format(pathspec)) except Exception as ex: # NOTE: return false in order not to cache this # since parameters might be available later return False values = {} for artifact_name, artifact in step.task.artifacts._asdict().items(): # Exclude following internal only artifacts from results: # - Artifacts prefixed with underscore (_) # - Artifacts with 'name' or 'script_name' if artifact_name.startswith('_') or artifact_name in [ 'name', 'script_name' ]: continue try: if artifact.size < MAX_S3_SIZE: values[artifact_name] = artifact.data else: values[ artifact_name] = "Artifact too large: {} bytes".format( artifact.size) except Exception as ex: values[artifact_name] = str(ex) return [True, values]