def get_log_content(task: Task, logtype: str): # NOTE: this re-implements some of the client logic from _load_log(self, stream) # for backwards compatibility of different log types. # Necessary due to the client not exposing a stdout/stderr property that would # contain the optional timestamps. stream = 'stderr' if logtype == STDERR else 'stdout' log_location = task.metadata_dict.get('log_location_%s' % stream) if log_location: return [ (None, line) for line in task._load_log_legacy(log_location, stream).split("\n") ] else: return [(_datetime_to_epoch(datetime), line) for datetime, line in task.loglines(stream)]
def check_results(self, flow, checker): run = checker.get_run() if run is None: # very basic sanity check for CLI for step in flow: checker.assert_artifact(step.name, 'step_name', step.name) checker.assert_artifact(step.name, 'project_names', {'current_singleton'}) else: from metaflow import Task task_data = run.data.task_data for pathspec, uuid in task_data.items(): assert_equals(Task(pathspec).data.uuid, uuid) for step in run: for task in step: assert_equals(task.data.step_name, step.id) pathspec = '/'.join(task.pathspec.split('/')[-4:]) assert_equals(task.data.uuid, task_data[pathspec]) assert_equals(run.data.project_names, {'current_singleton'}) assert_equals(run.data.branch_names, {'user.tester'}) assert_equals(run.data.project_flow_names,\ {'current_singleton.user.tester.CurrentSingletonTestFlow'}) assert_equals(run.data.is_production, {False}) assert_equals(run.data.flow_names, {run.parent.id}) assert_equals(run.data.run_ids, {run.id}) assert_equals(run.data.origin_run_ids, {None}) assert_equals(run.data.namespaces, {'user:tester'}) assert_equals(run.data.usernames, {'tester'})
def execute(cls, message=None, keys=None, existing_keys={}, stream_output=None, invalidate_cache=False, **kwargs): results = {} # params task_dict = message['task'] attempt = int(task_dict.get('attempt_id', 0)) limit = message['limit'] page = message['page'] logtype = message['logtype'] reverse = message['reverse_order'] output_raw = message['raw_log'] pathspec = pathspec_for_task(task_dict) # keys log_key = log_cache_id(task_dict, logtype) result_key = log_result_id(task_dict, logtype, limit, page, reverse, output_raw) previous_log_file = existing_keys.get(log_key, None) previous_log_size = json.loads(previous_log_file).get( "log_size", None) if previous_log_file else None log_size_changed = False # keep track if we loaded new content with streamed_errors(stream_output): task = Task(pathspec, attempt=attempt) # check if log has grown since last time. current_size = get_log_size(task, logtype) log_size_changed = previous_log_size is None or previous_log_size != current_size if log_size_changed: content = get_log_content(task, logtype) results[log_key] = json.dumps({ "log_size": current_size, "content": content }) else: results = {**existing_keys} if log_size_changed or result_key not in existing_keys: results[result_key] = json.dumps( paginated_result( json.loads(results[log_key])["content"], page, limit, reverse, output_raw)) return results
def step_all(self): from metaflow import current, Task run = Task(current.pathspec).parent.parent for i in range(7): tag = str(i) run.add_tag(tag) assert tag in run.user_tags run.remove_tag(tag) assert tag not in run.user_tags
def fetch_data(cls, pathspec: str, stream_output: Callable[[object], None]): """ Fetch data using Metaflow Client. Parameters ---------- pathspec : str Task pathspec with attempt id as last component: "FlowId/RunNumber/StepName/TaskId/0" stream_output : Callable[[object], None] Stream output callable from execute() that accepts a JSON serializable object. Used for generic messaging. Errors can be streamed to cache client using `stream_output` in combination with the error_event_msg helper. This way failures won't be cached for individual artifacts, thus making it necessary to retry fetching during next attempt. (Will add significant overhead/delay). Stream error example: stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str())) """ try: pathspec_without_attempt, attempt_id = unpack_pathspec_with_attempt_id( pathspec) task = Task(pathspec_without_attempt, attempt=attempt_id) except MetaflowNotFound: return False # Skip cache persist if Task cannot be found if '_task_ok' not in task: # Skip cache persist if _task_ok artifact cannot be found return False values = {} for artifact_name in ['_task_ok', '_foreach_stack']: if artifact_name in task: artifact = task[artifact_name] if artifact.size < MAX_S3_SIZE: values[artifact_name] = artifact.data else: return [ False, 'artifact-too-large', "{}: {} bytes".format(artifact.pathspec, artifact.size) ] return [True, values]
def get_run_output_data(self): st = time.time() task_wrapper = Task(self._run.end_task.pathspec) return_dataset = {} for data in task_wrapper.artifacts: print(data.pathspec) wrapper = { "data": data.data, "artifact_name": data.path_components[-1], "finished_at": data.finished_at } return_dataset[wrapper['artifact_name']] = wrapper end = time.time() print(end - st) return return_dataset
def fetch_data(cls, pathspec: str, stream_output: Callable[[str], None]): """ Fetch data using Metaflow Client. Parameters ---------- pathspec : str Task pathspec "FlowId/RunNumber/StepName/TaskId" stream_output : Callable[[object], None] Stream output callable from execute() that accepts a JSON serializable object. Used for generic messaging. Errors can be streamed to cache client using `stream_output` in combination with the error_event_msg helper. This way failures won't be cached for individual artifacts, thus making it necessary to retry fetching during next attempt. (Will add significant overhead/delay). Stream error example: stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str())) """ def _card_item(card): return { "id": card.id, "type": card.type, "html": card.get() } try: with streamed_errors(stream_output): task = Task("{}".format(pathspec)) cards = {card.hash: _card_item(card) for card in get_cards(task)} except Exception: # NOTE: return false in order not to cache this # since parameters might be available later return False return [True, cards]