def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order, filters=None, *args): # Special handling of self, artifact, and metadata if sub_type == 'self': url = ServiceMetadataProvider._obj_path(*args[:obj_order]) try: return MetadataProvider._apply_filter( [cls._request(None, url)], filters)[0] except ServiceException as ex: if ex.http_code == 404: return None raise # For the other types, we locate all the objects we need to find and return them if obj_type != 'root': url = ServiceMetadataProvider._obj_path(*args[:obj_order]) else: url = '' if sub_type != 'metadata': url += '/%ss' % sub_type else: url += '/metadata' try: return MetadataProvider._apply_filter(cls._request(None, url), filters) except ServiceException as ex: if ex.http_code == 404: return None raise
def _get_object_internal( cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args ): if attempt is not None: if cls._supports_attempt_gets is None: version = cls._version(None) cls._supports_attempt_gets = version is not None and LooseVersion( version ) >= LooseVersion("2.0.6") if not cls._supports_attempt_gets: raise ServiceException( "Getting specific attempts of Tasks or Artifacts requires " "the metaflow service to be at least version 2.0.6. Please " "upgrade your service" ) if sub_type == "self": if obj_type == "artifact": # Special case with the artifacts; we add the attempt url = ServiceMetadataProvider._obj_path( *args[:obj_order], attempt=attempt ) else: url = ServiceMetadataProvider._obj_path(*args[:obj_order]) try: v, _ = cls._request(None, url, "GET") return MetadataProvider._apply_filter([v], filters)[0] except ServiceException as ex: if ex.http_code == 404: return None raise # For the other types, we locate all the objects we need to find and return them if obj_type != "root": url = ServiceMetadataProvider._obj_path(*args[:obj_order]) else: url = "" if sub_type == "metadata": url += "/metadata" elif sub_type == "artifact" and obj_type == "task" and attempt is not None: url += "/attempt/%s/artifacts" % attempt else: url += "/%ss" % sub_type try: v, _ = cls._request(None, url, "GET") return MetadataProvider._apply_filter(v, filters) except ServiceException as ex: if ex.http_code == 404: return None raise
def _persist_tags_for_run(flow_id, run_id, tags, system_tags): subpath = LocalMetadataProvider._create_and_get_metadir( flow_name=flow_id, run_id=run_id) selfname = os.path.join(subpath, "_self.json") if not os.path.isfile(selfname): raise MetaflowInternalError( msg="Could not verify Run existence on disk - missing %s" % selfname) LocalMetadataProvider._save_meta( subpath, { "_self": MetadataProvider._run_to_json_static( flow_id, run_id=run_id, tags=tags, sys_tags=system_tags) }, allow_overwrite=True, )
def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order, filters=None, *args): from metaflow.datastore.local import LocalDataStore if obj_type == 'artifact': # Artifacts are actually part of the tasks in the filesystem obj_type = 'task' sub_type = 'artifact' sub_order = obj_order obj_order = obj_order - 1 # Special handling of self, artifact, and metadata if sub_type == 'self': meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return None self_file = os.path.join(meta_path, '_self.json') if os.path.isfile(self_file): return MetadataProvider._apply_filter( [LocalMetadataProvider._read_json_file(self_file)], filters)[0] return None if sub_type == 'artifact': meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) result = [] if meta_path is None: return result attempt_done_files = os.path.join(meta_path, 'sysmeta_attempt-done_*') attempts_done = sorted(glob.iglob(attempt_done_files)) if attempts_done: successful_attempt = int(LocalMetadataProvider._read_json_file( attempts_done[-1])['value']) which_artifact = '*' if len(args) >= sub_order: which_artifact = args[sub_order - 1] artifact_files = os.path.join( meta_path, '%d_artifact_%s.json' % (successful_attempt, which_artifact)) for obj in glob.iglob(artifact_files): result.append(LocalMetadataProvider._read_json_file(obj)) if len(result) == 1: return result[0] return result if sub_type == 'metadata': result = [] meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return result files = os.path.join(meta_path, 'sysmeta_*') for obj in glob.iglob(files): result.append(LocalMetadataProvider._read_json_file(obj)) return result # For the other types, we locate all the objects we need to find and return them obj_path = LocalMetadataProvider._make_path(*args[:obj_order], create_on_absent=False) result = [] if obj_path is None: return result skip_dirs = '*/'*(sub_order - obj_order) all_meta = os.path.join(obj_path, skip_dirs, LocalDataStore.METADATA_DIR) for meta_path in glob.iglob(all_meta): self_file = os.path.join(meta_path, '_self.json') if os.path.isfile(self_file): result.append(LocalMetadataProvider._read_json_file(self_file)) return MetadataProvider._apply_filter(result, filters)
def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args): # This is guaranteed by MetaflowProvider.get_object(), sole intended caller if obj_type in ("metadata", "self"): raise MetaflowInternalError(msg="Type %s is not allowed" % obj_type) if obj_type not in ("root", "flow", "run", "step", "task", "artifact"): raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type) from metaflow.datastore.local_storage import LocalStorage if obj_type == "artifact": # Artifacts are actually part of the tasks in the filesystem # E.g. we get here for (obj_type, sub_type) == (artifact, self) obj_type = "task" sub_type = "artifact" sub_order = obj_order obj_order = obj_order - 1 if obj_type != ObjectOrder.order_to_type(obj_order): raise MetaflowInternalError( "Object type order mismatch %s %s" % (obj_type, ObjectOrder.order_to_type(obj_order))) if sub_type != ObjectOrder.order_to_type(sub_order): raise MetaflowInternalError( "Sub type order mismatch %s %s" % (sub_type, ObjectOrder.order_to_type(sub_order))) RUN_ORDER = ObjectOrder.type_to_order("run") if obj_type not in ("root", "flow", "run", "step", "task"): raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type) # Special handling of self, artifact, and metadata if sub_type == "self": meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return None self_file = os.path.join(meta_path, "_self.json") if os.path.isfile(self_file): obj = MetadataProvider._apply_filter( [LocalMetadataProvider._read_json_file(self_file)], filters)[0] # For non-descendants of a run, we are done if obj_order <= RUN_ORDER: return obj if obj_type not in ("step", "task"): raise MetaflowInternalError( msg="Unexpected object type %s" % obj_type) run = LocalMetadataProvider.get_object( "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id] ) if not run: raise MetaflowInternalError(msg="Could not find run %s" % str(args[:RUN_ORDER])) obj["tags"] = run.get("tags", []) obj["system_tags"] = run.get("system_tags", []) return obj return None if sub_type == "artifact": if obj_type not in ("root", "flow", "run", "step", "task"): raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type) meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) result = [] if meta_path is None: return result successful_attempt = attempt if successful_attempt is None: attempt_done_files = os.path.join(meta_path, "sysmeta_attempt-done_*") attempts_done = sorted(glob.iglob(attempt_done_files)) if attempts_done: successful_attempt = int( LocalMetadataProvider._read_json_file( attempts_done[-1])["value"]) if successful_attempt is not None: which_artifact = "*" if len(args) >= sub_order: which_artifact = args[sub_order - 1] artifact_files = os.path.join( meta_path, "%d_artifact_%s.json" % (successful_attempt, which_artifact), ) for obj in glob.iglob(artifact_files): result.append(LocalMetadataProvider._read_json_file(obj)) # We are getting artifacts. We should overlay with ancestral run's tags run = LocalMetadataProvider.get_object( "run", "self", {}, None, *args[:RUN_ORDER] # *[flow_id, run_id] ) if not run: raise MetaflowInternalError(msg="Could not find run %s" % str(args[:RUN_ORDER])) for obj in result: obj["tags"] = run.get("tags", []) obj["system_tags"] = run.get("system_tags", []) if len(result) == 1: return result[0] return result if sub_type == "metadata": # artifact is not expected because if obj_type=artifact on function entry, we transform to =task if obj_type not in ("root", "flow", "run", "step", "task"): raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type) result = [] meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return result files = os.path.join(meta_path, "sysmeta_*") for obj in glob.iglob(files): result.append(LocalMetadataProvider._read_json_file(obj)) return result # For the other types, we locate all the objects we need to find and return them if obj_type not in ("root", "flow", "run", "step", "task"): raise MetaflowInternalError(msg="Unexpected object type %s" % obj_type) if sub_type not in ("flow", "run", "step", "task"): raise MetaflowInternalError(msg="unexpected sub type %s" % sub_type) obj_path = LocalMetadataProvider._make_path(*args[:obj_order], create_on_absent=False) result = [] if obj_path is None: return result skip_dirs = "*/" * (sub_order - obj_order) all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR) SelfInfo = collections.namedtuple("SelfInfo", ["filepath", "run_id"]) self_infos = [] for meta_path in glob.iglob(all_meta): self_file = os.path.join(meta_path, "_self.json") if not os.path.isfile(self_file): continue run_id = None # flow and run do not need info from ancestral run if sub_type in ("step", "task"): run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir( meta_path, sub_type) # obj_type IS run, or more granular than run, let's do sanity check vs args if obj_order >= RUN_ORDER: if run_id != args[RUN_ORDER - 1]: raise MetaflowInternalError( msg="Unexpected run id %s deduced from meta path" % run_id) self_infos.append(SelfInfo(filepath=self_file, run_id=run_id)) for self_info in self_infos: obj = LocalMetadataProvider._read_json_file(self_info.filepath) if self_info.run_id: flow_id_from_args = args[0] run = LocalMetadataProvider.get_object( "run", "self", {}, None, flow_id_from_args, self_info.run_id, ) if not run: raise MetaflowInternalError( msg="Could not find run %s, %s" % (flow_id_from_args, self_info.run_id)) obj["tags"] = run.get("tags", []) obj["system_tags"] = run.get("system_tags", []) result.append(obj) return MetadataProvider._apply_filter(result, filters)
def _get_object_internal( cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args ): from metaflow.datastore.local_storage import LocalStorage if obj_type == "artifact": # Artifacts are actually part of the tasks in the filesystem obj_type = "task" sub_type = "artifact" sub_order = obj_order obj_order = obj_order - 1 # Special handling of self, artifact, and metadata if sub_type == "self": meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return None self_file = os.path.join(meta_path, "_self.json") if os.path.isfile(self_file): return MetadataProvider._apply_filter( [LocalMetadataProvider._read_json_file(self_file)], filters )[0] return None if sub_type == "artifact": meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) result = [] if meta_path is None: return result successful_attempt = attempt if successful_attempt is None: attempt_done_files = os.path.join(meta_path, "sysmeta_attempt-done_*") attempts_done = sorted(glob.iglob(attempt_done_files)) if attempts_done: successful_attempt = int( LocalMetadataProvider._read_json_file(attempts_done[-1])[ "value" ] ) if successful_attempt is not None: which_artifact = "*" if len(args) >= sub_order: which_artifact = args[sub_order - 1] artifact_files = os.path.join( meta_path, "%d_artifact_%s.json" % (successful_attempt, which_artifact), ) for obj in glob.iglob(artifact_files): result.append(LocalMetadataProvider._read_json_file(obj)) if len(result) == 1: return result[0] return result if sub_type == "metadata": result = [] meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order]) if meta_path is None: return result files = os.path.join(meta_path, "sysmeta_*") for obj in glob.iglob(files): result.append(LocalMetadataProvider._read_json_file(obj)) return result # For the other types, we locate all the objects we need to find and return them obj_path = LocalMetadataProvider._make_path( *args[:obj_order], create_on_absent=False ) result = [] if obj_path is None: return result skip_dirs = "*/" * (sub_order - obj_order) all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR) for meta_path in glob.iglob(all_meta): self_file = os.path.join(meta_path, "_self.json") if os.path.isfile(self_file): result.append(LocalMetadataProvider._read_json_file(self_file)) return MetadataProvider._apply_filter(result, filters)