Example #1
0
    def _get_object_internal(cls,
                             obj_type,
                             obj_order,
                             sub_type,
                             sub_order,
                             filters=None,
                             *args):
        # Special handling of self, artifact, and metadata
        if sub_type == 'self':
            url = ServiceMetadataProvider._obj_path(*args[:obj_order])
            try:
                return MetadataProvider._apply_filter(
                    [cls._request(None, url)], filters)[0]
            except ServiceException as ex:
                if ex.http_code == 404:
                    return None
                raise

        # For the other types, we locate all the objects we need to find and return them
        if obj_type != 'root':
            url = ServiceMetadataProvider._obj_path(*args[:obj_order])
        else:
            url = ''
        if sub_type != 'metadata':
            url += '/%ss' % sub_type
        else:
            url += '/metadata'
        try:
            return MetadataProvider._apply_filter(cls._request(None, url),
                                                  filters)
        except ServiceException as ex:
            if ex.http_code == 404:
                return None
            raise
Example #2
0
    def _get_object_internal(
        cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args
    ):
        if attempt is not None:
            if cls._supports_attempt_gets is None:
                version = cls._version(None)
                cls._supports_attempt_gets = version is not None and LooseVersion(
                    version
                ) >= LooseVersion("2.0.6")
            if not cls._supports_attempt_gets:
                raise ServiceException(
                    "Getting specific attempts of Tasks or Artifacts requires "
                    "the metaflow service to be at least version 2.0.6. Please "
                    "upgrade your service"
                )

        if sub_type == "self":
            if obj_type == "artifact":
                # Special case with the artifacts; we add the attempt
                url = ServiceMetadataProvider._obj_path(
                    *args[:obj_order], attempt=attempt
                )
            else:
                url = ServiceMetadataProvider._obj_path(*args[:obj_order])
            try:
                v, _ = cls._request(None, url, "GET")
                return MetadataProvider._apply_filter([v], filters)[0]
            except ServiceException as ex:
                if ex.http_code == 404:
                    return None
                raise

        # For the other types, we locate all the objects we need to find and return them
        if obj_type != "root":
            url = ServiceMetadataProvider._obj_path(*args[:obj_order])
        else:
            url = ""
        if sub_type == "metadata":
            url += "/metadata"
        elif sub_type == "artifact" and obj_type == "task" and attempt is not None:
            url += "/attempt/%s/artifacts" % attempt
        else:
            url += "/%ss" % sub_type
        try:
            v, _ = cls._request(None, url, "GET")
            return MetadataProvider._apply_filter(v, filters)
        except ServiceException as ex:
            if ex.http_code == 404:
                return None
            raise
Example #3
0
 def _persist_tags_for_run(flow_id, run_id, tags, system_tags):
     subpath = LocalMetadataProvider._create_and_get_metadir(
         flow_name=flow_id, run_id=run_id)
     selfname = os.path.join(subpath, "_self.json")
     if not os.path.isfile(selfname):
         raise MetaflowInternalError(
             msg="Could not verify Run existence on disk - missing %s" %
             selfname)
     LocalMetadataProvider._save_meta(
         subpath,
         {
             "_self":
             MetadataProvider._run_to_json_static(
                 flow_id, run_id=run_id, tags=tags, sys_tags=system_tags)
         },
         allow_overwrite=True,
     )
Example #4
0
    def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order, filters=None, *args):
        from metaflow.datastore.local import LocalDataStore
        if obj_type == 'artifact':
            # Artifacts are actually part of the tasks in the filesystem
            obj_type = 'task'
            sub_type = 'artifact'
            sub_order = obj_order
            obj_order = obj_order - 1

        # Special handling of self, artifact, and metadata
        if sub_type == 'self':
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return None
            self_file = os.path.join(meta_path, '_self.json')
            if os.path.isfile(self_file):
                return MetadataProvider._apply_filter(
                    [LocalMetadataProvider._read_json_file(self_file)], filters)[0]
            return None

        if sub_type == 'artifact':
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            result = []
            if meta_path is None:
                return result
            attempt_done_files = os.path.join(meta_path, 'sysmeta_attempt-done_*')
            attempts_done = sorted(glob.iglob(attempt_done_files))
            if attempts_done:
                successful_attempt = int(LocalMetadataProvider._read_json_file(
                    attempts_done[-1])['value'])
                which_artifact = '*'
                if len(args) >= sub_order:
                    which_artifact = args[sub_order - 1]
                artifact_files = os.path.join(
                    meta_path, '%d_artifact_%s.json' % (successful_attempt, which_artifact))
                for obj in glob.iglob(artifact_files):
                    result.append(LocalMetadataProvider._read_json_file(obj))
            if len(result) == 1:
                return result[0]
            return result

        if sub_type == 'metadata':
            result = []
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return result
            files = os.path.join(meta_path, 'sysmeta_*')
            for obj in glob.iglob(files):
                result.append(LocalMetadataProvider._read_json_file(obj))
            return result

        # For the other types, we locate all the objects we need to find and return them
        obj_path = LocalMetadataProvider._make_path(*args[:obj_order], create_on_absent=False)
        result = []
        if obj_path is None:
            return result
        skip_dirs = '*/'*(sub_order - obj_order)
        all_meta = os.path.join(obj_path, skip_dirs, LocalDataStore.METADATA_DIR)
        for meta_path in glob.iglob(all_meta):
            self_file = os.path.join(meta_path, '_self.json')
            if os.path.isfile(self_file):
                result.append(LocalMetadataProvider._read_json_file(self_file))
        return MetadataProvider._apply_filter(result, filters)
Example #5
0
    def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order,
                             filters, attempt, *args):
        # This is guaranteed by MetaflowProvider.get_object(), sole intended caller
        if obj_type in ("metadata", "self"):
            raise MetaflowInternalError(msg="Type %s is not allowed" %
                                        obj_type)

        if obj_type not in ("root", "flow", "run", "step", "task", "artifact"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)

        from metaflow.datastore.local_storage import LocalStorage

        if obj_type == "artifact":
            # Artifacts are actually part of the tasks in the filesystem
            # E.g. we get here for (obj_type, sub_type) == (artifact, self)
            obj_type = "task"
            sub_type = "artifact"
            sub_order = obj_order
            obj_order = obj_order - 1

        if obj_type != ObjectOrder.order_to_type(obj_order):
            raise MetaflowInternalError(
                "Object type order mismatch %s %s" %
                (obj_type, ObjectOrder.order_to_type(obj_order)))
        if sub_type != ObjectOrder.order_to_type(sub_order):
            raise MetaflowInternalError(
                "Sub type order mismatch %s %s" %
                (sub_type, ObjectOrder.order_to_type(sub_order)))

        RUN_ORDER = ObjectOrder.type_to_order("run")

        if obj_type not in ("root", "flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)

        # Special handling of self, artifact, and metadata
        if sub_type == "self":
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return None
            self_file = os.path.join(meta_path, "_self.json")
            if os.path.isfile(self_file):
                obj = MetadataProvider._apply_filter(
                    [LocalMetadataProvider._read_json_file(self_file)],
                    filters)[0]
                # For non-descendants of a run, we are done

                if obj_order <= RUN_ORDER:
                    return obj

                if obj_type not in ("step", "task"):
                    raise MetaflowInternalError(
                        msg="Unexpected object type %s" % obj_type)
                run = LocalMetadataProvider.get_object(
                    "run",
                    "self",
                    {},
                    None,
                    *args[:RUN_ORDER]  # *[flow_id, run_id]
                )
                if not run:
                    raise MetaflowInternalError(msg="Could not find run %s" %
                                                str(args[:RUN_ORDER]))

                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])
                return obj
            return None

        if sub_type == "artifact":
            if obj_type not in ("root", "flow", "run", "step", "task"):
                raise MetaflowInternalError(msg="Unexpected object type %s" %
                                            obj_type)

            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            result = []
            if meta_path is None:
                return result

            successful_attempt = attempt
            if successful_attempt is None:
                attempt_done_files = os.path.join(meta_path,
                                                  "sysmeta_attempt-done_*")
                attempts_done = sorted(glob.iglob(attempt_done_files))
                if attempts_done:
                    successful_attempt = int(
                        LocalMetadataProvider._read_json_file(
                            attempts_done[-1])["value"])
            if successful_attempt is not None:
                which_artifact = "*"
                if len(args) >= sub_order:
                    which_artifact = args[sub_order - 1]
                artifact_files = os.path.join(
                    meta_path,
                    "%d_artifact_%s.json" %
                    (successful_attempt, which_artifact),
                )
                for obj in glob.iglob(artifact_files):
                    result.append(LocalMetadataProvider._read_json_file(obj))

            # We are getting artifacts. We should overlay with ancestral run's tags
            run = LocalMetadataProvider.get_object(
                "run",
                "self",
                {},
                None,
                *args[:RUN_ORDER]  # *[flow_id, run_id]
            )
            if not run:
                raise MetaflowInternalError(msg="Could not find run %s" %
                                            str(args[:RUN_ORDER]))
            for obj in result:
                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])

            if len(result) == 1:
                return result[0]
            return result

        if sub_type == "metadata":
            # artifact is not expected because if obj_type=artifact on function entry, we transform to =task
            if obj_type not in ("root", "flow", "run", "step", "task"):
                raise MetaflowInternalError(msg="Unexpected object type %s" %
                                            obj_type)
            result = []
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return result
            files = os.path.join(meta_path, "sysmeta_*")
            for obj in glob.iglob(files):
                result.append(LocalMetadataProvider._read_json_file(obj))
            return result

        # For the other types, we locate all the objects we need to find and return them
        if obj_type not in ("root", "flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)
        if sub_type not in ("flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="unexpected sub type %s" %
                                        sub_type)
        obj_path = LocalMetadataProvider._make_path(*args[:obj_order],
                                                    create_on_absent=False)
        result = []
        if obj_path is None:
            return result
        skip_dirs = "*/" * (sub_order - obj_order)
        all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR)
        SelfInfo = collections.namedtuple("SelfInfo", ["filepath", "run_id"])
        self_infos = []
        for meta_path in glob.iglob(all_meta):
            self_file = os.path.join(meta_path, "_self.json")
            if not os.path.isfile(self_file):
                continue
            run_id = None
            # flow and run do not need info from ancestral run
            if sub_type in ("step", "task"):
                run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir(
                    meta_path, sub_type)
                # obj_type IS run, or more granular than run, let's do sanity check vs args
                if obj_order >= RUN_ORDER:
                    if run_id != args[RUN_ORDER - 1]:
                        raise MetaflowInternalError(
                            msg="Unexpected run id %s deduced from meta path" %
                            run_id)
            self_infos.append(SelfInfo(filepath=self_file, run_id=run_id))

        for self_info in self_infos:
            obj = LocalMetadataProvider._read_json_file(self_info.filepath)
            if self_info.run_id:
                flow_id_from_args = args[0]
                run = LocalMetadataProvider.get_object(
                    "run",
                    "self",
                    {},
                    None,
                    flow_id_from_args,
                    self_info.run_id,
                )
                if not run:
                    raise MetaflowInternalError(
                        msg="Could not find run %s, %s" %
                        (flow_id_from_args, self_info.run_id))
                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])
            result.append(obj)

        return MetadataProvider._apply_filter(result, filters)
    def _get_object_internal(
        cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args
    ):
        from metaflow.datastore.local_storage import LocalStorage

        if obj_type == "artifact":
            # Artifacts are actually part of the tasks in the filesystem
            obj_type = "task"
            sub_type = "artifact"
            sub_order = obj_order
            obj_order = obj_order - 1

        # Special handling of self, artifact, and metadata
        if sub_type == "self":
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return None
            self_file = os.path.join(meta_path, "_self.json")
            if os.path.isfile(self_file):
                return MetadataProvider._apply_filter(
                    [LocalMetadataProvider._read_json_file(self_file)], filters
                )[0]
            return None

        if sub_type == "artifact":
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            result = []
            if meta_path is None:
                return result

            successful_attempt = attempt
            if successful_attempt is None:
                attempt_done_files = os.path.join(meta_path, "sysmeta_attempt-done_*")
                attempts_done = sorted(glob.iglob(attempt_done_files))
                if attempts_done:
                    successful_attempt = int(
                        LocalMetadataProvider._read_json_file(attempts_done[-1])[
                            "value"
                        ]
                    )
            if successful_attempt is not None:
                which_artifact = "*"
                if len(args) >= sub_order:
                    which_artifact = args[sub_order - 1]
                artifact_files = os.path.join(
                    meta_path,
                    "%d_artifact_%s.json" % (successful_attempt, which_artifact),
                )
                for obj in glob.iglob(artifact_files):
                    result.append(LocalMetadataProvider._read_json_file(obj))
            if len(result) == 1:
                return result[0]
            return result

        if sub_type == "metadata":
            result = []
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return result
            files = os.path.join(meta_path, "sysmeta_*")
            for obj in glob.iglob(files):
                result.append(LocalMetadataProvider._read_json_file(obj))
            return result

        # For the other types, we locate all the objects we need to find and return them
        obj_path = LocalMetadataProvider._make_path(
            *args[:obj_order], create_on_absent=False
        )
        result = []
        if obj_path is None:
            return result
        skip_dirs = "*/" * (sub_order - obj_order)
        all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR)
        for meta_path in glob.iglob(all_meta):
            self_file = os.path.join(meta_path, "_self.json")
            if os.path.isfile(self_file):
                result.append(LocalMetadataProvider._read_json_file(self_file))
        return MetadataProvider._apply_filter(result, filters)