コード例 #1
0
ファイル: util.py プロジェクト: knowledgehacker/metaflow
def compress_list(lst,
                  separator=',',
                  rangedelim=':',
                  zlibmarker='!',
                  zlibmin=500):

    bad_items = [
        x for x in lst if separator in x or rangedelim in x or zlibmarker in x
    ]
    if bad_items:
        raise MetaflowInternalError("Item '%s' includes a delimiter character "
                                    "so it can't be compressed" % bad_items[0])
    # Three output modes:
    lcp = longest_common_prefix(lst)
    if len(lst) < 2 or not lcp:
        # 1. Just a comma-separated list
        res = separator.join(lst)
    else:
        # 2. Prefix and a comma-separated list of suffixes
        lcplen = len(lcp)
        residuals = [e[lcplen:] for e in lst]
        res = rangedelim.join((lcp, separator.join(residuals)))
    if len(res) < zlibmin:
        return res
    else:
        # 3. zlib-compressed, base64-encoded, prefix-encoded list

        # interestingly, a typical zlib-encoded list of suffixes
        # has plenty of redundancy. Decoding the data *twice* helps a
        # lot
        compressed = zlib.compress(zlib.compress(to_bytes(res)))
        return zlibmarker + base64.b64encode(compressed).decode('utf-8')
コード例 #2
0
ファイル: local.py プロジェクト: zillow/metaflow
 def _deduce_run_id_from_meta_dir(meta_dir_path, sub_type):
     curr_order = ObjectOrder.type_to_order(sub_type)
     levels_to_ascend = curr_order - ObjectOrder.type_to_order("run")
     if levels_to_ascend < 0:
         return None
     curr_path = meta_dir_path
     for _ in range(levels_to_ascend + 1):  # +1 to account for ../_meta
         curr_path, _ = os.path.split(curr_path)
     _, run_id = os.path.split(curr_path)
     if not run_id:
         raise MetaflowInternalError(
             "Failed to deduce run_id from meta dir %s" % meta_dir_path)
     return run_id
コード例 #3
0
ファイル: local.py プロジェクト: zillow/metaflow
 def _persist_tags_for_run(flow_id, run_id, tags, system_tags):
     subpath = LocalMetadataProvider._create_and_get_metadir(
         flow_name=flow_id, run_id=run_id)
     selfname = os.path.join(subpath, "_self.json")
     if not os.path.isfile(selfname):
         raise MetaflowInternalError(
             msg="Could not verify Run existence on disk - missing %s" %
             selfname)
     LocalMetadataProvider._save_meta(
         subpath,
         {
             "_self":
             MetadataProvider._run_to_json_static(
                 flow_id, run_id=run_id, tags=tags, sys_tags=system_tags)
         },
         allow_overwrite=True,
     )
コード例 #4
0
    def __init__(self,
                 pathspec=None,
                 _object=None,
                 _parent=None,
                 _namespace_check=True):
        self._metaflow = Metaflow()
        self._parent = _parent
        self._path_components = None
        if pathspec:
            ids = pathspec.split('/')

            self.id = ids[-1]
            self._pathspec = pathspec
            self._object = self._get_object(*ids)
        else:
            self._object = _object
            self._pathspec = pathspec

        if self._NAME in ('flow', 'task'):
            self.id = str(self._object[self._NAME + '_id'])
        elif self._NAME == 'run':
            self.id = str(self._object['run_number'])
        elif self._NAME == 'step':
            self.id = str(self._object['step_name'])
        elif self._NAME == 'artifact':
            self.id = str(self._object['name'])
        else:
            raise MetaflowInternalError(msg="Unknown type: %s" % self._NAME)

        self._created_at = time.strftime(
            '%Y-%m-%dT%H:%M:%SZ',
            time.gmtime(self._object['ts_epoch'] // 1000))

        self._tags = frozenset(
            chain(
                self._object.get('system_tags') or [],
                self._object.get('tags') or []))

        if _namespace_check and not self.is_in_namespace():
            raise MetaflowNamespaceMismatch(current_namespace)
コード例 #5
0
    def loglines(self, stream, as_unicode=True):
        """
        Return an iterator over (utc_timestamp, logline) tuples.

        If as_unicode=False, logline is returned as a byte object. Otherwise,
        it is returned as a (unicode) string.
        """
        from metaflow.mflog.mflog import merge_logs
        from metaflow.mflog import LOG_SOURCES
        from metaflow.datastore import DATASTORES

        ds_type = self.metadata_dict.get('ds-type')
        ds_root = self.metadata_dict.get('ds-root')

        ds_cls = DATASTORES.get(ds_type, None)
        if ds_cls is None:
            raise MetaflowInternalError('Datastore %s was not found' % ds_type)
        ds_cls.datastore_root = ds_root

        # It is possible that a task fails before any metadata has been
        # recorded. In this case, we assume that we are executing the
        # first attempt.
        #
        # FIXME: Technically we are looking at the latest *recorded* attempt
        # here. It is possible that logs exists for a newer attempt that
        # just failed to record metadata. We could make this logic more robust
        # and guarantee that we always return the latest available log.

        ds = ds_cls(self._object['flow_id'],
                    run_id=str(self._object['run_number']),
                    step_name=self._object['step_name'],
                    task_id=str(self._object['task_id']),
                    mode='r',
                    attempt=int(self.metadata_dict.get('attempt', 0)),
                    allow_unsuccessful=True)
        logs = ds.load_logs(LOG_SOURCES, stream)
        for line in merge_logs([blob for _, blob in logs]):
            msg = to_unicode(line.msg) if as_unicode else line.msg
            yield line.utc_tstamp, msg
コード例 #6
0
ファイル: metadata.py プロジェクト: tobias-gp/metaflow
    def get_object(cls, obj_type, sub_type, filters=None, *args):
        '''Returns the requested object depending on obj_type and sub_type

        obj_type can be one of 'root', 'flow', 'run', 'step', 'task',
        or 'artifact'

        sub_type describes the aggregation required and can be either:
        'metadata', 'self' or any of obj_type provided that it is slotted below
        the object itself. For example, if obj_type is 'flow', you can
        specify 'run' to get all the runs in that flow.
        A few special rules:
            - 'metadata' is only allowed for obj_type 'task'
            - For obj_type 'artifact', only 'self' is allowed
        A few examples:
            - To get a list of all flows:
                - set obj_type to 'root' and sub_type to 'flow'
            - To get a list of all tasks:
                - set obj_type to 'root' and sub_type to 'task'
            - To get a list of all artifacts in a task:
                - set obj_type to 'task' and sub_type to 'artifact'
            - To get information about a specific flow:
                - set obj_type to 'flow' and sub_type to 'self'

        Parameters
        ----------
        obj_type : string
            One of 'root', 'flow', 'run', 'step', 'task', 'artifact' or 'metadata'
        sub_type : string
            Same as obj_type with the addition of 'self'
        filters : dict
            Dictionary with keys 'any_tags', 'tags' and 'system_tags'. If specified
            will return only objects that have the specified tags present. Filters
            are ANDed together so all tags must be present for the object to be returned.

        Return
        ------
            object or list :
                Depending on the call, the type of object return varies
        '''
        obj_order = {
            'root': 0,
            'flow': 1,
            'run': 2,
            'step': 3,
            'task': 4,
            'artifact': 5,
            'metadata': 6,
            'self': 7
        }
        type_order = obj_order.get(obj_type)
        sub_order = obj_order.get(sub_type)

        if type_order is None:
            raise MetaflowInternalError(msg='Cannot find type %s' % obj_type)
        if type_order > 5:
            raise MetaflowInternalError(msg='Type %s is not allowed' %
                                        obj_type)

        if sub_order is None:
            raise MetaflowInternalError(msg='Cannot find subtype %s' %
                                        sub_type)

        if type_order >= sub_order:
            raise MetaflowInternalError(msg='Subtype %s not allowed for %s' %
                                        (sub_type, obj_type))

        # Metadata is always only at the task level
        if sub_type == 'metadata' and obj_type != 'task':
            raise MetaflowInternalError(
                msg='Metadata can only be retrieved at the task level')

        return cls._get_object_internal(obj_type, type_order, sub_type,
                                        sub_order, filters, *args)
コード例 #7
0
def resolve_token(
    name, token_prefix, obj, authorize, given_token, generate_new_token, is_project
):

    # 1) retrieve the previous deployment, if one exists
    workflow = StepFunctions.get_existing_deployment(name)
    if workflow is None:
        obj.echo(
            "It seems this is the first time you are deploying *%s* to "
            "AWS Step Functions." % name
        )
        prev_token = None
    else:
        prev_user, prev_token = workflow

    # 2) authorize this deployment
    if prev_token is not None:
        if authorize is None:
            authorize = load_token(token_prefix)
        elif authorize.startswith("production:"):
            authorize = authorize[11:]

        # we allow the user who deployed the previous version to re-deploy,
        # even if they don't have the token
        if prev_user != get_username() and authorize != prev_token:
            obj.echo(
                "There is an existing version of *%s* on AWS Step "
                "Functions which was deployed by the user "
                "*%s*." % (name, prev_user)
            )
            obj.echo(
                "To deploy a new version of this flow, you need to use "
                "the same production token that they used. "
            )
            obj.echo(
                "Please reach out to them to get the token. Once you "
                "have it, call this command:"
            )
            obj.echo("    step-functions create --authorize MY_TOKEN", fg="green")
            obj.echo(
                'See "Organizing Results" at docs.metaflow.org for more '
                "information about production tokens."
            )
            raise IncorrectProductionToken(
                "Try again with the correct " "production token."
            )

    # 3) do we need a new token or should we use the existing token?
    if given_token:
        if is_project:
            # we rely on a known prefix for @project tokens, so we can't
            # allow the user to specify a custom token with an arbitrary prefix
            raise MetaflowException(
                "--new-token is not supported for "
                "@projects. Use --generate-new-token to "
                "create a new token."
            )
        if given_token.startswith("production:"):
            given_token = given_token[11:]
        token = given_token
        obj.echo("")
        obj.echo("Using the given token, *%s*." % token)
    elif prev_token is None or generate_new_token:
        token = new_token(token_prefix, prev_token)
        if token is None:
            if prev_token is None:
                raise MetaflowInternalError(
                    "We could not generate a new " "token. This is unexpected. "
                )
            else:
                raise MetaflowException(
                    "--generate-new-token option is not "
                    "supported after using --new-token. "
                    "Use --new-token to make a new "
                    "namespace."
                )
        obj.echo("")
        obj.echo("A new production token generated.")
    else:
        token = prev_token

    obj.echo("")
    obj.echo("The namespace of this production flow is")
    obj.echo("    production:%s" % token, fg="green")
    obj.echo(
        "To analyze results of this production flow " "add this line in your notebooks:"
    )
    obj.echo('    namespace("production:%s")' % token, fg="green")
    obj.echo(
        "If you want to authorize other people to deploy new versions "
        "of this flow to AWS Step Functions, they need to call"
    )
    obj.echo("    step-functions create --authorize %s" % token, fg="green")
    obj.echo("when deploying this flow to AWS Step Functions for the first " "time.")
    obj.echo(
        'See "Organizing Results" at https://docs.metaflow.org/ for more '
        "information about production tokens."
    )
    obj.echo("")
    store_token(token_prefix, token)
    return token
コード例 #8
0
    def _request(
        cls,
        monitor,
        path,
        method,
        data=None,
        retry_409_path=None,
        return_raw_resp=False,
    ):
        if cls.INFO is None:
            raise MetaflowException(
                "Missing Metaflow Service URL. "
                "Specify with METAFLOW_SERVICE_URL environment variable"
            )
        supported_methods = ("GET", "PATCH", "POST")
        if method not in supported_methods:
            raise MetaflowException(
                "Only these methods are supported: %s, but got %s"
                % (supported_methods, method)
            )
        url = os.path.join(cls.INFO, path.lstrip("/"))
        for i in range(METADATA_SERVICE_NUM_RETRIES):
            try:
                if method == "GET":
                    if monitor:
                        with monitor.measure("metaflow.service_metadata.get"):
                            resp = requests.get(url, headers=METADATA_SERVICE_HEADERS)
                    else:
                        resp = requests.get(url, headers=METADATA_SERVICE_HEADERS)
                elif method == "POST":
                    if monitor:
                        with monitor.measure("metaflow.service_metadata.post"):
                            resp = requests.post(
                                url, headers=METADATA_SERVICE_HEADERS, json=data
                            )
                    else:
                        resp = requests.post(
                            url, headers=METADATA_SERVICE_HEADERS, json=data
                        )
                elif method == "PATCH":
                    if monitor:
                        with monitor.measure("metaflow.service_metadata.patch"):
                            resp = requests.patch(
                                url, headers=METADATA_SERVICE_HEADERS, json=data
                            )
                    else:
                        resp = requests.patch(
                            url, headers=METADATA_SERVICE_HEADERS, json=data
                        )
                else:
                    raise MetaflowInternalError("Unexpected HTTP method %s" % (method,))
            except MetaflowInternalError:
                raise
            except:  # noqa E722
                if monitor:
                    with monitor.count("metaflow.service_metadata.failed_request"):
                        if i == METADATA_SERVICE_NUM_RETRIES - 1:
                            raise
                else:
                    if i == METADATA_SERVICE_NUM_RETRIES - 1:
                        raise
                resp = None
            else:
                if return_raw_resp:
                    return resp, True
                if resp.status_code < 300:
                    return resp.json(), True
                elif resp.status_code == 409 and data is not None:
                    # a special case: the post fails due to a conflict
                    # this could occur when we missed a success response
                    # from the first POST request but the request
                    # actually went though, so a subsequent POST
                    # returns 409 (conflict) or we end up with a
                    # conflict while running on AWS Step Functions
                    # instead of retrying the post we retry with a get since
                    # the record is guaranteed to exist
                    if retry_409_path:
                        v, _ = cls._request(monitor, retry_409_path, "GET")
                        return v, False
                    else:
                        return None, False
                elif resp.status_code != 503:
                    raise ServiceException(
                        "Metadata request (%s) failed (code %s): %s"
                        % (path, resp.status_code, resp.text),
                        resp.status_code,
                        resp.text,
                    )
            time.sleep(2 ** i)

        if resp:
            raise ServiceException(
                "Metadata request (%s) failed (code %s): %s"
                % (path, resp.status_code, resp.text),
                resp.status_code,
                resp.text,
            )
        else:
            raise ServiceException("Metadata request (%s) failed" % path)
コード例 #9
0
    def get_object(cls, obj_type, sub_type, filters, attempt, *args):
        """Returns the requested object depending on obj_type and sub_type

        obj_type can be one of 'root', 'flow', 'run', 'step', 'task',
        or 'artifact'

        sub_type describes the aggregation required and can be either:
        'metadata', 'self' or any of obj_type provided that it is slotted below
        the object itself. For example, if obj_type is 'flow', you can
        specify 'run' to get all the runs in that flow.
        A few special rules:
            - 'metadata' is only allowed for obj_type 'task'
            - For obj_type 'artifact', only 'self' is allowed
        A few examples:
            - To get a list of all flows:
                - set obj_type to 'root' and sub_type to 'flow'
            - To get a list of all tasks:
                - set obj_type to 'root' and sub_type to 'task'
            - To get a list of all artifacts in a task:
                - set obj_type to 'task' and sub_type to 'artifact'
            - To get information about a specific flow:
                - set obj_type to 'flow' and sub_type to 'self'

        Parameters
        ----------
        obj_type : string
            One of 'root', 'flow', 'run', 'step', 'task', 'artifact' or 'metadata'
        sub_type : string
            Same as obj_type with the addition of 'self'
        filters : dict
            Dictionary with keys 'any_tags', 'tags' and 'system_tags'. If specified
            will return only objects that have the specified tags present. Filters
            are ANDed together so all tags must be present for the object to be returned.
        attempt : int or None
            If None, for metadata and artifacts:
              - returns information about the latest attempt for artifacts
              - returns all metadata across all attempts
            Otherwise, returns information about metadata and artifacts for that
            attempt only.
            NOTE: For older versions of Metaflow (pre 2.4.0), the attempt for
            metadata is not known; in that case, all metadata is returned (as
            if None was passed in).

        Return
        ------
            object or list :
                Depending on the call, the type of object return varies
        """
        type_order = ObjectOrder.type_to_order(obj_type)
        sub_order = ObjectOrder.type_to_order(sub_type)

        if type_order is None:
            raise MetaflowInternalError(msg="Cannot find type %s" % obj_type)
        if type_order >= ObjectOrder.type_to_order("metadata"):
            raise MetaflowInternalError(msg="Type %s is not allowed" %
                                        obj_type)

        if sub_order is None:
            raise MetaflowInternalError(msg="Cannot find subtype %s" %
                                        sub_type)

        if type_order >= sub_order:
            raise MetaflowInternalError(msg="Subtype %s not allowed for %s" %
                                        (sub_type, obj_type))

        # Metadata is always only at the task level
        if sub_type == "metadata" and obj_type != "task":
            raise MetaflowInternalError(
                msg="Metadata can only be retrieved at the task level")

        if attempt is not None:
            try:
                attempt_int = int(attempt)
                if attempt_int < 0:
                    raise ValueError("Attempt can only be positive")
            except ValueError:
                raise ValueError("Attempt can only be a positive integer")
        else:
            attempt_int = None

        pre_filter = cls._get_object_internal(obj_type, type_order, sub_type,
                                              sub_order, filters, attempt_int,
                                              *args)
        if attempt_int is None or sub_type != "metadata":
            # If no attempt or not for metadata, just return as is
            return pre_filter
        return MetadataProvider._reconstruct_metadata_for_attempt(
            pre_filter, attempt_int)
コード例 #10
0
ファイル: local.py プロジェクト: zillow/metaflow
    def _get_object_internal(cls, obj_type, obj_order, sub_type, sub_order,
                             filters, attempt, *args):
        # This is guaranteed by MetaflowProvider.get_object(), sole intended caller
        if obj_type in ("metadata", "self"):
            raise MetaflowInternalError(msg="Type %s is not allowed" %
                                        obj_type)

        if obj_type not in ("root", "flow", "run", "step", "task", "artifact"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)

        from metaflow.datastore.local_storage import LocalStorage

        if obj_type == "artifact":
            # Artifacts are actually part of the tasks in the filesystem
            # E.g. we get here for (obj_type, sub_type) == (artifact, self)
            obj_type = "task"
            sub_type = "artifact"
            sub_order = obj_order
            obj_order = obj_order - 1

        if obj_type != ObjectOrder.order_to_type(obj_order):
            raise MetaflowInternalError(
                "Object type order mismatch %s %s" %
                (obj_type, ObjectOrder.order_to_type(obj_order)))
        if sub_type != ObjectOrder.order_to_type(sub_order):
            raise MetaflowInternalError(
                "Sub type order mismatch %s %s" %
                (sub_type, ObjectOrder.order_to_type(sub_order)))

        RUN_ORDER = ObjectOrder.type_to_order("run")

        if obj_type not in ("root", "flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)

        # Special handling of self, artifact, and metadata
        if sub_type == "self":
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return None
            self_file = os.path.join(meta_path, "_self.json")
            if os.path.isfile(self_file):
                obj = MetadataProvider._apply_filter(
                    [LocalMetadataProvider._read_json_file(self_file)],
                    filters)[0]
                # For non-descendants of a run, we are done

                if obj_order <= RUN_ORDER:
                    return obj

                if obj_type not in ("step", "task"):
                    raise MetaflowInternalError(
                        msg="Unexpected object type %s" % obj_type)
                run = LocalMetadataProvider.get_object(
                    "run",
                    "self",
                    {},
                    None,
                    *args[:RUN_ORDER]  # *[flow_id, run_id]
                )
                if not run:
                    raise MetaflowInternalError(msg="Could not find run %s" %
                                                str(args[:RUN_ORDER]))

                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])
                return obj
            return None

        if sub_type == "artifact":
            if obj_type not in ("root", "flow", "run", "step", "task"):
                raise MetaflowInternalError(msg="Unexpected object type %s" %
                                            obj_type)

            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            result = []
            if meta_path is None:
                return result

            successful_attempt = attempt
            if successful_attempt is None:
                attempt_done_files = os.path.join(meta_path,
                                                  "sysmeta_attempt-done_*")
                attempts_done = sorted(glob.iglob(attempt_done_files))
                if attempts_done:
                    successful_attempt = int(
                        LocalMetadataProvider._read_json_file(
                            attempts_done[-1])["value"])
            if successful_attempt is not None:
                which_artifact = "*"
                if len(args) >= sub_order:
                    which_artifact = args[sub_order - 1]
                artifact_files = os.path.join(
                    meta_path,
                    "%d_artifact_%s.json" %
                    (successful_attempt, which_artifact),
                )
                for obj in glob.iglob(artifact_files):
                    result.append(LocalMetadataProvider._read_json_file(obj))

            # We are getting artifacts. We should overlay with ancestral run's tags
            run = LocalMetadataProvider.get_object(
                "run",
                "self",
                {},
                None,
                *args[:RUN_ORDER]  # *[flow_id, run_id]
            )
            if not run:
                raise MetaflowInternalError(msg="Could not find run %s" %
                                            str(args[:RUN_ORDER]))
            for obj in result:
                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])

            if len(result) == 1:
                return result[0]
            return result

        if sub_type == "metadata":
            # artifact is not expected because if obj_type=artifact on function entry, we transform to =task
            if obj_type not in ("root", "flow", "run", "step", "task"):
                raise MetaflowInternalError(msg="Unexpected object type %s" %
                                            obj_type)
            result = []
            meta_path = LocalMetadataProvider._get_metadir(*args[:obj_order])
            if meta_path is None:
                return result
            files = os.path.join(meta_path, "sysmeta_*")
            for obj in glob.iglob(files):
                result.append(LocalMetadataProvider._read_json_file(obj))
            return result

        # For the other types, we locate all the objects we need to find and return them
        if obj_type not in ("root", "flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="Unexpected object type %s" %
                                        obj_type)
        if sub_type not in ("flow", "run", "step", "task"):
            raise MetaflowInternalError(msg="unexpected sub type %s" %
                                        sub_type)
        obj_path = LocalMetadataProvider._make_path(*args[:obj_order],
                                                    create_on_absent=False)
        result = []
        if obj_path is None:
            return result
        skip_dirs = "*/" * (sub_order - obj_order)
        all_meta = os.path.join(obj_path, skip_dirs, LocalStorage.METADATA_DIR)
        SelfInfo = collections.namedtuple("SelfInfo", ["filepath", "run_id"])
        self_infos = []
        for meta_path in glob.iglob(all_meta):
            self_file = os.path.join(meta_path, "_self.json")
            if not os.path.isfile(self_file):
                continue
            run_id = None
            # flow and run do not need info from ancestral run
            if sub_type in ("step", "task"):
                run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir(
                    meta_path, sub_type)
                # obj_type IS run, or more granular than run, let's do sanity check vs args
                if obj_order >= RUN_ORDER:
                    if run_id != args[RUN_ORDER - 1]:
                        raise MetaflowInternalError(
                            msg="Unexpected run id %s deduced from meta path" %
                            run_id)
            self_infos.append(SelfInfo(filepath=self_file, run_id=run_id))

        for self_info in self_infos:
            obj = LocalMetadataProvider._read_json_file(self_info.filepath)
            if self_info.run_id:
                flow_id_from_args = args[0]
                run = LocalMetadataProvider.get_object(
                    "run",
                    "self",
                    {},
                    None,
                    flow_id_from_args,
                    self_info.run_id,
                )
                if not run:
                    raise MetaflowInternalError(
                        msg="Could not find run %s, %s" %
                        (flow_id_from_args, self_info.run_id))
                obj["tags"] = run.get("tags", [])
                obj["system_tags"] = run.get("system_tags", [])
            result.append(obj)

        return MetadataProvider._apply_filter(result, filters)
コード例 #11
0
ファイル: tag_cli.py プロジェクト: zillow/metaflow
def tag_list(
    obj,
    run_id,
    hide_system_tags,
    list_all,
    my_runs,
    group_by_tag,
    group_by_run,
    flat,
    arg_run_id,
):
    _set_current(obj)
    if run_id is None and arg_run_id is None and not list_all and not my_runs:
        # Assume list_all by default
        list_all = True

    if list_all and my_runs:
        raise CommandException(
            "Option --all cannot be used together with --my-runs.")

    if run_id is not None and arg_run_id is not None:
        raise CommandException(
            "Specify a run either using --run-id or as an argument but not both"
        )

    if arg_run_id is not None:
        run_id = arg_run_id

    if group_by_run and group_by_tag:
        raise CommandException(
            "Option --group-by-tag cannot be used with --group-by-run")

    if flat and (group_by_run or group_by_tag):
        raise CommandException(
            "Option --flat cannot be used with any --group-by-* option")

    system_tags_by_some_grouping = dict()
    all_tags_by_some_grouping = dict()

    def _populate_tag_groups_from_run(_run):
        if group_by_run:
            if hide_system_tags:
                all_tags_by_some_grouping[
                    _run.pathspec] = _run.tags - _run.system_tags
            else:
                system_tags_by_some_grouping[_run.pathspec] = _run.system_tags
                all_tags_by_some_grouping[_run.pathspec] = _run.tags
        elif group_by_tag:
            for t in _run.tags - _run.system_tags:
                all_tags_by_some_grouping.setdefault(t,
                                                     []).append(_run.pathspec)
            if not hide_system_tags:
                for t in _run.system_tags:
                    system_tags_by_some_grouping.setdefault(t, []).append(
                        _run.pathspec)
        else:
            if hide_system_tags:
                all_tags_by_some_grouping.setdefault("_", set()).update(
                    _run.tags.difference(_run.system_tags))
            else:
                system_tags_by_some_grouping.setdefault("_", set()).update(
                    _run.system_tags)
                all_tags_by_some_grouping.setdefault("_",
                                                     set()).update(_run.tags)

    pathspecs = []
    if list_all or my_runs:
        user_namespace = resolve_identity() if my_runs else None
        namespace(user_namespace)
        try:
            flow = Flow(pathspec=obj.flow.name)
        except MetaflowNotFound:
            raise CommandException(
                "Cannot list tags because the flow %s has never been run." %
                (obj.flow.name, ))
        for run in flow.runs():
            _populate_tag_groups_from_run(run)
            pathspecs.append(run.pathspec)
    else:
        run = _get_client_run_obj(obj, run_id, None)
        _populate_tag_groups_from_run(run)
        pathspecs.append(run.pathspec)

    if not group_by_run and not group_by_tag:
        # We list all the runs that match to print them out if needed.
        system_tags_by_some_grouping[",".join(
            pathspecs)] = system_tags_by_some_grouping.get("_", set())
        all_tags_by_some_grouping[",".join(
            pathspecs)] = all_tags_by_some_grouping.get("_", set())
        if "_" in system_tags_by_some_grouping:
            del system_tags_by_some_grouping["_"]
        if "_" in all_tags_by_some_grouping:
            del all_tags_by_some_grouping["_"]

    if flat:
        if len(all_tags_by_some_grouping) != 1:
            raise MetaflowInternalError("Failed to flatten tag set")
        for v in all_tags_by_some_grouping.values():
            for tag in v:
                obj.echo(tag)
            return

    _print_tags_for_runs_by_groups(obj, system_tags_by_some_grouping,
                                   all_tags_by_some_grouping, group_by_tag)
コード例 #12
0
ファイル: core.py プロジェクト: sappier/metaflow
    def __init__(
        self,
        pathspec=None,
        attempt=None,
        _object=None,
        _parent=None,
        _namespace_check=True,
    ):
        self._metaflow = Metaflow()
        self._parent = _parent
        self._path_components = None
        self._attempt = attempt

        if self._attempt is not None:
            if self._NAME not in ["task", "artifact"]:
                raise MetaflowNotFound(
                    "Attempts can only be specified for Task or DataArtifact")
            try:
                self._attempt = int(self._attempt)
            except ValueError:
                raise MetaflowNotFound("Attempt can only be an integer")

            if self._attempt < 0:
                raise MetaflowNotFound("Attempt can only be non-negative")
            elif self._attempt >= MAX_ATTEMPTS:
                raise MetaflowNotFound("Attempt can only be smaller than %d" %
                                       MAX_ATTEMPTS)
            # NOTE: It is possible that no attempt exists but we can't
            # distinguish between "attempt will happen" and "no such
            # attempt exists".

        if pathspec:
            ids = pathspec.split("/")

            self.id = ids[-1]
            self._pathspec = pathspec
            self._object = self._get_object(*ids)
        else:
            self._object = _object
            self._pathspec = pathspec

        if self._NAME in ("flow", "task"):
            self.id = str(self._object[self._NAME + "_id"])
        elif self._NAME == "run":
            self.id = str(self._object["run_number"])
        elif self._NAME == "step":
            self.id = str(self._object["step_name"])
        elif self._NAME == "artifact":
            self.id = str(self._object["name"])
        else:
            raise MetaflowInternalError(msg="Unknown type: %s" % self._NAME)

        self._created_at = datetime.fromtimestamp(self._object["ts_epoch"] /
                                                  1000.0)

        self._tags = frozenset(
            chain(
                self._object.get("system_tags") or [],
                self._object.get("tags") or []))

        if _namespace_check and not self.is_in_namespace():
            raise MetaflowNamespaceMismatch(current_namespace)