コード例 #1
0
    def modify_next_group(
        self,
        stack: "StageStack",
    ) -> List[str]:
        name = stack.stage_names[-1]
        if not self.match(name):
            raise YmpStageError(f"Internal Error: {name} not a group?")

        # fetch directly previous grouoing
        if stack.prev_stack is not None:
            group = stack.prev_stack.stage.modify_next_group(
                stack.prev_stack) or []
        else:
            group = []

        group_name = name[len(self.PREFIX):]
        if group_name == "ALL":
            if group:
                raise YmpStageError(
                    "Regrouping to ALL means previous group statement has no effect"
                )
        elif group_name == "BIN":
            group += ["__bin__"]
        else:
            group += [group_name]
        return group
コード例 #2
0
    def __init__(self, path, stage=None):
        self.name = path
        self.stage_names = path.split(".")
        self.stages = [find_stage(name) for name in self.stage_names]

        cfg = ymp.get_config()

        # determine project
        try:
            self.project = cfg.projects[self.stage_names[0]]
        except IndexError:
            log.error("here")
            raise YmpStageError(f"No project for stage stack {path} found")

        # determine top stage
        stage_names = copy.copy(self.stage_names)
        top_stage = stage_names.pop()
        if stage:
            if not stage.match(top_stage):
                raise YmpStageError(
                    f"Internal error: {top_stage} not matched by {stage}")
        if not stage:
            stage = find_stage(top_stage)
        self.stage = stage

        # determine grouping
        self.group = getattr(stage, "group", None)
        if stage_names and stage_names[-1].startswith("group_"):
            self.group = [stage_names.pop().split("_")[1]]

        # collect inputs
        self.prevs = self.resolve_prevs()

        if self.group is None:
            groups = list(
                dict.fromkeys(group
                              for p in reversed(list(self.prevs.values()))
                              for group in p.group))
            self.group = self.project.minimize_variables(groups)

        log.info("Stage stack %s using column %s", self, self.group)
        prevmap = dict()
        for typ, stack in self.prevs.items():
            prevmap.setdefault(str(stack), []).append(typ)
        for stack, typ in prevmap.items():
            ftypes = ", ".join(typ).replace("/{sample}", "*")
            title = stack.split(".")[-1]
            if self.stage_names.count(title) != 1:
                title = stack
            log.info(f".. from {title}: {ftypes}")
コード例 #3
0
 def resolve_prevs(self):
     inputs = self.stage.get_inputs()
     stage = self.stage
     prevs = self._do_resolve_prevs(stage, inputs, exclude_self=True)
     if inputs:
         raise YmpStageError(self._format_missing_input_error(inputs))
     return prevs
コード例 #4
0
ファイル: stack.py プロジェクト: epruesse/ymp
    def target(self, args, kwargs):
        """
        Determines the IDs for a given input data type and output ID
        (replaces "{:target:}").
        """
        # Find stage stack from which input should be requested.
        # (not sure why the below causes a false positive in pylint)
        prev_stack = self.prev(args, kwargs)  # pylint: disable=not-callable
        # Find name of current output target
        cur_target = kwargs['wc'].target

        if self.debug:
            rulename = getattr(kwargs.get('rule'), 'name', 'N/A')
            log.error("input ids for %s", self)
            log.warning("  rule %s", rulename)
            log.warning("  from stack %s", prev_stack)
        cols = self.group
        vals = cur_target
        if cols == [] and vals == 'ALL':
            cols = vals = None

        ids = prev_stack.get_ids(prev_stack.group, cols, vals)

        if ids == []:
            rulename = getattr(kwargs.get('rule'), 'name', 'N/A')
            raise YmpStageError(
                f"Internal Error: Failed to find inputs\n\n"
                f"Context:\n"
                f"  In stack '{self}' rule '{rulename}'\n"
                f"  Building '{vals}' (grouped on '{','.join(cols)}')\n"
                f"  Seeking input from '{prev_stack}' "
                f"(grouped on '{','.join(prev_stack.group)}')"
                f"\n")

        return ids
コード例 #5
0
 def minimize_variables(self, groups):
     if not groups:
         groups = [self.idcol]
     if len(groups) > 1:
         groups = [g for g in groups if g != 'ALL']  # FIXME: lowercase?
     if len(groups) > 1:
         groups = self.data.groupby_dedup(groups)
     if len(groups) > 1:
         raise YmpStageError(
             f"multi-idx grouping not implemented (groups={groups})")
     return groups
コード例 #6
0
def find_stage(name):
    cfg = ymp.get_config()
    registry = Stage.get_registry()

    if name.startswith("group_"):
        return GroupBy(name)
    if name.startswith("ref_"):
        refname = name[4:]
        if refname in cfg.ref:
            return cfg.ref[refname]
        else:
            raise YmpStageError(f"Unknown reference '{cfg.ref[refname]}'")
    if name in cfg.projects:
        return cfg.projects[name]
    if name in cfg.pipelines:
        return cfg.pipelines[name]
    for stage in registry.values():
        if stage.match(name):
            return stage
    raise YmpStageError(f"Unknown stage '{name}'")
コード例 #7
0
ファイル: stage.py プロジェクト: epruesse/ymp
 def bin(self, _args=None, kwargs=None):
     """
     Dynamic ID for splitting stages
     """
     rule = kwargs['rule']
     if not rule.is_checkpoint:
         raise YmpStageError("Only checkpoints may use '{:bin:}'")
     item = kwargs['item']
     norm_item = item.replace(".{:bin:}", "")
     norm_suffix = self.register_inout("this", self._outputs, norm_item)
     self.checkpoints.setdefault(rule.name, set()).add(norm_suffix)
     raise RemoveValue()
コード例 #8
0
 def get_ids(self, groups, match_groups=None, match_value=None):
     ids = None
     if groups == ['ALL']:
         ids = 'ALL'
     elif groups == match_groups:
         ids = match_value
     elif groups[0] in self.data.columns():
         if match_groups and match_groups != ['ALL']:
             ids = self.data.get(match_groups[0], match_value, groups[0])
         else:
             ids = self.data.column(groups[0])
     if not ids:
         if len(groups) == 1:
             ids = groups[0]
         else:
             raise YmpStageError(
                 f"no ids for {groups} {match_groups} {match_value}")
     return ids
コード例 #9
0
ファイル: stack.py プロジェクト: epruesse/ymp
    def __init__(self, path):
        #: Name of stack, aka is its full path
        self.name = path
        #: Names of stages on stack
        self.stage_names = path.split(".")
        #: Stages on stack
        self.stages = [find_stage(name) for name in self.stage_names]
        #: Top Stage
        self.stage = self.stages[-1]
        #: Top Stage Name
        self.stage_name = self.stage_names[-1]
        #: Stage below top stage or None if first in stack
        self.prev_stage = self.stages[-2] if len(self.stages) > 1 else None
        self.prev_stack = None
        if len(self.stages) > 1:
            self.prev_stack = self.instance(".".join(self.stage_names[:-1]))

        cfg = ymp.get_config()

        #: Project on which stack operates
        #: This is needed for grouping variables currently.
        self.project = cfg.projects.get(self.stage_names[0])
        if not self.project:
            raise YmpStageError(f"No project for stage stack {path} found")

        #: Mapping of each input type required by the stage of this stack
        #: to the prefix stack providing it.
        self.prevs = self.resolve_prevs()

        # Gather all previous groups
        groups = list(
            dict.fromkeys(group
                          for stack in reversed(list(self.prevs.values()))
                          for group in stack.group))
        project_groups, other_groups = self.project.minimize_variables(groups)
        #: Grouping in effect for this StageStack. And empty list groups into
        #: one pseudo target, 'ALL'.
        self.group: List[str] = \
            self.stage.get_group(self, project_groups + other_groups)