예제 #1
0
    def resources(self):
        if self._resources is None:
            self._resources = defaultdict(int)
            self._resources["_nodes"] = 1
            pipe_group = any([job.is_pipe for job in self.jobs])
            # iterate over siblings that can be executed in parallel
            for siblings in self.toposorted:
                sibling_resources = defaultdict(int)
                for job in siblings:
                    try:
                        job_resources = job.resources
                    except FileNotFoundError:
                        # Skip job if resource evaluation leads to a file not found error.
                        # This will be caused by an inner job, which needs files created by the same group.
                        # All we can do is to ignore such jobs for now.
                        continue
                    for res, value in job_resources.items():
                        if res != "_nodes":
                            sibling_resources[res] += value

                for res, value in sibling_resources.items():
                    if res != "_nodes":
                        if self.dag.workflow.run_local or pipe_group:
                            # in case of local execution, this must be a
                            # group of jobs that are connected with pipes
                            # and have to run simultaneously
                            self._resources[res] += value
                        else:
                            # take the maximum with previous values
                            self._resources[res] = max(
                                self._resources.get(res, 0), value)

        return Resources(fromdict=self._resources)
예제 #2
0
    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = {"threads": threads} if threads is not None else dict()
                res = self.apply_input_function(res,
                                                wildcards,
                                                input=input,
                                                attempt=attempt,
                                                **aux)
                if not isinstance(res, int):
                    raise WorkflowError(
                        "Resources function did not return int.")
            res = min(self.workflow.global_resources.get(name, res), res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res)
        resources = Resources(fromdict=resources)
        return resources
예제 #3
0
 def resources(self):
     if self._resources is None:
         self._resources = defaultdict(int)
         pipe_group = any([
             any([is_flagged(o, "pipe") for o in job.output])
             for job in self.jobs
         ])
         for job in self.jobs:
             try:
                 job_resources = job.resources
             except FileNotFoundError:
                 # Skip job if resource evaluation leads to a file not found error.
                 # This will be caused by an inner job, which needs files created by the same group.
                 # All we can do is to ignore such jobs for now.
                 continue
             for res, value in job_resources.items():
                 if self.dag.workflow.run_local or pipe_group:
                     # in case of local execution, this must be a
                     # group of jobs that are connected with pipes
                     # and have to run simultaneously
                     self._resources[res] += value
                 else:
                     # take the maximum over all jobs
                     self._resources[res] = max(
                         self._resources.get(res, value), value)
     return Resources(fromdict=self._resources)
예제 #4
0
    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = dict(rulename=self.name)
                if threads:
                    aux["threads"] = threads
                res = self.apply_input_function(
                    res,
                    wildcards,
                    input=input,
                    attempt=attempt,
                    incomplete_checkpoint_func=lambda e: 0,
                    **aux)
                if not isinstance(res, int):
                    raise WorkflowError(
                        "Resources function did not return int.")
            res = min(self.workflow.global_resources.get(name, res), res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res, threads=threads)
        resources = Resources(fromdict=resources)
        return resources
예제 #5
0
    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = dict(rulename=self.name)
                if threads is not None:
                    aux["threads"] = threads
                try:
                    res, _ = self.apply_input_function(
                        res,
                        wildcards,
                        input=input,
                        attempt=attempt,
                        incomplete_checkpoint_func=lambda e: 0,
                        raw_exceptions=True,
                        **aux,
                    )
                except (Exception, BaseException) as e:
                    raise InputFunctionException(e,
                                                 rule=self,
                                                 wildcards=wildcards)

            if isinstance(res, float):
                # round to integer
                res = int(round(res))

            if not isinstance(res, int) and not isinstance(res, str):
                raise WorkflowError(
                    f"Resource {name} is neither int, float(would be rounded to nearest int), or str.",
                    rule=self,
                )

            global_res = self.workflow.global_resources.get(name)
            if global_res is not None:
                if not isinstance(res,
                                  TBDString) and type(res) != type(global_res):
                    global_type = ("an int" if isinstance(global_res, int) else
                                   type(global_res))
                    raise WorkflowError(
                        f"Resource {name} is of type {type(res).__name__} but global resource constraint "
                        f"defines {global_type} with value {global_res}. "
                        "Resources with the same name need to have the same types (int, float, or str are allowed).",
                        rule=self,
                    )
                if isinstance(res, int):
                    res = min(global_res, res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        if self.workflow.max_threads is not None:
            threads = min(threads, self.workflow.max_threads)
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res, threads=threads)
        resources = Resources(fromdict=resources)
        return resources
예제 #6
0
 def expand_resources(self, wildcards, input):
     resources = dict()
     for name, res in self.resources.items():
         if callable(res):
             res = self.apply_input_function(res, wildcards, input=input)
             if not isinstance(res, int):
                 raise WorkflowError(
                     "Resources function did not return int.")
         res = min(self.workflow.global_resources.get(name, res), res)
         resources[name] = res
     resources = Resources(fromdict=resources)
     return resources
예제 #7
0
    def __init__(self, rule, dag, targetfile=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag
        self.targetfile = targetfile

        self.wildcards_dict = self.rule.get_wildcards(targetfile)
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        (self.input, self.output, self.params, self.log, self.benchmark,
         self.ruleio,
         self.dependencies) = rule.expand_wildcards(self.wildcards_dict)

        self.resources_dict = {}
        for name, res in rule.resources.items():
            if callable(res):
                res = res(self.wildcards)
                if not isinstance(res, int):
                    raise ValueError("Callable for resources must return int")
            self.resources_dict[name] = min(
                self.rule.workflow.global_resources.get(name, res), res)

        self.threads = self.resources_dict["_cores"]
        self.resources = Resources(fromdict=self.resources_dict)
        self.shadow_dir = None
        self._inputsize = None

        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
        self._hash = self.rule.__hash__()
        if True or not self.dynamic_output:
            for o in self.output:
                self._hash ^= o.__hash__()
예제 #8
0
    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = dict(rulename=self.name)
                if threads:
                    aux["threads"] = threads
                try:
                    try:
                        res, _ = self.apply_input_function(
                            res,
                            wildcards,
                            input=input,
                            attempt=attempt,
                            incomplete_checkpoint_func=lambda e: 0,
                            raw_exceptions=True,
                            **aux)
                    except FileNotFoundError as e:
                        # Resources can depend on input files. Since expansion can happen during dryrun,
                        # where input files are not yet present, we need to skip such resources and
                        # mark them as [TBD].
                        if e.filename in input:
                            # use zero for resource if it cannot yet be determined
                            res = TBDInt(0)
                        else:
                            raise e
                except (Exception, BaseException) as e:
                    raise InputFunctionException(e,
                                                 rule=self,
                                                 wildcards=wildcards)

                if not isinstance(res, int) and not isinstance(res, str):
                    raise WorkflowError(
                        "Resources function did not return int or str.",
                        rule=self)
            if isinstance(res, int):
                global_res = self.workflow.global_resources.get(name, res)
                if global_res is not None:
                    res = min(global_res, res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res, threads=threads)
        resources = Resources(fromdict=resources)
        return resources
예제 #9
0
파일: rules.py 프로젝트: mw55309/snakemake
    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = dict(rulename=self.name)
                if threads is not None:
                    aux["threads"] = threads
                try:
                    res, _ = self.apply_input_function(
                        res,
                        wildcards,
                        input=input,
                        attempt=attempt,
                        incomplete_checkpoint_func=lambda e: 0,
                        raw_exceptions=True,
                        **aux)
                except (Exception, BaseException) as e:
                    raise InputFunctionException(e,
                                                 rule=self,
                                                 wildcards=wildcards)

            if isinstance(res, float):
                # round to integer
                res = int(round(res))

            if not isinstance(res, int) and not isinstance(res, str):
                raise WorkflowError(
                    "Resources function did not return int, float (floats are "
                    "rouded to the nearest integer), or str.",
                    rule=self,
                )
            if isinstance(res, int):
                global_res = self.workflow.global_resources.get(name, res)
                if global_res is not None:
                    res = min(global_res, res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        if self.workflow.max_threads is not None:
            threads = min(threads, self.workflow.max_threads)
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res, threads=threads)
        resources = Resources(fromdict=resources)
        return resources
예제 #10
0
    def __init__(self, rule, dag, targetfile=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag
        self.targetfile = targetfile

        self.wildcards_dict = self.rule.get_wildcards(targetfile)
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        (self.input, self.output, self.params, self.log, self.benchmark,
         self.ruleio,
         self.dependencies) = rule.expand_wildcards(self.wildcards_dict)

        self.resources_dict = {
            name: min(self.rule.workflow.global_resources.get(name, res), res)
            for name, res in rule.resources.items()
        }
        self.threads = self.resources_dict["_cores"]
        self.resources = Resources(fromdict=self.resources_dict)
        self._inputsize = None

        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
        self._hash = self.rule.__hash__()
        if True or not self.dynamic_output:
            for o in self.output:
                self._hash ^= o.__hash__()
예제 #11
0
 def resources(self):
     if self._resources is None:
         self._resources = defaultdict(int)
         # take the maximum over all jobs
         pipe_group = any([
             any([is_flagged(o, "pipe") for o in job.output])
             for job in self.jobs
         ])
         for job in self.jobs:
             for res, value in job.resources.items():
                 if self.dag.workflow.run_local or pipe_group:
                     # in case of local execution, this must be a
                     # group of jobs that are connected with pipes
                     # and have to run simultaneously
                     self._resources[res] += value
                 else:
                     self._resources[res] = max(
                         self._resources.get(res, value), value)
     return Resources(fromdict=self._resources)
예제 #12
0
파일: jobs.py 프로젝트: mw55309/snakemake
    def resources(self):
        if self._resources is None:

            def check_string_resource(res, value1, value2):
                if value1 != value2:
                    raise WorkflowError(
                        "Failed to group jobs together. Resource {} "
                        "is a string but not all group jobs require the same value. "
                        "Observed: {} != {}.".format(res, value1, value2))

            self._resources = defaultdict(int)
            self._resources["_nodes"] = 1
            pipe_group = any([job.is_pipe for job in self.jobs])
            # iterate over siblings that can be executed in parallel
            for siblings in self.toposorted:
                sibling_resources = defaultdict(int)
                for job in siblings:
                    try:
                        job_resources = job.resources
                    except FileNotFoundError:
                        # Skip job if resource evaluation leads to a file not found error.
                        # This will be caused by an inner job, which needs files created by the same group.
                        # All we can do is to ignore such jobs for now.
                        continue
                    for res, value in job_resources.items():
                        if isinstance(value, int):
                            if res != "_nodes":
                                sibling_resources[res] += value
                        elif isinstance(value, TBDString):
                            # we omit TBDs
                            continue
                        else:
                            # all string resources must be the same for all group jobs
                            if res in sibling_resources:
                                check_string_resource(res,
                                                      sibling_resources[res],
                                                      value)
                            else:
                                sibling_resources[res] = value

                for res, value in sibling_resources.items():
                    if isinstance(value, int):
                        if res != "_nodes":
                            if self.dag.workflow.run_local or pipe_group:
                                # in case of local execution, this must be a
                                # group of jobs that are connected with pipes
                                # and have to run simultaneously
                                self._resources[res] += value
                            else:
                                # take the maximum with previous values
                                self._resources[res] = max(
                                    self._resources.get(res, 0), value)
                    elif isinstance(value, TBDString):
                        # we omit TBDs
                        continue
                    else:
                        # all string resources must be the same for all group jobs
                        if res in self._resources:
                            check_string_resource(res, self._resources[res],
                                                  value)
                        else:
                            self._resources[res] = value

        return Resources(fromdict=self._resources)