def resources(self): if self._resources is None: self._resources = defaultdict(int) self._resources["_nodes"] = 1 pipe_group = any([job.is_pipe for job in self.jobs]) # iterate over siblings that can be executed in parallel for siblings in self.toposorted: sibling_resources = defaultdict(int) for job in siblings: try: job_resources = job.resources except FileNotFoundError: # Skip job if resource evaluation leads to a file not found error. # This will be caused by an inner job, which needs files created by the same group. # All we can do is to ignore such jobs for now. continue for res, value in job_resources.items(): if res != "_nodes": sibling_resources[res] += value for res, value in sibling_resources.items(): if res != "_nodes": if self.dag.workflow.run_local or pipe_group: # in case of local execution, this must be a # group of jobs that are connected with pipes # and have to run simultaneously self._resources[res] += value else: # take the maximum with previous values self._resources[res] = max( self._resources.get(res, 0), value) return Resources(fromdict=self._resources)
def expand_resources(self, wildcards, input, attempt): resources = dict() def apply(name, res, threads=None): if callable(res): aux = {"threads": threads} if threads is not None else dict() res = self.apply_input_function(res, wildcards, input=input, attempt=attempt, **aux) if not isinstance(res, int): raise WorkflowError( "Resources function did not return int.") res = min(self.workflow.global_resources.get(name, res), res) return res threads = apply("_cores", self.resources["_cores"]) resources["_cores"] = threads for name, res in self.resources.items(): if name != "_cores": resources[name] = apply(name, res) resources = Resources(fromdict=resources) return resources
def resources(self): if self._resources is None: self._resources = defaultdict(int) pipe_group = any([ any([is_flagged(o, "pipe") for o in job.output]) for job in self.jobs ]) for job in self.jobs: try: job_resources = job.resources except FileNotFoundError: # Skip job if resource evaluation leads to a file not found error. # This will be caused by an inner job, which needs files created by the same group. # All we can do is to ignore such jobs for now. continue for res, value in job_resources.items(): if self.dag.workflow.run_local or pipe_group: # in case of local execution, this must be a # group of jobs that are connected with pipes # and have to run simultaneously self._resources[res] += value else: # take the maximum over all jobs self._resources[res] = max( self._resources.get(res, value), value) return Resources(fromdict=self._resources)
def expand_resources(self, wildcards, input, attempt): resources = dict() def apply(name, res, threads=None): if callable(res): aux = dict(rulename=self.name) if threads: aux["threads"] = threads res = self.apply_input_function( res, wildcards, input=input, attempt=attempt, incomplete_checkpoint_func=lambda e: 0, **aux) if not isinstance(res, int): raise WorkflowError( "Resources function did not return int.") res = min(self.workflow.global_resources.get(name, res), res) return res threads = apply("_cores", self.resources["_cores"]) resources["_cores"] = threads for name, res in self.resources.items(): if name != "_cores": resources[name] = apply(name, res, threads=threads) resources = Resources(fromdict=resources) return resources
def expand_resources(self, wildcards, input, attempt): resources = dict() def apply(name, res, threads=None): if callable(res): aux = dict(rulename=self.name) if threads is not None: aux["threads"] = threads try: res, _ = self.apply_input_function( res, wildcards, input=input, attempt=attempt, incomplete_checkpoint_func=lambda e: 0, raw_exceptions=True, **aux, ) except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if isinstance(res, float): # round to integer res = int(round(res)) if not isinstance(res, int) and not isinstance(res, str): raise WorkflowError( f"Resource {name} is neither int, float(would be rounded to nearest int), or str.", rule=self, ) global_res = self.workflow.global_resources.get(name) if global_res is not None: if not isinstance(res, TBDString) and type(res) != type(global_res): global_type = ("an int" if isinstance(global_res, int) else type(global_res)) raise WorkflowError( f"Resource {name} is of type {type(res).__name__} but global resource constraint " f"defines {global_type} with value {global_res}. " "Resources with the same name need to have the same types (int, float, or str are allowed).", rule=self, ) if isinstance(res, int): res = min(global_res, res) return res threads = apply("_cores", self.resources["_cores"]) if self.workflow.max_threads is not None: threads = min(threads, self.workflow.max_threads) resources["_cores"] = threads for name, res in self.resources.items(): if name != "_cores": resources[name] = apply(name, res, threads=threads) resources = Resources(fromdict=resources) return resources
def expand_resources(self, wildcards, input): resources = dict() for name, res in self.resources.items(): if callable(res): res = self.apply_input_function(res, wildcards, input=input) if not isinstance(res, int): raise WorkflowError( "Resources function did not return int.") res = min(self.workflow.global_resources.get(name, res), res) resources[name] = res resources = Resources(fromdict=resources) return resources
def __init__(self, rule, dag, targetfile=None, format_wildcards=None): self.rule = rule self.dag = dag self.targetfile = targetfile self.wildcards_dict = self.rule.get_wildcards(targetfile) self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) (self.input, self.output, self.params, self.log, self.benchmark, self.ruleio, self.dependencies) = rule.expand_wildcards(self.wildcards_dict) self.resources_dict = {} for name, res in rule.resources.items(): if callable(res): res = res(self.wildcards) if not isinstance(res, int): raise ValueError("Callable for resources must return int") self.resources_dict[name] = min( self.rule.workflow.global_resources.get(name, res), res) self.threads = self.resources_dict["_cores"] self.resources = Resources(fromdict=self.resources_dict) self.shadow_dir = None self._inputsize = None self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = self.ruleio[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = self.ruleio[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] self._hash = self.rule.__hash__() if True or not self.dynamic_output: for o in self.output: self._hash ^= o.__hash__()
def expand_resources(self, wildcards, input, attempt): resources = dict() def apply(name, res, threads=None): if callable(res): aux = dict(rulename=self.name) if threads: aux["threads"] = threads try: try: res, _ = self.apply_input_function( res, wildcards, input=input, attempt=attempt, incomplete_checkpoint_func=lambda e: 0, raw_exceptions=True, **aux) except FileNotFoundError as e: # Resources can depend on input files. Since expansion can happen during dryrun, # where input files are not yet present, we need to skip such resources and # mark them as [TBD]. if e.filename in input: # use zero for resource if it cannot yet be determined res = TBDInt(0) else: raise e except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if not isinstance(res, int) and not isinstance(res, str): raise WorkflowError( "Resources function did not return int or str.", rule=self) if isinstance(res, int): global_res = self.workflow.global_resources.get(name, res) if global_res is not None: res = min(global_res, res) return res threads = apply("_cores", self.resources["_cores"]) resources["_cores"] = threads for name, res in self.resources.items(): if name != "_cores": resources[name] = apply(name, res, threads=threads) resources = Resources(fromdict=resources) return resources
def expand_resources(self, wildcards, input, attempt): resources = dict() def apply(name, res, threads=None): if callable(res): aux = dict(rulename=self.name) if threads is not None: aux["threads"] = threads try: res, _ = self.apply_input_function( res, wildcards, input=input, attempt=attempt, incomplete_checkpoint_func=lambda e: 0, raw_exceptions=True, **aux) except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if isinstance(res, float): # round to integer res = int(round(res)) if not isinstance(res, int) and not isinstance(res, str): raise WorkflowError( "Resources function did not return int, float (floats are " "rouded to the nearest integer), or str.", rule=self, ) if isinstance(res, int): global_res = self.workflow.global_resources.get(name, res) if global_res is not None: res = min(global_res, res) return res threads = apply("_cores", self.resources["_cores"]) if self.workflow.max_threads is not None: threads = min(threads, self.workflow.max_threads) resources["_cores"] = threads for name, res in self.resources.items(): if name != "_cores": resources[name] = apply(name, res, threads=threads) resources = Resources(fromdict=resources) return resources
def __init__(self, rule, dag, targetfile=None, format_wildcards=None): self.rule = rule self.dag = dag self.targetfile = targetfile self.wildcards_dict = self.rule.get_wildcards(targetfile) self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) (self.input, self.output, self.params, self.log, self.benchmark, self.ruleio, self.dependencies) = rule.expand_wildcards(self.wildcards_dict) self.resources_dict = { name: min(self.rule.workflow.global_resources.get(name, res), res) for name, res in rule.resources.items() } self.threads = self.resources_dict["_cores"] self.resources = Resources(fromdict=self.resources_dict) self._inputsize = None self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = self.ruleio[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = self.ruleio[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] self._hash = self.rule.__hash__() if True or not self.dynamic_output: for o in self.output: self._hash ^= o.__hash__()
def resources(self): if self._resources is None: self._resources = defaultdict(int) # take the maximum over all jobs pipe_group = any([ any([is_flagged(o, "pipe") for o in job.output]) for job in self.jobs ]) for job in self.jobs: for res, value in job.resources.items(): if self.dag.workflow.run_local or pipe_group: # in case of local execution, this must be a # group of jobs that are connected with pipes # and have to run simultaneously self._resources[res] += value else: self._resources[res] = max( self._resources.get(res, value), value) return Resources(fromdict=self._resources)
def resources(self): if self._resources is None: def check_string_resource(res, value1, value2): if value1 != value2: raise WorkflowError( "Failed to group jobs together. Resource {} " "is a string but not all group jobs require the same value. " "Observed: {} != {}.".format(res, value1, value2)) self._resources = defaultdict(int) self._resources["_nodes"] = 1 pipe_group = any([job.is_pipe for job in self.jobs]) # iterate over siblings that can be executed in parallel for siblings in self.toposorted: sibling_resources = defaultdict(int) for job in siblings: try: job_resources = job.resources except FileNotFoundError: # Skip job if resource evaluation leads to a file not found error. # This will be caused by an inner job, which needs files created by the same group. # All we can do is to ignore such jobs for now. continue for res, value in job_resources.items(): if isinstance(value, int): if res != "_nodes": sibling_resources[res] += value elif isinstance(value, TBDString): # we omit TBDs continue else: # all string resources must be the same for all group jobs if res in sibling_resources: check_string_resource(res, sibling_resources[res], value) else: sibling_resources[res] = value for res, value in sibling_resources.items(): if isinstance(value, int): if res != "_nodes": if self.dag.workflow.run_local or pipe_group: # in case of local execution, this must be a # group of jobs that are connected with pipes # and have to run simultaneously self._resources[res] += value else: # take the maximum with previous values self._resources[res] = max( self._resources.get(res, 0), value) elif isinstance(value, TBDString): # we omit TBDs continue else: # all string resources must be the same for all group jobs if res in self._resources: check_string_resource(res, self._resources[res], value) else: self._resources[res] = value return Resources(fromdict=self._resources)