def rule2job(self, targetrule): """Generate a new job from a given rule.""" if targetrule.has_wildcards(): raise WorkflowError( "Target rules may not contain wildcards. Please specify concrete files or a rule without wildcards." ) return Job(targetrule, self)
def file2jobs(self, targetfile): jobs = [ Job(rule, self, targetfile=targetfile) for rule in self.rules if rule.is_producer(targetfile) ] if not jobs: raise MissingRuleException(targetfile) return jobs
def update_dynamic(self, job): """Update the DAG by evaluating the output of the given job that contains dynamic output files.""" dynamic_wildcards = job.dynamic_wildcards if not dynamic_wildcards: # this happens e.g. in dryrun if output is not yet present return depending = list( filter(lambda job_: not self.finished(job_), self.bfs(self.depending, job))) newrule, non_dynamic_wildcards = job.rule.dynamic_branch( dynamic_wildcards, input=False) self.specialize_rule(job.rule, newrule) # no targetfile needed for job newjob = Job(newrule, self, format_wildcards=non_dynamic_wildcards) self.replace_job(job, newjob) for job_ in depending: if job_.dynamic_input: newrule_ = job_.rule.dynamic_branch(dynamic_wildcards) if newrule_ is not None: self.specialize_rule(job_.rule, newrule_) if not self.dynamic(job_): logger.debug("Updating job {}.".format(job_)) newjob_ = Job(newrule_, self, targetfile=job_.targetfile) unexpected_output = self.reason( job_).missing_output.intersection( newjob.existing_output) if unexpected_output: logger.warning( "Warning: the following output files of rule {} were not " "present when the DAG was created:\n{}".format( newjob_.rule, unexpected_output)) self.replace_job(job_, newjob_) return newjob
def file2jobs(self, targetfile): rules = self.output_index.match(targetfile) jobs = [] exceptions = list() for rule in rules: if rule.is_producer(targetfile): try: jobs.append(Job(rule, self, targetfile=targetfile)) except InputFunctionException as e: exceptions.append(e) if not jobs: if exceptions: raise exceptions[0] raise MissingRuleException(targetfile) return jobs
def update_dynamic(self, job): dynamic_wildcards = job.dynamic_wildcards if not dynamic_wildcards: # this happens e.g. in dryrun if output is not yet present return depending = list( filter(lambda job_: not self.finished(job_), self.bfs(self.depending, job))) newrule, non_dynamic_wildcards = job.rule.dynamic_branch( dynamic_wildcards, input=False) self.replace_rule(job.rule, newrule) # no targetfile needed for job newjob = Job(newrule, self, format_wildcards=non_dynamic_wildcards) self.replace_job(job, newjob) for job_ in depending: if job_.dynamic_input: newrule_ = job_.rule.dynamic_branch(dynamic_wildcards) if newrule_ is not None: self.replace_rule(job_.rule, newrule_) if not self.dynamic(job_): logger.debug("Updating job {}.".format(job_)) newjob_ = Job(newrule_, self, targetfile=job_.targetfile) unexpected_output = self.reason( job_).missing_output.intersection( newjob.existing_output) if unexpected_output: raise UnexpectedOutputException( newjob_.rule, unexpected_output) self.replace_job(job_, newjob_) return newjob
def collect_potential_dependencies(self, job): dependencies = defaultdict(list) # use a set to circumvent multiple jobs for the same file # if user specified it twice file2jobs = self.file2jobs for file in set(job.input): # omit the file if it comes from a subworkflow if file in job.subworkflow_input: continue try: if file in job.dependencies: jobs = [Job(job.dependencies[file], self, targetfile=file)] else: jobs = file2jobs(file) dependencies[file].extend(jobs) except MissingRuleException as ex: pass return dependencies
def rule2job(self, targetrule): return Job(targetrule, self)
def inputfiles(self): # we consider all input files, also of not running jobs return Job.files(self.dag.jobs, "input")
def outputfiles(self): # we only look at output files that will be updated return Job.files(self.dag.needrun_jobs, "output")