def depends_on(self, jobs): for job in ppg.util.flatten_jobs(jobs): if not hasattr(job, "is_prebuild") or not job.is_prebuild: raise ppg.JobContractError( "%s depended on a non-prebuild dependency %s - not supported" % (self, job)) ppg.Job.depends_on(self, job) return self
def align(self, aligner, genome, aligner_parameters, name=None): from .lanes import AlignedSample output_dir = ( Path("results") / "aligned" / ("%s_%s" % (aligner.name, aligner.version)) / genome.name / self.name ) output_dir.mkdir(parents=True, exist_ok=True) output_filename = output_dir / (self.name + ".bam") input_job = self.prepare_input() index_job = genome.build_index(aligner) alignment_job = aligner.align_job( input_job.filenames[0], input_job.filenames[1] if self.is_paired else None, index_job.output_path if hasattr(index_job, "output_path") else index_job.filenames[0], output_filename, aligner_parameters if aligner_parameters else {}, ) alignment_job.depends_on( input_job, index_job, # ppg.ParameterInvariant(output_filename, aligner_parameters), # that's the aligner's job. ) for j in alignment_job.prerequisites: if isinstance(j, ppg.ParameterInvariant): break else: raise ppg.JobContractError( "aligner (%s).align_job should have added a parameter invariant for aligner parameters" % aligner ) return AlignedSample( f"{self.name if name is None else name}_{aligner.name}", alignment_job, genome, self.is_paired, self.vid, output_dir, aligner=aligner, )
def add_annotator(self, anno): if ppg.util.global_pipegraph.running: raise ppg.JobContractError( "Can not add_annotator in a running pipegraph" " - the annotator structure get's fixed when a " "pipegraph is run, you can't add to it in e.g. a " "JobGeneratingJob") cache_name = anno.get_cache_name() forbidden_chars = "/", "?", "*" if any((x in cache_name for x in forbidden_chars)) or len(cache_name) > 60: raise ValueError( "annotator.column_names[0] not suitable as a cache_name (was %s), add cache_name property" % repr(cache_name)) if not cache_name in self.ddf.annotators: # if not hasattr(anno, "columns"): # handled by get_cache_name # raise AttributeError("no columns property on annotator %s" % repr(anno)) self.ddf.annotators[cache_name] = anno self.ddf.anno_jobs[cache_name] = self.get_anno_dependency_callback( anno) for c in self.ddf.children: c += anno return self.ddf.anno_jobs[cache_name]