def _make_source_file_name(target, label, src_suffix, tgt_suffix, src_label=None): # If tgt_suffix is list, target suffix should always # correspond to tgt_suffix[0] source = target if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): tgt_suffix = tgt_suffix[0] if tgt_suffix and not src_suffix is None: if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(rreplace(source, tgt_suffix, "", 1), src_label, "", 1) + src_label + src_suffix else: source = rreplace(source, tgt_suffix, src_suffix, 1) if label: if source.count(label) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format( label, source) elif source.count(label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format( label, source) source = rreplace(source, label, "", 1) return source
def _make_source_file_name(target_cls, source_cls, diff_label=None): src_label = source_cls().label tgt_suffix = target_cls.suffix src_suffix = source_cls().suffix if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): if len(tgt_suffix) > 0: tgt_suffix = tgt_suffix[0] if isinstance(src_suffix, tuple) or isinstance(src_suffix, list): if len(src_suffix) > 0: src_suffix = src_suffix[0] # Start by stripping tgt_suffix if tgt_suffix: source = rreplace(target_cls.target, tgt_suffix, "", 1) else: source = target_cls.target # Then remove the target label and diff_label source = rreplace(source, target_cls.label, "", 1) if diff_label: source = rreplace(source, str(diff_label), "", 1) if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(source, str(src_label), "", 1) + str(src_label) + str(src_suffix) else: source = source + str(src_suffix) if src_label: if source.count(str(src_label)) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format(src_label, source) elif source.count(src_label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format(src_label, source) return source
def _make_source_file_name(self): """Construct source file name from a target. Change target_suffix to source_suffix. Remove label from a target file name. A target given to a task must have its file name modified for the requirement. This function should therefore be called in the requires function. Make sure only to replace the last label. :return: string """ source = self.target if isinstance(self.target_suffix, tuple): if self.target_suffix[0] and self.source_suffix: source = rreplace(source, self.target_suffix[0], self.source_suffix, 1) else: if self.target_suffix and self.source_suffix: source = rreplace(source, self.target_suffix, self.source_suffix, 1) if not self.label: return source if source.count(self.label) > 1: logger.warn("label '{}' found multiple times in target '{}'; this could be intentional".format(self.label, source)) elif source.count(self.label) == 0: logger.warn("label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format(self.label, source)) return rreplace(source, self.label, "", 1)
def _make_source_file_name(self, parent_cls, diff_label=None, add_label=None): """Make source file name for parent tasks. Uses parent_cls to get parent class suffix (i.e. source suffix as viewed from self). The optional argument diff_label is needed for cases where the parent class is several steps up in the workflow, meaning that several labels have been added along the way. This is an irritating and as of yet unresolved issue. :param parent_cls: parent class :param diff_label: the "difference" in labels between self and parent. E.g. if self.target=file.merge.sort.recal.bam depends on task with output file.merge.bam, and self.label=.recal, we would need to set the difference (.sort) here. :param add_label: label that should be added to parent source, e.g. read suffix :return: parent task target name (source) """ src_label = parent_cls().label tgt_suffix = self.sfx() src_suffix = parent_cls().sfx() target = self.target if isinstance(self.target, tuple) or isinstance(self.target, list): target = self.target[self._target_iter] self._target_iter += 1 if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): if len(tgt_suffix) > 0: tgt_suffix = tgt_suffix[0] if isinstance(src_suffix, tuple) or isinstance(src_suffix, list): if len(src_suffix) > 0: src_suffix = src_suffix[0] # Start by setting source, stripping tgt_suffix if present source = target if tgt_suffix: source = rreplace(target, tgt_suffix, "", 1) # Then remove the target label and optional diff_label if self.label: source = rreplace(source, self.label, "", 1) if diff_label: source = rreplace(source, str(diff_label), "", 1) if add_label: source = source + add_label if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(source, str(src_label), "", 1) + str(src_label) + str(src_suffix) else: source = source + str(src_suffix) if src_label: if source.count(str(src_label)) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format( src_label, source) elif source.count(src_label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format( src_label, source) return source
def args(self): sai1 = self.input()[0] sai2 = self.input()[1] fastq1 = luigi.LocalTarget(rreplace(sai1.fn, self.source_suffix, ".fastq.gz", 1)) fastq2 = luigi.LocalTarget(rreplace(sai2.fn, self.source_suffix, ".fastq.gz", 1)) if not self.read_group: foo = sai1.fn.replace(".sai", "") # The platform should be configured elsewhere self.read_group = "-r \"{}\"".format("\t".join(["@RG", "ID:{}".format(foo), "SM:{}".format(foo), "PL:{}".format(self.platform)])) return [self.read_group, self.bwaref, sai1, sai2, fastq1, fastq2, ">", self.output()]
def _make_source_file_name(self, parent_cls, diff_label=None, add_label=None): """Make source file name for parent tasks. Uses parent_cls to get parent class suffix (i.e. source suffix as viewed from self). The optional argument diff_label is needed for cases where the parent class is several steps up in the workflow, meaning that several labels have been added along the way. This is an irritating and as of yet unresolved issue. :param parent_cls: parent class :param diff_label: the "difference" in labels between self and parent. E.g. if self.target=file.merge.sort.recal.bam depends on task with output file.merge.bam, and self.label=.recal, we would need to set the difference (.sort) here. :param add_label: label that should be added to parent source, e.g. read suffix :return: parent task target name (source) """ src_label = parent_cls().label tgt_suffix = self.sfx() src_suffix = parent_cls().sfx() target = self.target if isinstance(self.target, tuple) or isinstance(self.target, list): target = self.target[self._target_iter] self._target_iter += 1 if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): if len(tgt_suffix) > 0: tgt_suffix = tgt_suffix[0] if isinstance(src_suffix, tuple) or isinstance(src_suffix, list): if len(src_suffix) > 0: src_suffix = src_suffix[0] # Start by setting source, stripping tgt_suffix if present source = target if tgt_suffix: source = rreplace(target, tgt_suffix, "", 1) # Then remove the target label and optional diff_label if self.label: source = rreplace(source, self.label, "", 1) if diff_label: source = rreplace(source, str(diff_label), "", 1) if add_label: source = source + add_label if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(source, str(src_label), "", 1) + str(src_label) + str(src_suffix) else: source = source + str(src_suffix) if src_label: if source.count(str(src_label)) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format(src_label, source) elif source.count(src_label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format(src_label, source) return source
def requires(self): cls = self.set_parent_task() source = self._make_source_file_name() # Ugly hack for 1 -> 2 dependency: works but should be dealt with otherwise if str(fullclassname(cls)) in ["ratatosk.lib.utils.misc.ResyncMatesJobTask"]: if re.search(self.read1_suffix, source): self.is_read1 = True fq1 = source fq2 = rreplace(source, self.read1_suffix, self.read2_suffix, 1) else: self.is_read1 = False fq1 = rreplace(source, self.read2_suffix, self.read1_suffix, 1) fq2 = source return cls(target=[fq1, fq2]) else: return cls(target=source)
def requires(self): cls = self.parent()[0] bamcls = self.parent()[0]().parent()[0] source = self.source()[0] if self.split_by == "chromosome": # Partition sources by chromosome. Need to get the # references from the source bam file, i.e. the source to # the parent task bamfile = rreplace(source, self.sfx(), bamcls().sfx(), 1) if os.path.exists(bamfile): samfile = pysam.Samfile(bamfile, "rb") refs = samfile.references samfile.close() elif os.path.exists(os.path.expanduser(self.ref)): dictfile = os.path.expanduser(os.path.splitext(self.ref)[0] + ".dict") with open(dictfile) as fh: seqdict = [x for x in fh.readlines() if x.startswith("@SQ")] m = [re.search(r'SN:([a-zA-z0-9]+)', x) for x in seqdict] refs = [x.group(1) for x in m] else: return [] outdir = "{base}-split".format(base=os.path.splitext(self.target)[0]) if not os.path.exists(outdir): os.makedirs(outdir) split_targets = [os.path.join("{base}-split".format(base=os.path.splitext(self.target)[0]), "{base}-{ref}{ext}".format(base=os.path.splitext(os.path.basename(self.target))[0], ref=chr_ref, ext=self.sfx())) for chr_ref in refs] return [cls(target=tgt, target_region=chr_ref) for tgt, chr_ref in izip(split_targets, refs)] else: return [cls(target=source)]
def output(self): if isinstance(self.suffix, tuple): return [ luigi.LocalTarget(rreplace(self.target, self.suffix[0], x, 1)) for x in self.suffix ] else: return [luigi.LocalTarget(self.target)]
def _make_source_file_name(self, parent_cls): """Assume pattern is {base}-split/{base}-{ref}{ext}, as in CombineVariants. FIX ME: well, generalize """ base = rreplace(os.path.join(os.path.dirname(os.path.dirname(self.target)), os.path.basename(self.target)), self.label, "", 1).split("-") return "".join(base[0:-1]) + parent_cls().sfx()
def _make_paired_source_file_names(self): """Construct source file name from a target. """ source_list = self.target for source in source_list: if isinstance(self.target_suffix, tuple): if self.target_suffix[0] and self.source_suffix: source = rreplace(source, self.target_suffix[0], self.source_suffix, 1) else: if self.target_suffix and self.source_suffix: source = rreplace(source, self.target_suffix, self.source_suffix, 1) if not self.label: source_list.append(source) if source.count(self.label) > 1: logger.warn("label '{}' found multiple times in target '{}'; this could be intentional".format(self.label, source)) elif source.count(self.label) == 0: logger.warn("label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format(self.label, source)) return [rreplace(x, self.label, "", 1) for x in source_list]
def _make_source_file_name(self, parent_cls): """Assume pattern is {base}-split/{base}-{ref}{ext}, as in CombineVariants. FIX ME: well, generalize """ base = rreplace( os.path.join(os.path.dirname(os.path.dirname(self.target)), os.path.basename(self.target)), self.label, "", 1).split("-") return "".join(base[0:-1]) + parent_cls().sfx()
def requires(self): cls = self.parent()[0] source = self.source()[0] # Ugly hack for 1 -> 2 dependency: works but should be dealt with otherwise if str(fullclassname(cls)) in ["ratatosk.lib.utils.misc.ResyncMates"]: rt = determine_read_type(source, self.read1_suffix, self.read2_suffix) if rt == 1: self.is_read1 = True fq1 = source fq2 = rreplace(source, self.read1_suffix, self.read2_suffix, 1) elif rt == 2: self.is_read1 = False fq1 = rreplace(source, self.read2_suffix, self.read1_suffix, 1) fq2 = source retval = [cls(target=[fq1, fq2])] else: retval = [cls(target=source)] if len(self.parent()) > 1: retval += [cls(target=source) for cls, source in izip(self.parent()[1:], self.source()[1:])] return retval
def _make_source_file_name(target, label, src_suffix, tgt_suffix, src_label=None): # If tgt_suffix is list, target suffix should always # correspond to tgt_suffix[0] source = target if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): tgt_suffix = tgt_suffix[0] if tgt_suffix and not src_suffix is None: if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(rreplace(source, tgt_suffix, "", 1), src_label, "", 1) + src_label + src_suffix else: source = rreplace(source, tgt_suffix, src_suffix, 1) if label: if source.count(label) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format(label, source) elif source.count(label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format(label, source) source = rreplace(source, label, "", 1) return source
def requires(self): vcfcls = self.parent()[0] indexcls = ratatosk.lib.variation.tabix.Tabix return [ cls(target=source) for cls, source in izip(self.parent(), self.source()) ] + [ indexcls(target=rreplace(self.source()[0], vcfcls().suffix, indexcls().suffix, 1), parent_task=fullclassname(vcfcls)) ]
def args(self): cls = self.parent()[0] seq = self.threeprime if determine_read_type(self.input()[0].path, self.read1_suffix, self.read2_suffix) == 2: seq = self.fiveprime return [ "-a", seq, self.input()[0], "-o", self.output(), ">", rreplace(self.input()[0].path, str(cls().suffix[0]), self.label + self.suffix[1], 1) ]
def collect_sample_runs(task): """Collect sample runs for a sample. Since it is to be used with MergeSamFiles it should return a list of targets. :param task: current task :return: list of bam files for each sample run in a flowcell directory """ logging.debug("Collecting sample runs for {}".format(task.target)) sample_runs = target_generator(os.path.dirname(os.path.dirname(task.target)), sample=[os.path.basename(os.path.dirname(task.target))]) bam_list = [x[2] + os.path.basename(rreplace(task.target.replace(x[0], ""), "{}{}".format(task.label, task.target_suffix), task.source_suffix, 1)) for x in sample_runs] logging.debug("Generated target bamfile list {}".format(bam_list)) return bam_list
def _get_read_group(self): if not self.read_group: from ratatosk import backend cls = self.parent()[0] sai1 = self.input()[0] rgid = rreplace(rreplace(sai1.path, cls().sfx(), "", 1), self.add_label[0], "", 1) smid = rgid # Get sample information if present in global vars. Note # that this requires the # backend.__global_vars__["targets"] be set # This is not perfect but works for now for tgt in backend.__global_vars__.get("targets", []): if smid.startswith(tgt.prefix("sample_run")): smid = tgt.sample_id() break # The platform should be configured elsewhere rg = "\"{}\"".format("\t".join(["@RG", "ID:{}".format(rgid), "SM:{}".format(smid), "PL:{}".format(self.platform)])) if self.pipe: return rg.replace("\t", "\\t") else: return rg else: return self.read_group
def requires(self): """Task requirements. In many cases this is a single source whose name can be generated following the code below, and therefore doesn't need reimplementation in the subclasses.""" bamcls = self.parent()[0] indexcls = ratatosk.lib.tools.samtools.Index return [ cls(target=source) for cls, source in izip(self.parent(), self.source()) ] + [ indexcls(target=rreplace(self.source()[0], bamcls().sfx(), indexcls().sfx(), 1), parent_task=fullclassname(bamcls)) ]
def requires(self): """Task requirements. In many cases this is a single source whose name can be generated following the code below, and therefore doesn't need reimplementation in the subclasses.""" bamcls = self.parent()[0] indexcls = ratatosk.lib.tools.samtools.Index return [bamcls(target=self.source()[0])] + [ CombineVariants( target=os.path.join(self.outdir, "CombinedVariants.vcf")) ] + [ indexcls(target=rreplace(self.source()[0], bamcls().sfx(), indexcls().sfx(), 1), parent_task=fullclassname(bamcls)) ]
def generic_collect_sample_runs(task): """Collect sample runs for a sample. Since it is to be used with MergeSamFiles it should return a list of targets. :param task: current task :return: list of bam files for each sample run in a flowcell directory """ logging.debug("Collecting sample runs for {}".format(task.target)) sample_runs = generic_target_generator(os.path.dirname(os.path.dirname(task.target)), sample=[os.path.basename(os.path.dirname(task.target))]) src_suffix = task.parent()[0]().suffix bam_list = list(set([x.prefix("sample_run") + os.path.basename(rreplace(task.target.replace(x.sample_id(), ""), "{}{}".format(task.label, task.suffix), src_suffix, 1)) for x in sample_runs])) logging.debug("Generated target bamfile list {}".format(bam_list)) return bam_list
def _make_source_file_name(target_cls, source_cls, diff_label=None): src_label = source_cls().label tgt_suffix = target_cls.suffix src_suffix = source_cls().suffix if isinstance(tgt_suffix, tuple) or isinstance(tgt_suffix, list): if len(tgt_suffix) > 0: tgt_suffix = tgt_suffix[0] if isinstance(src_suffix, tuple) or isinstance(src_suffix, list): if len(src_suffix) > 0: src_suffix = src_suffix[0] # Start by stripping tgt_suffix if tgt_suffix: source = rreplace(target_cls.target, tgt_suffix, "", 1) else: source = target_cls.target # Then remove the target label and diff_label source = rreplace(source, target_cls.label, "", 1) if diff_label: source = rreplace(source, str(diff_label), "", 1) if src_label: # Trick: remove src_label first if present since # the source label addition here corresponds to a # "diff" compared to target name source = rreplace(source, str(src_label), "", 1) + str(src_label) + str(src_suffix) else: source = source + str(src_suffix) if src_label: if source.count(str(src_label)) > 1: print "label '{}' found multiple times in target '{}'; this could be intentional".format( src_label, source) elif source.count(src_label) == 0: print "label '{}' not found in target '{}'; are you sure your target is correctly formatted?".format( src_label, source) return source
def args(self): # This is plain daft and inconsistent. If we want PrintReads # to run on a bam file for which there is baserecalibrated # output, it does *not* work to set requirements to point both # to IndelRealigner and # BaseReacalibrator(parent_task=IndelRealigner) - the # dependencies break. This fix changes meaning of input option # (-I) depending on whether we do recalibrate or note # TODO: sort this out - is the above statement really true? if self.recalibrate: inputfile = rreplace(self.input().fn, self.source_suffix, InputBamFile.target_suffix.default, 1) retval = ["-BQSR", self.input(), "-o", self.output(), "-I", inputfile] else: retval = ["-I", self.input(), "-o", self.output()] if not self.ref: raise Exception("need reference for PrintReads") retval += [" -R {}".format(self.ref)] return retval
def organize_sample_runs(task, cls): # This currently relies on the folder structure sample/fc1, # sample/fc2 etc... logging.debug("Organizing samples for {}".format(task.target)) targetdir = os.path.dirname(task.target) flowcells = os.listdir(targetdir) bam_list = [] for fc in flowcells: fc_dir = os.path.join(targetdir, fc) if not os.path.isdir(fc_dir): continue if not fc_dir.endswith("XX"): continue logging.debug("Looking in directory {}".format(fc)) # This assumes only one sample run per flowcell bam_list.append(os.path.join(fc_dir, os.path.basename(rreplace(task.target, "{}{}".format(task.label, task.target_suffix), task.source_suffix, 1)))) logging.debug("Generated target bamfile list {}".format(bam_list)) return bam_list
def requires(self): cls = self.parent()[0] bamcls = self.parent()[0]().parent()[0] source = self.source()[0] if self.split_by == "chromosome": # Partition sources by chromosome. Need to get the # references from the source bam file, i.e. the source to # the parent task bamfile = rreplace(source, self.sfx(), bamcls().sfx(), 1) if os.path.exists(bamfile): samfile = pysam.Samfile(bamfile, "rb") refs = samfile.references samfile.close() elif os.path.exists(os.path.expanduser(self.ref)): dictfile = os.path.expanduser( os.path.splitext(self.ref)[0] + ".dict") with open(dictfile) as fh: seqdict = [ x for x in fh.readlines() if x.startswith("@SQ") ] m = [re.search(r'SN:([a-zA-z0-9]+)', x) for x in seqdict] refs = [x.group(1) for x in m] else: return [] outdir = "{base}-split".format( base=os.path.splitext(self.target)[0]) if not os.path.exists(outdir): os.makedirs(outdir) split_targets = [ os.path.join( "{base}-split".format( base=os.path.splitext(self.target)[0]), "{base}-{ref}{ext}".format(base=os.path.splitext( os.path.basename(self.target))[0], ref=chr_ref, ext=self.sfx())) for chr_ref in refs ] return [ cls(target=tgt, target_region=chr_ref) for tgt, chr_ref in izip(split_targets, refs) ] else: return [cls(target=source)]
def collect_sample_runs(task): """Collect sample runs for a sample. Since it is to be used with MergeSamFiles it should return a list of targets. :param task: current task :return: list of bam files for each sample run in a flowcell directory """ logging.debug("Collecting sample runs for {}".format(task.target)) sample = os.path.basename(os.path.dirname(task.target)) if backend.__global_vars__.get("targets", None): sample_runs = [x for x in backend.__global_vars__.get("targets") if x.sample_id() == sample] else: sample_runs = target_generator_handler(os.path.dirname(os.path.dirname(task.target)), sample=[sample]) src_suffix = task.parent()[0]().sfx() bam_list = list(set([x.prefix("sample_run") + os.path.basename(rreplace(task.target.replace(x.sample_id(), ""), "{}{}".format(task.label, task.suffix), src_suffix, 1)) for x in sample_runs])) logging.debug("Generated target bamfile list {}".format(bam_list)) return bam_list
def organize_sample_runs(task): # This currently relies on the folder structure sample/fc1, # sample/fc2 etc... This should possibly also be a # configurable function? # NB: this is such a pain to get right I'm adding lots of debug right now logging.debug("Organizing samples for {}".format(task.target)) targetdir = os.path.dirname(task.target) flowcells = os.listdir(targetdir) bam_list = [] for fc in flowcells: fc_dir = os.path.join(targetdir, fc) if not os.path.isdir(fc_dir): continue if not fc_dir.endswith("XX"): continue logging.debug("Looking in directory {}".format(fc)) # This assumes only one sample run per flowcell bam_list.append(os.path.join(fc_dir, os.path.basename(rreplace(task.target, "{}{}".format(task.label, task.target_suffix), task.source_suffix, 1)))) logging.debug("Generated target bamfile list {}".format(bam_list)) return bam_list
def requires(self): cls = self.set_parent_task() source = self._make_source_file_name() return [cls(target=source), ratatosk.lib.tools.samtools.IndexBam(target=rreplace(source, self.source_suffix, ".bai", 1), parent_task=fullclassname(cls))]
def requires(self): # From target name, generate sai1, sai2, fastq1, fastq2 sai1 = rreplace(self._make_source_file_name(), self.source_suffix, self.read1_suffix + self.source_suffix, 1) sai2 = rreplace(self._make_source_file_name(), self.source_suffix, self.read2_suffix + self.source_suffix, 1) return [BwaAln(target=sai1), BwaAln(target=sai2)]
def requires(self): cls = self.set_parent_task() source = self._make_source_file_name() return [cls(target=source), ratatosk.lib.tools.samtools.IndexBam(target=rreplace(source, self.source_suffix, ".bai", 1), parent_task="ratatosk.lib.tools.gatk.InputBamFile"), ratatosk.lib.tools.gatk.RealignerTargetCreator(target=rreplace(source, ".bam", ".intervals", 1))]
def args(self): cls = self.parent()[0] parent_cls = cls().parent()[0] (fastq1, fastq2) = [luigi.LocalTarget(rreplace(sai.path, cls().suffix, parent_cls().sfx(), 1)) for sai in self.input()] return ["-r", self._get_read_group(), self.bwaref, self.input()[0].path, self.input()[1].path, fastq1, fastq2, ">", self.output()]
def requires(self): return [InsertMetrics(target=self.target + str(InsertMetrics().suffix)), HsMetrics(target=self.target + str(HsMetrics().suffix)), HsMetricsNonDup(target=rreplace(self.target, str(DuplicationMetrics().label), "", 1) + str(HsMetrics().suffix)), AlignmentMetrics(target=self.target + str(AlignmentMetrics().suffix))]
def args(self): output_prefix = luigi.LocalTarget(rreplace(self.output().path, self.suffix, "", 1)) return [self.input()[0], output_prefix]
def output(self): return [luigi.LocalTarget(self.target), luigi.LocalTarget(rreplace(self.target, self.suffix[0], self.suffix[1], 1))]
def args(self): return ["INPUT=", self.input()[0], "OUTPUT=", self.output(), "METRICS_FILE=", rreplace(self.output().path, "{}{}".format(self.label, self.suffix[0]), self.suffix[1], 1)]
def args(self): cls = self.parent()[0] seq = self.threeprime if determine_read_type(self.input()[0].path, self.read1_suffix, self.read2_suffix) == 2: seq = self.fiveprime return ["-a", seq, self.input()[0], "-o", self.output(), ">", rreplace(self.input()[0].path, str(cls().suffix[0]), self.label + self.suffix[1], 1)]
def args(self): output_prefix = luigi.LocalTarget( rreplace(self.output().path, self.suffix, "", 1)) return [self.input()[0], output_prefix]
def requires(self): return [InsertMetrics(target=self.target + str(InsertMetrics.target_suffix.default[0])), HsMetrics(target=self.target + str(HsMetrics.target_suffix.default)), HsMetricsNonDup(target=rreplace(self.target, str(DuplicationMetrics.label.default), "", 1) + str(HsMetrics.target_suffix.default)), AlignmentMetrics(target=self.target + str(AlignmentMetrics.target_suffix.default))]
def requires(self): """Task requirements. In many cases this is a single source whose name can be generated following the code below, and therefore doesn't need reimplementation in the subclasses.""" bamcls = self.parent()[0] indexcls = ratatosk.lib.tools.samtools.Index return [cls(target=source) for cls, source in izip(self.parent(), self.source())] + [indexcls(target=rreplace(self.source()[0], bamcls().sfx(), indexcls().sfx(), 1), parent_task=fullclassname(bamcls))]
def requires(self): """Task requirements. In many cases this is a single source whose name can be generated following the code below, and therefore doesn't need reimplementation in the subclasses.""" bamcls = self.parent()[0] indexcls = ratatosk.lib.tools.samtools.Index return [bamcls(target=self.source()[0])] + [CombineVariants(target=os.path.join(self.outdir, "CombinedVariants.vcf"))] + [indexcls(target=rreplace(self.source()[0], bamcls().sfx(), indexcls().sfx(), 1), parent_task=fullclassname(bamcls))]
def requires(self): zipcls = ratatosk.lib.variation.tabix.Bgzip indexcls = ratatosk.lib.variation.tabix.Tabix return [zipcls(target=self.source()[0]), indexcls(target=rreplace(self.source()[0], zipcls().sfx(), indexcls().sfx(), 1), parent_task=fullclassname(zipcls))]
def output(self): if isinstance(self.suffix, tuple): return [luigi.LocalTarget(rreplace(self.target, self.suffix[0], x, 1)) for x in self.suffix] else: return [luigi.LocalTarget(self.target)]