def _remove_pcr_duplicates(self, config, prefix, bams, strategy): rmdup_cls = {"collapsed" : FilterCollapsedBAMNode, "normal" : MarkDuplicatesNode} keep_duplicates = False if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"): keep_duplicates = True # Indexing is required if we wish to calulate per-region statistics, index_required = (bool(prefix.get("RegionsOfInterest")) or # or if we wish to run GATK, but only if we don't # use a downstream rescaled BAM as input for GATK (self.options["Features"]["RealignedBAM"] and not self.options["RescaleQualities"])) results = {} for (key, files_and_nodes) in bams.items(): output_filename = self.folder + ".rmdup.%s.bam" % key node = rmdup_cls[key](config = config, input_bams = files_and_nodes.keys(), output_bam = output_filename, keep_dupes = keep_duplicates, dependencies = files_and_nodes.values()) validated_node = index_and_validate_bam(config, prefix, node, create_index=index_required) results[key] = {output_filename : validated_node} return results
def _mapdamage_rescale(self, config, destination, prefix, files_and_nodes): output_filename = self.folder + ".rescaled.bam" # Generates basic plots / table files plot = self._mapdamage_plot(config=config, destination=destination, prefix=prefix, files_and_nodes=files_and_nodes) # Builds model of post-mortem DNA damage model = MapDamageModelNode.customize(reference=prefix["Reference"], directory=destination, dependencies=plot) apply_options(model.command, self.options["mapDamage"]) model = model.build_node() # Rescales BAM quality scores using model built above input_files = files_and_nodes.keys() scale = MapDamageRescaleNode.customize(config=config, reference=prefix["Reference"], input_files=input_files, output_file=output_filename, directory=destination, dependencies=model) apply_options(scale.command, self.options["mapDamage"]) scale = scale.build_node() # Grab indexing and validation nodes, required by ROIs and GATK index_required = bool(prefix.get("RegionsOfInterest")) \ or self.options["Features"]["RealignedBAM"] validate = index_and_validate_bam(config, prefix, scale, create_index=index_required) return {output_filename: validate}, (model,)
def _build_realigned_bam(self, config, prefix, bams): output_filename = os.path.join( self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"])) intervals_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".intervals") validated_filename = os.path.join( self.folder, self.target, prefix["Name"] + ".realigned.validated") trainer = gatk.GATKIndelTrainerNode(config=config, reference=prefix["Reference"], infiles=bams.keys(), outfile=intervals_filename, threads=config.gatk_max_threads, dependencies=self.datadup_check) aligner = gatk.GATKIndelRealignerNode(config=config, reference=prefix["Reference"], infiles=bams.keys(), intervals=intervals_filename, outfile=output_filename, dependencies=trainer) validated_node = index_and_validate_bam(config=config, prefix=prefix, node=aligner, log_file=validated_filename) return {output_filename: validated_node}
def _mapdamage_rescale(self, config, destination, prefix, files_and_nodes): model = self._mapdamage_model(config=config, destination=destination, prefix=prefix, files_and_nodes=files_and_nodes) # Rescales BAM quality scores using model built above input_files = files_and_nodes.keys() output_filename = self.folder + ".rescaled.bam" scale = MapDamageRescaleNode.customize(config=config, reference=prefix["Reference"], input_files=input_files, output_file=output_filename, directory=destination, dependencies=model) apply_options(scale.command, self.options["mapDamage"]) scale = scale.build_node() # Grab indexing and validation nodes, required by ROIs and GATK index_required = bool(prefix.get("RegionsOfInterest")) \ or self.options["Features"]["RealignedBAM"] validate = index_and_validate_bam(config=config, prefix=prefix, node=scale, create_index=index_required) return {output_filename: validate}, (model, )
def _remove_pcr_duplicates(self, config, prefix, bams, strategy): rmdup_cls = { "collapsed": FilterCollapsedBAMNode, "normal": MarkDuplicatesNode } keep_duplicates = False if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"): keep_duplicates = True # Indexing is required if we wish to calulate per-region statistics, index_required = ( bool(prefix.get("RegionsOfInterest")) or # or if we wish to run GATK, but only if we don't # use a downstream rescaled BAM as input for GATK (self.options["Features"]["RealignedBAM"] and not self.options["Features"]["mapDamage"] == 'rescale')) results = {} for (key, files_and_nodes) in bams.items(): output_filename = self.folder + ".rmdup.%s.bam" % key node = rmdup_cls[key](config=config, input_bams=files_and_nodes.keys(), output_bam=output_filename, keep_dupes=keep_duplicates, dependencies=files_and_nodes.values()) validated_node = index_and_validate_bam( config=config, prefix=prefix, node=node, create_index=index_required) results[key] = {output_filename: validated_node} return results
def _build_realigned_bam(self, config, prefix, bams): output_filename = os.path.join( self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"])) intervals_filename = os.path.join( self.folder, self.target, prefix["Name"] + ".intervals") validated_filename = os.path.join( self.folder, self.target, prefix["Name"] + ".realigned.validated") trainer = gatk.GATKIndelTrainerNode(config=config, reference=prefix["Reference"], infiles=bams.keys(), outfile=intervals_filename, threads=config.gatk_max_threads, dependencies=self.datadup_check) aligner = gatk.GATKIndelRealignerNode(config=config, reference=prefix["Reference"], infiles=bams.keys(), intervals=intervals_filename, outfile=output_filename, dependencies=trainer) validated_node = index_and_validate_bam(config=config, prefix=prefix, node=aligner, log_file=validated_filename) return {output_filename: validated_node}
def _build_raw_bam(self, config, prefix, files_and_bams): output_filename = os.path.join(self.folder, "%s.%s.bam" % (self.target, prefix["Name"])) validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated") node = MergeSamFilesNode(config = config, input_bams = files_and_bams.keys(), output_bam = output_filename, dependencies = self.datadup_check) validated_node = index_and_validate_bam(config, prefix, node, validated_filename) return {output_filename : validated_node}
def _build_raw_bam(self, config, prefix, files_and_bams): output_filename = os.path.join( self.folder, "%s.%s.bam" % (self.target, prefix["Name"])) validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated") node = MergeSamFilesNode(config=config, input_bams=files_and_bams.keys(), output_bam=output_filename, dependencies=self.datadup_check) validated_node = index_and_validate_bam(config=config, prefix=prefix, node=node, log_file=validated_filename) return {output_filename: validated_node}
def _finalize_nodes(self, config, prefix, parameters, node): self._set_rg_tags(node.commands["convert"]) min_quality = self.options["Aligners"]["BWA"]["MinQuality"] node.commands["convert"].set_option('-q', min_quality) if self.options["Aligners"]["BWA"]["FilterUnmappedReads"]: node.commands["convert"].set_option('-F', "0x4") index_required = self._is_indexing_required(prefix) validated_node = index_and_validate_bam(config=config, prefix=prefix, node=node.build_node(), create_index=index_required) return validated_node