Ejemplo n.º 1
0
    def _remove_pcr_duplicates(self, config, prefix, bams, strategy):
        rmdup_cls = {"collapsed"  : FilterCollapsedBAMNode,
                     "normal"     : MarkDuplicatesNode}

        keep_duplicates = False
        if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"):
            keep_duplicates = True

        # Indexing is required if we wish to calulate per-region statistics,
        index_required = (bool(prefix.get("RegionsOfInterest")) or
                          # or if we wish to run GATK, but only if we don't
                          # use a downstream rescaled BAM as input for GATK
                          (self.options["Features"]["RealignedBAM"] and not
                           self.options["RescaleQualities"]))

        results = {}
        for (key, files_and_nodes) in bams.items():
            output_filename = self.folder + ".rmdup.%s.bam" % key
            node = rmdup_cls[key](config       = config,
                                  input_bams   = files_and_nodes.keys(),
                                  output_bam   = output_filename,
                                  keep_dupes   = keep_duplicates,
                                  dependencies = files_and_nodes.values())
            validated_node = index_and_validate_bam(config, prefix, node,
                                                    create_index=index_required)

            results[key] = {output_filename : validated_node}
        return results
Ejemplo n.º 2
0
    def _mapdamage_rescale(self, config, destination, prefix, files_and_nodes):
        output_filename = self.folder + ".rescaled.bam"

        # Generates basic plots / table files
        plot = self._mapdamage_plot(config=config,
                                    destination=destination,
                                    prefix=prefix,
                                    files_and_nodes=files_and_nodes)

        # Builds model of post-mortem DNA damage
        model = MapDamageModelNode.customize(reference=prefix["Reference"],
                                             directory=destination,
                                             dependencies=plot)
        apply_options(model.command, self.options["mapDamage"])
        model = model.build_node()

        # Rescales BAM quality scores using model built above
        input_files = files_and_nodes.keys()
        scale = MapDamageRescaleNode.customize(config=config,
                                               reference=prefix["Reference"],
                                               input_files=input_files,
                                               output_file=output_filename,
                                               directory=destination,
                                               dependencies=model)
        apply_options(scale.command, self.options["mapDamage"])
        scale = scale.build_node()

        # Grab indexing and validation nodes, required by ROIs and GATK
        index_required = bool(prefix.get("RegionsOfInterest")) \
            or self.options["Features"]["RealignedBAM"]
        validate = index_and_validate_bam(config, prefix, scale,
                                          create_index=index_required)

        return {output_filename: validate}, (model,)
Ejemplo n.º 3
0
    def _build_realigned_bam(self, config, prefix, bams):
        output_filename = os.path.join(
            self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"]))
        intervals_filename = os.path.join(self.folder, self.target,
                                          prefix["Name"] + ".intervals")
        validated_filename = os.path.join(
            self.folder, self.target, prefix["Name"] + ".realigned.validated")

        trainer = gatk.GATKIndelTrainerNode(config=config,
                                            reference=prefix["Reference"],
                                            infiles=bams.keys(),
                                            outfile=intervals_filename,
                                            threads=config.gatk_max_threads,
                                            dependencies=self.datadup_check)

        aligner = gatk.GATKIndelRealignerNode(config=config,
                                              reference=prefix["Reference"],
                                              infiles=bams.keys(),
                                              intervals=intervals_filename,
                                              outfile=output_filename,
                                              dependencies=trainer)

        validated_node = index_and_validate_bam(config=config,
                                                prefix=prefix,
                                                node=aligner,
                                                log_file=validated_filename)

        return {output_filename: validated_node}
Ejemplo n.º 4
0
    def _mapdamage_rescale(self, config, destination, prefix, files_and_nodes):
        model = self._mapdamage_model(config=config,
                                      destination=destination,
                                      prefix=prefix,
                                      files_and_nodes=files_and_nodes)

        # Rescales BAM quality scores using model built above
        input_files = files_and_nodes.keys()
        output_filename = self.folder + ".rescaled.bam"

        scale = MapDamageRescaleNode.customize(config=config,
                                               reference=prefix["Reference"],
                                               input_files=input_files,
                                               output_file=output_filename,
                                               directory=destination,
                                               dependencies=model)
        apply_options(scale.command, self.options["mapDamage"])
        scale = scale.build_node()

        # Grab indexing and validation nodes, required by ROIs and GATK
        index_required = bool(prefix.get("RegionsOfInterest")) \
            or self.options["Features"]["RealignedBAM"]
        validate = index_and_validate_bam(config=config,
                                          prefix=prefix,
                                          node=scale,
                                          create_index=index_required)

        return {output_filename: validate}, (model, )
Ejemplo n.º 5
0
    def _remove_pcr_duplicates(self, config, prefix, bams, strategy):
        rmdup_cls = {
            "collapsed": FilterCollapsedBAMNode,
            "normal": MarkDuplicatesNode
        }

        keep_duplicates = False
        if isinstance(strategy, types.StringTypes) and (strategy.lower()
                                                        == "mark"):
            keep_duplicates = True

        # Indexing is required if we wish to calulate per-region statistics,
        index_required = (
            bool(prefix.get("RegionsOfInterest")) or
            # or if we wish to run GATK, but only if we don't
            # use a downstream rescaled BAM as input for GATK
            (self.options["Features"]["RealignedBAM"]
             and not self.options["Features"]["mapDamage"] == 'rescale'))

        results = {}
        for (key, files_and_nodes) in bams.items():
            output_filename = self.folder + ".rmdup.%s.bam" % key
            node = rmdup_cls[key](config=config,
                                  input_bams=files_and_nodes.keys(),
                                  output_bam=output_filename,
                                  keep_dupes=keep_duplicates,
                                  dependencies=files_and_nodes.values())
            validated_node = index_and_validate_bam(
                config=config,
                prefix=prefix,
                node=node,
                create_index=index_required)

            results[key] = {output_filename: validated_node}
        return results
Ejemplo n.º 6
0
    def _build_realigned_bam(self, config, prefix, bams):
        output_filename = os.path.join(
            self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"]))
        intervals_filename = os.path.join(
            self.folder, self.target, prefix["Name"] + ".intervals")
        validated_filename = os.path.join(
            self.folder, self.target, prefix["Name"] + ".realigned.validated")

        trainer = gatk.GATKIndelTrainerNode(config=config,
                                            reference=prefix["Reference"],
                                            infiles=bams.keys(),
                                            outfile=intervals_filename,
                                            threads=config.gatk_max_threads,
                                            dependencies=self.datadup_check)

        aligner = gatk.GATKIndelRealignerNode(config=config,
                                              reference=prefix["Reference"],
                                              infiles=bams.keys(),
                                              intervals=intervals_filename,
                                              outfile=output_filename,
                                              dependencies=trainer)

        validated_node = index_and_validate_bam(config=config,
                                                prefix=prefix,
                                                node=aligner,
                                                log_file=validated_filename)

        return {output_filename: validated_node}
Ejemplo n.º 7
0
    def _build_raw_bam(self, config, prefix, files_and_bams):
        output_filename = os.path.join(self.folder, "%s.%s.bam" % (self.target, prefix["Name"]))
        validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated")

        node = MergeSamFilesNode(config       = config,
                                 input_bams   = files_and_bams.keys(),
                                 output_bam   = output_filename,
                                 dependencies = self.datadup_check)
        validated_node = index_and_validate_bam(config, prefix, node, validated_filename)

        return {output_filename : validated_node}
Ejemplo n.º 8
0
    def _build_raw_bam(self, config, prefix, files_and_bams):
        output_filename = os.path.join(
            self.folder, "%s.%s.bam" % (self.target, prefix["Name"]))
        validated_filename = os.path.join(self.folder, self.target,
                                          prefix["Name"] + ".validated")

        node = MergeSamFilesNode(config=config,
                                 input_bams=files_and_bams.keys(),
                                 output_bam=output_filename,
                                 dependencies=self.datadup_check)
        validated_node = index_and_validate_bam(config=config,
                                                prefix=prefix,
                                                node=node,
                                                log_file=validated_filename)

        return {output_filename: validated_node}
Ejemplo n.º 9
0
    def _finalize_nodes(self, config, prefix, parameters, node):
        self._set_rg_tags(node.commands["convert"])

        min_quality = self.options["Aligners"]["BWA"]["MinQuality"]
        node.commands["convert"].set_option('-q', min_quality)

        if self.options["Aligners"]["BWA"]["FilterUnmappedReads"]:
            node.commands["convert"].set_option('-F', "0x4")

        index_required = self._is_indexing_required(prefix)
        validated_node = index_and_validate_bam(config=config,
                                                prefix=prefix,
                                                node=node.build_node(),
                                                create_index=index_required)

        return validated_node