コード例 #1
0
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file, dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing, backwards compatibility.
        #        validation_params.command.set_kwargs(IN_REFERENCE = prefix["Reference"])
        #        validation_params.command.add_option("R", "%(IN_REFERENCE)s", sep = "=")
        # Ignored since we filter out misses and low-quality hits during mapping, which
        # leads to a large proportion of missing mates for PE reads.
        validation_params.command.add_option("IGNORE",
                                             "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits, where
        # high-quality reads may case ValidateSamFile to mis-identify the qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT",
                                             sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
コード例 #2
0
ファイル: nodes.py プロジェクト: health1987/paleomix
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file,
                                dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing,
        #   backwards compatibility. See the discussion at
        #     http://sourceforge.net/mailarchive/message.php?msg_id=31348639
        # validation_params.command.set_kwargs(IN_REF=prefix["Reference"])
        # validation_params.command.add_option("R", "%(IN_REF)s", sep="=")

        # Ignored since we may filter out misses and low-quality hits during
        # mapping, which leads to a large proportion of missing PE mates.
        validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits,
        # where high-quality reads may cause mis-identification of qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT", sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
コード例 #3
0
ファイル: gatk.py プロジェクト: health1987/paleomix
    def __init__(self, config, reference, infiles, outfile, intervals=None,
                 dependencies=()):
        if not intervals:
            intervals = outfile + ".intervals"

        infiles = safe_coerce_to_tuple(infiles)
        trainer = _IndelTrainerNode(config=config,
                                    reference=reference,
                                    infiles=infiles,
                                    outfile=intervals,
                                    dependencies=dependencies)
        aligner = _IndelRealignerNode(config=config,
                                      reference=reference,
                                      intervals=intervals,
                                      infiles=infiles,
                                      outfile=outfile,
                                      dependencies=trainer)

        description = "<GATK Indel Realigner: %i files -> '%s'>" \
            % (len(infiles), outfile)

        MetaNode.__init__(self,
                          description=description,
                          subnodes=[trainer, aligner],
                          dependencies=dependencies)
コード例 #4
0
ファイル: sequences.py プロジェクト: health1987/paleomix
    def __init__(self, input_files, destination, filter_by, dependencies=()):
        subnodes = []
        filter_by = dict(filter_by)
        for (filename, node) in input_files.iteritems():
            output_filename = fileutils.reroot_path(destination, filename)
            subnodes.append(FilterSingletonsNode(input_file=filename,
                                                 output_file=output_filename,
                                                 filter_by=filter_by,
                                                 dependencies=node))

        MetaNode.__init__(self,
                          description="<FilterSingleton: %i files -> '%s'>"
                          % (len(subnodes), destination),
                          subnodes=subnodes,
                          dependencies=dependencies)
コード例 #5
0
ファイル: sequences.py プロジェクト: UMNPonyClub/paleomix
    def __init__(self, input_files, destination, filter_by, dependencies=()):
        subnodes = []
        filter_by = dict(filter_by)
        for (filename, node) in input_files.iteritems():
            output_filename = fileutils.reroot_path(destination, filename)
            subnodes.append(FilterSingletonsNode(input_file=filename,
                                                 output_file=output_filename,
                                                 filter_by=filter_by,
                                                 dependencies=node))

        MetaNode.__init__(self,
                          description="<FilterSingleton: %i files -> '%s'>"
                          % (len(subnodes), destination),
                          subnodes=subnodes,
                          dependencies=dependencies)
コード例 #6
0
ファイル: mafft.py プロジェクト: schae234/pypeline
    def __init__(self, rootdir, sequences, preset = "auto", subnodes = (), dependencies = ()):
        subnodes = []
        for sequence in sequences:
            prefix  = os.path.join(rootdir, sequence)
            node    = MAFFTNode(infile       = prefix + ".fasta",
                                outfile      = prefix + ".afa",
                                preset       = preset,
                                dependencies = dependencies)

            subnodes.append(node)

        MetaNode.__init__(self,
                          description  = "<MAFFTAlignSequences (%s): In '%s'>" \
                              % (preset, rootdir),
                          subnodes     = subnodes,
                          dependencies = dependencies)
コード例 #7
0
ファイル: gatk.py プロジェクト: UMNPonyClub/paleomix
    def __init__(self, config, reference, infiles, outfile, intervals=None, dependencies=()):
        if not intervals:
            intervals = outfile + ".intervals"

        infiles = safe_coerce_to_tuple(infiles)
        trainer = _IndelTrainerNode(
            config=config, reference=reference, infiles=infiles, outfile=intervals, dependencies=dependencies
        )
        aligner = _IndelRealignerNode(
            config=config,
            reference=reference,
            intervals=intervals,
            infiles=infiles,
            outfile=outfile,
            dependencies=trainer,
        )

        description = "<GATK Indel Realigner: %i files -> '%s'>" % (len(infiles), outfile)

        MetaNode.__init__(self, description=description, subnodes=[trainer, aligner], dependencies=dependencies)