Python swap_ext 예제들, pypeline.common.fileutils.swap_ext Python 예제들

예제 #1

0

파일 보기

파일: picard.py 프로젝트: CarlesV/paleomix

    def customize(cls, config, input_bams, output_bam, output_metrics=None,
                  keep_dupes=False, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES", "True",
                              sep="=", fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics,
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params,
                "dependencies": dependencies}

예제 #2

0

파일 보기

    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  keep_dupes=False,
                  dependencies=()):
        params = picard_command(config, "MarkDuplicates")

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES",
                              "True",
                              sep="=",
                              fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics)

        return {"command": params, "dependencies": dependencies}

예제 #3

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, input_bams, output_bam, output_metrics = None, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep = "=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep = "=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep = "=", fixed = False)

        params.set_kwargs(OUT_BAM     = output_bam,
                         OUT_BAI     = swap_ext(output_bam, ".bai"),
                         OUT_METRICS = output_metrics or swap_ext(output_bam, ".metrics"),
                         CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}

예제 #4

0

파일 보기

파일: phylo.py 프로젝트: CarlesV/paleomix

def _build_examl_bootstraps(options, phylo, destination, input_alignment, input_partition, dependencies):
    bootstraps = []
    num_bootstraps = phylo["ExaML"]["Bootstraps"]
    bootstrap_destination = os.path.join(destination, "bootstraps")
    bootstrap_template    = os.path.join(bootstrap_destination, "bootstrap.%04i.phy")

    for bootstrap_num in xrange(num_bootstraps):
        bootstrap_alignment = bootstrap_template % (bootstrap_num,)
        bootstrap = PHYLIPBootstrapNode(input_alignment  = input_alignment,
                                        input_partition  = input_partition,
                                        output_alignment = bootstrap_alignment,
                                        seed             = random.randint(1, 2**32 - 1),
                                        dependencies     = dependencies)

        bootstrap_binary      = swap_ext(bootstrap_alignment, ".binary")
        bootstrap_final       = swap_ext(bootstrap_alignment, ".%s")
        bs_binary   = ExaMLParserNode(input_alignment = bootstrap_alignment,
                                      input_partition = input_partition,
                                      output_file     = bootstrap_binary,
                                      dependencies    = bootstrap)

        bootstraps.append(_examl_nodes(options          = options,
                                       settings         = phylo,
                                       input_alignment  = bootstrap_alignment,
                                       input_partitions = input_partition,
                                       input_binary     = bootstrap_binary,
                                       output_template  = bootstrap_final,
                                       dependencies     = bs_binary))

    if bootstraps:
        meta = MetaNode(description  = "Bootstraps",
                        subnodes     = bootstraps,
                        dependencies = dependencies)
        return _build_rerooted_trees(meta, phylo["RootTreesOn"])
    return None

예제 #5

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False)

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics
                          or swap_ext(output_bam, ".metrics"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}

예제 #6

0

파일 보기

    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  keep_dupes=False,
                  dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES",
                              "True",
                              sep="=",
                              fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics,
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params, "dependencies": dependencies}

예제 #7

0

파일 보기

    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
            params = AtomicJavaCmdBuilder(jar=jar_file,
                                          temp_root=config.temp_root,
                                          jre_options=config.jre_options)

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file),
                              TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")

예제 #8

0

파일 보기

파일: raxml.py 프로젝트: schae234/pypeline

    def customize(cls,
                  input_alignment,
                  input_partition,
                  output_alignment,
                  dependencies=()):
        command = AtomicCmdBuilder("raxmlHPC", set_cwd=True)

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "j")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-b", int(random.random() * 2**31 - 1), fixed=False)
        # Generate a single bootstrap alignment (makes growing the number of bootstraps easier).
        command.set_option("-N", 1, fixed=False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "input.alignment")
        command.set_option("-q", "input.partition")

        command.set_kwargs(IN_ALIGNMENT=input_alignment,
                           IN_PARTITION=input_partition,
                           OUT_ALIGNMENT=output_alignment,
                           OUT_INFO=fileutils.swap_ext(output_alignment,
                                                       ".info"))

        return {"command": command}

예제 #9

0

파일 보기

파일: picard.py 프로젝트: CarlesV/paleomix

    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
            params = AtomicJavaCmdBuilder(jar=jar_file,
                                          temp_root=config.temp_root,
                                          jre_options=config.jre_options)

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file),
                              TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")

예제 #10

0

파일 보기

파일: raxml.py 프로젝트: schae234/pypeline

    def customize(cls, input_alignment, input_partition, output_alignment, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC", set_cwd = True)

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "j")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False)
        # Generate a single bootstrap alignment (makes growing the number of bootstraps easier).
        command.set_option("-N", 1, fixed = False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "input.alignment")
        command.set_option("-q", "input.partition")

        command.set_kwargs(IN_ALIGNMENT      = input_alignment,
                          IN_PARTITION      = input_partition,

                          OUT_ALIGNMENT     = output_alignment,
                          OUT_INFO          = fileutils.swap_ext(output_alignment, ".info"))

        return {"command" : command}

예제 #11

0

파일 보기

파일: gatk.py 프로젝트: health1987/paleomix

    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)

예제 #12

0

파일 보기

파일: depthhist.py 프로젝트: schae234/pypeline

    def __init__(self, config, target_name, input_files, output_file, intervals_file = None, print_stats = False, max_contigs = _MAX_CONTIGS, dependencies = ()):
        self._target_name = target_name
        self._input_files = safe_coerce_to_tuple(input_files)
        self._output_file = output_file
        self._intervals   = intervals_file
        self._print_stats = print_stats
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        input_files = []
        input_files.extend(self._input_files)
        input_files.extend(swap_ext(input_file, ".bai") for input_file in self._input_files)
        if intervals_file:
            input_files.append(intervals_file)

        executables = ["coverageBed"] if intervals_file else ["genomeCoverageBed"]
        auxiliary_files = []
        for cmd in concatenate_input_bams(config, self._input_files)[0]:
            executables.extend(cmd.executables)
            auxiliary_files.extend(cmd.auxiliary_files)

        Node.__init__(self,
                      description  = "<DepthHistogram: %s -> '%s'>" \
                        % (describe_files(self._input_files),
                           self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies,
                      executables  = executables,
                      auxiliary_files = auxiliary_files)

예제 #13

0

파일 보기

파일: gatk.py 프로젝트: schae234/pypeline

    def __init__(self, config, reference, intervals, infiles, outfile, dependencies = ()):
        self._basename = os.path.basename(outfile)

        infiles  = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command  = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE = reference,
                           IN_REF_DICT  = fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS = intervals,
                           OUT_BAMFILE  = outfile)

        calmd   = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                            TEMP_IN_BAM     = self._basename,
                            IN_REF          = reference,
                            TEMP_OUT_STDOUT = self._basename + ".calmd")

        description = "<Indel Realign: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)

        CommandNode.__init__(self,
                             description  = description,
                             command      = ParallelCmds([command.finalize(),
                                                          calmd]),
                             dependencies = dependencies)

예제 #14

0

파일 보기

파일: makefile.py 프로젝트: UMNPonyClub/paleomix

    def _collect_subsets(roi, subset, path):
        if roi not in subsets_by_regions:
            raise MakefileError("Subset of unknown region (%r) requested at %r" % (roi, path))

        roi_fname = swap_ext(subsets_by_regions[roi]["BED"], subset + ".names")
        if not os.path.isfile(roi_fname):
            raise MakefileError(
                ("Subset file does not exist for Regions Of Interest:\n" "  Region = %r\n  Subset = %r\n  Path   = %r")
                % (roi, subset, roi_fname)
            )

        sequences = set()
        with open(roi_fname) as handle:
            for line in handle:
                line = line.strip()
                if line and not line.startswith("#"):
                    sequences.add(line)

        known_seqs = subsets_by_regions[roi]["Sequences"][None]
        unknown_seqs = sequences - known_seqs
        if unknown_seqs:
            message = (
                "Unknown sequences in subset file:\n"
                "  File   = %r\n  Region = %r\n  Subset = %r\n"
                "  Unknown sequence names ="
            ) % (roi_fname, roi, subset)
            unknown_seqs = list(sorted(unknown_seqs))
            if len(unknown_seqs) > 5:
                unknown_seqs = unknown_seqs[:5] + ["..."]
            message = "\n    - ".join([message] + unknown_seqs)
            raise MakefileError(message)

        subsets_by_regions[roi]["SubsetFiles"][subset] = (roi_fname,)
        subsets_by_regions[roi]["Sequences"][subset] = frozenset(sequences)

예제 #15

0

파일 보기

파일: makefile.py 프로젝트: KHanghoj/epiPALEOMIX

    def _collect_subsets(roi, subset, path):
        if roi not in subsets_by_regions:
            raise MakefileError(
                "Subset of unknown region (%r) requested at %r" % (roi, path))

        roi_fname = swap_ext(subsets_by_regions[roi]["BED"], subset + ".names")
        if not os.path.isfile(roi_fname):
            raise MakefileError(
                ("Subset file does not exist for Regions Of Interest:\n"
                 "  Region = %r\n  Subset = %r\n  Path   = %r") %
                (roi, subset, roi_fname))

        sequences = set()
        with open(roi_fname) as handle:
            for line in handle:
                line = line.strip()
                if line and not line.startswith("#"):
                    sequences.add(line)

        known_seqs = subsets_by_regions[roi]["Sequences"][None]
        unknown_seqs = sequences - known_seqs
        if unknown_seqs:
            message = ("Unknown sequences in subset file:\n"
                       "  File   = %r\n  Region = %r\n  Subset = %r\n"
                       "  Unknown sequence names =") \
                       % (roi_fname, roi, subset)
            unknown_seqs = list(sorted(unknown_seqs))
            if len(unknown_seqs) > 5:
                unknown_seqs = unknown_seqs[:5] + ["..."]
            message = "\n    - ".join([message] + unknown_seqs)
            raise MakefileError(message)

        subsets_by_regions[roi]["SubsetFiles"][subset] = (roi_fname, )
        subsets_by_regions[roi]["Sequences"][subset] = frozenset(sequences)

예제 #16

0

파일 보기

파일: common.py 프로젝트: UMNPonyClub/paleomix

def parse_arguments(argv, ext):
    parser = argparse.ArgumentParser(prog="paleomix %s" % (ext.strip("."),))

    parser.add_argument("infile", metavar="BAM",
                        help="Filename of a sorted BAM file. If set to '-' "
                             "the file is read from STDIN.")
    parser.add_argument("outfile", metavar="OUTPUT", nargs='?',
                        help="Filename of output table; defaults to name of "
                             "the input BAM with a '%s' extension. If "
                             "set to '-' the table is printed to STDOUT."
                             % (ext,))
    parser.add_argument("--target-name", default=None,
                        help="Name used for 'Target' column; defaults to the "
                             "filename of the BAM file.")
    parser.add_argument("--regions-file", default=None, dest="regions_fpath",
                        help="BED file containing regions of interest; %s "
                             "is calculated only for these grouping by the "
                             "name used in the BED file, or the contig name "
                             "if no name has been specified for a record."
                             % (ext.strip("."),))
    parser.add_argument('--max-contigs', default=100, type=int,
                        help="The maximum number of contigs allowed in a BAM "
                             "file. If this number is exceeded, the entire "
                             "set of contigs is aggregated into one pseudo-"
                             "contig named '<Genome>'. This is done to "
                             "limit table sizes [default: %(default)s]")
    parser.add_argument('--ignore-readgroups',
                        default=False, action="store_true",
                        help="Ignore readgroup information in reads, and only "
                             "provide aggregated statistics; this is required "
                             "if readgroup information is missing or partial "
                             "[default: %(default)s]")
    parser.add_argument('--overwrite-output',
                        default=False, action="store_true",
                        help="Overwrite output file if it it exists; by "
                             "default, the script will terminate if the file "
                             "already exists.")

    args = parser.parse_args(argv)
    if not args.outfile:
        args.outfile = swap_ext(args.infile, ext)

    if args.ignore_readgroups:
        args.get_readgroup_func = _get_readgroup_ignored
    else:
        args.get_readgroup_func = _get_readgroup

    if not args.target_name:
        if args.infile == "-":
            args.target_name = "<STDIN>"
        else:
            args.target_name = os.path.basename(args.infile)

    if os.path.exists(args.outfile) and not args.overwrite_output:
        parser.error("Destination filename already exists (%r); use option "
                     "--overwrite-output to allow overwriting of this file."
                     % (args.outfile,))

    return args

예제 #17

0

파일 보기

파일: gatk.py 프로젝트: schae234/pypeline

def _set_input_files(command, input_files):
    keys = {}
    for (index, filename) in enumerate(input_files):
        command.add_option("-I", "%%(IN_BAMFILE_%02i)s" % index)
        keys["IN_BAMFILE_%02i" % index] = filename
        keys["IN_BAIFILE_%02i" % index] = swap_ext(filename, ".bai")

    command.set_kwargs(**keys)

예제 #18

0

파일 보기

파일: gatk.py 프로젝트: health1987/paleomix

def _set_input_files(command, input_files):
    keys = {}
    for (index, filename) in enumerate(input_files):
        command.add_option("-I", "%%(IN_BAMFILE_%02i)s" % index)
        keys["IN_BAMFILE_%02i" % index] = filename
        keys["IN_BAIFILE_%02i" % index] = swap_ext(filename, ".bai")

    command.set_kwargs(**keys)

예제 #19

0

파일 보기

    def customize(cls, config, input_bam, output_log=None, dependencies=()):
        params = picard_command(config, "ValidateSamFile")

        params.set_option("I", "%(IN_BAM)s", sep="=")

        output_log = output_log or swap_ext(input_bam, ".validated")
        params.set_kwargs(IN_BAM=input_bam, OUT_STDOUT=output_log)

        return {"command": params, "dependencies": dependencies}

예제 #20

0

파일 보기

    def customize(cls, config, reference, dependencies=()):
        params = picard_command(config, "CreateSequenceDictionary")

        params.set_option("R", "%(TEMP_OUT_REF)s", sep="=")
        params.set_option("O", "%(OUT_DICT)s", sep="=")
        params.set_kwargs(IN_REF=reference,
                          TEMP_OUT_REF=os.path.basename(reference),
                          OUT_DICT=swap_ext(reference, ".dict"))

        return {"command": params, "dependencies": dependencies}

예제 #21

0

파일 보기

파일: raxml.py 프로젝트: schae234/pypeline

    def _teardown(self, config, temp):
        fileutils.move_file(os.path.join(temp, "RAxML_info.Pypeline"),
                            os.path.join(temp, fileutils.swap_ext(self._output_alignment, ".info")))
        fileutils.move_file(os.path.join(temp, "input.alignment.BS0"),
                            os.path.join(temp, self._output_alignment))

        os.remove(os.path.join(temp, "input.alignment"))
        os.remove(os.path.join(temp, "input.partition"))

        CommandNode._teardown(self, config, temp)

예제 #22

0

파일 보기

    def __init__(self, infile, dependencies = ()):
        cmd_index = AtomicCmd(["samtools", "index", "%(IN_BAM)s", "%(OUT_BAI)s"],
                              IN_BAM      = infile,
                              OUT_BAI     = swap_ext(infile, ".bai"),
                              CHECK_SAM   = SAMTOOLS_VERSION)

        CommandNode.__init__(self,
                             description  = "<BAMIndex: '%s'>" % (infile,),
                             command      = cmd_index,
                             dependencies = dependencies)

예제 #23

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def _setup(self, config, temp_root):
        CommandNode._setup(self, config, temp_root)
        dst_fname = os.path.join(temp_root, self._bam_input.pipe)
        if len(self._bam_input.files) > 1:
            os.mkfifo(dst_fname)
        else:
            src_fname, = self._bam_input.files
            os.symlink(os.path.join(os.getcwd(), src_fname), dst_fname)

            src_fname = os.path.join(os.getcwd(), swap_ext(src_fname, ".bai"))
            os.symlink(src_fname, dst_fname + ".bai")

예제 #24

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def customize(cls, config, input_bam, output_log=None, dependencies=()):
        params = picard_command(config, "ValidateSamFile")

        params.set_option("I", "%(IN_BAM)s", sep="=")

        output_log = output_log or swap_ext(input_bam, ".validated")
        params.set_kwargs(IN_BAM=input_bam,
                          OUT_STDOUT=output_log)

        return {"command": params,
                "dependencies": dependencies}

예제 #25

0

파일 보기

    def __init__(self,
                 input_file,
                 target_name,
                 output_file=None,
                 intervals_file=None,
                 max_contigs=_MAX_CONTIGS,
                 dependencies=()):
        self._target_name = target_name
        self._input_file = input_file
        self._output_file = output_file or swap_ext(input_file, ".coverage")
        self._intervals_file = intervals_file
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        Node.__init__(self,
                      description  = "<Coverage: '%s' -> '%s'>" \
                          % (input_file, self._output_file),
                      input_files  = (input_file, swap_ext(input_file, ".bai")),
                      output_files = self._output_file,
                      dependencies = dependencies)

예제 #26

0

파일 보기

    def _setup(self, config, temp_root):
        CommandNode._setup(self, config, temp_root)
        dst_fname = os.path.join(temp_root, self._bam_input.pipe)
        if len(self._bam_input.files) > 1:
            os.mkfifo(dst_fname)
        else:
            src_fname, = self._bam_input.files
            os.symlink(os.path.join(os.getcwd(), src_fname), dst_fname)

            src_fname = os.path.join(os.getcwd(), swap_ext(src_fname, ".bai"))
            os.symlink(src_fname, dst_fname + ".bai")

예제 #27

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def customize(cls, config, reference, dependencies=()):
        params = picard_command(config, "CreateSequenceDictionary")

        params.set_option("R", "%(TEMP_OUT_REF)s", sep="=")
        params.set_option("O", "%(OUT_DICT)s", sep="=")
        params.set_kwargs(IN_REF=reference,
                          TEMP_OUT_REF=os.path.basename(reference),
                          OUT_DICT=swap_ext(reference, ".dict"))

        return {"command": params,
                "dependencies": dependencies}

예제 #28

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, input_bam, output_log=None, dependencies=()):
        jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("I", "%(IN_BAM)s", sep="=")
        params.set_kwargs(IN_BAM=input_bam,
                          OUT_STDOUT=output_log
                          or swap_ext(input_bam, ".validated"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}

예제 #29

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, input_bam, output_log = None, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("I", "%(IN_BAM)s", sep = "=")
        params.set_kwargs(IN_BAM     = input_bam,
                         OUT_STDOUT = output_log or swap_ext(input_bam, ".validated"),
                         CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}

예제 #30

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, reference, dependencies=()):
        jar_file = os.path.join(config.jar_root,
                                "CreateSequenceDictionary.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("R", "%(IN_REF)s", sep="=")
        params.set_option("O", "%(OUT_DICT)s", sep="=")
        params.set_kwargs(IN_REF=reference,
                          OUT_DICT=swap_ext(reference, ".dict"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}

예제 #31

0

파일 보기

    def customize(cls, config, input_bams, output_bam, dependencies=()):
        params = picard_command(config, "MergeSamFiles")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"))

        return {"command": params, "dependencies": dependencies}

예제 #32

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, reference, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "CreateSequenceDictionary.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("R", "%(IN_REF)s", sep = "=")
        params.set_option("O", "%(OUT_DICT)s", sep = "=")
        params.set_kwargs(IN_REF     = reference,
                          OUT_DICT   = swap_ext(reference, ".dict"),
                          CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}

예제 #33

0

파일 보기

파일: raxml.py 프로젝트: schae234/pypeline

    def _teardown(self, config, temp):
        fileutils.move_file(
            os.path.join(temp, "RAxML_info.Pypeline"),
            os.path.join(temp,
                         fileutils.swap_ext(self._output_alignment, ".info")))
        fileutils.move_file(os.path.join(temp, "input.alignment.BS0"),
                            os.path.join(temp, self._output_alignment))

        os.remove(os.path.join(temp, "input.alignment"))
        os.remove(os.path.join(temp, "input.partition"))

        CommandNode._teardown(self, config, temp)

예제 #34

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def customize(cls, config, input_bams, output_bam, dependencies=()):
        params = picard_command(config, "MergeSamFiles")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"))

        return {"command": params,
                "dependencies": dependencies}

예제 #35

0

파일 보기

def _build_examl_bootstraps(options, phylo, destination, input_alignment,
                            input_partition, dependencies):
    bootstraps = []
    num_bootstraps = phylo["ExaML"]["Bootstraps"]
    bootstrap_destination = os.path.join(destination, "bootstraps")
    bootstrap_template = os.path.join(bootstrap_destination,
                                      "bootstrap.%04i.phy")

    for bootstrap_num in xrange(num_bootstraps):
        bootstrap_alignment = bootstrap_template % (bootstrap_num, )
        bootstrap = PHYLIPBootstrapNode(input_alignment=input_alignment,
                                        input_partition=input_partition,
                                        output_alignment=bootstrap_alignment,
                                        seed=random.randint(1, 2**32 - 1),
                                        dependencies=dependencies)

        bootstrap_binary = swap_ext(bootstrap_alignment, ".binary")
        bootstrap_final = swap_ext(bootstrap_alignment, ".%s")
        bs_binary = ExaMLParserNode(input_alignment=bootstrap_alignment,
                                    input_partition=input_partition,
                                    output_file=bootstrap_binary,
                                    dependencies=bootstrap)

        bootstraps.append(
            _examl_nodes(options=options,
                         settings=phylo,
                         input_alignment=bootstrap_alignment,
                         input_partitions=input_partition,
                         input_binary=bootstrap_binary,
                         output_template=bootstrap_final,
                         dependencies=bs_binary))

    if bootstraps:
        meta = MetaNode(description="Bootstraps",
                        subnodes=bootstraps,
                        dependencies=dependencies)
        return _build_rerooted_trees(meta, phylo["RootTreesOn"])
    return None

예제 #36

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def customize(cls, config, input_bams, output_bam, output_metrics=None,
                  keep_dupes=False, dependencies=()):
        params = picard_command(config, "MarkDuplicates")

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES", "True",
                              sep="=", fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics)

        return {"command": params,
                "dependencies": dependencies}

예제 #37

0

파일 보기

    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params, "dependencies": dependencies}

예제 #38

0

파일 보기

파일: picard.py 프로젝트: CarlesV/paleomix

    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params,
                "dependencies": dependencies}

예제 #39

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start=1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        params.set_option("SO", "coordinate", sep="=", fixed=False)

        return {"command": params, "dependencies": dependencies}

예제 #40

0

파일 보기

파일: gatk.py 프로젝트: UMNPonyClub/paleomix

    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(
            IN_REFERENCE=reference,
            IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
            OUT_INTERVALS=outfile,
            CHECK_GATK=_get_gatk_version_check(config),
        )

        description = "<Indel Realigner (training): %s -> %r>" % (describe_files(infiles), outfile)
        CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)

예제 #41

0

파일 보기

파일: picard.py 프로젝트: schae234/pypeline

    def customize(cls, config, input_bams, output_bam, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("CREATE_INDEX", "True", sep = "=")
        params.set_kwargs(OUT_BAM = output_bam,
                         OUT_BAI = swap_ext(output_bam, ".bai"),
                         CHECK_JAR  = _picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start = 1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        params.set_option("SO", "coordinate", sep = "=", fixed = False)

        return {"command"      : params,
                "dependencies" : dependencies}

예제 #42

0

파일 보기

파일: statistics.py 프로젝트: schae234/pypeline

def _build_coverage_nodes_cached(files_and_nodes, target_name, aoi_name, aoi_filename, cache):
    output_ext = ".coverage"
    if aoi_name:
        output_ext = ".%s.coverage" % aoi_name

    coverages = {}
    for (input_filename, node) in files_and_nodes.iteritems():
        output_filename = swap_ext(input_filename, output_ext)

        cache_key = (aoi_filename, input_filename)
        if cache_key not in cache:
            cache[cache_key] = CoverageNode(input_file     = input_filename,
                                            output_file    = output_filename,
                                            target_name    = target_name,
                                            intervals_file = aoi_filename,
                                            dependencies   = node)

        coverages[output_filename] = cache[cache_key]
    return coverages

예제 #43

0

파일 보기

파일: gatk.py 프로젝트: schae234/pypeline

    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile)

        description = "<Train Indel Realigner: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)

예제 #44

0

파일 보기

파일: gatk.py 프로젝트: schae234/pypeline

    def __init__(self, config, reference, infiles, outfile, dependencies = ()):
        infiles  = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command  = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE  = reference,
                           IN_REF_DICT   = fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS = outfile)

        description = "<Train Indel Realigner: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)
        CommandNode.__init__(self,
                             description  = description,
                             command      = command.finalize(),
                             dependencies = dependencies)

예제 #45

0

파일 보기

def _build_coverage_nodes_cached(files_and_nodes, target_name, aoi_name,
                                 aoi_filename, cache):
    output_ext = ".coverage"
    if aoi_name:
        output_ext = ".%s.coverage" % aoi_name

    coverages = {}
    for (input_filename, node) in files_and_nodes.iteritems():
        output_filename = swap_ext(input_filename, output_ext)

        cache_key = (aoi_filename, input_filename)
        if cache_key not in cache:
            cache[cache_key] = CoverageNode(input_file=input_filename,
                                            output_file=output_filename,
                                            target_name=target_name,
                                            intervals_file=aoi_filename,
                                            dependencies=node)

        coverages[output_filename] = cache[cache_key]
    return coverages

예제 #46

0

파일 보기

파일: gatk.py 프로젝트: health1987/paleomix

    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)

예제 #47

0

파일 보기

    def __init__(self,
                 config,
                 target_name,
                 input_files,
                 output_file,
                 intervals_file=None,
                 print_stats=False,
                 max_contigs=_MAX_CONTIGS,
                 dependencies=()):
        self._target_name = target_name
        self._input_files = safe_coerce_to_tuple(input_files)
        self._output_file = output_file
        self._intervals = intervals_file
        self._print_stats = print_stats
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        input_files = []
        input_files.extend(self._input_files)
        input_files.extend(
            swap_ext(input_file, ".bai") for input_file in self._input_files)
        if intervals_file:
            input_files.append(intervals_file)

        executables = ["coverageBed"
                       ] if intervals_file else ["genomeCoverageBed"]
        auxiliary_files = []
        for cmd in concatenate_input_bams(config, self._input_files)[0]:
            executables.extend(cmd.executables)
            auxiliary_files.extend(cmd.auxiliary_files)

        Node.__init__(self,
                      description  = "<DepthHistogram: %s -> '%s'>" \
                        % (describe_files(self._input_files),
                           self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies,
                      executables  = executables,
                      auxiliary_files = auxiliary_files)

예제 #48

0

파일 보기

    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            params = picard_command(config, "MergeSamFiles")

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")

예제 #49

0

파일 보기

파일: picard.py 프로젝트: UMNPonyClub/paleomix

    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            params = picard_command(config, "MergeSamFiles")

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")

예제 #50

0

파일 보기

def build_sampling_nodes(options, genotyping, sample, regions, dependencies):
    fasta_file = regions["Genotypes"][sample]
    pileup_file = swap_ext(fasta_file, ".pileup.bgz")

    padding = genotyping["Padding"]
    slop, node = build_regions_nodes(regions, padding, dependencies)

    bam_file = "%s.%s.bam" % (sample, regions["Prefix"])
    bam_file = os.path.join(options.samples_root, bam_file)
    if regions["Realigned"]:
        bam_file = add_postfix(bam_file, ".realigned")
    bai_node = build_bam_index_node(bam_file)

    genotype = GenotypeRegionsNode.customize(
        pileup_only=True,
        reference=regions["FASTA"],
        bedfile=slop,
        infile=bam_file,
        outfile=pileup_file,
        nbatches=options.samtools_max_threads,
        dependencies=node + (bai_node, ))
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    genotype = genotype.build_node()

    tabix = TabixIndexNode(infile=pileup_file,
                           preset="pileup",
                           dependencies=genotype)

    builder = SampleRegionsNode(infile=pileup_file,
                                bedfile=regions["BED"],
                                outfile=fasta_file,
                                dependencies=tabix)

    faidx = FastaIndexNode(infile=fasta_file, dependencies=builder)

    return (faidx, )

예제 #51

0

파일 보기

파일: genotype.py 프로젝트: health1987/paleomix

def build_sampling_nodes(options, genotyping, sample, regions, dependencies):
    fasta_file = regions["Genotypes"][sample]
    pileup_file = swap_ext(fasta_file, ".pileup.bgz")

    padding = genotyping["Padding"]
    slop, node = build_regions_nodes(regions, padding, dependencies)

    bam_file = "%s.%s.bam" % (sample, regions["Prefix"])
    bam_file = os.path.join(options.samples_root, bam_file)
    if regions["Realigned"]:
        bam_file = add_postfix(bam_file, ".realigned")
    bai_node = build_bam_index_node(bam_file)

    genotype = GenotypeRegionsNode.customize(pileup_only=True,
                                             reference=regions["FASTA"],
                                             bedfile=slop,
                                             infile=bam_file,
                                             outfile=pileup_file,
                                             nbatches=options.samtools_max_threads,
                                             dependencies=node + (bai_node,))
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    genotype = genotype.build_node()

    tabix = TabixIndexNode(infile=pileup_file,
                           preset="pileup",
                           dependencies=genotype)

    builder = SampleRegionsNode(infile=pileup_file,
                                bedfile=regions["BED"],
                                outfile=fasta_file,
                                dependencies=tabix)

    faidx = FastaIndexNode(infile=fasta_file,
                           dependencies=builder)

    return (faidx,)

예제 #52

0

파일 보기

파일: common.py 프로젝트: KHanghoj/epiPALEOMIX

def parse_arguments(argv, ext):
    parser = argparse.ArgumentParser(prog="paleomix %s" % (ext.strip("."), ))

    parser.add_argument("infile",
                        metavar="BAM",
                        help="Filename of a sorted BAM file. If set to '-' "
                        "the file is read from STDIN.")
    parser.add_argument("outfile",
                        metavar="OUTPUT",
                        nargs='?',
                        help="Filename of output table; defaults to name of "
                        "the input BAM with a '%s' extension. If "
                        "set to '-' the table is printed to STDOUT." % (ext, ))
    parser.add_argument("--target-name",
                        default=None,
                        help="Name used for 'Target' column; defaults to the "
                        "filename of the BAM file.")
    parser.add_argument("--regions-file",
                        default=None,
                        dest="regions_fpath",
                        help="BED file containing regions of interest; %s "
                        "is calculated only for these grouping by the "
                        "name used in the BED file, or the contig name "
                        "if no name has been specified for a record." %
                        (ext.strip("."), ))
    parser.add_argument('--max-contigs',
                        default=100,
                        type=int,
                        help="The maximum number of contigs allowed in a BAM "
                        "file. If this number is exceeded, the entire "
                        "set of contigs is aggregated into one pseudo-"
                        "contig named '<Genome>'. This is done to "
                        "limit table sizes [default: %(default)s]")
    parser.add_argument('--ignore-readgroups',
                        default=False,
                        action="store_true",
                        help="Ignore readgroup information in reads, and only "
                        "provide aggregated statistics; this is required "
                        "if readgroup information is missing or partial "
                        "[default: %(default)s]")
    parser.add_argument('--overwrite-output',
                        default=False,
                        action="store_true",
                        help="Overwrite output file if it it exists; by "
                        "default, the script will terminate if the file "
                        "already exists.")

    args = parser.parse_args(argv)
    if not args.outfile:
        args.outfile = swap_ext(args.infile, ext)

    if args.ignore_readgroups:
        args.get_readgroup_func = _get_readgroup_ignored
    else:
        args.get_readgroup_func = _get_readgroup

    if not args.target_name:
        if args.infile == "-":
            args.target_name = "<STDIN>"
        else:
            args.target_name = os.path.basename(args.infile)

    if os.path.exists(args.outfile) and not args.overwrite_output:
        parser.error("Destination filename already exists (%r); use option "
                     "--overwrite-output to allow overwriting of this file." %
                     (args.outfile, ))

    return args

예제 #53

0

파일 보기

파일: fileutils_test.py 프로젝트: schae234/pypeline

def test_swap_ext__empty_ext_vs_empty_ext():
    assert_equal(swap_ext("name", ""), "name")

예제 #54

0

파일 보기

파일: fileutils_test.py 프로젝트: schae234/pypeline

def test_swap_ext__dot_ext_vs_dot_ext():
    assert_equal(swap_ext("name.", "."), "name")

예제 #55

0

파일 보기

파일: fileutils_test.py 프로젝트: schae234/pypeline

def test_swap_ext__multiple__has_ext_vs_empty_ext():
    assert_equal(swap_ext("name.foo.bar", ""), "name.foo")

예제 #56

0

파일 보기

파일: genotype.py 프로젝트: health1987/paleomix

def build_genotyping_nodes_cached(options, genotyping, sample, regions,
                                  dependencies):
    """Carries out genotyping, filtering of calls, and indexing of files for a
    given sample and prefix. If the option 'GenotypeEntirePrefix' is enabled,
    the BAM is genotyped once, and each set of RegionsOfInterest simply extract
    the relevant regions during construction of the consensus sequence.

    Parameters:
        options: An options object (c.f. pypeline.tools.phylo_pipeline.config).
        genotyping: Genotyping options defined for a specific set of areas of
                    interest, corresponding to Genotyping:NAME in the makefile.
        sample: The name of the sample to be genotyped.
        egions: A dictionary for a 'RegionsOfInterest' from the makefile.
        dependencies: Depenencies that must be met before genotyping starts.

    Returns a tuple containing the filename of the filtered and tabix-indexed
    VCF file, and the top-level node generating this file. Multiple calls for
    the same BAM and prefix will return the same VCF and nodes if the option
    for 'GenotypeEntirePrefix' is enabled, otherwise each ROI is genotyped
    individiually.

    Output files are generated in ./results/PROJECT/genotyping. If the option
    for 'GenotypeEntirePrefix' is enabled, the following files are generated:
        SAMPLE.PREFIX.vcf.bgz: Unfiltered calls for variant/non-variant sites.
        SAMPLE.PREFIX.vcf.pileup.bgz: Pileup of sites containing SNPs.
        SAMPLE.PREFIX.vcf.pileup.bgz.tbi: Tabix index of the pileup.
        SAMPLE.PREFIX.filtered.vcf.bgz: Variant calls filtered with vcf_filter.
        SAMPLE.PREFIX.filtered.vcf.bgz.tbi: Tabix index for the filtered VCF.

    If 'GenotypeEntirePrefix' is not enabled for a given ROI, the following
    files are generated for that ROI (see descriptions above):
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz.tbi
        SAMPLE.PREFIX.ROI.vcf.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz.tbi

    In addition, the following files are generated for each set of
    RegionsOfInterest (ROI), regardless of the 'GenotypeEntirePrefix' option:
        SAMPLE.PREFIX.ROI.CDS.fasta: FASTA sequence of each feature in the ROI.
        SAMPLE.PREFIX.ROI.CDS.fasta.fai: FASTA index generated using SAMTools.

    """
    output_prefix, bamfile, bedfile, dependencies \
        = build_genotyping_bedfile_nodes(options, genotyping, sample, regions,
                                         dependencies)

    if (bamfile, output_prefix) in _VCF_CACHE:
        return _VCF_CACHE[(bamfile, output_prefix)]

    calls = swap_ext(output_prefix, ".vcf.bgz")
    pileups = swap_ext(output_prefix, ".vcf.pileup.bgz")
    filtered = swap_ext(output_prefix, ".filtered.vcf.bgz")

    # 1. Call samtools mpilup | bcftools view on the bam
    genotype = GenotypeRegionsNode.customize(reference=regions["FASTA"],
                                             bedfile=bedfile,
                                             infile=bamfile,
                                             outfile=calls,
                                             nbatches=options.samtools_max_threads,
                                             dependencies=dependencies)

    genotype.command.add_option("--mpileup-argument",
                                "-f=%s" % (regions["FASTA"],), sep="=")
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    apply_samtools_options(genotype.command, genotyping["BCFTools"],
                           "--bcftools-argument")
    genotype = genotype.build_node()

    # 2. Collect pileups of sites with SNPs, to allow proper filtering by
    #    frequency of the minor allele, as only the major non-ref allele is
    #    counted in the VCF (c.f. field DP4).
    vcfpileup = VCFPileupNode.customize(reference=regions["FASTA"],
                                        infile_bam=bamfile,
                                        infile_vcf=calls,
                                        outfile=pileups,
                                        dependencies=genotype)
    apply_samtools_options(vcfpileup.command, genotyping["MPileup"],
                           "--mpileup-argument")
    vcfpileup = vcfpileup.build_node()

    vcf_tabix = TabixIndexNode(infile=pileups,
                               preset="pileup",
                               dependencies=vcfpileup)

    # 3. Filter all sites using the 'vcf_filter' command
    vcffilter = VCFFilterNode.customize(infile=calls,
                                        pileup=pileups,
                                        outfile=filtered,
                                        regions=regions,
                                        dependencies=vcf_tabix)
    vcffilter = _apply_vcf_filter_options(vcffilter, genotyping, sample)

    # 4. Tabix index. This allows random-access to the VCF file when building
    #    the consensus FASTA sequence later in the pipeline.
    tabix = TabixIndexNode(infile=filtered,
                           preset="vcf",
                           dependencies=vcffilter)

    _VCF_CACHE[(bamfile, output_prefix)] = (filtered, tabix)
    return filtered, tabix

예제 #57

0

파일 보기

    def _setup(self, _config, temp):
        bam_filename = os.path.abspath(self._input_file)
        temp_filename = reroot_path(temp, bam_filename)

        os.symlink(bam_filename, temp_filename)
        os.symlink(swap_ext(bam_filename, ".bai"), temp_filename + ".bai")

예제 #58

0

파일 보기

파일: fileutils_test.py 프로젝트: schae234/pypeline

def test_swap_ext__empty_ext_vs_empty_ext():
    assert_equal(swap_ext("name", ""), "name")

예제 #59

0

파일 보기

파일: fileutils_test.py 프로젝트: schae234/pypeline

def test_swap_ext__has_ext_vs_dot_ext():
    assert_equal(swap_ext("name.foo", "."), "name")