예제 #1
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>" %
                   (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
예제 #2
0
    def __init__(self, config, reference, intervals, infiles, outfile, dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(
            IN_REFERENCE=reference,
            IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
            IN_INTERVALS=intervals,
            OUT_BAMFILE=outfile,
            CHECK_GATK=_get_gatk_version_check(config),
        )

        calmd = AtomicCmd(
            ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
            TEMP_IN_BAM=self._basename,
            IN_REF=reference,
            TEMP_OUT_STDOUT=self._basename + ".calmd",
            CHECK_VERSION=SAMTOOLS_VERSION,
        )

        description = "<Indel Realigner (aligning): %s -> %r>" % (describe_files(infiles), outfile)
        CommandNode.__init__(
            self, description=description, command=ParallelCmds([command.finalize(), calmd]), dependencies=dependencies
        )
예제 #3
0
    def __init__(self, config, target_name, input_files, output_file, intervals_file = None, print_stats = False, max_contigs = _MAX_CONTIGS, dependencies = ()):
        self._target_name = target_name
        self._input_files = safe_coerce_to_tuple(input_files)
        self._output_file = output_file
        self._intervals   = intervals_file
        self._print_stats = print_stats
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        input_files = []
        input_files.extend(self._input_files)
        input_files.extend(swap_ext(input_file, ".bai") for input_file in self._input_files)
        if intervals_file:
            input_files.append(intervals_file)

        executables = ["coverageBed"] if intervals_file else ["genomeCoverageBed"]
        auxiliary_files = []
        for cmd in concatenate_input_bams(config, self._input_files)[0]:
            executables.extend(cmd.executables)
            auxiliary_files.extend(cmd.auxiliary_files)

        Node.__init__(self,
                      description  = "<DepthHistogram: %s -> '%s'>" \
                        % (describe_files(self._input_files),
                           self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies,
                      executables  = executables,
                      auxiliary_files = auxiliary_files)
예제 #4
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
예제 #5
0
파일: gatk.py 프로젝트: health1987/paleomix
    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)
예제 #6
0
 def __init__(self, parameters):
     description = "<MarkDuplicates: %s>" \
         % (describe_files(parameters.input_bams),)
     PicardNode.__init__(self,
                         command=parameters.command.finalize(),
                         description=description,
                         dependencies=parameters.dependencies)
예제 #7
0
 def __init__(self, parameters):
     description = "<MarkDuplicates: %s>" \
         % (describe_files(parameters.input_bams),)
     PicardNode.__init__(self,
                         command=parameters.command.finalize(),
                         description=description,
                         dependencies=parameters.dependencies)
예제 #8
0
    def __init__(self, config, target_name, input_files, output_file,
                 regions_file=None, dependencies=()):
        bam_input = MultiBAMInput(config, input_files)
        if len(bam_input.files) > 1 and regions_file:
            raise ValueError("DepthHistogram for regions require single, "
                             "indexed input BAM file.")

        builder = factory.new("depths")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.add_value("%(OUT_FILE)s")
        builder.set_option("--target-name", target_name)
        builder.set_kwargs(OUT_FILE=output_file)
        bam_input.setup(builder)

        if regions_file:
            builder.set_option('--regions-file', '%(IN_REGIONS)s')
            builder.set_kwargs(IN_REGIONS=regions_file)

        command = ParallelCmds(bam_input.commands + [builder.finalize()])
        description = "<DepthHistogram: %s -> '%s'>" \
            % (describe_files(bam_input.files), output_file)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=command,
                                   description=description,
                                   dependencies=dependencies)
예제 #9
0
 def __init__(self, input_files, output_file, offset, dependencies=()):
     self._offset = offset
     Node.__init__(self,
                   description="<Validate FASTQ Files: %s>" %
                   (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
예제 #10
0
 def __init__(self, input_files, output_file, offset, dependencies=()):
     self._offset = offset
     Node.__init__(self,
                   description="<Validate FASTQ Files: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
예제 #11
0
    def __init__(self, parameters):
        self._version = parameters.version
        self._basename = parameters.basename
        if len(parameters.input_files_1) != len(parameters.input_files_2):
            raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \
                               % (len(parameters.input_files_1),
                                  len(parameters.input_files_2)))

        zcat_pair_1 = _build_unicat_command(parameters.input_files_1,
                                            "uncompressed_input_1")
        zcat_pair_2 = _build_unicat_command(parameters.input_files_2,
                                            "uncompressed_input_2")
        zip_pair_1 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair1.truncated")
        zip_pair_2 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair2.truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        commands = [adapterrm, zip_pair_1, zip_pair_2]
        if parameters.version == VERSION_15:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".collapsed")
            zip_aln_trunc = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".collapsed.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.truncated")
            commands += [zip_aln, zip_aln_trunc, zip_unaligned]
        else:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".singleton.aln.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.unaln.truncated")
            commands += [zip_aln, zip_unaligned]
        commands += [zip_discarded, zcat_pair_1, zcat_pair_2]

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds(commands)

        description  = "<PE_AdapterRM: %s -> '%s.*'>" \
            % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"),
               parameters.output_prefix)

        CommandNode.__init__(self,
                             command=commands,
                             description=description,
                             dependencies=parameters.dependencies)
예제 #12
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>" %
                      (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
예제 #13
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>"
                      % (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
예제 #14
0
    def __init__(self, input_files, output_file, dependencies=()):
        self._output_file = output_file

        Node.__init__(self,
                      description  = "<MergeCoverage: '%s' -> '%s'>" \
                          % (describe_files(input_files), self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies)
예제 #15
0
    def __init__(self, input_files, output_file, dependencies=()):
        self._output_file = output_file

        Node.__init__(
            self,
            description="<MergeCoverage: '%s' -> '%s'>" % (describe_files(input_files), self._output_file),
            input_files=input_files,
            output_files=self._output_file,
            dependencies=dependencies,
        )
예제 #16
0
파일: nodes.py 프로젝트: schae234/pypeline
    def __init__(self, config, input_bams, output_bam, dependencies = ()):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_bams)
        filteruniq = AtomicCmd(["bam_rmdup_collapsed", "--remove-duplicates"],
                               IN_STDIN   = cat_obj,
                               OUT_STDOUT = output_bam)

        command     = ParallelCmds(cat_cmds + [filteruniq])
        description =  "<FilterCollapsedBAM: %s>" % (describe_files(input_bams),)
        CommandNode.__init__(self,
                             command      = command,
                             description  = description,
                             dependencies = dependencies)
예제 #17
0
    def __init__(self, config, input_bams, output_bam, dependencies=()):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_bams)
        filteruniq = AtomicCmd(["bam_rmdup_collapsed", "--remove-duplicates"],
                               IN_STDIN=cat_obj,
                               OUT_STDOUT=output_bam)

        command = ParallelCmds(cat_cmds + [filteruniq])
        description = "<FilterCollapsedBAM: %s>" % (
            describe_files(input_bams), )
        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             dependencies=dependencies)
예제 #18
0
    def __init__(self, parameters):
        bam_input = MultiBAMInput(parameters.config, parameters.input_files)
        bam_input.setup(parameters.command)
        cmd_map = parameters.command.finalize()

        description = "<mapDamage (plots): %s -> '%s'>" \
            % (describe_files(parameters.input_files),
               parameters.output_directory)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=ParallelCmds(bam_input.commands +
                                                        [cmd_map]),
                                   description=description,
                                   dependencies=parameters.dependencies)
예제 #19
0
    def __init__(self, parameters):
        bam_input = MultiBAMInput(parameters.config, parameters.input_files)
        bam_input.setup(parameters.command)
        cmd_map = parameters.command.finalize()

        description = "<mapDamage (plots): %s -> '%s'>" \
            % (describe_files(parameters.input_files),
               parameters.output_directory)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=ParallelCmds(bam_input.commands +
                                                        [cmd_map]),
                                   description=description,
                                   dependencies=parameters.dependencies)
예제 #20
0
    def __init__(self, config, input_files, output_file, dependencies=()):
        bam_input = MultiBAMInput(config, input_files)
        duphist_command = factory.new("duphist")
        duphist_command.add_value("%(TEMP_IN_BAM)s")
        duphist_command.set_kwargs(OUT_STDOUT=output_file)
        bam_input.setup(duphist_command)
        duphist_command = duphist_command.finalize()

        commands = ParallelCmds(bam_input.commands + [duphist_command])

        description = "<DuplicateHistogram: %s -> %r>" % (describe_files(input_files), output_file)
        MultiBAMInputNode.__init__(
            self, bam_input=bam_input, command=commands, description=description, dependencies=dependencies
        )
예제 #21
0
파일: newick.py 프로젝트: CarlesV/paleomix
    def __init__(self, main_tree_files, support_tree_files, output_file, dependencies = ()):
        self._output_file        = output_file
        self._main_tree_files    = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = input_files,
                      output_files = output_file,
                      dependencies = dependencies)
예제 #22
0
    def __init__(self, parameters):
        self._directory = parameters.directory
        bam_input = MultiBAMInput(parameters.config, parameters.input_files)
        bam_input.setup(parameters.command)
        command = parameters.command.finalize()

        description = "<mapDamage (rescale): %s -> %r>" \
            % (describe_files(parameters.input_files),
               parameters.output_file)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=ParallelCmds(bam_input.commands +
                                                        [command]),
                                   description=description,
                                   dependencies=parameters.dependencies)
예제 #23
0
    def __init__(self, parameters):
        self._directory = parameters.directory
        bam_input = MultiBAMInput(parameters.config, parameters.input_files)
        bam_input.setup(parameters.command)
        command = parameters.command.finalize()

        description = "<mapDamage (rescale): %s -> %r>" \
            % (describe_files(parameters.input_files),
               parameters.output_file)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=ParallelCmds(bam_input.commands +
                                                        [command]),
                                   description=description,
                                   dependencies=parameters.dependencies)
예제 #24
0
    def __init__(self, parameters):
        self._quality_offset = parameters.quality_offset
        self._basename = parameters.basename

        zcat           = _build_cat_command(parameters.input_files, "uncompressed_input")
        zip_truncated  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".truncated")
        zip_discarded  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded")
        adapterrm      = parameters.command.finalize()

        commands = ParallelCmds([adapterrm, zip_discarded, zip_truncated, zcat])
        CommandNode.__init__(self,
                             command      = commands,
                             description  = "<AdapterRM (SE): %s -> '%s.*'>" \
                                 % (fileutils.describe_files(parameters.input_files),
                                    parameters.output_prefix),
                             dependencies = parameters.dependencies)
예제 #25
0
    def __init__(self, parameters):
        self._basename = parameters.basename

        zcat           = _build_unicat_command(parameters.input_files, "uncompressed_input")
        zip_truncated  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".truncated")
        zip_discarded  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded")
        adapterrm      = parameters.command.finalize()

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds([adapterrm, zip_discarded, zip_truncated, zcat])
        CommandNode.__init__(self,
                             command      = commands,
                             description  = "<SE_AdapterRM: %s -> '%s.*'>" \
                                 % (fileutils.describe_files(parameters.input_files),
                                    parameters.output_prefix),
                             dependencies = parameters.dependencies)
예제 #26
0
    def __init__(self, config, input_bams, output_bam, keep_dupes=True, dependencies=()):
        bam_input = MultiBAMInput(config, input_bams)

        builder = factory.new("rmdup_collapsed")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.set_kwargs(OUT_STDOUT=output_bam)
        bam_input.setup(builder)

        if not keep_dupes:
            builder.set_option("--remove-duplicates")

        filteruniq = builder.finalize()
        command = ParallelCmds(bam_input.commands + [filteruniq])
        description = "<FilterCollapsedBAM: %s>" % (describe_files(bam_input.files),)
        MultiBAMInputNode.__init__(
            self, bam_input=bam_input, command=command, description=description, dependencies=dependencies
        )
예제 #27
0
파일: newick.py 프로젝트: CarlesV/paleomix
    def __init__(self, tree_files, output_file, taxa = (), dependencies = ()):
        self._output_file    = output_file
        self._tree_files     = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = self._tree_files,
                      output_files = self._output_file,
                      dependencies = dependencies)
예제 #28
0
    def __init__(self, config, input_files, output_file, dependencies=()):
        bam_input = MultiBAMInput(config, input_files)
        duphist_command = factory.new("duphist")
        duphist_command.add_value('%(TEMP_IN_BAM)s')
        duphist_command.set_kwargs(OUT_STDOUT=output_file)
        bam_input.setup(duphist_command)
        duphist_command = duphist_command.finalize()

        commands = ParallelCmds(bam_input.commands + [duphist_command])

        description = "<DuplicateHistogram: %s -> %r>" \
            % (describe_files(input_files), output_file)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=commands,
                                   description=description,
                                   dependencies=dependencies)
예제 #29
0
    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(
            IN_REFERENCE=reference,
            IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
            OUT_INTERVALS=outfile,
            CHECK_GATK=_get_gatk_version_check(config),
        )

        description = "<Indel Realigner (training): %s -> %r>" % (describe_files(infiles), outfile)
        CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)
예제 #30
0
    def __init__(self, parameters):
        self._quality_offset = parameters.quality_offset
        self._basename = parameters.basename

        zcat = _build_cat_command(parameters.input_files, "uncompressed_input")
        zip_truncated = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        commands = ParallelCmds(
            [adapterrm, zip_discarded, zip_truncated, zcat])
        CommandNode.__init__(self,
                             command      = commands,
                             description  = "<AdapterRM (SE): %s -> '%s.*'>" \
                                 % (fileutils.describe_files(parameters.input_files),
                                    parameters.output_prefix),
                             dependencies = parameters.dependencies)
예제 #31
0
    def __init__(self, config, input_bams, output_bam, keep_dupes=True,
                 dependencies=()):
        bam_input = MultiBAMInput(config, input_bams)

        builder = factory.new("rmdup_collapsed")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.set_kwargs(OUT_STDOUT=output_bam)
        bam_input.setup(builder)

        if not keep_dupes:
            builder.set_option("--remove-duplicates")

        filteruniq = builder.finalize()
        command = ParallelCmds(bam_input.commands + [filteruniq])
        description = "<FilterCollapsedBAM: %s>" \
            % (describe_files(bam_input.files),)
        MultiBAMInputNode.__init__(self,
                                   bam_input=bam_input,
                                   command=command,
                                   description=description,
                                   dependencies=dependencies)
예제 #32
0
파일: gatk.py 프로젝트: health1987/paleomix
    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)
예제 #33
0
    def __init__(self, config, target_name, input_files, output_file, regions_file=None, dependencies=()):
        bam_input = MultiBAMInput(config, input_files)
        if len(bam_input.files) > 1 and regions_file:
            raise ValueError("DepthHistogram for regions require single, " "indexed input BAM file.")

        builder = factory.new("depths")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.add_value("%(OUT_FILE)s")
        builder.set_option("--target-name", target_name)
        builder.set_kwargs(OUT_FILE=output_file)
        bam_input.setup(builder)

        if regions_file:
            builder.set_option("--regions-file", "%(IN_REGIONS)s")
            builder.set_kwargs(IN_REGIONS=regions_file)

        command = ParallelCmds(bam_input.commands + [builder.finalize()])
        description = "<DepthHistogram: %s -> '%s'>" % (describe_files(bam_input.files), output_file)
        MultiBAMInputNode.__init__(
            self, bam_input=bam_input, command=command, description=description, dependencies=dependencies
        )
예제 #34
0
    def __init__(self,
                 config,
                 target_name,
                 input_files,
                 output_file,
                 intervals_file=None,
                 print_stats=False,
                 max_contigs=_MAX_CONTIGS,
                 dependencies=()):
        self._target_name = target_name
        self._input_files = safe_coerce_to_tuple(input_files)
        self._output_file = output_file
        self._intervals = intervals_file
        self._print_stats = print_stats
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        input_files = []
        input_files.extend(self._input_files)
        input_files.extend(
            swap_ext(input_file, ".bai") for input_file in self._input_files)
        if intervals_file:
            input_files.append(intervals_file)

        executables = ["coverageBed"
                       ] if intervals_file else ["genomeCoverageBed"]
        auxiliary_files = []
        for cmd in concatenate_input_bams(config, self._input_files)[0]:
            executables.extend(cmd.executables)
            auxiliary_files.extend(cmd.auxiliary_files)

        Node.__init__(self,
                      description  = "<DepthHistogram: %s -> '%s'>" \
                        % (describe_files(self._input_files),
                           self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies,
                      executables  = executables,
                      auxiliary_files = auxiliary_files)
예제 #35
0
    def __init__(self, parameters):
        self._version    = parameters.version
        self._basename   = parameters.basename
        if len(parameters.input_files_1) != len(parameters.input_files_2):
            raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \
                               % (len(parameters.input_files_1),
                                  len(parameters.input_files_2)))

        zcat_pair_1    = _build_unicat_command(parameters.input_files_1, "uncompressed_input_1")
        zcat_pair_2    = _build_unicat_command(parameters.input_files_2, "uncompressed_input_2")
        zip_pair_1     = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated")
        zip_pair_2     = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated")
        zip_discarded  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded")
        adapterrm      = parameters.command.finalize()

        commands = [adapterrm, zip_pair_1, zip_pair_2]
        if parameters.version == VERSION_15:
            zip_aln        = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed")
            zip_aln_trunc  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated")
            zip_unaligned  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated")
            commands      += [zip_aln, zip_aln_trunc, zip_unaligned]
        else:
            zip_aln        = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated")
            zip_unaligned  = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated")
            commands      += [zip_aln, zip_unaligned]
        commands += [zip_discarded, zcat_pair_1, zcat_pair_2]

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds(commands)

        description  = "<PE_AdapterRM: %s -> '%s.*'>" \
            % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"),
               parameters.output_prefix)

        CommandNode.__init__(self,
                             command      = commands,
                             description  = description,
                             dependencies = parameters.dependencies)
예제 #36
0
    def __init__(self, parameters):
        self._basename = parameters.basename

        zcat = _build_unicat_command(parameters.input_files,
                                     "uncompressed_input")
        zip_truncated = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds(
            [adapterrm, zip_discarded, zip_truncated, zcat])
        CommandNode.__init__(self,
                             command      = commands,
                             description  = "<SE_AdapterRM: %s -> '%s.*'>" \
                                 % (fileutils.describe_files(parameters.input_files),
                                    parameters.output_prefix),
                             dependencies = parameters.dependencies)
예제 #37
0
def test_describe_files__iterable():
    fpaths = iter(("/var/foo/bar", "/var/foo/foo"))
    assert_equal(describe_files(fpaths), "2 files in '/var/foo'")
예제 #38
0
def test_describe_files__no_files():
    assert_equal(describe_files(()), "No files")
예제 #39
0
def test_describe_files__same_path_abs__1_differences():
    fpaths = ("/var/foo/faz", "/var/foo/fao")
    assert_equal(describe_files(fpaths), "'/var/foo/fa?'")
예제 #40
0
def test_describe_files__same_path_rel():
    fpaths = ("var/foo/bar", "var/foo/foo")
    assert_equal(describe_files(fpaths), "2 files in 'var/foo'")
예제 #41
0
def test_describe_files__single_file():
    fpath = "/var/foo/bar"
    assert_equal(describe_files((fpath,)), repr(fpath))
예제 #42
0
def test_describe_files__same_path_abs__3_differences():
    fpaths = ("/var/foo/bar", "/var/foo/foo")
    assert_equal(describe_files(fpaths), "2 files in '/var/foo'")
예제 #43
0
def test_describe_files__different_paths_rel():
    fpaths = ("var/foo/bar", "var/bar/foo")
    assert_equal(describe_files(fpaths), "2 files")
예제 #44
0
def test_describe_files__no_files():
    assert_equal(describe_files(()), "No files")
예제 #45
0
def test_describe_files__iterable():
    fpaths = iter(("/var/foo/bar", "/var/foo/foo"))
    assert_equal(describe_files(fpaths), "2 files in '/var/foo'")
예제 #46
0
def test_describe_files__different_paths_rel():
    fpaths = ("var/foo/bar", "var/bar/foo")
    assert_equal(describe_files(fpaths), "2 files")
예제 #47
0
def test_describe_files__same_path_rel():
    fpaths = ("var/foo/bar", "var/foo/foo")
    assert_equal(describe_files(fpaths), "2 files in 'var/foo'")
예제 #48
0
def test_describe_files__single_file():
    fpath = "/var/foo/bar"
    assert_equal(describe_files((fpath, )), repr(fpath))