def __init__(self, config, reference, intervals, infiles, outfile, dependencies=()): self._basename = os.path.basename(outfile) infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) command.set_option("-T", "IndelRealigner") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-targetIntervals", "%(IN_INTERVALS)s") command.set_option("-o", "%(OUT_BAMFILE)s") command.set_option("--bam_compression", 0) command.set_option("--disable_bam_indexing") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), IN_INTERVALS=intervals, OUT_BAMFILE=outfile, CHECK_GATK=_get_gatk_version_check(config)) calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], TEMP_IN_BAM=self._basename, IN_REF=reference, TEMP_OUT_STDOUT=self._basename + ".calmd", CHECK_VERSION=SAMTOOLS_VERSION) description = "<Indel Realigner (aligning): %s -> %r>" \ % (describe_files(infiles), outfile) CommandNode.__init__(self, description=description, command=ParallelCmds([command.finalize(), calmd]), dependencies=dependencies)
def __init__(self, parameters): commands = [parameters.commands['VariantRecal'].finalize()] description = "<Variant Recalibrator: {}".format(os.path.basename(parameters.model_name)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): self._kwargs = parameters.command.paths CommandNode.__init__(self, command = parameters.command.finalize(), description = "<RAxMLReduce: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_alignment), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['Filter'].finalize()] description = "<Variant Filter: {}".format(os.path.basename(parameters.outfile)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, config, d_make, bedn, mappa, unique, dependencies=()): inbedfile = d_make.bedfiles[bedn] basename, extension = os.path.splitext(os.path.basename(inbedfile)) bname = "{}_MappaOnly{}".format(basename, extension) dest = os.path.join(config.temp_local, bname) d_make.bedfiles[bedn] = dest call1 = [ "python", os.path.join(PREFIX, "intersectmappabed.py"), "%(IN_BED)s", "%(IN_MAPPA)s", str(unique), "%(OUT_DEST)s" ] cmd = AtomicCmd(call1, IN_BED=inbedfile, IN_MAPPA=mappa, OUT_DEST=dest, CHECK_VERSION=PYTHON_VERSION) description = ("<CLEANBEDFILES: '%s' -> '%s', Uniqueness: '%s'>" % (inbedfile, dest, unique)) CommandNode.__init__(self, description=description, command=cmd, dependencies=dependencies)
def __init__(self, infile, outfile, genome, from_start=0, from_end=0, strand_relative=False, dependencies=()): if type(from_start) != type(from_end): raise ValueError( "'from_start' and 'from_end' should be of same type!") call = [ "slopBed", "-i", "%(IN_FILE)s", "-g", "%(IN_GENOME)s", "-l", str(from_start), "-r", str(from_end) ] if strand_relative: call.append("-s") if type(from_start) is float: call.append("-pct") command = AtomicCmd(call, IN_FILE=infile, IN_GENOME=genome, OUT_STDOUT=outfile) description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile) CommandNode.__init__(self, description=description, command=command, dependencies=dependencies)
def __init__(self, parameters): description = "<Merge BAMs: %i file(s) -> '%s'>" \ % (len(parameters.input_bams), parameters.output_bam) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['merge'].finalize()] description = "<Variant Merge Node" CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): description = "<MarkDuplicates: %s>" % (describe_files( parameters.input_bams), ) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ('pileup','bcftools')] description = "<Samtools VariantCaller : {}".format(os.path.basename(parameters.outfile)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, config, reference, input_files, output_directory, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd( [ "mapDamage", "--no-stats", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference ], IN_STDIN=cat_obj, OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"), OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"), OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"), OUT_PLOT_FRAG=os.path.join(output_directory, "Fragmisincorporation_plot.pdf"), OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"), OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"), OUT_MISINCORP=os.path.join(output_directory, "misincorporation.txt"), OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"), CHECK_VERSION=MAPDAMAGE_VERSION) description = "<mapDamage: %i file(s) -> '%s'>" % (len(input_files), output_directory) CommandNode.__init__(self, command=ParallelCmds(cat_cmds + [cmd_map]), description=description, dependencies=dependencies)
def __init__(self, config, reference, input_files, output_file, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd([ "mapDamage", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference ], IN_STDIN=cat_obj, CHECK_VERSION=MAPDAMAGE_VERSION) train_cmds = ParallelCmds(cat_cmds + [cmd_map]) cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_scale = AtomicCmd([ "mapDamage", "--rescale-only", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference, "--rescale-out", "%(OUT_BAM)s" ], IN_STDIN=cat_obj, OUT_BAM=output_file, CHECK_VERSION=MAPDAMAGE_VERSION) rescale_cmds = ParallelCmds(cat_cmds + [cmd_scale]) description = "<mapDamageRescale: %i file(s) -> '%s'>" % ( len(input_files), output_file) CommandNode.__init__(self, command=SequentialCmds([train_cmds, rescale_cmds]), description=description, dependencies=dependencies)
def __init__(self, parameters): self._directory = parameters.directory description = "<mapDamage (model): %r>" % (parameters.directory, ) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): command = parameters.command.finalize() description = "<Bowtie2 Index '%s' -> '%s.*'>" % ( parameters.input_file, parameters.prefix) CommandNode.__init__(self, command=command, description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): command = parameters.command.finalize() description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__(self, description=description, command=command, dependencies=parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['Snp'].finalize()] description = "<SNP List Generator Node>" CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies )
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ("pileup", "genotype", "bgzip")] description = "<Genotyper: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): self._version = parameters.version self._basename = parameters.basename if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \ % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_unicat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_unicat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] # Opening of pipes block, so the order of these commands is dependent upon # the order of file-opens in atomiccmd and the the programs themselves. commands = ParallelCmds(commands) description = "<PE_AdapterRM: %s -> '%s.*'>" \ % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): self._input_alignment = parameters.input_alignment self._input_partitions = parameters.input_partitions self._output_tree = parameters.output_tree CommandNode.__init__(self, command = parameters.command.finalize(), description = "<RAxMLParsimonyTree: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_tree), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [ parameters.commands[key].finalize() for key in ("unicat", "pileup") ] description = "<VCFPileup: '%s' -> '%s'>" % (parameters.in_bam, parameters.outfile) CommandNode.__init__(self, description=description, command=ParallelCmds(commands), dependencies=parameters.dependencies)
def __init__(self, infile, dependencies = ()): cmd_index = AtomicCmd(["samtools", "index", "%(IN_BAM)s", "%(OUT_BAI)s"], IN_BAM = infile, OUT_BAI = swap_ext(infile, ".bai"), CHECK_SAM = SAMTOOLS_VERSION) CommandNode.__init__(self, description = "<BAMIndex: '%s'>" % (infile,), command = cmd_index, dependencies = dependencies)
def __init__(self, parameters): self._symlinks = [os.path.realpath(parameters.input_alignment)] self._output_tree = os.path.basename(parameters.output_tree) CommandNode.__init__(self, command = parameters.command.finalize(), description = "<Parsimonator: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_tree), dependencies = parameters.dependencies)
def __init__(self, parameters): self._input_alignment = parameters.input_alignment self._input_partition = parameters.input_partition self._output_alignment = os.path.basename(parameters.output_alignment) CommandNode.__init__(self, command = parameters.command.finalize(), description = "<RAxMLBootstrap: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_alignment), dependencies = parameters.dependencies)
def __init__(self, parameters): self._output_file = parameters.output_file description = "<MAFFTNode (%s): '%s' -> '%s'>" \ % (parameters.algorithm, parameters.input_file, parameters.output_file) CommandNode.__init__(self, command = parameters.command.finalize(), description = description, dependencies = parameters.dependencies)
def __init__(self, parameters): self._template = os.path.basename(parameters.output_template) CommandNode.__init__(self, command = parameters.command.finalize(), description = "<EXaML (%i thread(s)): '%s' -> '%s'>" \ % (parameters.threads, parameters.input_binary, parameters.output_template), threads = parameters.threads, dependencies = parameters.dependencies)
def __init__(self, parameters): self._in_vcf = parameters.infile_vcf command = parameters.command.finalize() description = "<VCFPileup: '%s' -> '%s'>" \ % (parameters.infile_vcf, parameters.outfile) CommandNode.__init__(self, description=description, command=command, dependencies=parameters.dependencies)
def __init__(self, parameters): _check_bwa_prefix(parameters.prefix) command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) description = "<PE_BWA (%i threads): '%s'>" % (parameters.threads, parameters.input_file_1) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, infile, dependencies=()): self._infile = infile cmd_faidx = AtomicCmd(["samtools", "faidx", "%(TEMP_IN_FASTA)s"], TEMP_IN_FASTA=os.path.basename(infile), IN_FASTA=infile, OUT_TBI=infile + ".fai", CHECK_SAM=SAMTOOLS_VERSION) CommandNode.__init__(self, description="<FastaIndex: '%s'>" % (infile,), command=cmd_faidx, dependencies=dependencies)
def __init__(self, parameters): self._symlinks = [ os.path.realpath(parameters.input_alignment), os.path.realpath(parameters.input_partition) ] self._output_file = os.path.basename(parameters.output_file) CommandNode.__init__(self, command = parameters.command.finalize(), description = "<EXaMLParser: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_file), dependencies = parameters.dependencies)
def __init__(self, config, input_bams, output_bam, dependencies=()): cat_cmds, cat_obj = concatenate_input_bams(config, input_bams) filteruniq = AtomicCmd(["bam_rmdup_collapsed", "--remove-duplicates"], IN_STDIN=cat_obj, OUT_STDOUT=output_bam) command = ParallelCmds(cat_cmds + [filteruniq]) description = "<FilterCollapsedBAM: %s>" % ( describe_files(input_bams), ) CommandNode.__init__(self, command=command, description=description, dependencies=dependencies)