def _do_test_parallel_commands__ready_two(first, second, result): cmd_mock_1 = flexmock(AtomicCmd(["ls"])) cmd_mock_1.should_receive('ready').and_return(first).at_least.once cmd_mock_2 = flexmock(AtomicCmd(["ls"])) cmd_mock_2.should_receive('ready').and_return(second) cmds = ParallelCmds([cmd_mock_1, cmd_mock_2]) assert_equal(cmds.ready(), result)
def __init__(self, config, reference, input_files, output_file, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd([ "mapDamage", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference ], IN_STDIN=cat_obj, CHECK_VERSION=MAPDAMAGE_VERSION) train_cmds = ParallelCmds(cat_cmds + [cmd_map]) cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_scale = AtomicCmd([ "mapDamage", "--rescale-only", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference, "--rescale-out", "%(OUT_BAM)s" ], IN_STDIN=cat_obj, OUT_BAM=output_file, CHECK_VERSION=MAPDAMAGE_VERSION) rescale_cmds = ParallelCmds(cat_cmds + [cmd_scale]) description = "<mapDamageRescale: %i file(s) -> '%s'>" % ( len(input_files), output_file) CommandNode.__init__(self, command=SequentialCmds([train_cmds, rescale_cmds]), description=description, dependencies=dependencies)
def test_parallel_commands__join_before_run(): mocks = [] for value in reversed(range(3)): cmd_mock = flexmock(AtomicCmd("true")) cmd_mock.should_receive('join').and_return([value]).never mocks.append(cmd_mock) cmds = ParallelCmds(mocks) assert_equal(cmds.join(), [None, None, None])
def test_parallel_commands__run(): mocks = [] for _ in range(3): cmd_mock = flexmock(AtomicCmd(["ls"])) cmd_mock.should_receive('run').with_args("xTMPx").once mocks.append(cmd_mock) cmds = ParallelCmds(mocks) cmds.run("xTMPx")
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ('pileup','bcftools')] description = "<Samtools VariantCaller : {}".format(os.path.basename(parameters.outfile)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, config, target_name, input_files, output_file, regions_file=None, dependencies=()): bam_input = MultiBAMInput(config, input_files) if len(bam_input.files) > 1 and regions_file: raise ValueError("DepthHistogram for regions require single, " "indexed input BAM file.") builder = factory.new("depths") builder.add_value("%(TEMP_IN_BAM)s") builder.add_value("%(OUT_FILE)s") builder.set_option("--target-name", target_name) builder.set_kwargs(OUT_FILE=output_file) bam_input.setup(builder) if regions_file: builder.set_option('--regions-file', '%(IN_REGIONS)s') builder.set_kwargs(IN_REGIONS=regions_file) command = ParallelCmds(bam_input.commands + [builder.finalize()]) description = "<DepthHistogram: %s -> '%s'>" \ % (describe_files(bam_input.files), output_file) MultiBAMInputNode.__init__(self, bam_input=bam_input, command=command, description=description, dependencies=dependencies)
def __init__(self, parameters): commands = [parameters.commands['Filter'].finalize()] description = "<Variant Filter: {}".format(os.path.basename(parameters.outfile)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['VariantRecal'].finalize()] description = "<Variant Recalibrator: {}".format(os.path.basename(parameters.model_name)) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['merge'].finalize()] description = "<Variant Merge Node" CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, config, reference, input_files, output_directory, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd( [ "mapDamage", "--no-stats", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference ], IN_STDIN=cat_obj, OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"), OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"), OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"), OUT_PLOT_FRAG=os.path.join(output_directory, "Fragmisincorporation_plot.pdf"), OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"), OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"), OUT_MISINCORP=os.path.join(output_directory, "misincorporation.txt"), OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"), CHECK_VERSION=MAPDAMAGE_VERSION) description = "<mapDamage: %i file(s) -> '%s'>" % (len(input_files), output_directory) CommandNode.__init__(self, command=ParallelCmds(cat_cmds + [cmd_map]), description=description, dependencies=dependencies)
def __init__(self, config, reference, intervals, infiles, outfile, dependencies=()): self._basename = os.path.basename(outfile) infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) command.set_option("-T", "IndelRealigner") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-targetIntervals", "%(IN_INTERVALS)s") command.set_option("-o", "%(OUT_BAMFILE)s") command.set_option("--bam_compression", 0) command.set_option("--disable_bam_indexing") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), IN_INTERVALS=intervals, OUT_BAMFILE=outfile, CHECK_GATK=_get_gatk_version_check(config)) calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], TEMP_IN_BAM=self._basename, IN_REF=reference, TEMP_OUT_STDOUT=self._basename + ".calmd", CHECK_VERSION=SAMTOOLS_VERSION) description = "<Indel Realigner (aligning): %s -> %r>" \ % (describe_files(infiles), outfile) CommandNode.__init__(self, description=description, command=ParallelCmds([command.finalize(), calmd]), dependencies=dependencies)
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ("pileup", "genotype", "bgzip")] description = "<Genotyper: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands['Snp'].finalize()] description = "<SNP List Generator Node>" CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies )
def __init__(self, parameters): self._version = parameters.version self._basename = parameters.basename if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \ % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_unicat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_unicat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] # Opening of pipes block, so the order of these commands is dependent upon # the order of file-opens in atomiccmd and the the programs themselves. commands = ParallelCmds(commands) description = "<PE_AdapterRM: %s -> '%s.*'>" \ % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): commands = [ parameters.commands[key].finalize() for key in ("unicat", "pileup") ] description = "<VCFPileup: '%s' -> '%s'>" % (parameters.in_bam, parameters.outfile) CommandNode.__init__(self, description=description, command=ParallelCmds(commands), dependencies=parameters.dependencies)
def __init__(self, parameters): _check_bwa_prefix(parameters.prefix) command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) description = "<PE_BWA (%i threads): '%s'>" % (parameters.threads, parameters.input_file_1) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, parameters): command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) aln_type = "PE" if parameters.input_file_2 else "SE" description = "<Bowtie2 (%s, %i threads): '%s'>" \ % (aln_type, parameters.threads, parameters.input_file_1) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, config, input_bams, output_bam, dependencies=()): cat_cmds, cat_obj = concatenate_input_bams(config, input_bams) filteruniq = AtomicCmd(["bam_rmdup_collapsed", "--remove-duplicates"], IN_STDIN=cat_obj, OUT_STDOUT=output_bam) command = ParallelCmds(cat_cmds + [filteruniq]) description = "<FilterCollapsedBAM: %s>" % ( describe_files(input_bams), ) CommandNode.__init__(self, command=command, description=description, dependencies=dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def __init__(self, parameters): bam_input = MultiBAMInput(parameters.config, parameters.input_files) bam_input.setup(parameters.command) cmd_map = parameters.command.finalize() description = "<mapDamage (plots): %s -> '%s'>" \ % (describe_files(parameters.input_files), parameters.output_directory) MultiBAMInputNode.__init__(self, bam_input=bam_input, command=ParallelCmds(bam_input.commands + [cmd_map]), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): self._directory = parameters.directory bam_input = MultiBAMInput(parameters.config, parameters.input_files) bam_input.setup(parameters.command) command = parameters.command.finalize() description = "<mapDamage (rescale): %s -> %r>" \ % (describe_files(parameters.input_files), parameters.output_file) MultiBAMInputNode.__init__(self, bam_input=bam_input, command=ParallelCmds(bam_input.commands + [command]), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): if parameters.input_file_2: description = "<PE_BWASW (%i threads): '%s', '%s' -> '%s'>" \ % (parameters.threads, parameters.input_file_1, parameters.input_file_2, parameters.output_file) else: description = "<BWASW (%i threads): '%s' -> '%s'>" \ % (parameters.threads, parameters.input_file_1, parameters.output_file) command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, config, input_files, output_file, dependencies=()): bam_input = MultiBAMInput(config, input_files) duphist_command = factory.new("duphist") duphist_command.add_value('%(TEMP_IN_BAM)s') duphist_command.set_kwargs(OUT_STDOUT=output_file) bam_input.setup(duphist_command) duphist_command = duphist_command.finalize() commands = ParallelCmds(bam_input.commands + [duphist_command]) description = "<DuplicateHistogram: %s -> %r>" \ % (describe_files(input_files), output_file) MultiBAMInputNode.__init__(self, bam_input=bam_input, command=commands, description=description, dependencies=dependencies)
def __init__(self, parameters): _check_bwa_prefix(parameters.prefix) command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) description = _get_node_description( name="BWA", algorithm="SE", input_files_1=(parameters.input_file, ), input_files_2=(), prefix=parameters.prefix, threads=parameters.threads) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, parameters): command = ParallelCmds( [parameters.commands[key].finalize() for key in parameters.order]) algorithm = "PE" if parameters.input_file_2 else "SE" description = _get_node_description( name="Bowtie2", algorithm=algorithm, input_files_1=parameters.input_file_1, input_files_2=parameters.input_file_2, prefix=parameters.prefix, threads=parameters.threads) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, parameters): _check_bwa_prefix(parameters.prefix) algorithm = parameters.algorithm.upper() algorithm += "_PE" if parameters.input_file_2 else "_SE" description = _get_node_description( name="BWA", algorithm=algorithm, input_files_1=parameters.input_file_1, input_files_2=parameters.input_file_2, prefix=parameters.prefix) command = ParallelCmds( [cmd.finalize() for cmd in parameters.commands.itervalues()]) CommandNode.__init__(self, command=command, description=description, threads=parameters.threads, dependencies=parameters.dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): call = ["samtools", "view", "-bu"] if min_mapq > 0: call.append("-q%i" % min_mapq) if filter_unmapped: call.append("-F0x4") call.append("%(IN_BAM)s") flt = AtomicCmd(call, IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(jar=jar_file, jre_options=config.jre_options) params.set_option("INPUT", "/dev/stdin", sep="=") params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") params.set_option("COMPRESSION_LEVEL", "0", sep="=") params.set_option("SORT_ORDER", "coordinate", sep="=") for tag in ("SM", "LB", "PU", "PL"): params.set_option(tag, tags[tag], sep="=") params.set_kwargs(IN_STDIN=flt, TEMP_OUT_BAM="bam.pipe") annotate = params.finalize() calmd = AtomicCmd( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds([flt, annotate, calmd]), description=description, dependencies=dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, dependencies=()): flt = AtomicCmd([ "samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s" ], IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("INPUT", "/dev/stdin", sep="=") params.set_option("OUTPUT", "/dev/stdout", sep="=") params.set_option("QUIET", "true", sep="=") params.set_option("COMPRESSION_LEVEL", "0", sep="=") for (tag, value) in sorted(tags.iteritems()): if tag not in ("PG", "Target", "PU_src", "PU_cur"): params.set_option(tag, value, sep="=") elif tag == "PU_src": params.set_option("PU", value, sep="=") params.set_kwargs(IN_STDIN=flt, OUT_STDOUT=AtomicCmd.PIPE) annotate = params.finalize() calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"], IN_REF=reference, IN_STDIN=annotate, OUT_STDOUT=output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) CommandNode.__init__(self, command=ParallelCmds([flt, annotate, calmd]), description=description, dependencies=dependencies)
def __init__(self, parameters): self._quality_offset = parameters.quality_offset self._basename = parameters.basename zcat = _build_cat_command(parameters.input_files, "uncompressed_input") zip_truncated = _build_zip_command(parameters.output_format, parameters.output_prefix, ".truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = ParallelCmds( [adapterrm, zip_discarded, zip_truncated, zcat]) CommandNode.__init__(self, command = commands, description = "<AdapterRM (SE): %s -> '%s.*'>" \ % (fileutils.describe_files(parameters.input_files), parameters.output_prefix), dependencies = parameters.dependencies)
def test_pformat__sets__nested(): cmd_1 = AtomicCmd(("echo", "foo"), OUT_STDOUT=AtomicCmd.PIPE) cmd_2 = AtomicCmd("gzip", IN_STDIN=cmd_1) cmd_3 = AtomicCmd("sha1sum") set_1 = ParallelCmds((cmd_1, cmd_2)) set_2 = SequentialCmds((set_1, cmd_3)) assert_equal(pformat(set_2), ("<Sequential commands:\n" " - Parallel commands:\n" " - <00> Command = ['echo', 'foo']\n" " STDOUT = <01>\n" " STDERR* = '${{TEMP_DIR}}/pipe_echo_{cmd_1_id}.stderr'\n" " - <01> Command = ['gzip']\n" " STDIN = <00>\n" " STDOUT* = '${{TEMP_DIR}}/pipe_gzip_{cmd_2_id}.stdout'\n" " STDERR* = '${{TEMP_DIR}}/pipe_gzip_{cmd_2_id}.stderr'\n" " - <02> Command = ['sha1sum']\n" " STDOUT* = '${{TEMP_DIR}}/pipe_sha1sum_{cmd_3_id}.stdout'\n" " STDERR* = '${{TEMP_DIR}}/pipe_sha1sum_{cmd_3_id}.stderr'>") \ .format(cmd_1_id = id(cmd_1), cmd_2_id = id(cmd_2), cmd_3_id = id(cmd_3)))
def _do_test_parallel_commands__ready_single(value): cmd_mock = flexmock(AtomicCmd(["ls"])) cmd_mock.should_receive('ready').and_return(value).at_least.once cmds = ParallelCmds([cmd_mock]) assert_equal(cmds.ready(), value)
def test_parallel_commands__join_after_run(temp_folder): cmds = ParallelCmds([AtomicCmd("true") for _ in range(3)]) cmds.run(temp_folder) assert_equal(cmds.join(), [0, 0, 0])
def test_parallel_commands__join_failure_3(temp_folder): mocks = _setup_mocks_for_failure(True, True, False) cmds = ParallelCmds(mocks) cmds.run(temp_folder) assert_equal(cmds.join(), ['SIGTERM', 'SIGTERM', 1])