def customize(cls, reference, infile, outfile, regions = None, dependencies = ()): assert outfile.lower().endswith(".vcf.bgz") pileup = AtomicCmdBuilder(["samtools", "mpileup"], IN_REFERENCE = reference, IN_BAMFILE = infile, IN_REGIONS = regions, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAM = SAMTOOLS_VERSION) pileup.set_option("-u") # Uncompressed output pileup.set_option("-f", "%(IN_REFERENCE)s") pileup.add_value("%(IN_BAMFILE)s") if regions: pileup.set_option("-l", "%(IN_REGIONS)s") genotype = AtomicCmdBuilder(["bcftools", "view"], IN_STDIN = pileup, OUT_STDOUT = AtomicCmd.PIPE) genotype.add_value("-") bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN = genotype, OUT_STDOUT = outfile) return {"commands" : {"pileup" : pileup, "genotype" : genotype, "bgzip" : bgzip}}
def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()): command = AtomicCmdBuilder(_PRESETS[algorithm.lower()]) command.add_value("%(IN_FASTA)s") command.set_kwargs(IN_FASTA = input_file, OUT_STDOUT = output_file, CHECK_VERSION = MAFFT_VERSION) return {"command" : command, "dependencies" : dependencies}
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=in_vcf, OUT_STDOUT=AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF=reference, IN_BAM=in_bam, IN_STDIN=unicat, OUT_PILEUP=outfile, OUT_TBI=outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies = ()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF = in_vcf, OUT_STDOUT = AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF = reference, IN_BAM = in_bam, IN_STDIN = unicat, OUT_PILEUP = outfile, OUT_TBI = outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands" : {"unicat" : unicat, "pileup" : vcfpileup}}
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],)) assert_equal(kwargs, {"IN_FILE" : "/in/file", "OUT_FILE" : "/out/file", "set_cwd" : True}) was_called.append(True) with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd = True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE = "/out/file", IN_FILE = "/in/file") builder.finalize() assert was_called
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], )) assert_equal(kwargs, { "IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True }) was_called.append(True) with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def __init__(self, d_bam, dependencies): aux_r = os.path.join(os.path.dirname(gccorrect.__file__), 'model_gc.R') dest = os.path.join(d_bam.bam_output, '%s_GC_Model.txt' % (str(d_bam.bam_name))) plot_dest = os.path.splitext(dest)[0] + '.pdf' infiles = [''.join(n.output_files) for n in dependencies] builder = AtomicCmdBuilder(("Rscript", "%(AUX_R)s")) builder.add_value('%(OUT_FILEPATH)s') builder.add_value('%(OUT_PLOT)s') builder.add_multiple_values(infiles) builder.set_kwargs(AUX_R=aux_r, OUT_FILEPATH=dest, OUT_PLOT=plot_dest, CHECK_VERSION=Rscript_VERSION) cmd = builder.finalize() d_bam.opts['BamInfo']['--GCmodel'] = dest description = "<CreateGCModel: '%s' -> '%s'" % (d_bam.bam_temp_local, dest) CommandNode.__init__(self, description=description, command=cmd, dependencies=dependencies)
def _process_output(stdin, output_file, reference, run_fixmate = False): convert = AtomicCmdBuilder("safeSAM2BAM") convert.set_option("--flag-as-sorted") convert.set_option("-F", "0x4", sep = "", fixed = False) # Remove misses convert.set_kwargs(IN_STDIN = stdin, OUT_STDOUT = AtomicCmd.PIPE, CHECK_PYSAM = PYSAM_VERSION, CHECK_SAMTOOLS = SAMTOOLS_VERSION) fixmate = None if run_fixmate: fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"), IN_STDIN = convert, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAMTOOLS = SAMTOOLS_VERSION) sort = AtomicCmdBuilder(("samtools", "sort")) sort.set_option("-o") # Output to STDOUT on completion sort.add_value("-") sort.add_value("%(TEMP_OUT_BAM)s") sort.set_kwargs(IN_STDIN = fixmate or convert, OUT_STDOUT = AtomicCmd.PIPE, TEMP_OUT_BAM = "sorted", CHECK_SAM = SAMTOOLS_VERSION) calmd = AtomicCmdBuilder(("samtools", "calmd")) calmd.add_value("-") calmd.add_value("%(IN_REF)s") calmd.set_option("-b") # Output BAM calmd.set_kwargs(IN_REF = reference, IN_STDIN = sort, OUT_STDOUT = output_file, CHECK_SAM = SAMTOOLS_VERSION) order = ["convert", "sort", "calmd"] commands = {"convert" : convert, "sort" : sort, "calmd" : calmd} if run_fixmate: order.insert(1, "fixmate") commands["fixmate"] = fixmate return order, commands
def _process_output(stdin, output_file, reference, run_fixmate=False): convert = AtomicCmdBuilder("safeSAM2BAM") convert.set_option("--flag-as-sorted") convert.set_option("-F", "0x4", sep="", fixed=False) # Remove misses convert.set_kwargs(IN_STDIN=stdin, OUT_STDOUT=AtomicCmd.PIPE, CHECK_PYSAM=PYSAM_VERSION, CHECK_SAMTOOLS=SAMTOOLS_VERSION) fixmate = None if run_fixmate: fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"), IN_STDIN=convert, OUT_STDOUT=AtomicCmd.PIPE, CHECK_SAMTOOLS=SAMTOOLS_VERSION) sort = AtomicCmdBuilder(("samtools", "sort")) sort.set_option("-o") # Output to STDOUT on completion sort.add_value("-") sort.add_value("%(TEMP_OUT_BAM)s") sort.set_kwargs(IN_STDIN=fixmate or convert, OUT_STDOUT=AtomicCmd.PIPE, TEMP_OUT_BAM="sorted", CHECK_SAM=SAMTOOLS_VERSION) calmd = AtomicCmdBuilder(("samtools", "calmd")) calmd.add_value("-") calmd.add_value("%(IN_REF)s") calmd.set_option("-b") # Output BAM calmd.set_kwargs(IN_REF=reference, IN_STDIN=sort, OUT_STDOUT=output_file, CHECK_SAM=SAMTOOLS_VERSION) order = ["convert", "sort", "calmd"] commands = {"convert": convert, "sort": sort, "calmd": calmd} if run_fixmate: order.insert(1, "fixmate") commands["fixmate"] = fixmate return order, commands
def test_builder__add_value__two_values(): builder = AtomicCmdBuilder("ls") builder.add_value("%(IN_FILE)s") builder.add_value("%(OUT_FILE)s") assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])