def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Read and (in the case of empty columns) reduce input command.set_option("-f", "c") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s") command.set_option("-q", "%(TEMP_IN_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, TEMP_IN_ALIGNMENT = "RAxML_alignment", TEMP_IN_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_ALIGNMENT = output_alignment, OUT_PARTITION = output_partition, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn(" Download the newest version of AdapterRemoval at ") ui.print_warn(" https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies=()): command = AtomicCmdBuilder("raxmlHPC") # Read and (in the case of empty columns) reduce input command.set_option("-f", "c") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s") command.set_option("-q", "%(TEMP_IN_PARTITION)s") command.set_kwargs(IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, TEMP_IN_ALIGNMENT="RAXML_alignment", TEMP_IN_PARTITION="RAXML_partitions", TEMP_OUT_INFO="RAxML_info.Pypeline", OUT_ALIGNMENT=output_alignment, OUT_PARTITION=output_partition) return {"command": command}
def customize(cls, reference, infiles, outfile, options, dependencies= ()): assert outfile.lower().endswith('.vcf') # Create the pileup command pileup = AtomicCmdBuilder( ['samtools','mpileup'], IN_REFERENCE = reference, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAM = SAMTOOLS_VERSION ) pileup.set_option('-u') # uncompressed output pileup.set_option('-r','chrUn2:1-19214051') pileup.set_option('-f', "%(IN_REFERENCE)s") # Add reference option for bam in infiles: pileup.add_option(bam) # Create variant caller command bcftools = AtomicCmdBuilder( ['bcftools','view'], IN_STDIN = pileup, OUT_STDOUT = outfile ) bcftools.set_option('-v') # output potential variant sites bcftools.set_option('-c') # SNP calling bcftools.set_option('-g') # call genotypes at vairant sites bcftools.set_option('-') # STDIN return { "commands" : { "pileup" : pileup, "bcftools" : bcftools, } }
def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn( " Download the newest version of AdapterRemoval at ") ui.print_warn( " https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def customize(cls, infile, intervals, outfile, dependencies = ()): params = AtomicCmdBuilder(["bam_sample_regions"], IN_PILEUP = infile, IN_INTERVALS = intervals, OUT_STDOUT = outfile) params.set_option("--genotype", "%(IN_PILEUP)s") params.set_option("--intervals", "%(IN_INTERVALS)s") return {"command" : params}
def customize(cls, infile, intervals, outfile, dependencies=()): params = AtomicCmdBuilder(["bam_sample_regions"], IN_PILEUP=infile, IN_INTERVALS=intervals, OUT_STDOUT=outfile) params.set_option("--genotype", "%(IN_PILEUP)s") params.set_option("--intervals", "%(IN_INTERVALS)s") return {"command": params}
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, input_alignment, input_partition, output_file, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("examlParser", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_PART = os.path.basename(input_partition), TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, # Final output file, are not created directly OUT_BINARY = output_file, CHECK_EXAML = PARSER_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_file, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("examlParser", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output file, are not created directly OUT_BINARY=output_file) return {"command": command}
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=in_vcf, OUT_STDOUT=AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF=reference, IN_BAM=in_bam, IN_STDIN=unicat, OUT_PILEUP=outfile, OUT_TBI=outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies = ()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF = in_vcf, OUT_STDOUT = AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF = reference, IN_BAM = in_bam, IN_STDIN = unicat, OUT_PILEUP = outfile, OUT_TBI = outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands" : {"unicat" : unicat, "pileup" : vcfpileup}}
def customize(cls, reference, infile, outfile, filters, options, dependencies = ()): # filter reads percentile = str(options.makefile['vcf_percentile_threshold']) flt = AtomicCmdBuilder(['vcf_qual_percentile'], IN_VCF = infile, OUT_VCF = outfile ) for key,val in filters.items(): flt.add_option(key,val) flt.set_option('--out','%(OUT_VCF)s') flt.add_option(infile) return { 'commands':{ 'Filter': flt } }
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) return cmd
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, pileup, infile, outfile, interval, dependencies = ()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF = infile, OUT_STDOUT = AtomicCmd.PIPE) vcffilter = AtomicCmdBuilder(["vcf_filter", "--pileup", "%(IN_PILEUP)s"], IN_PILEUP = pileup, IN_STDIN = unicat, OUT_STDOUT = AtomicCmd.PIPE) for contig in interval.get("Homozygous Contigs", ()): vcffilter.set_option("--homozygous-chromosome", contig) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN = vcffilter, OUT_STDOUT = outfile) return {"commands" : {"unicat" : unicat, "filter" : vcffilter, "bgzip" : bgzip}}
def customize(cls, reference, infile, outfile, regions = None, dependencies = ()): assert outfile.lower().endswith(".vcf.bgz") pileup = AtomicCmdBuilder(["samtools", "mpileup"], IN_REFERENCE = reference, IN_BAMFILE = infile, IN_REGIONS = regions, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAM = SAMTOOLS_VERSION) pileup.set_option("-u") # Uncompressed output pileup.set_option("-f", "%(IN_REFERENCE)s") pileup.add_value("%(IN_BAMFILE)s") if regions: pileup.set_option("-l", "%(IN_REGIONS)s") genotype = AtomicCmdBuilder(["bcftools", "view"], IN_STDIN = pileup, OUT_STDOUT = AtomicCmd.PIPE) genotype.add_value("-") bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN = genotype, OUT_STDOUT = outfile) return {"commands" : {"pileup" : pileup, "genotype" : genotype, "bgzip" : bgzip}}
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Allow 1/3 mismatches in the aligned region cmd.set_option("--mm", 3, fixed=False) # Minimum length of trimmed reads cmd.set_option("--minlength", 25, fixed=False) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) # Offset of quality scores cmd.set_option("--qualitybase", 33, fixed=False) return cmd
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION = version_check) # Allow 1/3 mismatches in the aligned region cmd.set_option("--mm", 3, fixed = False) # Minimum length of trimmed reads cmd.set_option("--minlength", 25, fixed = False) # Trim Ns at read ends cmd.set_option("--trimns", fixed = False) # Trim low quality scores cmd.set_option("--trimqualities", fixed = False) # Offset of quality scores cmd.set_option("--qualitybase", 33, fixed = False) return cmd
def customize(cls, options, infile, interval, outfile, padding, dependencies = ()): prefix = "{Genome}.{Name}".format(**interval) intervals = os.path.join(options.intervals_root, prefix + ".bed") params = AtomicCmdBuilder(["bam_genotype_regions"], IN_VCFFILE = infile, IN_TABIX = infile + ".tbi", IN_INTERVALS = intervals, OUT_STDOUT = outfile) params.set_option("--genotype", "%(IN_VCFFILE)s") params.set_option("--intervals", "%(IN_INTERVALS)s") if interval.get("Protein coding"): params.set_option("--whole-codon-indels-only") if not interval.get("Indels"): params.set_option("--ignore-indels") return {"command" : params}
def customize(cls, pileup, infile, outfile, interval, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE) vcffilter = AtomicCmdBuilder( ["vcf_filter", "--pileup", "%(IN_PILEUP)s"], IN_PILEUP=pileup, IN_STDIN=unicat, OUT_STDOUT=AtomicCmd.PIPE) for contig in interval.get("Homozygous Contigs", ()): vcffilter.set_option("--homozygous-chromosome", contig) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN=vcffilter, OUT_STDOUT=outfile) return { "commands": { "unicat": unicat, "filter": vcffilter, "bgzip": bgzip } }
def _process_output(stdin, output_file, reference, run_fixmate = False): convert = AtomicCmdBuilder("safeSAM2BAM") convert.set_option("--flag-as-sorted") convert.set_option("-F", "0x4", sep = "", fixed = False) # Remove misses convert.set_kwargs(IN_STDIN = stdin, OUT_STDOUT = AtomicCmd.PIPE, CHECK_PYSAM = PYSAM_VERSION, CHECK_SAMTOOLS = SAMTOOLS_VERSION) fixmate = None if run_fixmate: fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"), IN_STDIN = convert, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAMTOOLS = SAMTOOLS_VERSION) sort = AtomicCmdBuilder(("samtools", "sort")) sort.set_option("-o") # Output to STDOUT on completion sort.add_value("-") sort.add_value("%(TEMP_OUT_BAM)s") sort.set_kwargs(IN_STDIN = fixmate or convert, OUT_STDOUT = AtomicCmd.PIPE, TEMP_OUT_BAM = "sorted", CHECK_SAM = SAMTOOLS_VERSION) calmd = AtomicCmdBuilder(("samtools", "calmd")) calmd.add_value("-") calmd.add_value("%(IN_REF)s") calmd.set_option("-b") # Output BAM calmd.set_kwargs(IN_REF = reference, IN_STDIN = sort, OUT_STDOUT = output_file, CHECK_SAM = SAMTOOLS_VERSION) order = ["convert", "sort", "calmd"] commands = {"convert" : convert, "sort" : sort, "calmd" : calmd} if run_fixmate: order.insert(1, "fixmate") commands["fixmate"] = fixmate return order, commands
def _process_output(stdin, output_file, reference, run_fixmate=False): convert = AtomicCmdBuilder("safeSAM2BAM") convert.set_option("--flag-as-sorted") convert.set_option("-F", "0x4", sep="", fixed=False) # Remove misses convert.set_kwargs(IN_STDIN=stdin, OUT_STDOUT=AtomicCmd.PIPE, CHECK_PYSAM=PYSAM_VERSION, CHECK_SAMTOOLS=SAMTOOLS_VERSION) fixmate = None if run_fixmate: fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"), IN_STDIN=convert, OUT_STDOUT=AtomicCmd.PIPE, CHECK_SAMTOOLS=SAMTOOLS_VERSION) sort = AtomicCmdBuilder(("samtools", "sort")) sort.set_option("-o") # Output to STDOUT on completion sort.add_value("-") sort.add_value("%(TEMP_OUT_BAM)s") sort.set_kwargs(IN_STDIN=fixmate or convert, OUT_STDOUT=AtomicCmd.PIPE, TEMP_OUT_BAM="sorted", CHECK_SAM=SAMTOOLS_VERSION) calmd = AtomicCmdBuilder(("samtools", "calmd")) calmd.add_value("-") calmd.add_value("%(IN_REF)s") calmd.set_option("-b") # Output BAM calmd.set_kwargs(IN_REF=reference, IN_STDIN=sort, OUT_STDOUT=output_file, CHECK_SAM=SAMTOOLS_VERSION) order = ["convert", "sort", "calmd"] commands = {"convert": convert, "sort": sort, "calmd": calmd} if run_fixmate: order.insert(1, "fixmate") commands["fixmate"] = fixmate return order, commands
def customize(cls, options, infile, interval, outfile, padding, dependencies=()): prefix = "{Genome}.{Name}".format(**interval) intervals = os.path.join(options.intervals_root, prefix + ".bed") params = AtomicCmdBuilder(["bam_genotype_regions"], IN_VCFFILE=infile, IN_TABIX=infile + ".tbi", IN_INTERVALS=intervals, OUT_STDOUT=outfile) params.set_option("--genotype", "%(IN_VCFFILE)s") params.set_option("--intervals", "%(IN_INTERVALS)s") if interval.get("Protein coding"): params.set_option("--whole-codon-indels-only") if not interval.get("Indels"): params.set_option("--ignore-indels") return {"command": params}
def customize(cls, input_alignment, output_tree, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, # Final output file, are not created directly OUT_TREE = output_tree) return {"command" : command}
def customize(cls, input_alignment, output_tree, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, # Final output file, are not created directly OUT_TREE=output_tree) return {"command": command}
def customize(cls, input_alignment, input_partitions, output_tree, dependencies=()): command = AtomicCmdBuilder("raxmlHPC") # Compute a randomized parsimony starting tree command.set_option("-y") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s") command.set_option("-q", "%(TEMP_OUT_PARTITION)s") command.set_kwargs( IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partitions, # TEMP_OUT_ is used to automatically remove these files TEMP_OUT_ALIGNMENT="RAxML_alignment", TEMP_OUT_PARTITION="RAxML_partitions", TEMP_OUT_INFO="RAxML_info.Pypeline", OUT_TREE=output_tree, CHECK_VERSION=RAXML_VERSION) return {"command": command}
def customize(self, reference='', sra_infile='', wdir = '/tmp', dependencies=()): ''' Customize CLI parameters for Mapping commands ''' #------------------------------------------ # Dump SRA file into fastq format #------------------------------------------ fastq_dump = AtomicCmdBuilder(['fastq-dump', '%(IN_SRA)s'], IN_SRA = os.path.expanduser(sra_infile), OUT_FASTQ1 = os.path.basename(sra_infile).replace('.sra','_1.fastq.gz'), OUT_FASTQ2 = os.path.basename(sra_infile).replace('.sra','_2.fastq.gz') ) fastq_dump.set_option('--split-files') #------------------------------------------ # Remove Adapters #------------------------------------------ adapter_rm = AtomicCmdBuilder(['AdapterRemoval'], TEMP_IN_READS_1 = os.path.join( wdir, os.path.basename(sra_infile).replace(".sra",'') + "_1.fastq" ), TEMP_IN_READS_2 = os.path.join( wdir, os.path.basename(sra_infile).replace(".sra",'') + "_2.fastq" ), TEMP_OUT_BASENAME = os.path.basename(sra_infile), TEMP_OUT_LINK_PAIR1 = 'pair_1', TEMP_OUT_LINK_PAIR2 = 'pair_2', TEMP_OUT_LINK_ALN = 'aligned', TEMP_OUT_LINK_ALN_TRUNC = 'truncated', TEMP_OUT_LINK_UNALN = 'unaligned', TEMP_OUT_LINK_DISC = 'discarded', ) # Allow 1/3 mismatches in the aligned region adapter_rm.set_option("--mm", 3, fixed = False) # Minimum length of trimmed reads adapter_rm.set_option("--minlength", 25, fixed = False) # Trim Ns at read ends adapter_rm.set_option("--trimns", fixed = False) # Trim low quality scores adapter_rm.set_option("--trimqualities", fixed = False) # Offset of quality scores adapter_rm.set_option("--qualitybase", 33, fixed = False) adapter_rm.set_option('--collapse') # Uncompressed mate 1 and 2 reads (piped from fastq-dump) adapter_rm.set_option("--file1", "%(TEMP_IN_READS_1)s") adapter_rm.set_option("--file2", "%(TEMP_IN_READS_2)s") # Prefix for output files, ensure that all end up in temp folder adapter_rm.set_option("--basename", "%(TEMP_OUT_BASENAME)s") # Output files are explicity specified, to ensure that the order is the same here # as below. A difference in the order in which files are opened can cause a deadlock, # due to the use of named pipes (see __init__). adapter_rm.set_option("--output1", "%(TEMP_OUT_LINK_PAIR1)s") adapter_rm.set_option("--output2", "%(TEMP_OUT_LINK_PAIR2)s") adapter_rm.set_option("--outputcollapsed", "%(TEMP_OUT_LINK_ALN)s") adapter_rm.set_option("--outputcollapsedtruncated", "%(TEMP_OUT_LINK_ALN_TRUNC)s") adapter_rm.set_option("--singleton", "%(TEMP_OUT_LINK_UNALN)s") adapter_rm.set_option("--discarded", "%(TEMP_OUT_LINK_DISC)s") # Return the commands return { 'commands' : { 'fastq_dump' : fastq_dump, 'adapter_rm' : adapter_rm, } }
def customize(cls, input_alignment, input_partition, output_alignment, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC", set_cwd = True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", 1, fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, OUT_ALIGNMENT = output_alignment, OUT_INFO = fileutils.swap_ext(output_alignment, ".info")) return {"command" : command}
def test_builder__set_option__overwrite_fixed(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt") assert_raises(AtomicCmdBuilderError, builder.set_option, "-name", "*.bat")
def customize(cls, input_alignment, input_partition, output_template, threads=1, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree'""" if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) else: command = AtomicCmdBuilder("raxmlHPC") # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs( # Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced", TEMP_OUT_ALN=os.path.basename(input_alignment), TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output files, are not created directly OUT_INFO=output_template % "info", OUT_BESTTREE=output_template % "bestTree", OUT_BOOTSTRAP=output_template % "bootstrap", OUT_BIPART=output_template % "bipartitions", OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels") # Use the GTRGAMMAI model of NT substitution by default command.set_option("-m", "GTRGAMMAI", fixed=False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability. command.set_option("-x", int(random.random() * 2**31 - 1), fixed=False) # Set random seed for parsimony inference. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions command.set_option("-N", "autoMRE", fixed=False) return {"command": command}
def customize(cls, input_alignment, input_partition, template, start=0, bootstraps=50, dependencies=()): command = AtomicCmdBuilder("raxmlHPC", set_cwd=True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed=False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", int(bootstraps), fixed=False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") bootstrap_files = { "IN_ALIGNMENT": input_alignment, "IN_PARTITION": input_partition, "TEMP_OUT_INF": "RAxML_info.Pypeline", "TEMP_OUT_ALN": "input.alignment", "TEMP_OUT_PAR": "input.partition", "CHECK_VERSION": RAXML_VERSION } for (index, (_, filename)) in enumerate( cls._bootstraps(template, bootstraps, start)): bootstrap_files["OUT_BS_%03i" % index] = filename command.set_kwargs(**bootstrap_files) return {"command": command}
def test_builder__set_option(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt") assert_equal(builder.call, ["find", "-name", "*.txt"])
def customize(cls, input_alignment, input_partition, output_alignment, dependencies=()): command = AtomicCmdBuilder("raxmlHPC", set_cwd=True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed=False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", 1, fixed=False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") command.set_kwargs(IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, OUT_ALIGNMENT=output_alignment, OUT_INFO=fileutils.swap_ext(output_alignment, ".info")) return {"command": command}
def customize(cls, input_alignment, input_partitions, output_tree, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Compute a randomized parsimony starting tree command.set_option("-y") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s") command.set_option("-q", "%(TEMP_OUT_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partitions, # TEMP_OUT_ is used to automatically remove these files TEMP_OUT_ALIGNMENT = "RAxML_alignment", TEMP_OUT_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_TREE = output_tree, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_template, threads = 1, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree'""" if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) version = RAXML_PTHREADS_VERSION else: command = AtomicCmdBuilder("raxmlHPC") version = RAXML_VERSION # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs(# Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_PART = os.path.basename(input_partition), TEMP_OUT_PART_R = os.path.basename(input_partition) + ".reduced", TEMP_OUT_ALN = os.path.basename(input_alignment), TEMP_OUT_ALN_R = os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, # Final output files, are not created directly OUT_INFO = output_template % "info", OUT_BESTTREE = output_template % "bestTree", OUT_BOOTSTRAP = output_template % "bootstrap", OUT_BIPART = output_template % "bipartitions", OUT_BIPARTLABEL = output_template % "bipartitionsBranchLabels", CHECK_VERSION = version) # Use the GTRGAMMAI model of NT substitution by default command.set_option("-m", "GTRGAMMAI", fixed = False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability. command.set_option("-x", int(random.random() * 2**31 - 1), fixed = False) # Set random seed for parsimony inference. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions command.set_option("-N", "autoMRE", fixed = False) return {"command" : command}
def customize(cls, input_alignment, input_partition, template, start = 0, bootstraps = 50, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC", set_cwd = True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", int(bootstraps), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") bootstrap_files = {"IN_ALIGNMENT" : input_alignment, "IN_PARTITION" : input_partition, "TEMP_OUT_INF" : "RAxML_info.Pypeline", "TEMP_OUT_ALN" : "input.alignment", "TEMP_OUT_PAR" : "input.partition", "CHECK_VERSION": RAXML_VERSION} for (index, (_, filename)) in enumerate(cls._bootstraps(template, bootstraps, start)): bootstrap_files["OUT_BS_%03i" % index] = filename command.set_kwargs(**bootstrap_files) return {"command" : command}
def test_builder__pop_option__missing_key(): builder = AtomicCmdBuilder("find") builder.set_option("-size", 0) assert_raises(KeyError, builder.pop_option, "-isize")
def test_builder__set_option__overwrite(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt", fixed=False) builder.set_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.bat"])
def test_builder__set_option__overwrite(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt", fixed = False) builder.set_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.bat"])