def test_requirement__same_obj_if_name_differ(): obj1 = versions.Requirement("echo", "", versions.GE(1)) assert_equal(obj1.name, "echo") obj2 = versions.Requirement("echo", "", versions.GE(1), name="foo") assert_equal(obj2.name, "foo") assert_is(obj1, obj2) obj3 = versions.Requirement("echo", "", versions.GE(1), name="bar") assert_equal(obj3.name, "bar") assert_is(obj2, obj3) obj4 = versions.Requirement("echo", "", versions.GE(1)) assert_equal(obj3.name, "bar") assert_is(obj3, obj4)
def picard_command(config, command): """Returns basic AtomicJavaCmdBuilder for Picard tools commands.""" jar_path = os.path.join(config.jar_root, _PICARD_JAR) if jar_path not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options) # Arbitrary command, since just '--version' does not work params.set_option("MarkDuplicates") params.set_option("--version") requirement = versions.Requirement(call=params.finalized_call, name="Picard tools", search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 124)) _PICARD_VERSION_CACHE[jar_path] = requirement version = _PICARD_VERSION_CACHE[jar_path] params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options, CHECK_JAR=version) params.set_option(command) return params
def _check_paleomix_version(): assert _PALEOMIX_PATH is not None version = versions.EQ(*pypeline.__version_info__) return versions.Requirement(call=[_PALEOMIX_PATH, "help"], search=r"v(\d+)\.(\d+)\.(\d+)", checks=version, priority=100)
def _picard_version(jar_file): if jar_file not in _PICARD_VERSION_CACHE: requirement = versions.Requirement(call=("java", "-client", "-jar", jar_file, "--version"), search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 82)) _PICARD_VERSION_CACHE[jar_file] = requirement return _PICARD_VERSION_CACHE[jar_file]
def _get_java_version(cls, version): version = tuple(map(int, version)) if version not in JAVA_VERSIONS: regexp = r"[\._]".join(r"(\d+)" for _ in version) regexp = r'java version "%s' % (regexp, ) jre_call = ["java", "-Djava.awt.headless=true", "-version"] JAVA_VERSIONS[version] \ = versions.Requirement(call=jre_call, name="JAVA Runtime Environment", search=regexp, checks=versions.GE(*version), priority=10) return JAVA_VERSIONS[version]
def _picard_version(config, jar_file): if jar_file not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") name = "Picard " + os.path.basename(jar_file) requirement = versions.Requirement(call=params.finalized_call, name=name, search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 82)) _PICARD_VERSION_CACHE[jar_file] = requirement return _PICARD_VERSION_CACHE[jar_file]
def _get_gatk_version_check(config): """Returns a version-check object for the "GenomeAnalysisTK.jar" located at config.jar_root; for now, this check only serves to verify that the JAR can be executed, which may not be the case if the JRE is outdated. """ jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") if jar_file not in _GATK_VERSION: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") # Any version is fine; for now just catch old JREs requirement = versions.Requirement(call=params.finalized_call, name="GenomeAnalysisTK", search=r"^(\d+)\.(\d+)", checks=versions.Any()) _GATK_VERSION[jar_file] = requirement return _GATK_VERSION[jar_file]
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # from pypeline.node import CommandNode from pypeline.atomiccmd.command import AtomicCmd import pypeline.common.versions as versions BEDTOOLS_VERSION = versions.Requirement( call=("bedtools", "--version"), search=r"bedtools v?(\d+)\.(\d+)\.(\d+)", checks=versions.GE(2, 15, 0)) class SlopBedNode(CommandNode): def __init__(self, infile, outfile, genome, from_start=0, from_end=0, strand_relative=False, dependencies=()): if type(from_start) != type(from_end): raise ValueError( "'from_start' and 'from_end' should be of same type!")
# import os import re import random import pypeline.common.fileutils as fileutils import pypeline.common.versions as versions from pypeline.node import CommandNode from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters RAXML_VERSION = versions.Requirement(call=("raxmlHPC", "-version"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(7, 3, 2)) RAXML_PTHREADS_VERSION = versions.Requirement( call=("raxmlHPC-PTHREADS", "-version"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(7, 3, 2)) class RAxMLReduceNode(CommandNode): @create_customizable_cli_parameters def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies=()):
ParallelCmds from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.nodes.validation import \ check_fastq_files import pypeline.common.utilities as utilities import pypeline.common.fileutils as fileutils import pypeline.common.versions as versions import pypeline.tools.factory as factory VERSION_14 = "1.4" _VERSION_14_CHECK = versions.Requirement(call=("AdapterRemoval", "--version"), search=r"ver. (\d+)\.(\d+)", checks=versions.EQ(1, 4)) VERSION_15 = "1.5+" _VERSION_15_CHECK = versions.Requirement(call=("AdapterRemoval", "--version"), search=r"ver. (\d+)\.(\d+)\.(\d+)", checks=versions.GE(1, 5, 0)) class SE_AdapterRemovalNode(CommandNode): @create_customizable_cli_parameters def customize(cls, input_files, output_prefix, output_format="bz2", quality_offset=33,
def test_requirement__new_obj_if_search_differ(): obj1 = versions.Requirement("echo", r"(\d+)", versions.LT(1)) obj2 = versions.Requirement("echo", "", versions.LT(1)) assert_is_not(obj1, obj2)
#!/usr/bin/env python from __future__ import print_function from pypeline.node import CommandNode, Node from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.builder import AtomicCmdBuilder from pypeline.common.fileutils import move_file, reroot_path import pypeline.common.versions as versions from epiomix.tools import gccorrect import os Rscript_VERSION = versions.Requirement(call=("Rscript", "--version"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(2, 15, 3)) GC_NAME = '_GCcorrect' class GccorrectNode(Node): def __init__(self, d_bam, rl, halfresolution, dependencies=()): self.halfresolution = halfresolution self.dest = os.path.join(d_bam.bam_temp_local, d_bam.bam_name + GC_NAME + '_' + str(rl)) self.rl, self.d_bam = rl, d_bam description = ( "<Gccorrect: '%s' window length: '%s' based on chromosome '%s' >" % (self.dest, rl, self.d_bam.opts['GCcorrect']['--ChromUsed'])) Node.__init__(self, description=description, input_files=self.d_bam.baminfo["BamPath"],
AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.atomiccmd.sets import ParallelCmds from pypeline.nodes.samtools import SAMTOOLS_VERSION from pypeline.common.fileutils import \ describe_paired_files, \ missing_files import pypeline.common.versions as versions import pypeline.tools.factory as factory BWA_VERSION = versions.Requirement(call=("bwa", ), search=r"Version: (\d+)\.(\d+)\.(\d+)", checks=versions.Or(versions.EQ(0, 5, 9), versions.EQ(0, 5, 10), versions.EQ(0, 6, 2), versions.GE(0, 7, 9))) BWA_VERSION_07x = versions.Requirement(call=("bwa", ), search=r"Version: (\d+)\.(\d+)\.(\d+)", checks=versions.GE(0, 7, 9)) class BWAIndexNode(CommandNode): @create_customizable_cli_parameters def customize(cls, input_file, prefix=None, dependencies=()): prefix = prefix if prefix else input_file params = _get_bwa_template(("bwa", "index"), prefix, iotype="OUT",
CommandNode, \ NodeError from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.common.fileutils import \ reroot_path from pypeline.common.formats.msa import \ MSA, \ MSAError import pypeline.common.versions as versions MAFFT_VERSION = versions.Requirement(call = ("mafft", "--version"), search = r"v(\d+)\.(\d+)", checks = versions.GE(7, 0)) # Presets mainly taken from # http://mafft.cbrc.jp/alignment/software/algorithms/algorithms.html _PRESETS = { "mafft" : ["mafft"], "auto" : ["mafft", "--auto"], "fft-ns-1" : ["mafft-fftns", "--retree", 1], "fft-ns-2" : ["mafft-fftns"], "fft-ns-i" : ["mafft-fftnsi"], "nw-ns-i" : ["mafft-nwnsi"], "l-ins-i" : ["mafft-linsi"], "e-ins-i" : ["mafft-einsi"], "g-ins-i" : ["mafft-ginsi"],
#!/usr/bin/env python import os import pypeline.common.versions as versions from pypeline.common.fileutils import move_file, reroot_path from pypeline.node import CommandNode, Node from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.sets import ParallelCmds from epiomix.tools import splitbedfiles, \ merge_datafiles PYTHON_VERSION = versions.Requirement(call=("python", "--version"), search=r"Python (\d+)\.(\d+)\.(\d+)", checks=versions.GE(2, 7, 3)) PREFIX = os.path.dirname(splitbedfiles.__file__) class CleanFilesNode(CommandNode): def __init__(self, config, d_make, bedn, mappa, unique, dependencies=()): inbedfile = d_make.bedfiles[bedn] basename, extension = os.path.splitext(os.path.basename(inbedfile)) bname = "{}_MappaOnly{}".format(basename, extension) dest = os.path.join(config.temp_local, bname) d_make.bedfiles[bedn] = dest call1 = [ "python", os.path.join(PREFIX, "intersectmappabed.py"), "%(IN_BED)s", "%(IN_MAPPA)s",
from pypeline.node import CommandNode from pypeline.atomiccmd.command import AtomicCmd from pypeline.common.fileutils import reroot_path, swap_ext import pypeline.common.versions as versions _VERSION_REGEX = r"Version: (\d+)\.(\d+)(?:\.(\d+))?" # v0.2.0 was the pre-release version of v1.0, and lacks required features _COMMON_CHECK = versions.And(versions.GE(0, 1, 18), versions.LT(0, 2, 0)) SAMTOOLS_VERSION = versions.Requirement(call=("samtools",), search=_VERSION_REGEX, checks=_COMMON_CHECK) BCFTOOLS_VERSION \ = versions.Requirement(call=("bcftools",), search=_VERSION_REGEX, checks=_COMMON_CHECK) TABIX_VERSION = versions.Requirement(call=("tabix",), search=_VERSION_REGEX, checks=versions.GE(0, 2, 5)) def samtools_compatible_wbu_mode(): """Returns a writing mode for Pysam compatible with the current version of samtools; uncompressed output from Pysam 0.8.x cannot be read by older
from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.builder import AtomicJavaCmdBuilder,AtomicJava7CmdBuilder from pypeline.atomiccmd.sets import ParallelCmds from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.nodes.samtools import GenotypeNode, TabixIndexNode, FastaIndexNode, MPileupNode import pypeline.common.versions as versions from pypeline.common.utilities import safe_coerce_to_tuple SAMTOOLS_VERSION = versions.Requirement( call = ("samtools",), search = b"Version: (\d+)\.(\d+)\.(\d+)", checks = versions.GE(0, 1, 18) ) class SnpListNode(CommandNode): @create_customizable_cli_parameters def customize(cls, groups, prefix, options, dependencies = ()): # Merge the VCF files merge_vcf = AtomicCmdBuilder(['vcf_merge'], OUT_VCF = "merged.vcf") for group in groups: vcf_file = os.path.join(options.makefile['RecalDir'], 'gatk.{}.{}.raw.recal_final.vcf'.format(group,prefix) ) merge_vcf.add_option("-i",vcf_file) merge_vcf.add_option("-o", '%(OUT_VCF)s')
def new(command, *args, **kwargs): """Returns AtomicCmdBuilder setup to call the tools accessible through the 'paleomix' command-line tool. This builder adds executable / version checks for the specified command, but does not add any arguments. Thus, calling new with the argument "cat" produces the equivalent of ["paleomix", "cat"]. """ if command in _SPECIAL_COMMANDS: return _SPECIAL_COMMANDS[command](*args, **kwargs) return _build_generic_command(command) _VERSION_EQ = versions.EQ(*pypeline.__version_info__) VERSION_PALEOMIX = versions.Requirement(call=[_PALEOMIX_PATH, "help"], search=r"v(\d+)\.(\d+)\.(\d+)", checks=_VERSION_EQ, priority=100) def _build_generic_command(argument): """Returns a AtomicCmdBuilder for a regular 'paleomix ...' command.""" return AtomicCmdBuilder([_PALEOMIX_PATH, argument], CHECK_PALEOMIX=VERSION_PALEOMIX) def _build_cat_command(): """Returns a AtomicCmdBuilder for the 'paleomix cat' command.""" return AtomicCmdBuilder([_PALEOMIX_PATH, "cat"], EXEC_GZIP="gzip", EXEC_BZIP="bzip2", EXEC_CAT="cat",
def test_requirement__obj_is_cached_for_same_values(): obj1 = versions.Requirement("echo", "", versions.LT(1)) obj2 = versions.Requirement("echo", "", versions.LT(1)) assert_is(obj1, obj2)
import pypeline.common.fileutils as fileutils import pypeline.common.versions as versions from pypeline.node import CommandNode from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ AtomicMPICmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.nodegraph import \ FileStatusCache EXAML_VERSION = versions.Requirement(call=("examl", "-version"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(1, 0, 5)) PARSER_VERSION = versions.Requirement(call=("examlParser", "-h"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(1, 0, 2)) class ExaMLParserNode(CommandNode): @create_customizable_cli_parameters def customize(cls, input_alignment, input_partition, output_file, dependencies=()): """
def test_requirement__new_obj_if_checks_differ(): obj1 = versions.Requirement("echo", "", versions.GE(1)) obj2 = versions.Requirement("echo", "", versions.LT(1)) assert_is_not(obj1, obj2)
from pypeline.node import CommandNode, NodeError from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.atomiccmd.sets import ParallelCmds from pypeline.nodes.bwa import _process_output, _get_max_threads import pypeline.common.versions as versions BOWTIE2_VERSION = versions.Requirement(call=("bowtie2", "--version"), search=r"version (\d+)\.(\d+)\.(\d+)", checks=versions.GE(2, 0, 0)) class Bowtie2IndexNode(CommandNode): @create_customizable_cli_parameters def customize(cls, input_file, prefix=None, dependencies=()): prefix = prefix if prefix else input_file params = _bowtie2_template(("bowtie2-build"), prefix, iotype="OUT", IN_FILE=input_file, TEMP_OUT_PREFIX=os.path.basename(prefix), CHECK_VERSION=BOWTIE2_VERSION) params.add_value("%(IN_FILE)s") # Destination prefix, in temp folder
def test_requirement_highest_priority_retained(): obj1 = versions.Requirement("echo", "", versions.LT(1), priority=5) assert_equal(obj1.priority, 5) obj2 = versions.Requirement("echo", "", versions.LT(1), priority=0) assert_is(obj1, obj2) assert_equal(obj2.priority, 5)
from pypeline.node import \ NodeError, \ CommandNode from pypeline.atomiccmd.sets import \ ParallelCmds from pypeline.nodes.picard import \ MultiBAMInput, \ MultiBAMInputNode from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters MAPDAMAGE_VERSION = versions.Requirement(call=("mapDamage", "--version"), search=r"(\d+)\.(\d+).(\d+)", checks=versions.GE(2, 0, 1)) RSCRIPT_VERSION = versions.Requirement(call=("Rscript", "--version"), search=r"(\d+)\.(\d+).(\d+)", checks=versions.GE(2, 15, 1)) class MapDamagePlotNode(MultiBAMInputNode): @create_customizable_cli_parameters def customize(self, config, reference, input_files, output_directory, title="mapDamage",
from pypeline.node import CommandNode, MetaNode from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.sets import ParallelCmds, SequentialCmds from pypeline.atomiccmd.builder import AtomicJavaCmdBuilder from pypeline.nodes.picard import ValidateBAMNode, concatenate_input_bams from pypeline.nodes.samtools import BAMIndexNode from pypeline.common.fileutils import describe_files import pypeline.common.versions as versions # Number of reads to sample when running mapDamage _MAPDAMAGE_MAX_READS = 100000 MAPDAMAGE_VERSION = versions.Requirement(call=("mapDamage", "--version"), search=r"(\d+)\.(\d+)[\.-](\d+)", pprint="{0}.{1}.{2}", checks=versions.GE(2, 0, 45)) class MapDamageNode(CommandNode): def __init__(self, config, reference, input_files, output_directory, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd( [ "mapDamage", "--no-stats", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference ], IN_STDIN=cat_obj, OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"),
from pypeline.node import CommandNode, NodeError from pypeline.atomiccmd.command import AtomicCmd from pypeline.atomiccmd.builder import \ AtomicCmdBuilder, \ use_customizable_cli_parameters, \ create_customizable_cli_parameters from pypeline.atomiccmd.sets import ParallelCmds from pypeline.nodes.samtools import SAMTOOLS_VERSION import pypeline.common.versions as versions BWA_VERSION = versions.Requirement(call=("bwa", ), search=r"Version: (\d+)\.(\d+)\.(\d+)", checks=versions.Or( versions.And(versions.GE(0, 5, 9), versions.LT(0, 6, 0)), versions.GE(0, 7, 5))) # Required by safeSam2Bam for 'PG' tagging support / known good version # Cannot be a lambda due to need to be able to pickle function def _get_pysam_version(): return __import__("pysam").__version__ PYSAM_VERSION = versions.Requirement(name="module 'pysam'", call=_get_pysam_version, search=r"(\d+)\.(\d+)\.(\d+)", checks=versions.GE(0, 7, 4))