Esempio n. 1
0
 def _do_test_builder__pop_option(setter):
     builder = AtomicCmdBuilder("find")
     setter(builder, "-empty", fixed = False)
     setter(builder, "-size", "1", fixed = False)
     setter(builder, "-name", "*.txt", fixed = False)
     builder.pop_option("-size")
     assert_equal(builder.call, ["find", "-empty", "-name", "*.txt"])
Esempio n. 2
0
def _get_common_parameters(version):
    global _DEPRECATION_WARNING_PRINTED

    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval",
                           CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    try:
        if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0):
            import pypeline.ui as ui
            ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;")
            ui.print_warn("         Upgrading to 2.1.x is strongly adviced!\n")
            ui.print_warn("         Download the newest version of AdapterRemoval at ")
            ui.print_warn("         https://github.com/MikkelSchubert/adapterremoval\n")

            _DEPRECATION_WARNING_PRINTED = True
    except versions.VersionRequirementError:
        pass

    return cmd
Esempio n. 3
0
def _bowtie2_template(call, prefix, iotype = "IN", **kwargs):
    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"):
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key : (prefix + "." + postfix)})

    return params
Esempio n. 4
0
 def _do_test_builder__pop_option(setter):
     builder = AtomicCmdBuilder("find")
     setter(builder, "-empty", fixed=False)
     setter(builder, "-size", "1", fixed=False)
     setter(builder, "-name", "*.txt", fixed=False)
     builder.pop_option("-size")
     assert_equal(builder.call, ["find", "-empty", "-name", "*.txt"])
Esempio n. 5
0
def test_builder__set_kwargs__after_finalize():
    expected = {"IN_PATH" : "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH = "/a/b/")
    builder.finalize()
    assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH = "/dst/file")
    assert_equal(builder.kwargs, expected)
Esempio n. 6
0
def _bowtie2_template(call, prefix, iotype="IN", **kwargs):
    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2",
                    "rev.2.bt2"):
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key: (prefix + "." + postfix)})

    return params
Esempio n. 7
0
def test_builder__set__kwargs__overwriting():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    assert_raises(AtomicCmdBuilderError,
                  builder.set_kwargs,
                  IN_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Esempio n. 8
0
    def customize(cls, infile, intervals, outfile, dependencies = ()):
        params = AtomicCmdBuilder(["bam_sample_regions"],
                                  IN_PILEUP    = infile,
                                  IN_INTERVALS = intervals,
                                  OUT_STDOUT   = outfile)
        params.set_option("--genotype", "%(IN_PILEUP)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")

        return {"command" : params}
Esempio n. 9
0
def test_builder__add_multiple_options_with_sep():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_options("-i", values, sep="=")

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "-i=%(IN_FILE_01)s", "-i=%(IN_FILE_02)s"])
Esempio n. 10
0
def test_builder__add_multiple_values_with_template():
    values = ("file_a", "file_b")
    expected = {"OUT_BAM_1": "file_a", "OUT_BAM_2": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_values(values, template="OUT_BAM_%i")

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "%(OUT_BAM_1)s", "%(OUT_BAM_2)s"])
Esempio n. 11
0
def test_builder__add_multiple_values():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_values(values)

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "%(IN_FILE_01)s", "%(IN_FILE_02)s"])
Esempio n. 12
0
def test_builder__add_multiple_values_with_template():
    values = ("file_a", "file_b")
    expected = {"OUT_BAM_1": "file_a", "OUT_BAM_2": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_values(values, template="OUT_BAM_%i")

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "%(OUT_BAM_1)s", "%(OUT_BAM_2)s"])
Esempio n. 13
0
def test_builder__add_multiple_values():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_values(values)

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "%(IN_FILE_01)s", "%(IN_FILE_02)s"])
Esempio n. 14
0
def test_builder__add_multiple_options_with_template_fixed():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_options("-i", values)

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_raises(AtomicCmdBuilderError, builder.add_multiple_options, "-i",
                  values)
Esempio n. 15
0
def test_builder__add_multiple_options_with_sep():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_options("-i", values, sep="=")

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call,
                 ["ls", "-i=%(IN_FILE_01)s", "-i=%(IN_FILE_02)s"])
Esempio n. 16
0
def test_builder__add_multiple_options_multiple_times():
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_options("-i", ("file_a",))
    assert_equal(kwargs, {"IN_FILE_01": "file_a"})
    kwargs = builder.add_multiple_options("-i", ("file_b",))
    assert_equal(kwargs, {"IN_FILE_02": "file_b"})

    assert_equal(builder.kwargs, expected)
    assert_equal(builder.call, ["ls", "-i", "%(IN_FILE_01)s", "-i", "%(IN_FILE_02)s"])
Esempio n. 17
0
def test_builder__add_multiple_options_with_template_fixed():
    values = ("file_a", "file_b")
    expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"}

    builder = AtomicCmdBuilder("ls")
    kwargs = builder.add_multiple_options("-i", values)

    assert_equal(kwargs, expected)
    assert_equal(builder.kwargs, expected)
    assert_raises(AtomicCmdBuilderError,
                  builder.add_multiple_options, "-i", values)
Esempio n. 18
0
def test_builder__add_option__overwrite():
    builder = AtomicCmdBuilder("find")
    builder.add_option("-name", "*.txt")
    builder.add_option("-or")
    builder.add_option("-name", "*.bat")
    assert_equal(builder.call,
                 ["find", "-name", "*.txt", "-or", "-name", "*.bat"])
Esempio n. 19
0
def _get_bwa_template(call, prefix, iotype = "IN", **kwargs):
    extensions = ["amb", "ann", "bwt", "pac", "sa"]
    try:
        if BWA_VERSION.version < (0, 6, 0):
            extensions.extend(("rbwt", "rpac", "rsa"))
    except versions.VersionRequirementError:
        pass # Ignored here, handled elsewhere

    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in extensions:
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key : (prefix + "." + postfix)})

    return params
Esempio n. 20
0
def _get_bwa_template(call, prefix, iotype="IN", **kwargs):
    extensions = ["amb", "ann", "bwt", "pac", "sa"]
    try:
        if BWA_VERSION.version < (0, 6, 0):
            extensions.extend(("rbwt", "rpac", "rsa"))
    except versions.VersionRequirementError:
        pass  # Ignored here, handled elsewhere

    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in extensions:
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key: (prefix + "." + postfix)})

    return params
Esempio n. 21
0
    def customize(cls, reference, in_bam, in_vcf, outfile, dependencies = ()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF     = in_vcf,
                                  OUT_STDOUT = AtomicCmd.PIPE)

        vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"],
                                     IN_REF       = reference,
                                     IN_BAM       = in_bam,
                                     IN_STDIN     = unicat,
                                     OUT_PILEUP   = outfile,
                                     OUT_TBI      = outfile + ".tbi")
        vcfpileup.add_value("%(IN_BAM)s")
        vcfpileup.set_option("-f", "%(IN_REF)s")

        return {"commands" : {"unicat" : unicat,
                              "pileup" : vcfpileup}}
Esempio n. 22
0
    def customize(self,
                  config,
                  reference,
                  input_files,
                  output_file,
                  directory,
                  dependencies=()):
        stats_out_fname = "Stats_out_MCMC_correct_prob.csv"
        command = AtomicCmdBuilder([
            "mapDamage", "--rescale-only", "-i", "%(TEMP_IN_BAM)s", "-d",
            "%(TEMP_DIR)s", "-r", "%(IN_REFERENCE)s", "--rescale-out",
            "%(OUT_BAM)s"
        ],
                                   IN_REFERENCE=reference,
                                   TEMP_OUT_LOG="Runtime_log.txt",
                                   TEMP_OUT_CSV=stats_out_fname,
                                   OUT_BAM=output_file,
                                   CHECK_VERSION=MAPDAMAGE_VERSION)

        return {
            "command": command,
            "config": config,
            "input_files": input_files,
            "directory": directory,
            "dependencies": dependencies
        }
Esempio n. 23
0
    def customize(self, reference, directory, dependencies=()):
        command = AtomicCmdBuilder(
            [
                "mapDamage", "--stats-only", "-r", "%(IN_REFERENCE)s", "-d",
                "%(TEMP_DIR)s"
            ],
            IN_REFERENCE=reference,
            TEMP_OUT_FREQ_3p="3pGtoA_freq.txt",
            TEMP_OUT_FREQ_5p="5pCtoT_freq.txt",
            TEMP_OUT_COMP_USER="******",
            TEMP_OUT_MISINCORP="misincorporation.txt",
            TEMP_OUT_LOG="Runtime_log.txt",
            TEMP_OUT_STDOUT="pipe_mapDamage.stdout",
            TEMP_OUT_STDERR="pipe_mapDamage.stderr",
            OUT_COMP_GENOME=os.path.join(directory, "dnacomp_genome.csv"),
            OUT_MCMC_PROBS=os.path.join(directory,
                                        "Stats_out_MCMC_correct_prob.csv"),
            OUT_MCMC_HIST=os.path.join(directory, "Stats_out_MCMC_hist.pdf"),
            OUT_MCMC_ITER=os.path.join(directory, "Stats_out_MCMC_iter.csv"),
            OUT_MCMC_ITERSUM=os.path.join(directory,
                                          "Stats_out_MCMC_iter_summ_stat.csv"),
            OUT_MCMC_POSTPRED=os.path.join(directory,
                                           "Stats_out_MCMC_post_pred.pdf"),
            OUT_MCMC_TRACE=os.path.join(directory, "Stats_out_MCMC_trace.pdf"),
            CHECK_RSCRIPT=RSCRIPT_VERSION,
            CHECK_MAPDAMAGE=MAPDAMAGE_VERSION)

        return {"command": command, "dependencies": dependencies}
Esempio n. 24
0
def _build_cat_command():
    """Returns a AtomicCmdBuilder for the 'paleomix cat' command."""
    return AtomicCmdBuilder([_PALEOMIX_PATH, "cat"],
                            EXEC_GZIP="gzip",
                            EXEC_BZIP="bzip2",
                            EXEC_CAT="cat",
                            CHECK_PALEOMIX=VERSION_PALEOMIX)
Esempio n. 25
0
def _get_common_parameters(version):
    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval",
                           CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    return cmd
Esempio n. 26
0
    def __init__(self,
                 config,
                 reference,
                 input_bam,
                 output_bam,
                 tags,
                 min_mapq=0,
                 filter_unmapped=False,
                 dependencies=()):
        flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"),
                                      IN_BAM=input_bam,
                                      OUT_STDOUT=AtomicCmd.PIPE)

        if min_mapq:
            flt_params.set_option("-q", min_mapq, sep="")
        if filter_unmapped:
            flt_params.set_option("-F", "0x4", sep="")

        flt_params.add_value("%(IN_BAM)s")

        jar_params = picard.picard_command(config, "AddOrReplaceReadGroups")
        jar_params.set_option("INPUT", "/dev/stdin", sep="=")
        # Output is written to a named pipe, since the JVM may, in some cases,
        # emit warning messages to stdout, resulting in a malformed BAM.
        jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        # Ensure that the BAM is sorted; this is required by the pipeline, and
        # needs to be done before calling calmd (avoiding pathologic runtimes).
        jar_params.set_option("SORT_ORDER", "coordinate", sep="=")

        # All tags are overwritten; ID is set since the default (e.g. '1')
        # causes problems with pysam due to type inference (is read as a length
        # 1 string, but written as a character).
        for tag in ("ID", "SM", "LB", "PU", "PL"):
            jar_params.set_option(tag, tags[tag], sep="=")

        jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe")

        calmd = AtomicCmdBuilder(
            ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
            IN_REF=reference,
            TEMP_IN_BAM="bam.pipe",
            OUT_STDOUT=output_bam)

        commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)]
        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds(commands),
                            description=description,
                            dependencies=dependencies)
Esempio n. 27
0
    def customize(cls, pileup, infile, outfile, interval, dependencies = ()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF     = infile,
                                  OUT_STDOUT = AtomicCmd.PIPE)

        vcffilter = AtomicCmdBuilder(["vcf_filter", "--pileup", "%(IN_PILEUP)s"],
                                     IN_PILEUP = pileup,
                                     IN_STDIN     = unicat,
                                     OUT_STDOUT   = AtomicCmd.PIPE)
        for contig in interval.get("Homozygous Contigs", ()):
            vcffilter.set_option("--homozygous-chromosome", contig)

        bgzip = AtomicCmdBuilder(["bgzip"],
                                 IN_STDIN     = vcffilter,
                                 OUT_STDOUT   = outfile)

        return {"commands" : {"unicat" : unicat,
                              "filter" : vcffilter,
                              "bgzip"  : bgzip}}
Esempio n. 28
0
    def customize(cls, options, infile, interval, outfile, padding, dependencies = ()):
        prefix = "{Genome}.{Name}".format(**interval)
        intervals = os.path.join(options.intervals_root, prefix + ".bed")

        params = AtomicCmdBuilder(["bam_genotype_regions"],
                                  IN_VCFFILE   = infile,
                                  IN_TABIX     = infile + ".tbi",
                                  IN_INTERVALS = intervals,
                                  OUT_STDOUT   = outfile)
        params.set_option("--genotype", "%(IN_VCFFILE)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")
        if interval.get("Protein coding"):
            params.set_option("--whole-codon-indels-only")
        if not interval.get("Indels"):
            params.set_option("--ignore-indels")

        return {"command" : params}
Esempio n. 29
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []
    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],))
            assert_equal(kwargs, {"IN_FILE" : "/in/file", "OUT_FILE" : "/out/file", "set_cwd" : True})
            was_called.append(True)

    with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd = True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE = "/out/file",
                           IN_FILE  = "/in/file")

        builder.finalize()
        assert was_called
Esempio n. 30
0
def _get_common_parameters(version):
    global _DEPRECATION_WARNING_PRINTED

    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    try:
        if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0):
            import pypeline.ui as ui
            ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;")
            ui.print_warn("         Upgrading to 2.1.x is strongly adviced!\n")
            ui.print_warn(
                "         Download the newest version of AdapterRemoval at ")
            ui.print_warn(
                "         https://github.com/MikkelSchubert/adapterremoval\n")

            _DEPRECATION_WARNING_PRINTED = True
    except versions.VersionRequirementError:
        pass

    return cmd
Esempio n. 31
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args,
                         (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], ))
            assert_equal(kwargs, {
                "IN_FILE": "/in/file",
                "OUT_FILE": "/out/file",
                "set_cwd": True
            })
            was_called.append(True)

    with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file")

        builder.finalize()
        assert was_called
Esempio n. 32
0
    def customize(cls,
                  options,
                  infile,
                  interval,
                  outfile,
                  padding,
                  dependencies=()):
        prefix = "{Genome}.{Name}".format(**interval)
        intervals = os.path.join(options.intervals_root, prefix + ".bed")

        params = AtomicCmdBuilder(["bam_genotype_regions"],
                                  IN_VCFFILE=infile,
                                  IN_TABIX=infile + ".tbi",
                                  IN_INTERVALS=intervals,
                                  OUT_STDOUT=outfile)
        params.set_option("--genotype", "%(IN_VCFFILE)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")
        if interval.get("Protein coding"):
            params.set_option("--whole-codon-indels-only")
        if not interval.get("Indels"):
            params.set_option("--ignore-indels")

        return {"command": params}
Esempio n. 33
0
 def customize(cls, reference, infile, outfile, filters, options, dependencies = ()):
     # filter reads
     percentile = str(options.makefile['vcf_percentile_threshold'])
     flt = AtomicCmdBuilder(['vcf_qual_percentile'],
         IN_VCF = infile,
         OUT_VCF = outfile
     )
     for key,val in filters.items():
         flt.add_option(key,val)
     flt.set_option('--out','%(OUT_VCF)s')
     flt.add_option(infile)
     return {
         'commands':{
             'Filter': flt
         }
     }
Esempio n. 34
0
    def customize(cls, input_alignment, output_tree, dependencies=()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        output_tree      -- Filename for the output newick tree."""

        command = AtomicCmdBuilder("parsimonator", set_cwd=True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-n", "output")
        # Random seed for the stepwise addition process
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False)

        command.set_kwargs(  # Auto-delete: Symlinks
            TEMP_OUT_ALN=os.path.basename(input_alignment),

            # Input files, are not used directly (see below)
            IN_ALIGNMENT=input_alignment,

            # Final output file, are not created directly
            OUT_TREE=output_tree)

        return {"command": command}
Esempio n. 35
0
    def customize(cls, infile, intervals, outfile, dependencies=()):
        params = AtomicCmdBuilder(["bam_sample_regions"],
                                  IN_PILEUP=infile,
                                  IN_INTERVALS=intervals,
                                  OUT_STDOUT=outfile)
        params.set_option("--genotype", "%(IN_PILEUP)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")

        return {"command": params}
Esempio n. 36
0
def test_builder__set_kwargs__after_finalize():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    builder.finalize()
    assert_raises(AtomicCmdBuilderError,
                  builder.set_kwargs,
                  OUT_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Esempio n. 37
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
Esempio n. 38
0
    def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF=in_vcf,
                                  OUT_STDOUT=AtomicCmd.PIPE)

        vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"],
                                     IN_REF=reference,
                                     IN_BAM=in_bam,
                                     IN_STDIN=unicat,
                                     OUT_PILEUP=outfile,
                                     OUT_TBI=outfile + ".tbi")
        vcfpileup.add_value("%(IN_BAM)s")
        vcfpileup.set_option("-f", "%(IN_REF)s")

        return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
Esempio n. 39
0
    def customize(cls, input_alignment, output_tree, dependencies = ()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        output_tree      -- Filename for the output newick tree."""

        command = AtomicCmdBuilder("parsimonator", set_cwd = True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-n", "output")
        # Random seed for the stepwise addition process
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False)

        command.set_kwargs(# Auto-delete: Symlinks
                          TEMP_OUT_ALN   = os.path.basename(input_alignment),

                          # Input files, are not used directly (see below)
                          IN_ALIGNMENT    = input_alignment,

                          # Final output file, are not created directly
                          OUT_TREE       = output_tree)

        return {"command"         : command}
Esempio n. 40
0
    def customize(self,
                  config,
                  reference,
                  input_files,
                  output_directory,
                  title="mapDamage",
                  dependencies=()):
        command = AtomicCmdBuilder(
            [
                "mapDamage",
                "--no-stats",
                # Prevent references with many contigs from using excessive
                # amounts of memory, at the cost of per-contig statistics:
                "--merge-reference-sequences",
                "-t",
                title,
                "-i",
                "%(TEMP_IN_BAM)s",
                "-d",
                "%(TEMP_DIR)s",
                "-r",
                "%(IN_REFERENCE)s"
            ],
            IN_REFERENCE=reference,
            OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"),
            OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"),
            OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"),
            OUT_PLOT_FRAG=os.path.join(output_directory,
                                       "Fragmisincorporation_plot.pdf"),
            OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"),
            OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"),
            OUT_MISINCORP=os.path.join(output_directory,
                                       "misincorporation.txt"),
            OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"),
            TEMP_OUT_STDOUT="pipe_mapDamage.stdout",
            TEMP_OUT_STDERR="pipe_mapDamage.stderr",
            CHECK_RSCRIPT=RSCRIPT_VERSION,
            CHECK_MAPDAMAGE=MAPDAMAGE_VERSION)

        return {
            "command": command,
            "config": config,
            "input_files": input_files,
            "dependencies": dependencies
        }
Esempio n. 41
0
    def __init__(self, config, reference, input_bam, output_bam, tags,
                 min_mapq=0, filter_unmapped=False, dependencies=()):
        flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"),
                                      IN_BAM=input_bam,
                                      OUT_STDOUT=AtomicCmd.PIPE)

        if min_mapq:
            flt_params.set_option("-q", min_mapq, sep="")
        if filter_unmapped:
            flt_params.set_option("-F", "0x4", sep="")

        flt_params.add_value("%(IN_BAM)s")

        jar_params = picard.picard_command(config, "AddOrReplaceReadGroups")
        jar_params.set_option("INPUT", "/dev/stdin", sep="=")
        # Output is written to a named pipe, since the JVM may, in some cases,
        # emit warning messages to stdout, resulting in a malformed BAM.
        jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        # Ensure that the BAM is sorted; this is required by the pipeline, and
        # needs to be done before calling calmd (avoiding pathologic runtimes).
        jar_params.set_option("SORT_ORDER", "coordinate", sep="=")

        # All tags are overwritten; ID is set since the default (e.g. '1')
        # causes problems with pysam due to type inference (is read as a length
        # 1 string, but written as a character).
        for tag in ("ID", "SM", "LB", "PU", "PL"):
            jar_params.set_option(tag, tags[tag], sep="=")

        jar_params.set_kwargs(IN_STDIN=flt_params,
                              TEMP_OUT_BAM="bam.pipe")

        calmd = AtomicCmdBuilder(["samtools", "calmd", "-b",
                                 "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                                 IN_REF=reference,
                                 TEMP_IN_BAM="bam.pipe",
                                 OUT_STDOUT=output_bam)

        commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)]
        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds(commands),
                            description=description,
                            dependencies=dependencies)
Esempio n. 42
0
    def customize(cls, pileup, infile, outfile, regions, dependencies=()):
        cat = factory.new("cat")
        cat.add_value("%(IN_VCF)s")
        cat.set_kwargs(IN_VCF=infile,
                       OUT_STDOUT=AtomicCmd.PIPE)

        vcffilter = factory.new("vcf_filter")
        vcffilter.add_option("--pileup", "%(IN_PILEUP)s")
        for contig in regions["HomozygousContigs"]:
            vcffilter.add_option("--homozygous-chromosome", contig)
        vcffilter.set_kwargs(IN_PILEUP=pileup,
                             IN_STDIN=cat,
                             OUT_STDOUT=AtomicCmd.PIPE)

        bgzip = AtomicCmdBuilder(["bgzip"],
                                 IN_STDIN=vcffilter,
                                 OUT_STDOUT=outfile)

        return {"commands": {"cat": cat,
                             "filter": vcffilter,
                             "bgzip": bgzip}}
Esempio n. 43
0
def _get_common_parameters(version):
    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    return cmd
Esempio n. 44
0
    def customize(cls, pileup, infile, outfile, interval, dependencies=()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF=infile,
                                  OUT_STDOUT=AtomicCmd.PIPE)

        vcffilter = AtomicCmdBuilder(
            ["vcf_filter", "--pileup", "%(IN_PILEUP)s"],
            IN_PILEUP=pileup,
            IN_STDIN=unicat,
            OUT_STDOUT=AtomicCmd.PIPE)
        for contig in interval.get("Homozygous Contigs", ()):
            vcffilter.set_option("--homozygous-chromosome", contig)

        bgzip = AtomicCmdBuilder(["bgzip"],
                                 IN_STDIN=vcffilter,
                                 OUT_STDOUT=outfile)

        return {
            "commands": {
                "unicat": unicat,
                "filter": vcffilter,
                "bgzip": bgzip
            }
        }
Esempio n. 45
0
def test_builder__set_option__overwrite():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt", fixed=False)
    builder.set_option("-name", "*.bat")
    assert_equal(builder.call, ["find", "-name", "*.bat"])
Esempio n. 46
0
def test_builder__set_kwargs__called_twice():
    expected = {"IN_PATH" : "/a/b/", "OUT_PATH" : "/dst/file"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(OUT_PATH = "/dst/file")
    builder.set_kwargs(IN_PATH = "/a/b/")
    assert_equal(builder.kwargs, expected)
Esempio n. 47
0
def test_builder__set_option():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt")
    assert_equal(builder.call, ["find", "-name", "*.txt"])
Esempio n. 48
0
def test_builder__set_option__overwrite():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt", fixed = False)
    builder.set_option("-name", "*.bat")
    assert_equal(builder.call, ["find", "-name", "*.bat"])
Esempio n. 49
0
def test_builder__set_option__overwrite_fixed():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt")
    assert_raises(AtomicCmdBuilderError, builder.set_option, "-name", "*.bat")
Esempio n. 50
0
    def customize(cls, input_alignment, input_partition, template, start = 0, bootstraps = 50, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC", set_cwd = True)

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "j")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False)
        # Generate a single bootstrap alignment (makes growing the number of bootstraps easier).
        command.set_option("-N", int(bootstraps), fixed = False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "input.alignment")
        command.set_option("-q", "input.partition")

        bootstrap_files = {"IN_ALIGNMENT" : input_alignment,
                           "IN_PARTITION" : input_partition,
                           "TEMP_OUT_INF" : "RAxML_info.Pypeline",
                           "TEMP_OUT_ALN" : "input.alignment",
                           "TEMP_OUT_PAR" : "input.partition",
                           "CHECK_VERSION": RAXML_VERSION}

        for (index, (_, filename)) in enumerate(cls._bootstraps(template, bootstraps, start)):
            bootstrap_files["OUT_BS_%03i" % index] = filename
        command.set_kwargs(**bootstrap_files)

        return {"command" : command}
Esempio n. 51
0
    def customize(cls, input_alignment, input_partition, output_template, threads = 1, dependencies = ()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_template  -- A template string used to construct final filenames. Should consist
                            of a full path, including a single '%s', which is replaced with the
                            variable part of RAxML output files (e.g. 'info', 'bestTree', ...).
                            Example destination: '/disk/project/SN013420.RAxML.%s'
                            Example output:      '/disk/project/SN013420.RAxML.bestTree'"""

        if threads > 1:
            command = AtomicCmdBuilder("raxmlHPC-PTHREADS")
            command.set_option("-T", threads)
            version = RAXML_PTHREADS_VERSION
        else:
            command = AtomicCmdBuilder("raxmlHPC")
            version = RAXML_VERSION

        # Perform rapid bootstrapping
        command.set_option("-f", "a")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")
        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-q", "%(TEMP_OUT_PART)s")

        command.set_kwargs(# Auto-delete: Symlinks and .reduced files that RAxML may generate
                          TEMP_OUT_PART   = os.path.basename(input_partition),
                          TEMP_OUT_PART_R = os.path.basename(input_partition) + ".reduced",
                          TEMP_OUT_ALN    = os.path.basename(input_alignment),
                          TEMP_OUT_ALN_R  = os.path.basename(input_alignment) + ".reduced",

                          # Input files, are not used directly (see below)
                          IN_ALIGNMENT    = input_alignment,
                          IN_PARTITION    = input_partition,

                          # Final output files, are not created directly
                          OUT_INFO        = output_template % "info",
                          OUT_BESTTREE    = output_template % "bestTree",
                          OUT_BOOTSTRAP   = output_template % "bootstrap",
                          OUT_BIPART      = output_template % "bipartitions",
                          OUT_BIPARTLABEL = output_template % "bipartitionsBranchLabels",

                          CHECK_VERSION   = version)

        # Use the GTRGAMMAI model of NT substitution by default
        command.set_option("-m", "GTRGAMMAI", fixed = False)
        # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability.
        command.set_option("-x", int(random.random() * 2**31 - 1), fixed = False)
        # Set random seed for parsimony inference. May be set to a fixed value to allow replicability.
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False)
        # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions
        command.set_option("-N", "autoMRE", fixed = False)

        return {"command"         : command}
Esempio n. 52
0
    def customize(cls, input_alignment, input_partitions, output_tree, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC")

        # Compute a randomized parsimony starting tree
        command.set_option("-y")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s")
        command.set_option("-q", "%(TEMP_OUT_PARTITION)s")

        command.set_kwargs(IN_ALIGNMENT       = input_alignment,
                           IN_PARTITION       = input_partitions,

                           # TEMP_OUT_ is used to automatically remove these files
                           TEMP_OUT_ALIGNMENT = "RAxML_alignment",
                           TEMP_OUT_PARTITION = "RAxML_partitions",
                           TEMP_OUT_INFO      = "RAxML_info.Pypeline",

                           OUT_TREE           = output_tree,

                           CHECK_VERSION      = RAXML_VERSION)

        return {"command" : command}
Esempio n. 53
0
    def customize(cls, input_alignment, input_partition, output_file, dependencies = ()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_filename  -- Filename for the output binary sequence."""

        command = AtomicCmdBuilder("examlParser", set_cwd = True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-q", "%(TEMP_OUT_PART)s")
        # Output file will be named output.binary, and placed in the CWD
        command.set_option("-n", "output")

        # Substitution model
        command.set_option("-m", "DNA", fixed = False)


        command.set_kwargs(# Auto-delete: Symlinks
                          TEMP_OUT_PART   = os.path.basename(input_partition),
                          TEMP_OUT_ALN    = os.path.basename(input_alignment),

                          # Input files, are not used directly (see below)
                          IN_ALIGNMENT    = input_alignment,
                          IN_PARTITION    = input_partition,

                          # Final output file, are not created directly
                          OUT_BINARY      = output_file,

                          CHECK_EXAML     = PARSER_VERSION)

        return {"command" : command}
Esempio n. 54
0
    def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC")

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "c")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s")
        command.set_option("-q", "%(TEMP_IN_PARTITION)s")

        command.set_kwargs(IN_ALIGNMENT      = input_alignment,
                          IN_PARTITION      = input_partition,

                          TEMP_IN_ALIGNMENT = "RAxML_alignment",
                          TEMP_IN_PARTITION = "RAxML_partitions",
                          TEMP_OUT_INFO     = "RAxML_info.Pypeline",

                          OUT_ALIGNMENT     = output_alignment,
                          OUT_PARTITION     = output_partition,
                          CHECK_VERSION     = RAXML_VERSION)

        return {"command" : command}
Esempio n. 55
0
def test_builder__kwargs__set_cwd():
    builder = AtomicCmdBuilder(["ls"], set_cwd=True)
    assert_equal(builder.kwargs, {"set_cwd": True})
Esempio n. 56
0
def test_builder__set_option():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt")
    assert_equal(builder.call, ["find", "-name", "*.txt"])
Esempio n. 57
0
def test_builder__set__kwargs__overwriting():
    expected = {"IN_PATH" : "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH = "/a/b/")
    assert_raises(AtomicCmdBuilderError, builder.set_kwargs, IN_PATH = "/dst/file")
    assert_equal(builder.kwargs, expected)
Esempio n. 58
0
def test_builder__set_option__overwrite_fixed():
    builder = AtomicCmdBuilder("find")
    builder.set_option("-name", "*.txt")
    assert_raises(AtomicCmdBuilderError, builder.set_option, "-name", "*.bat")
Esempio n. 59
0
def test_builder__finalize__returns_singleton():
    builder = AtomicCmdBuilder("echo")
    assert builder.finalize() is builder.finalize()