Beispiel #1
0
    def get_version(self):
        help_string1 = E.run("{self.path_manta} --version".format(**locals()),
                             return_stdout=True).strip()

        help_string2 = E.run("{self.path_strelka} --version".format(**locals()),
                             return_stdout=True).strip()
        return "-".join([help_string1, help_string2])
Beispiel #2
0
def cleanup(signal=None, frame=None):

    mountpoint = P.PARAMS.get("mount_point", None)
    if mountpoint:
        E.run("fusermount -u {}".format(mountpoint))
        E.debug("finally: unmounted arvados at {}".format(mountpoint))
        shutil.rmtree(mountpoint)
        P.PARAMS["mount_point"] = None
Beispiel #3
0
    def arvados_cleanup(signal=None, frame=None):

        mountpoint = P.PARAMS.get("mount_point", None)
        if mountpoint:
            E.debug("unmounting arvados at {}".format(mountpoint))
            E.run("fusermount -u {}".format(mountpoint))
            E.debug("unmounted arvados at {}".format(mountpoint))
            try:
                shutil.rmtree(mountpoint)
            except OSError:
                # ignore errors - arvados issue (read-only file system)?
                E.warn("failure while removing mountpoint {} - ignored".format(
                    mountpoint))
            P.PARAMS["mount_point"] = None
Beispiel #4
0
 def get_version(self):
     help_string = E.run("{self.path} -h".format(**locals()),
                         return_stderr=True).strip()
     if "vcf-concat [OPTIONS]" in help_string:
         return "unknown"
     else:
         raise ValueError("vcf-concat not found")
Beispiel #5
0
    def test_workflow_make(self):

        logfile = os.path.join(self.work_dir, "output.log")
        statement = ("daisy "
                     "benchmark-simple "
                     "--debug "
                     "--log={} "
                     "--config-file={} "
                     "--work-dir={} "
                     "--local "
                     "make all".format(
                         logfile,
                         os.path.join(os.path.dirname(__file__),
                                      self.filename), self.work_dir))

        p = E.run(statement, return_popen=True)
        stdout, stderr = p.communicate()

        if os.path.exists(logfile):
            with open(logfile) as inf:
                logdata = inf.read()
        else:
            logdata = "no log data available at {}".format(logfile)

        self.assertEqual(p.returncode,
                         0,
                         msg="stderr = {}, log={}".format(stderr, logdata))

        self.check_files_present(itertools.product(self.expected_tools,
                                                   self.expected_metrics),
                                 aliases=self.aliases)
Beispiel #6
0
 def get_version(self):
     help_string = E.run("{self.path}".format(**locals()),
                         return_stderr=True).strip()
     if "USAGE: pbsim" in help_string:
         return "unknown"
     else:
         raise ValueError("pbsim not found")
Beispiel #7
0
 def get_version(self):
     version_bedtools = run_metric_bedtools_intersection.get_version(self)
     help_string = E.run(
         "{self.gat_path} --version 2> /dev/null".format(**locals()),
         return_stdout=True).strip()
     return "{} {}".format(
         version_bedtools,
         re.search(r"gat-run.py version: (\S+):", help_string).groups()[0])
Beispiel #8
0
 def get_version(self):
     help_string = E.run("{self.path} --version".format(**locals()),
                         return_stderr=True).strip()
     if help_string and "not found" not in help_string:
         return re.search("QUAST (.+)", help_string).groups()[0]
     else:
         raise ValueError("QUAST not found at/as {}: {}".format(
             self.path, help_string))
Beispiel #9
0
 def get_version(self):
     help_text = E.run("{self.path} -version".format(**locals()),
                       return_stderr=True).strip()
     if help_text and "not found" not in help_text:
         return re.search(r"BBMap version (\S+)", help_text).groups()[0]
     else:
         raise ValueError("bbmap not found at/as {}: {}".format(
             self.path, help_text))
Beispiel #10
0
def compress(infile):
    '''gzip infile'''

    statement = "gzip -f %(infile)s" % locals()

    E.debug("executing statement '%s'" % statement)

    return E.run(statement)
Beispiel #11
0
 def get_version(self):
     help_string = E.run("{self.path} ".format(**locals()),
                         return_stdout=True,
                         on_error="ignore").strip()
     if help_string:
         return re.search("Delly \(Version: (\S+)\)",
                          help_string).groups()[0]
     else:
         raise ValueError("delly not found at/as {}".format(self.path))
    def test_task_library_contains_user_tasks(self):
        stdout = E.run("export DAISY_TASKLIBRARY={} && "
                       "daisy run-task --list-tasks -v 0".format(self.library),
                       return_stdout=True)
        tasks = stdout.splitlines()

        self.assertGreater(len(tasks), 0)
        self.assertTrue("tool_my_tasklibrary_cat" in tasks)
        self.assertTrue("metric_my_tasklibrary_count" in tasks)
Beispiel #13
0
def get_version():
    # get script that has called P.main()
    code_location = os.path.abspath(os.path.dirname(get_caller(1).__file__))
    # try git for runs from repository
    stdout = E.run("git rev-parse HEAD 2> /dev/null",
                   cwd=code_location,
                   return_stdout=True,
                   on_error="ignore").strip()
    return VersionData(code_location=code_location, version=stdout)
 def get_version(self):
     help_string = E.run("{self.path} version 2> /dev/null".format(**locals()),
                         return_stdout=True,
                         on_error="ignore").strip()
     if help_string and "not found" not in help_string:
         return re.search(r"Product: RTG Tools (\S+)", help_string).groups()[0]
     else:
         raise ValueError("rtg not found at/as {}: {}".format(
             self.path, help_string))
Beispiel #15
0
def mount_file(fn):

    arvados_options = "--disable-event-listening --read-only"
    arvados_options = "--read-only"

    # TODO: exception-safe clean-up?
    if fn.startswith("arv="):
        mountpoint = tempfile.mkdtemp(suffix="_arvados_keep")
        E.debug("mount_file: mounting arvados at {}".format(mountpoint))
        E.run("arv-mount {} {}".format(arvados_options, mountpoint))
        yield mountpoint + "/" + fn[4:]
        E.run("fusermount -u {}".format(mountpoint))
        E.debug("mount_file: unmounted arvados at {}".format(mountpoint))
        try:
            shutil.rmtree(mountpoint)
        except OSError as ex:
            E.warn("could not delete mountpoint {}: {}".format(
                mountpoint, str(ex)))
    else:
        yield fn
Beispiel #16
0
 def get_version(self):
     help_string = E.run("{self.path} ".format(**locals()),
                         return_stdout=True,
                         on_error="ignore").strip()
     # lumpy express without arguments ends in error
     if help_string:
         raise NotImplementedError()
         return re.search(r"lumpy \(Version: (\S+)\)",
                          help_string).groups()[0]
     else:
         raise ValueError("lumpy not found at/as {}".format(self.path))
Beispiel #17
0
def get_conda_environment_directory(env_name):
    if "CONDA_EXE" in os.environ:
        stdout = E.run("{} env list".format(os.environ["CONDA_EXE"]),
                       return_stdout=True).strip()
    else:
        stdout = E.run("conda env list", return_stdout=True).strip()

    env_map = {}
    for line in stdout.splitlines():
        if line.startswith("#"):
            continue
        parts = re.split(" +", line)
        if len(parts) == 2:
            env_map[parts[0]] = parts[1]
        elif len(parts) == 3:
            env_map[parts[0]] = parts[2]
    if env_name not in env_map:
        raise IOError("conda environment {} does not exist, found {}".format(
            env_name, sorted(env_map.keys())))
    return env_map[env_name]
Beispiel #18
0
    def get_resource_usage(self, job_id, retval, hostname):
        # delay to help with sync'ing of book-keeping
        time.sleep(5)
        statement = "sacct --noheader --units=K --parsable2 --format={} -j {} ".format(
            ",".join(self.map_drmaa2benchmark_data.values()), job_id)

        stdout = E.run(statement, return_stdout=True).splitlines()
        if len(stdout) != 2:
            E.warn("expected 2 lines in {}, but got {}".format(statement, len(stdout)))

        return self.parse_accounting_data(stdout[-1], retval)
    def test_task_library_default_tasks_can_be_executed(self):
        infile = os.path.abspath(__file__)
        outfile = os.path.join(self.work_dir, "out")
        env = os.environ.copy()
        env["DAISY_TASKLIBRARY"] = self.library
        statement = ("daisy run-task "
                     "--local "
                     "--task=metric_filestat "
                     "--input-file={} "
                     "--output-file={}".format(infile, outfile))

        p = E.run(statement, env=env, return_popen=True)
        stdout, stderr = p.communicate()
        self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr))
Beispiel #20
0
    def test_workflow_show(self):

        statement = ("daisy benchmark-simple "
                     "--debug "
                     "--log={} "
                     "--config-file={} "
                     "--work-dir={} "
                     "--local "
                     "show all".format(
                         os.path.join(self.work_dir, "output.log"),
                         os.path.join(os.path.dirname(__file__),
                                      self.filename), self.work_dir))

        p = E.run(statement, return_popen=True)
        stdout, stderr = p.communicate()

        self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr))
Beispiel #21
0
def getBigwigSummary(bigwig_file):
    '''return summary of bigwig contents.

    This method uses the bigWigInfo UCSC utility
    '''

    results = E.run("bigWigInfo %(bigwig_file)s" % locals(),
                    return_stdout=True)

    data = [x.split(":") for x in results.split("\n") if x != ""]
    fields = [x[0] for x in data]
    Results = collections.namedtuple("BigwigInfo", fields)

    def conv(v):
        return iotools.str2val(re.sub(",", "", v.strip()))

    results = Results(*[conv(x[1]) for x in data])
    return results
    def test_task_library_user_metric_can_be_executed(self):
        infile = os.path.abspath(__file__)
        outfile = os.path.join(self.work_dir, "out")
        env = os.environ.copy()
        env["DAISY_TASKLIBRARY"] = self.library
        statement = ("daisy run-task "
                     "--local "
                     "--task=metric_my_tasklibrary_count "
                     "--input-file={} "
                     "--input-slot=data "
                     "--output-file={}".format(infile, outfile))

        p = E.run(statement, env=env, return_popen=True)
        stdout, stderr = p.communicate()
        self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr))

        with iotools.open_file(infile) as inf1, iotools.open_file(
                outfile) as inf2:
            data1 = inf1.readlines()
            data2 = inf2.readlines()
        self.assertEqual(len(data1), int(data2[0]))
Beispiel #23
0
def bigwig(infile, contig_sizes):
    '''convert infile to bigwig file'''

    if infile.endswith(".wig"):
        outfile = infile[:-4] + ".bigwig"
    else:
        outfile = infile + ".bigwig"

    tmp, filename_sizes = tempfile.mkstemp()

    os.write(tmp,
             "\n".join(["\t".join(map(str, x)) for x in contig_sizes.items()]))
    os.close(tmp)

    statement = "wigToBigWig " \
                "-clip %(infile)s %(filename_sizes)s %(outfile)s " % locals()

    E.debug("executing statement '%s'" % statement)

    if E.run(statement):
        os.unlink(infile)

    os.unlink(filename_sizes)
Beispiel #24
0
 def get_version(self):
     help_string = E.run("{self.path_lastal} -V".format(**locals()),
                         return_stdout=True).strip()
     return re.search("lastal (.+)", help_string).groups()[0]
 def test_task_library_contains_default_only(self):
     stdout = E.run("daisy run-task --list-tasks -v 0", return_stdout=True)
     tasks = stdout.splitlines()
     self.assertGreater(len(tasks), 0)
     self.assertFalse("tool_my_tasklibrary_cat" in tasks)
     self.assertFalse("metric_my_tasklibrary_count" in tasks)
Beispiel #26
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.ArgumentParser(description=__doc__)

    parser.add_argument("--version", action='version', version="1.0")

    parser.set_defaults()

    # add common options (-h/--help, ...) and parse command line
    (args, unknown) = E.start(parser,
                              argv=argv,
                              add_output_options=True,
                              unknowns=True)

    # do sth
    if len(unknown) == 1:
        fastqfile1 = unknown[0]
        fastqfile2 = args.output_filename_pattern % "2"
    elif len(unknown) == 2:
        fastqfile1, fastqfile2 = unknown
    else:
        fastqfile1 = args.output_filename_pattern % "1"
        fastqfile2 = args.output_filename_pattern % "2"

    # only output compressed data
    if not fastqfile1.endswith(".gz"):
        fastqfile1 += ".gz"
    if not fastqfile2.endswith(".gz"):
        fastqfile2 += ".gz"

    if args.stdin != sys.stdin:
        samfile = pysam.AlignmentFile(args.stdin.name, "rb")
    else:
        samfile = pysam.AlignmentFile("-", "rb")

    tmpdir = tempfile.mkdtemp()

    outtemp1 = os.path.join(tmpdir, "pair1.gz")
    outtemp2 = os.path.join(tmpdir, "pair2.gz")

    outstream1 = iotools.open_file(outtemp1, "w")
    outstream2 = iotools.open_file(outtemp2, "w")

    E.info('writing fastq files to temporary directory %s' % tmpdir)

    found1, found2 = set(), set()
    read1_qlen, read2_qlen = 0, 0

    c = E.Counter()
    for read in samfile.fetch(until_eof=True):
        c.input += 1
        if not read.is_paired:
            outstream1.write("\t".join((read.qname, read.seq, read.qual)) +
                             "\n")
            found1.add(read.qname)
            if not read1_qlen:
                read1_qlen = read.qlen
            c.unpaired += 1
        elif read.is_read1:
            outstream1.write("\t".join((read.qname, read.seq, read.qual)) +
                             "\n")
            found1.add(read.qname)
            if not read1_qlen:
                read1_qlen = read.qlen
            c.output1 += 1
        elif read.is_read2:
            if read.qname not in found2:
                outstream2.write("\t".join((read.qname, read.seq, read.qual)) +
                                 "\n")
                found2.add(read.qname)
                if not read2_qlen:
                    read2_qlen = read.qlen
                c.output2 += 1

    if c.unpaired == 0 and c.output1 == 0 and c.output2 == 0:
        E.warn("no reads were found")
        return

    sort_statement = '''gunzip < %s
    | sort -k1,1
    | awk '{printf("@%%s\\n%%s\\n+\\n%%s\\n", $1,$2,$3)}'
    | gzip > %s'''

    if c.output1 == 0 and c.output2 == 0:
        # single end data:
        outstream1.close()
        outstream2.close()
        E.info("sorting fastq files")
        E.run(sort_statement % (outtemp1, fastqfile1))

    else:
        # paired end data
        for qname in found2.difference(found1):
            outstream1.write("\t".join((qname, "N" * read1_qlen,
                                        "B" * read1_qlen)) + "\n")
            c.extra1 += 1

        for qname in found1.difference(found2):
            outstream2.write("\t".join((qname, "N" * read2_qlen,
                                        "B" * read2_qlen)) + "\n")
            c.extra2 += 1

        E.info("%s" % str(c))

        outstream1.close()
        outstream2.close()

        E.info("sorting fastq files")
        E.run(sort_statement % (outtemp1, fastqfile1))
        E.run(sort_statement % (outtemp2, fastqfile2))

    shutil.rmtree(tmpdir)

    # write footer and output benchmark information.
    E.stop()
 def get_version(self):
     help_string = E.run("{self.path}".format(**locals()),
                         return_stderr=True).strip()
     return re.search("Version: (\S+)", help_string).groups()[0]
 def get_version(self):
     return E.run("java -jar {self.path} --version".format(**locals()),
                  return_stdout=True).strip()
 def get_version(self):
     help_string = E.run("{self.path} --version".format(**locals()),
                         return_stdout=True).strip()
     return re.search(r"FastQC (\S+)", help_string).groups()[0]
Beispiel #30
0
def redirect2mounts(config,
                    mountpoint=None,
                    debug=None,
                    mount_write=False,
                    substitute_only=False,
                    always_mount=False):
    """redirect filenames in dictionary config to a mount-point.

    Mount points in the config are indicated by the `arv=` prefix. If
    no option in config requires mounting, no mounting will be done and
    the method returns None.

    :param config: dictionary with config values. Will be modified in-place.
    :param mountpoint: if given, paths will be substituted by mountpoint. If None,
        a new mountpoint will be created.
    :param debug: if given, mount in debug mode and save log to filename.
    :param mount_write: if True, mount in --read-write mode.
    :param substitute_only: if True, only perform substitution, do not mount anything
        even if mountpoint is None.
    :param always_mount: if True, always mount, no matter if arv= prefix is present.

    :return: the mountpoint

    """
    arvados_options = ["--disable-event-listening"]
    if debug:
        arvados_options.append(" --debug --logfile={}".format(debug))

    if mount_write:
        arvados_options.append("--read-write")
        arvados_options = " ".join(arvados_options)
        if not mountpoint:
            mountpoint = P.get_temp_dir() + "/"
            E.info("redirect2mounts: mounting arvados at {} with --read-write".
                   format(mountpoint))
            E.run("arv-mount {} {}".format(arvados_options, mountpoint))
            E.info("redirect2mounts: arvados mounted at {} with --read-write".
                   format(mountpoint))
    else:
        arvados_options.append("--read-only")
        if always_mount:
            mountpoint = P.get_temp_dir() + "/"
            do_mount = True
        else:
            do_mount = False

        for d, key, value in IOTools.nested_iter(config):
            if isinstance(value, str):
                if "arv=" in value:
                    if substitute_only and mountpoint is None:
                        continue
                    if not mountpoint:
                        mountpoint = P.get_temp_dir() + "/"
                        do_mount = True
                    d[key] = re.sub("arv=", mountpoint, value)

        if do_mount:
            raise NotImplementedError("arvados support disabled")
            # if not arvados.have_arvados():
            #     raise ValueError(
            #         "config file requires arvados access, but arvados not available")
            arvados_options = " ".join(arvados_options)
            E.debug("redirect2mounts: mounting arvados at {} with options {}".
                    format(mountpoint, arvados_options))
            E.run("arv-mount {} {}".format(arvados_options, mountpoint))
            E.debug(
                "redirect2mounts: arvados mounted at {}".format(mountpoint))

    return mountpoint