def get_version(self): help_string1 = E.run("{self.path_manta} --version".format(**locals()), return_stdout=True).strip() help_string2 = E.run("{self.path_strelka} --version".format(**locals()), return_stdout=True).strip() return "-".join([help_string1, help_string2])
def cleanup(signal=None, frame=None): mountpoint = P.PARAMS.get("mount_point", None) if mountpoint: E.run("fusermount -u {}".format(mountpoint)) E.debug("finally: unmounted arvados at {}".format(mountpoint)) shutil.rmtree(mountpoint) P.PARAMS["mount_point"] = None
def arvados_cleanup(signal=None, frame=None): mountpoint = P.PARAMS.get("mount_point", None) if mountpoint: E.debug("unmounting arvados at {}".format(mountpoint)) E.run("fusermount -u {}".format(mountpoint)) E.debug("unmounted arvados at {}".format(mountpoint)) try: shutil.rmtree(mountpoint) except OSError: # ignore errors - arvados issue (read-only file system)? E.warn("failure while removing mountpoint {} - ignored".format( mountpoint)) P.PARAMS["mount_point"] = None
def get_version(self): help_string = E.run("{self.path} -h".format(**locals()), return_stderr=True).strip() if "vcf-concat [OPTIONS]" in help_string: return "unknown" else: raise ValueError("vcf-concat not found")
def test_workflow_make(self): logfile = os.path.join(self.work_dir, "output.log") statement = ("daisy " "benchmark-simple " "--debug " "--log={} " "--config-file={} " "--work-dir={} " "--local " "make all".format( logfile, os.path.join(os.path.dirname(__file__), self.filename), self.work_dir)) p = E.run(statement, return_popen=True) stdout, stderr = p.communicate() if os.path.exists(logfile): with open(logfile) as inf: logdata = inf.read() else: logdata = "no log data available at {}".format(logfile) self.assertEqual(p.returncode, 0, msg="stderr = {}, log={}".format(stderr, logdata)) self.check_files_present(itertools.product(self.expected_tools, self.expected_metrics), aliases=self.aliases)
def get_version(self): help_string = E.run("{self.path}".format(**locals()), return_stderr=True).strip() if "USAGE: pbsim" in help_string: return "unknown" else: raise ValueError("pbsim not found")
def get_version(self): version_bedtools = run_metric_bedtools_intersection.get_version(self) help_string = E.run( "{self.gat_path} --version 2> /dev/null".format(**locals()), return_stdout=True).strip() return "{} {}".format( version_bedtools, re.search(r"gat-run.py version: (\S+):", help_string).groups()[0])
def get_version(self): help_string = E.run("{self.path} --version".format(**locals()), return_stderr=True).strip() if help_string and "not found" not in help_string: return re.search("QUAST (.+)", help_string).groups()[0] else: raise ValueError("QUAST not found at/as {}: {}".format( self.path, help_string))
def get_version(self): help_text = E.run("{self.path} -version".format(**locals()), return_stderr=True).strip() if help_text and "not found" not in help_text: return re.search(r"BBMap version (\S+)", help_text).groups()[0] else: raise ValueError("bbmap not found at/as {}: {}".format( self.path, help_text))
def compress(infile): '''gzip infile''' statement = "gzip -f %(infile)s" % locals() E.debug("executing statement '%s'" % statement) return E.run(statement)
def get_version(self): help_string = E.run("{self.path} ".format(**locals()), return_stdout=True, on_error="ignore").strip() if help_string: return re.search("Delly \(Version: (\S+)\)", help_string).groups()[0] else: raise ValueError("delly not found at/as {}".format(self.path))
def test_task_library_contains_user_tasks(self): stdout = E.run("export DAISY_TASKLIBRARY={} && " "daisy run-task --list-tasks -v 0".format(self.library), return_stdout=True) tasks = stdout.splitlines() self.assertGreater(len(tasks), 0) self.assertTrue("tool_my_tasklibrary_cat" in tasks) self.assertTrue("metric_my_tasklibrary_count" in tasks)
def get_version(): # get script that has called P.main() code_location = os.path.abspath(os.path.dirname(get_caller(1).__file__)) # try git for runs from repository stdout = E.run("git rev-parse HEAD 2> /dev/null", cwd=code_location, return_stdout=True, on_error="ignore").strip() return VersionData(code_location=code_location, version=stdout)
def get_version(self): help_string = E.run("{self.path} version 2> /dev/null".format(**locals()), return_stdout=True, on_error="ignore").strip() if help_string and "not found" not in help_string: return re.search(r"Product: RTG Tools (\S+)", help_string).groups()[0] else: raise ValueError("rtg not found at/as {}: {}".format( self.path, help_string))
def mount_file(fn): arvados_options = "--disable-event-listening --read-only" arvados_options = "--read-only" # TODO: exception-safe clean-up? if fn.startswith("arv="): mountpoint = tempfile.mkdtemp(suffix="_arvados_keep") E.debug("mount_file: mounting arvados at {}".format(mountpoint)) E.run("arv-mount {} {}".format(arvados_options, mountpoint)) yield mountpoint + "/" + fn[4:] E.run("fusermount -u {}".format(mountpoint)) E.debug("mount_file: unmounted arvados at {}".format(mountpoint)) try: shutil.rmtree(mountpoint) except OSError as ex: E.warn("could not delete mountpoint {}: {}".format( mountpoint, str(ex))) else: yield fn
def get_version(self): help_string = E.run("{self.path} ".format(**locals()), return_stdout=True, on_error="ignore").strip() # lumpy express without arguments ends in error if help_string: raise NotImplementedError() return re.search(r"lumpy \(Version: (\S+)\)", help_string).groups()[0] else: raise ValueError("lumpy not found at/as {}".format(self.path))
def get_conda_environment_directory(env_name): if "CONDA_EXE" in os.environ: stdout = E.run("{} env list".format(os.environ["CONDA_EXE"]), return_stdout=True).strip() else: stdout = E.run("conda env list", return_stdout=True).strip() env_map = {} for line in stdout.splitlines(): if line.startswith("#"): continue parts = re.split(" +", line) if len(parts) == 2: env_map[parts[0]] = parts[1] elif len(parts) == 3: env_map[parts[0]] = parts[2] if env_name not in env_map: raise IOError("conda environment {} does not exist, found {}".format( env_name, sorted(env_map.keys()))) return env_map[env_name]
def get_resource_usage(self, job_id, retval, hostname): # delay to help with sync'ing of book-keeping time.sleep(5) statement = "sacct --noheader --units=K --parsable2 --format={} -j {} ".format( ",".join(self.map_drmaa2benchmark_data.values()), job_id) stdout = E.run(statement, return_stdout=True).splitlines() if len(stdout) != 2: E.warn("expected 2 lines in {}, but got {}".format(statement, len(stdout))) return self.parse_accounting_data(stdout[-1], retval)
def test_task_library_default_tasks_can_be_executed(self): infile = os.path.abspath(__file__) outfile = os.path.join(self.work_dir, "out") env = os.environ.copy() env["DAISY_TASKLIBRARY"] = self.library statement = ("daisy run-task " "--local " "--task=metric_filestat " "--input-file={} " "--output-file={}".format(infile, outfile)) p = E.run(statement, env=env, return_popen=True) stdout, stderr = p.communicate() self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr))
def test_workflow_show(self): statement = ("daisy benchmark-simple " "--debug " "--log={} " "--config-file={} " "--work-dir={} " "--local " "show all".format( os.path.join(self.work_dir, "output.log"), os.path.join(os.path.dirname(__file__), self.filename), self.work_dir)) p = E.run(statement, return_popen=True) stdout, stderr = p.communicate() self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr))
def getBigwigSummary(bigwig_file): '''return summary of bigwig contents. This method uses the bigWigInfo UCSC utility ''' results = E.run("bigWigInfo %(bigwig_file)s" % locals(), return_stdout=True) data = [x.split(":") for x in results.split("\n") if x != ""] fields = [x[0] for x in data] Results = collections.namedtuple("BigwigInfo", fields) def conv(v): return iotools.str2val(re.sub(",", "", v.strip())) results = Results(*[conv(x[1]) for x in data]) return results
def test_task_library_user_metric_can_be_executed(self): infile = os.path.abspath(__file__) outfile = os.path.join(self.work_dir, "out") env = os.environ.copy() env["DAISY_TASKLIBRARY"] = self.library statement = ("daisy run-task " "--local " "--task=metric_my_tasklibrary_count " "--input-file={} " "--input-slot=data " "--output-file={}".format(infile, outfile)) p = E.run(statement, env=env, return_popen=True) stdout, stderr = p.communicate() self.assertEqual(p.returncode, 0, msg="stderr = {}".format(stderr)) with iotools.open_file(infile) as inf1, iotools.open_file( outfile) as inf2: data1 = inf1.readlines() data2 = inf2.readlines() self.assertEqual(len(data1), int(data2[0]))
def bigwig(infile, contig_sizes): '''convert infile to bigwig file''' if infile.endswith(".wig"): outfile = infile[:-4] + ".bigwig" else: outfile = infile + ".bigwig" tmp, filename_sizes = tempfile.mkstemp() os.write(tmp, "\n".join(["\t".join(map(str, x)) for x in contig_sizes.items()])) os.close(tmp) statement = "wigToBigWig " \ "-clip %(infile)s %(filename_sizes)s %(outfile)s " % locals() E.debug("executing statement '%s'" % statement) if E.run(statement): os.unlink(infile) os.unlink(filename_sizes)
def get_version(self): help_string = E.run("{self.path_lastal} -V".format(**locals()), return_stdout=True).strip() return re.search("lastal (.+)", help_string).groups()[0]
def test_task_library_contains_default_only(self): stdout = E.run("daisy run-task --list-tasks -v 0", return_stdout=True) tasks = stdout.splitlines() self.assertGreater(len(tasks), 0) self.assertFalse("tool_my_tasklibrary_cat" in tasks) self.assertFalse("metric_my_tasklibrary_count" in tasks)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.ArgumentParser(description=__doc__) parser.add_argument("--version", action='version', version="1.0") parser.set_defaults() # add common options (-h/--help, ...) and parse command line (args, unknown) = E.start(parser, argv=argv, add_output_options=True, unknowns=True) # do sth if len(unknown) == 1: fastqfile1 = unknown[0] fastqfile2 = args.output_filename_pattern % "2" elif len(unknown) == 2: fastqfile1, fastqfile2 = unknown else: fastqfile1 = args.output_filename_pattern % "1" fastqfile2 = args.output_filename_pattern % "2" # only output compressed data if not fastqfile1.endswith(".gz"): fastqfile1 += ".gz" if not fastqfile2.endswith(".gz"): fastqfile2 += ".gz" if args.stdin != sys.stdin: samfile = pysam.AlignmentFile(args.stdin.name, "rb") else: samfile = pysam.AlignmentFile("-", "rb") tmpdir = tempfile.mkdtemp() outtemp1 = os.path.join(tmpdir, "pair1.gz") outtemp2 = os.path.join(tmpdir, "pair2.gz") outstream1 = iotools.open_file(outtemp1, "w") outstream2 = iotools.open_file(outtemp2, "w") E.info('writing fastq files to temporary directory %s' % tmpdir) found1, found2 = set(), set() read1_qlen, read2_qlen = 0, 0 c = E.Counter() for read in samfile.fetch(until_eof=True): c.input += 1 if not read.is_paired: outstream1.write("\t".join((read.qname, read.seq, read.qual)) + "\n") found1.add(read.qname) if not read1_qlen: read1_qlen = read.qlen c.unpaired += 1 elif read.is_read1: outstream1.write("\t".join((read.qname, read.seq, read.qual)) + "\n") found1.add(read.qname) if not read1_qlen: read1_qlen = read.qlen c.output1 += 1 elif read.is_read2: if read.qname not in found2: outstream2.write("\t".join((read.qname, read.seq, read.qual)) + "\n") found2.add(read.qname) if not read2_qlen: read2_qlen = read.qlen c.output2 += 1 if c.unpaired == 0 and c.output1 == 0 and c.output2 == 0: E.warn("no reads were found") return sort_statement = '''gunzip < %s | sort -k1,1 | awk '{printf("@%%s\\n%%s\\n+\\n%%s\\n", $1,$2,$3)}' | gzip > %s''' if c.output1 == 0 and c.output2 == 0: # single end data: outstream1.close() outstream2.close() E.info("sorting fastq files") E.run(sort_statement % (outtemp1, fastqfile1)) else: # paired end data for qname in found2.difference(found1): outstream1.write("\t".join((qname, "N" * read1_qlen, "B" * read1_qlen)) + "\n") c.extra1 += 1 for qname in found1.difference(found2): outstream2.write("\t".join((qname, "N" * read2_qlen, "B" * read2_qlen)) + "\n") c.extra2 += 1 E.info("%s" % str(c)) outstream1.close() outstream2.close() E.info("sorting fastq files") E.run(sort_statement % (outtemp1, fastqfile1)) E.run(sort_statement % (outtemp2, fastqfile2)) shutil.rmtree(tmpdir) # write footer and output benchmark information. E.stop()
def get_version(self): help_string = E.run("{self.path}".format(**locals()), return_stderr=True).strip() return re.search("Version: (\S+)", help_string).groups()[0]
def get_version(self): return E.run("java -jar {self.path} --version".format(**locals()), return_stdout=True).strip()
def get_version(self): help_string = E.run("{self.path} --version".format(**locals()), return_stdout=True).strip() return re.search(r"FastQC (\S+)", help_string).groups()[0]
def redirect2mounts(config, mountpoint=None, debug=None, mount_write=False, substitute_only=False, always_mount=False): """redirect filenames in dictionary config to a mount-point. Mount points in the config are indicated by the `arv=` prefix. If no option in config requires mounting, no mounting will be done and the method returns None. :param config: dictionary with config values. Will be modified in-place. :param mountpoint: if given, paths will be substituted by mountpoint. If None, a new mountpoint will be created. :param debug: if given, mount in debug mode and save log to filename. :param mount_write: if True, mount in --read-write mode. :param substitute_only: if True, only perform substitution, do not mount anything even if mountpoint is None. :param always_mount: if True, always mount, no matter if arv= prefix is present. :return: the mountpoint """ arvados_options = ["--disable-event-listening"] if debug: arvados_options.append(" --debug --logfile={}".format(debug)) if mount_write: arvados_options.append("--read-write") arvados_options = " ".join(arvados_options) if not mountpoint: mountpoint = P.get_temp_dir() + "/" E.info("redirect2mounts: mounting arvados at {} with --read-write". format(mountpoint)) E.run("arv-mount {} {}".format(arvados_options, mountpoint)) E.info("redirect2mounts: arvados mounted at {} with --read-write". format(mountpoint)) else: arvados_options.append("--read-only") if always_mount: mountpoint = P.get_temp_dir() + "/" do_mount = True else: do_mount = False for d, key, value in IOTools.nested_iter(config): if isinstance(value, str): if "arv=" in value: if substitute_only and mountpoint is None: continue if not mountpoint: mountpoint = P.get_temp_dir() + "/" do_mount = True d[key] = re.sub("arv=", mountpoint, value) if do_mount: raise NotImplementedError("arvados support disabled") # if not arvados.have_arvados(): # raise ValueError( # "config file requires arvados access, but arvados not available") arvados_options = " ".join(arvados_options) E.debug("redirect2mounts: mounting arvados at {} with options {}". format(mountpoint, arvados_options)) E.run("arv-mount {} {}".format(arvados_options, mountpoint)) E.debug( "redirect2mounts: arvados mounted at {}".format(mountpoint)) return mountpoint