def expand_generators(config): """expand generator expressions in option lists. A generator expression are valid python syntax and has the following syntax:: options: generate=["--chrom={}".format(x) for x in [1,2,3,4,5]] """ to_delete = [] for d, key, value in IOTools.nested_iter(config): if isinstance(value, str): if value.startswith("generate="): expression = re.sub("^generate=\s*", "", value) if expression.startswith("'") and expression.startswith("'"): expression = expression[1:-1] try: argument_list = eval(expression) except SyntaxError as ex: raise ValueError( "error occured while evaluating generator " "expression {}: {}".format(expression, ex)) if isinstance(d, list): d.extend(argument_list) to_delete.append((d, key)) else: d[key] = argument_list for d, key in to_delete[::-1]: del d[key] return config
def __call__(self, infiles, outfile, only_info=False): # NOTE: extras not implemented in ruffus 2.6.3, thus # use parameter: only_info = "only_info" in P.PARAMS if self.mountpoint: # revert mount redirection for arvados to allow redirection # on individual cluster nodes for d, key, value in IOTools.nested_iter(infiles): d[key] = re.sub(self.mountpoint, "arv=", value) self.instantiate_input(infiles) self.save_meta(outfile, output_file=outfile) if only_info: E.warn("only_info - meta information has been updated") return params = self.build_params(output_file=outfile) benchmark = self.run(outfile, as_namedtuple(params)) self.save_benchmark(outfile, benchmark)
def main(argv=sys.argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-n", "--dry-run", dest="dry_run", action="store_true", help="only show what will be done, don't do it [%default]") parser.add_option("-l", "--link", dest="link", action="store_true", help="link instead of rename [%default]") parser.set_defaults(dry_run=False, link=False) (options, args) = E.start(parser, argv) config = P.get_parameters("benchmark.yml") old_data, new_data = [], [] for old_info in glob.glob("*.dir/tool.info"): old_dir, old_file = os.path.split(old_info) old_info = toolkit.read_data(old_info) old_data.append((old_dir, old_info)) tool_functions = workflow.build_tool_functions(map_tool_to_runner, config) config_files = workflow.expand_globs(config["input"]) input_combos = workflow.build_combinations(config_files) map_property_to_dir = collections.defaultdict(list) for toolf, input_files in itertools.product(tool_functions, input_combos): # create a copy of the task function and give it its unique name # by mangling it with the input_files taskf = copy.copy(toolf) taskf.register_input(input_files) result_dir = os.path.basename(os.path.join(taskf.__name__ + ".dir")) new_data.append((result_dir, taskf)) for a, x, y in IOTools.nested_iter(taskf.input_files): map_property_to_dir[(x, y)].append(result_dir) map_property_to_dir[("name", taskf.name)].append(result_dir) for x, y in list(taskf._option_dict.items()): map_property_to_dir[(x, y)].append(result_dir) # match by input_files options.stdout.write("\t".join(("old", "new", "matching")) + "\n") for old_dir, old_info in old_data: targets = [] for a, x, y in IOTools.nested_iter(old_info["input_files"]): if (x, y) in map_property_to_dir: targets.extend(map_property_to_dir[(x, y)]) for x, y in list(old_info.items()): try: targets.extend(map_property_to_dir[(x, y)]) except TypeError: pass counts = collections.Counter(targets) max_count = max(counts.values()) max_count_items = [ x for x, y in list(counts.items()) if y == max_count ] if len(max_count_items) > 1: E.warn("multiple matches for {}, ignored".format(old_dir)) continue new_dir = max_count_items[0] options.stdout.write("\t".join(map(str, (old_dir, new_dir, max_count))) + "\n") if os.path.exists(new_dir): raise ValueError("directory {} already exists".format(new_dir)) if options.dry_run: continue if options.link: os.symlink(old_dir, new_dir) else: os.rename(old_dir, new_dir) E.stop()
def redirect2mounts(config, mountpoint=None, debug=None, mount_write=False, substitute_only=False, always_mount=False): """redirect filenames in dictionary config to a mount-point. Mount points in the config are indicated by the `arv=` prefix. If no option in config requires mounting, no mounting will be done and the method returns None. :param config: dictionary with config values. Will be modified in-place. :param mountpoint: if given, paths will be substituted by mountpoint. If None, a new mountpoint will be created. :param debug: if given, mount in debug mode and save log to filename. :param mount_write: if True, mount in --read-write mode. :param substitute_only: if True, only perform substitution, do not mount anything even if mountpoint is None. :param always_mount: if True, always mount, no matter if arv= prefix is present. :return: the mountpoint """ arvados_options = ["--disable-event-listening"] if debug: arvados_options.append(" --debug --logfile={}".format(debug)) if mount_write: arvados_options.append("--read-write") arvados_options = " ".join(arvados_options) if not mountpoint: mountpoint = P.get_temp_dir() + "/" E.info("redirect2mounts: mounting arvados at {} with --read-write". format(mountpoint)) E.run("arv-mount {} {}".format(arvados_options, mountpoint)) E.info("redirect2mounts: arvados mounted at {} with --read-write". format(mountpoint)) else: arvados_options.append("--read-only") if always_mount: mountpoint = P.get_temp_dir() + "/" do_mount = True else: do_mount = False for d, key, value in IOTools.nested_iter(config): if isinstance(value, str): if "arv=" in value: if substitute_only and mountpoint is None: continue if not mountpoint: mountpoint = P.get_temp_dir() + "/" do_mount = True d[key] = re.sub("arv=", mountpoint, value) if do_mount: raise NotImplementedError("arvados support disabled") # if not arvados.have_arvados(): # raise ValueError( # "config file requires arvados access, but arvados not available") arvados_options = " ".join(arvados_options) E.debug("redirect2mounts: mounting arvados at {} with options {}". format(mountpoint, arvados_options)) E.run("arv-mount {} {}".format(arvados_options, mountpoint)) E.debug( "redirect2mounts: arvados mounted at {}".format(mountpoint)) return mountpoint
def expand_globs(config, is_test=False): """detect and expand glob expressions in the input section. A glob expression is any filename that contains a '*'. Multiple glob expressions can be combined on the same line by a ','. A "find" expression is detected starting with 'find'. These expressions will be evaluated in a shell and the results insterted into the dictionary. If a filename starts with "file=", the contents of the file following the "=" are read and inserted. Multiple files can be separated by a ','. If a glob or find expression is evaluated to nothing, an exception is raised unless ``is_test`` is set. In that case, two files will be returned called "test1" and "test2". """ for d, key, value in IOTools.nested_iter(config): if isinstance( value, str) and not (isinstance(key, str) and key.endswith("_regex")): if value.startswith("find"): try: data = E.run(value, return_stdout=True) except Exception as e: data = e.output d[key] = [x for x in data.split("\n") if x] elif "*" in value: if "," in value: v = [glob.glob(x.strip()) for x in value.split(",")] v = [item for sublist in v for item in sublist] else: v = glob.glob(value) d[key] = v elif value.startswith("file="): filenames = [x.strip() for x in value.split("=")[1].split(",")] paths = [] for fn in filenames: with IOTools.open_file(fn) as inf: paths.extend([x.strip() for x in inf if x.strip()]) d[key] = paths if len(d[key]) == 0: if not is_test: raise ValueError( "expression '{}' expanded to nothing".format(value)) else: # insert some random files for testing purposes: if "*" in value: # replace glob expressions value = re.sub(",.*", "", value) d[key] = [ re.sub("[*]", "test1", value), re.sub("[*]", "test2", value) ] else: if "bam" in value: d[key] = ["test1.bam", "test2.bam"] elif "vcf" in value: d[key] = ["test1.vcf.gz", "test2.vcf.gz"] else: d[key] = ["test1.txt", "test2.txt"] return config