def call(self, *args, standalone_mode=False, **kwargs): """Call into YMP CLI ``standalone_mode`` defaults to False so that exceptions are passed rather than caught. """ if not self.initialized: # change path to USER ymp config (default ~/.ymp/ymp.yml) # so that settings there do not interfere with tests ymp.config.ConfigMgr.CONF_USER_FNAME = "ymp_user.yml" # force reload ymp.get_config().unload() if not os.path.exists("cmd.sh"): with open("cmd.sh", "w") as f: f.write("#!/bin/bash -x\n") argstr = " ".join(shlex.quote(arg) for arg in args) with open("cmd.sh", "w") as f: f.write(f"PATH={os.environ['PATH']} ymp {argstr} \"$@\"\n") result = self.runner.invoke(self.main, args, **kwargs, standalone_mode=standalone_mode) with open("out.log", "w") as f: f.write(result.output) if result.exception and not standalone_mode: raise result.exception return result
def target(request, project_dir): with project_dir.as_cwd(): log.info("Switched to directory {}".format(project_dir)) ymp.get_config().unload() cfg = ymp.get_config() targets = [request.param.format(prj) for prj in cfg.projects] with open("target.txt", "w") as out: out.write("\n".join(targets)) yield from targets
def install( conda_prefix, conda_env_spec, dry_run, reinstall, no_spec, no_archive, fresh, envnames): "Install conda software environments" if conda_env_spec is not None: cfg = ymp.get_config() cfg.conda.env_specs = conda_env_spec if fresh: reinstall = no_spec = no_archive = True envs = get_envs(envnames) need_install = len([env for env in envs.values() if not env.installed]) if not reinstall and len(envs) != need_install: log.warning("Creating %i environments (%i already installed)", need_install, len(envs)-need_install) else: log.warning(f"Creating {len(envs)} environments.") for env in envs.values(): if not reinstall and env.installed: continue if conda_prefix: env.set_prefix(conda_prefix) env.create(dry_run, reinstall=reinstall, nospec=no_spec, noarchive=no_archive)
def call(self, *args, standalone_mode=False, **kwargs): """Call into YMP CLI ``standalone_mode`` defaults to False so that exceptions are passed rather than caught. """ if not self.initialized: ymp.config.ConfigMgr.CONF_USER_FNAME = "ymp_user.yml" cfg = ymp.get_config() cfg.reload() if not os.path.exists("cmd.sh"): with open("cmd.sh", "w") as f: f.write("#!/bin/bash -x\n") with open("cmd.sh", "w") as f: f.write(f"PATH={os.environ['PATH']} ymp {' '.join(args)} \"$@\"\n") result = self.runner.invoke(self.main, args, **kwargs, standalone_mode=standalone_mode) with open("out.log", "w") as f: f.write(result.output) if result.exception and not standalone_mode: raise result.exception return result
def test_graph_complete(target, project, invoker): cfg = ymp.get_config() res = invoker.call("make", "-qq", "--dag", target) # Snakemake can't be quietet in version 4.7, and pytest can't be # told to ignore stderr. We work around this by removing the # first line if it is the spurious Snakemake log message if res.output.startswith("Building DAG of jobs..."): _, output = res.output.split("\n", 1) else: output = res.output assert output.startswith("digraph") with open("dat.dot", "w") as out: out.write(output) g = DiGraph(AGraph(output)) n_runs = len(cfg[project].runs) n_start_nodes = len([1 for node, degree in g.in_degree() if degree == 0]) log.info("\nTesting start-nodes ({}) >= runs ({})" "".format(n_start_nodes, n_runs)) # assert n_start_nodes >= n_runs n_symlinks = len([ 1 for node, degree in g.in_degree() if g.node[node]['label'].startswith('symlink_raw_reads') ]) log.info("Testing symlinks ({}) == 2 * runs ({})" "".format(n_symlinks, n_runs)) assert n_symlinks == len(cfg[project].fq_names)
def test_get_all_targets(demo_dir): with open("ymp.yml", "a") as f: f.write("include: test.yml") with open("test.yml", "w") as f: f.write("pipelines:\n" " test_pipe:\n" " stages:\n" " - trim_bbmap\n" " - ref_phiX\n") ymp.get_config().unload() pipe = ymp.get_config().pipelines.test_pipe stack = StageStack("toy.test_pipe") assert pipe.get_all_targets(stack) == [ "toy.test_pipe", "references/phiX/ALL.fasta.gz" ] assert pipe.get_ids(stack, []) == ["ALL"]
def __init__(self, name, cfg): super().__init__("ref_" + name, cfg) #: Files provided by the reference. Keys are the file names #: within ymp ("target.extension"), symlinked into dir.ref/ref_name/ and #: values are the path to the reference file from workspace root. self.files: Dict[str, str] = {} self.archives = [] self._ids: Set[str] = set() self._outputs = None import ymp self.dir = os.path.join(ymp.get_config().dir.references, name) if isinstance(cfg, Mapping): self.add_resource(cfg) elif isinstance(cfg, Sequence) and not isinstance(cfg, str): for item in cfg: self.add_resource(item) else: raise YmpConfigError( cfg, "Reference config must list or key-value mapping") # Copy rules defined in primary references stage stage_references = Stage.get_registry().get("references") if not stage_references: raise YmpConfigError( cfg, "Reference base stage not found. Main rules not loaded?") self.rules = stage_references.rules.copy()
def _get_content(self): if self.dynamic: return self._get_dynamic_contents().encode("utf-8") cfg = ymp.get_config() if cfg.workflow: self.workflow = cfg.workflow return super()._get_content() return open(self.file, "rb").read()
def complete(cls, ctx, incomplete): """Try to complete incomplete command This is executed on tab or tab-tab from the shell Args: ctx: click context object incomplete: last word in command line up until cursor Returns: list of words incomplete can be completed to """ # Turn of logging! log = ctx.ensure_object(Log) log.mod_level(10) result: list = [] stack, _, tocomplete = incomplete.rpartition(".") debug("complete(stack={},incomplete={})", stack, tocomplete) if not stack: cfg = ymp.get_config() options = cfg.projects.keys() result += (o for o in options if o.startswith(tocomplete)) result += (o + "." for o in options if o.startswith(tocomplete)) else: from ymp.stage import StageStack try: stackobj = StageStack.get(stack) except YmpStageError as e: debug(e.format_message().replace("{", "{{").replace("}", "}}")) return [] debug("stacko = {}", repr(stack)) options = stackobj.complete(tocomplete) debug("options = {}", options) # reduce items sharing prefix before "_" prefixes = {} for option in options: prefix = option.split("_", 1)[0] group = prefixes.setdefault(prefix, []) group.append(option) if len(prefixes) == 1: extensions = options else: extensions = [] for prefix, group in prefixes.items(): if len(group) > 1: extensions.append(prefix + "_") else: extensions.append(group[0]) result += ('.'.join((stack, ext)) for ext in extensions) result += ('.'.join((stack, ext)) + "." for ext in extensions if not ext[-1] == "_") debug("res={}", result) return result
def run(self, command): """Execute command in environment Returns exit code of command run. """ cfg = ymp.get_config() command = "source activate {}; {}".format(self.path, " ".join(command)) log.debug("Running: %s", command) return subprocess.run(command, shell=True, executable=cfg.shell).returncode
def get_registry(cls, clean=False): """ Return all objects of this class registered with current workflow """ import ymp cfg = ymp.get_config() return cfg.cache.get_cache( cls.__name__, loadfunc=ExpandableWorkflow.ensure_global_workflow, clean=clean)
def test_fqfiles(project_dir, fq_names): with project_dir.as_cwd(): cfg = ymp.get_config() cfg.reload() for ds in cfg.projects.values(): assert len(ds.fq_names) == fq_names[0] assert len(ds.pe_fq_names) == fq_names[1] assert len(ds.se_fq_names) == fq_names[2] assert len(ds.fwd_pe_fq_names) == fq_names[3] assert len(ds.rev_pe_fq_names) == fq_names[4] assert len(ds.fwd_fq_names) == fq_names[5]
def add_resource(self, rsc): if not isinstance(rsc, Mapping): raise YmpConfigError( rsc, "Reference resource config must be a key-value mapping") if not "url" in rsc: raise YmpConfigError(rsc, "Reference resource must have 'url' field") maybeurl = str(rsc["url"]) import ymp local_path = make_local_path(ymp.get_config(), maybeurl) isurl = local_path != maybeurl if not isurl: local_path = rsc.get_path("url") type_name = rsc.get('type', 'fasta').lower() if 'id' in rsc: self._ids.add(rsc['id']) if type_name in ("fasta", "fastp"): self.files[f"ALL.{type_name}.gz"] = local_path elif type_name in ("gtf", "snp", "tsv", "csv"): self.files[f"ALL.{type_name}"] = local_path elif type_name == 'dir': archive = Archive(name=self.name, dirname=self.dir, tar=local_path, url=maybeurl, files=rsc['files'], strip=rsc.get('strip_components', 0)) self.files.update(archive.get_files()) self.archives.append(archive) elif type_name == 'dirx': self.files.update({ key: os.path.join(local_path, val) for key, val in rsc.get('files', {}).items() }) elif type_name == 'path': self.dir = local_path.rstrip("/") try: filenames = os.listdir(local_path) except FileNotFoundError: log.error("Directory %s required by %s %s does not exist", local_path, self.__class__.__name__, self.name) filenames = [] for filename in filenames: for regex in rsc.get('match', []): match = re.fullmatch(regex, filename) if not match: continue self._ids.add(match.group('sample')) self.files[filename] = os.path.join(local_path, filename) else: raise YmpConfigError(rsc, f"Unknown type {type_name}", key="type")
def run(self, command): """Execute command in environment Returns exit code of command run. """ command = " ".join(command) command = snakemake_conda.Conda().shellcmd(self.path, command) cfg = ymp.get_config() log.debug("Running: %s", command) return subprocess.run(command, shell=True, executable=cfg.shell).returncode
def _get_dynamic_contents(self): cfg = ymp.get_config() defaults = { 'name': self.name, 'dependencies': self.packages, 'channels': self.channels, } yaml = YAML(typ='rt') yaml.default_flow_style = False buf = io.StringIO() yaml.dump(defaults, buf) return buf.getvalue()
def __init__(self, name, cfg): super().__init__(name, cfg) self.pairnames = ymp.get_config().pairnames self.fieldnames = None self._data = None self._source_cfg = None self._idcol = None self.bccol = cfg.get(self.KEY_BCCOL) if self.KEY_DATA not in self.cfg: raise YmpConfigError( self.cfg, "Missing key '{}'".format(self.KEY_DATA))
def install(conda_prefix, conda_env_spec, dry_run, force, envnames): "Install conda software environments" if conda_env_spec is not None: cfg = ymp.get_config() cfg.conda.env_specs = conda_env_spec envs = get_envs(envnames) log.warning(f"Creating {len(envs)} environments.") for env in envs.values(): if conda_prefix: env.set_prefix(conda_prefix) env.create(dry_run, force)
def _download_files(self, urls, md5s): from ymp.download import FileDownloader if not op.exists(self.archive_file): os.makedirs(self.archive_file) cfg = ymp.get_config() fd = FileDownloader(alturls=cfg.conda.alturls) if not fd.get(urls, self.archive_file, md5s): # remove partially download archive folder? # shutil.rmtree(self.archive_file, ignore_errors=True) raise YmpWorkflowError( f"Unable to create environment {self.name}, " f"because downloads failed. See log for details.")
def start_snakemake(kwargs): """Execute Snakemake with given parameters and targets Fixes paths of kwargs['targets'] to be relative to YMP root. """ cfg = ymp.get_config() root_path = cfg.root cur_path = os.path.abspath(os.getcwd()) if not cur_path.startswith(root_path): raise YmpException("internal error - CWD moved out of YMP root?!") cur_path = cur_path[len(root_path):] # translate renamed arguments to snakemake synopsis arg_map = { 'immediate': 'immediate_submit', 'wrapper': 'jobscript', 'scriptname': 'jobname', 'cluster_cores': 'nodes', 'snake_config': 'config', 'drmaa': None, 'sync': None, 'sync_arg': None, 'command': None, 'args': None, 'nohup': None } kwargs = {arg_map.get(key, key): value for key, value in kwargs.items() if arg_map.get(key, key) is not None} kwargs['workdir'] = root_path # our debug flag sets a new excepthoook handler, to we use this # to decide whether snakemake should run in debug mode if sys.excepthook.__module__ != "sys": log.warning( "Custom excepthook detected. Having Snakemake open stdin " "inside of run: blocks") kwargs['debug'] = True # map our logging level to snakemake logging level if log.getEffectiveLevel() > logging.WARNING: kwargs['quiet'] = True if log.getEffectiveLevel() < logging.WARNING: kwargs['verbose'] = True kwargs['use_conda'] = True if 'targets' in kwargs: kwargs['targets'] = [os.path.join(cur_path, t) for t in kwargs['targets']] log.debug("Running snakemake.snakemake with args: %s", kwargs) import snakemake return snakemake.snakemake(ymp._snakefile, **kwargs)
def submit(profile, **kwargs): """Build target(s) on cluster The parameters for cluster execution are drawn from layered profiles. YMP includes base profiles for the "torque" and "slurm" cluster engines. """ cfg = ymp.get_config() # The cluster config uses profiles, which are assembled by layering # the default profile, the selected profile and additional command # line parameters. The selected profile is either specified via # " config = cfg.cluster.profiles.default profile_name = profile or cfg.cluster.profile if profile_name: config.add_layer(profile_name, cfg.cluster.profiles[profile_name]) cli_params = {key: value for key, value in kwargs.items() if value is not None} config.add_layer("<shell arguments>", cli_params) # prepare cluster command if config.command is None: raise click.UsageError(""" No cluster submission command configured. Please check the manual on how to configure YMP for your cluster" """) cmd = config.command.split() + config.args.values() if config.drmaa: param = 'drmaa' cmd[0] = '' elif config.sync: param = 'cluster_sync' cmd.append(config.sync_arg) else: param = 'cluster' if cmd[0] and not shutil.which(cmd[0]): raise click.UsageError(f""" The configured cluster submission command '{cmd[0]}' is does not exist or is not executable. Please check your cluster configuration. """) config[param] = cfg.expand(" ".join(cmd)) rval = start_snakemake(config) if not rval: sys.exit(1)
def __init__(self, path, stage=None): self.name = path self.stage_names = path.split(".") self.stages = [find_stage(name) for name in self.stage_names] cfg = ymp.get_config() # determine project try: self.project = cfg.projects[self.stage_names[0]] except IndexError: log.error("here") raise YmpStageError(f"No project for stage stack {path} found") # determine top stage stage_names = copy.copy(self.stage_names) top_stage = stage_names.pop() if stage: if not stage.match(top_stage): raise YmpStageError( f"Internal error: {top_stage} not matched by {stage}") if not stage: stage = find_stage(top_stage) self.stage = stage # determine grouping self.group = getattr(stage, "group", None) if stage_names and stage_names[-1].startswith("group_"): self.group = [stage_names.pop().split("_")[1]] # collect inputs self.prevs = self.resolve_prevs() if self.group is None: groups = list( dict.fromkeys(group for p in reversed(list(self.prevs.values())) for group in p.group)) self.group = self.project.minimize_variables(groups) log.info("Stage stack %s using column %s", self, self.group) prevmap = dict() for typ, stack in self.prevs.items(): prevmap.setdefault(str(stack), []).append(typ) for stack, typ in prevmap.items(): ftypes = ", ".join(typ).replace("/{sample}", "*") title = stack.split(".")[-1] if self.stage_names.count(title) != 1: title = stack log.info(f".. from {title}: {ftypes}")
def get(cls, path, stage=None): """ Cached access to StageStack Args: path: Stage path stage: Stage object at head of stack """ cfg = ymp.get_config() cache = cfg.cache.get_cache(cls.__name__, itemloadfunc=StageStack) res = cache[path] if res not in cls.used_stacks: cls.used_stacks.add(res) log.info("Stage stack %s using column %s", res, res.group) return res
def instance(cls, path): """ Cached access to StageStack Args: path: Stage path stage: Stage object at head of stack """ cfg = ymp.get_config() cache = cfg.cache.get_cache(cls.__name__, itemloadfunc=StageStack) res = cache[path] if res not in cls.used_stacks: cls.used_stacks.add(res) res.show_info() return res
def project(name, yes): cfg = ymp.get_config()._config if not name: name = click.prompt("Please enter a name for the new project", type=str) if name in cfg.projects and not yes: click.confirm("Project '{}' already configured. " "Do you want to overwrite this project?" "".format(name), abort=True) cfg.projects[name] = {'data': None} log.warning("Saving config") cfg.save()
def __init__(self, name, cfg): super().__init__("ref_" + name, cfg) self.files = {} self.archives = [] self.group = [] self._outputs = None import ymp cfgmgr = ymp.get_config() self.dir = os.path.join(cfgmgr.dir.references, name) for rsc in cfg: if isinstance(rsc, str): rsc = {'url': rsc} self.add_files(rsc, make_local_path(cfgmgr, rsc['url'])) self.group = self.group or ["ALL"]
def complete(self, incomplete): registry = Stage.get_registry() cfg = ymp.get_config() result = [] groups = ("group_" + name for name in self.project.variables) result += (opt for opt in groups if opt.startswith(incomplete)) refs = ("ref_" + name for name in cfg.ref) result += (opt for opt in refs if opt.startswith(incomplete)) for stage in registry.values(): for name in (stage.name, stage.altname): if name and name.startswith(incomplete): try: self.get(".".join((self.path, name))) result.append(name) except YmpStageError: pass return result
def show(ctx, prop, source): """ Show configuration properties """ if not prop: show_help(ctx) log.debug(f"querying prop {prop}") obj = ymp.get_config() while prop: key, _, prop = prop.partition(".") key, _, slice_str = key.partition("[") obj = getattr(obj, key) if not slice_str: continue for subslice_str in slice_str[:-1].replace("][", ",").split(","): try: indices = [int(part) if part else None for part in subslice_str.split(":")] if len(indices) > 3: log.warning(f"Malformed slice string '{slice_str}'") indices = indices[:3] if len(indices) == 1: obj = obj[indices[0]] else: obj = obj[slice(*indices)] except ValueError: obj = obj[subslice_str] if hasattr(obj, "to_yaml"): output = obj.to_yaml(source) elif isinstance(obj, str): output = obj elif isinstance(obj, Sequence): output = pformat([str(x) for x in obj], width=200) elif isinstance(obj, Mapping): output = pformat({k: pformat(v) for k,v in obj.items()}) else: output = str(obj) click.echo(output)
def complete(cls, _ctx, incomplete): """Try to complete incomplete command This is executed on tab or tab-tab from the shell Args: ctx: click context object incomplete: last word in command line up until cursor Returns: list of words incomplete can be completed to """ # errlog = open("err.txt", "a") errlog = open("/dev/null", "a") errlog.write("\nincomplete={}\n".format(incomplete)) cache = Cache.get_cache("completion") query_stages = incomplete.split(".") errlog.write("stages={}\n".format(query_stages)) options: list = [] if len(query_stages) == 1: # expand projects cfg = ymp.get_config() options = cfg.projects.keys() else: # expand stages if 'stages' in cache: stages = cache['stages'] else: from ymp.snakemake import load_workflow from ymp.stage import Stage load_workflow() stages = cache['stages'] = list(Stage.get_registry().keys()) options = stages options = [o for o in options if o.startswith(query_stages[-1])] prefix = ".".join(query_stages[:-1]) if prefix: prefix += "." errlog.write("prefix={}\n".format(prefix)) options = [prefix + o + cont for o in options for cont in ("/", ".")] errlog.write("options={}\n".format(options)) errlog.close() return options
def _get_env_from_spec(self): """Parses conda spec file Conda spec files contain a list of URLs pointing to the packages comprising the environment. Each URL may have an md5 sum appended as "anchor" using "#". Comments are placed at the top in lines beginnig with "#" and a single line "@EXPLICIT" indicates the type of the file. Returns: urls: list of URLs files: list of file names md5s: list of md5 sums """ cfg = ymp.get_config() for spec_path in cfg.conda.env_specs.get_paths(): if spec_path.startswith("BUILTIN:"): spec_path = spec_path.replace("BUILTIN:", "") spec_path = op.join(ymp._env_dir, spec_path) for path in (op.join(spec_path, cfg.platform), spec_path): spec_file = op.join(path, self.name + ".txt") log.debug("Trying %s", spec_file) if op.exists(spec_file): log.info("Using %s", spec_file) break else: continue break else: return [], [], [] log.debug("Using env spec '%s'", spec_file) with open(spec_file) as sf: urls = [ line for line in sf if line and line[0] != "@" and line[0] != "#" ] md5s = [url.split("#")[1] for url in urls] files = [url.split("#")[0].split("/")[-1] for url in urls] return urls, files, md5s
def find_stage(name): cfg = ymp.get_config() registry = Stage.get_registry() if name.startswith("group_"): return GroupBy(name) if name.startswith("ref_"): refname = name[4:] if refname in cfg.ref: return cfg.ref[refname] else: raise YmpStageError(f"Unknown reference '{cfg.ref[refname]}'") if name in cfg.projects: return cfg.projects[name] if name in cfg.pipelines: return cfg.pipelines[name] for stage in registry.values(): if stage.match(name): return stage raise YmpStageError(f"Unknown stage '{name}'")