def get_dir_digest(path, pm=None): """ Generate a MD5 digest that reflects just the contents of the files in the selected directory. :param str path: path to the directory to digest :param pypiper.PipelineManager pm: a pipeline object, optional. The subprocess module will be used if not provided :return str: a digest, e.g. a3c46f201a3ce7831d85cf4a125aa334 """ if not is_command_callable("md5sum"): raise OSError( "md5sum command line tool is required for asset digest calculation. \n" "Install and try again, e.g on macOS: 'brew install md5sha1sum'") cmd = "cd {}; find . -type f -not -path './" + BUILD_STATS_DIR + \ "*' -exec md5sum {{}} \; | sort -k 2 | awk '{{print $1}}' | md5sum" if isinstance(pm, pypiper.PipelineManager): x = pm.checkprint(cmd.format(path)) else: try: from subprocess import check_output x = check_output(cmd.format(path), shell=True).decode("utf-8") except Exception as e: _LOGGER.warning("{}: could not calculate digest for '{}'".format( e.__class__.__name__, path)) return return str(sub(r'\W+', '', x)) # strips non-alphanumeric
def _check_tgz(path, output, asset_name): """ Check if file exists and tar it. If gzipping is requested, the availability of pigz software is checked and used. :param str path: path to the file to be tarred :param str output: path to the result file :param str asset_name: name of the asset :raise OSError: if the file/directory meant to be archived does not exist """ # since the genome directory structure changed (added tag layer) in refgenie >= 0.7.0 we need to perform some # extra file manipulation before archiving to make the produced archive compatible with new and old versions # of refgenie CLI. The difference is that refgenie < 0.7.0 requires the asset to be archived with the asset-named # enclosing dir, but with no tag-named directory as this concept did not exist back then. if os.path.exists(path): # move the asset files to asset-named dir cmd = "cd {p}; mkdir {an}; mv `find . -type f -not -path './_refgenie_build*'` {an} 2>/dev/null; " # tar gzip cmd += "tar -cvf - {an} | pigz > {o}; " if is_command_callable( "pigz") else "tar -cvzf {o} {an}; " # move the files back to the tag-named dir and remove asset-named dir cmd += "mv {an}/* .; rm -r {an}" _LOGGER.debug("command: {}".format( cmd.format(p=path, o=output, an=asset_name))) run(cmd.format(p=path, o=output, an=asset_name), shell=True) else: raise OSError("Entity '{}' does not exist".format(path))
def _check_tgz_legacy(path, output, asset_name, genome_name, alias): """ NOTE: This is a duplication of the _check_tgz function, kept separate as in the future this step will be simply removed. Check if file exists and tar it. If gzipping is requested, the availability of pigz software is checked and used. :param str path: path to the file to be tarred :param str output: path to the result file :param str asset_name: name of the asset :raise OSError: if the file/directory meant to be archived does not exist """ # TODO: remove in the future if isinstance(alias, str): alias = [alias] for a in alias: if os.path.exists(path): aliased_output = replace_str_in_obj(output, x=genome_name, y=a) cmd = "rsync -rvL --exclude '_refgenie_build' {p}/ {p}/{an}/; " command = cmd.format(p=path, o=output, an=asset_name) _LOGGER.debug("command: {}".format(command)) run(command, shell=True) swap_names_in_tree(os.path.join(path, asset_name), a, genome_name) # tar gzip the new dir cmd = "cd {p}; " + ("tar -cvf - {an} | pigz > {oa}; " if is_command_callable("pigz") else "tar -cvzf {oa} {an}; ") # remove the new dir cmd += "rm -r {p}/{an}; " command = cmd.format(p=path, oa=aliased_output, an=asset_name) _LOGGER.debug(f"command: {command}") run(command, shell=True) else: raise OSError(f"Entity '{path}' does not exist")
def test_extant_folder_isnt_callable(tmpdir, get_arg): """ A directory path can't be callable. """ p = get_arg(tmpdir.strpath) print("p: {}".format(p)) if not os.path.exists(p): os.makedirs(p) assert os.path.exists(p) assert not is_command_callable(p)
def determine_uncallable( commands, transformations=((lambda f: isinstance(f, str) and os.path.isfile( expandpath(f)) and expandpath(f).endswith(".jar"), lambda f: "java -jar {}".format(expandpath(f))), ), accumulate=False): """ Determine which commands are not callable. :param Iterable[str] | str commands: commands to check for callability :param Iterable[(function(str) -> bool, function(str) -> str)] transformations: pairs in which first element is a predicate and second is a transformation to apply to the input if the predicate is satisfied :param bool accumulate: whether to accumulate transformations (more than one possible per command) :return list[(str, str)]: collection of commands that appear uncallable; each element is a pair in which the first element is the original 'command' and the second is what was actually assessed for callability. :raise TypeError: if transformations are provided but are argument is a string or is non-Iterable :raise Exception: if accumulation of transformation is False but the collection of transformations is unordered """ commands = [commands] if isinstance(commands, str) else commands if transformations: trans = transformations.values() if isinstance( transformations, Mapping) else transformations if not isinstance(transformations, Iterable) or isinstance(transformations, str) or \ not all(map(lambda func_pair: isinstance(func_pair, tuple) and len(func_pair) == 2, trans)): raise TypeError( "Transformations argument should be a collection of pairs; got " "{} ({})".format(transformations, type(transformations).__name__)) if accumulate: def finalize(cmd): for p, t in transformations: if p(cmd): cmd = t(cmd) return cmd else: if not isinstance(transformations, (tuple, list)): raise Exception( "If transformations are unordered, non-accumulation of " "effects may lead to nondeterministic behavior.") def finalize(cmd): print("Transformations: {}".format(transformations)) for p, t in transformations: if p(cmd): return t(cmd) return cmd else: finalize = lambda cmd: cmd return [(orig, used) for orig, used in map(lambda c: (c, finalize(c)), commands) if not is_command_callable(used)]
def build_submission_bundles(self, protocol, priority=True): """ Create pipelines to submit for each sample of a particular protocol. With the argument (flag) to the priority parameter, there's control over whether to submit pipeline(s) from only one of the project's known pipeline locations with a match for the protocol, or whether to submit pipelines created from all locations with a match for the protocol. :param str protocol: name of the protocol/library for which to create pipeline(s) :param bool priority: to only submit pipeline(s) from the first of the pipelines location(s) (indicated in the project config file) that has a match for the given protocol; optional, default True :return Iterable[(PipelineInterface, type, str, str)]: :raises AssertionError: if there's a failure in the attempt to partition an interface's pipeline scripts into disjoint subsets of those already mapped and those not yet mapped """ if not priority: raise NotImplementedError( "Currently, only prioritized protocol mapping is supported " "(i.e., pipeline interfaces collection is a prioritized list, " "so only the first interface with a protocol match is used.)") # Pull out the collection of interfaces (potentially one from each of # the locations indicated in the project configuration file) as a # sort of pool of information about possible ways in which to submit # pipeline(s) for sample(s) of the indicated protocol. pifaces = self.interfaces.get_pipeline_interface(protocol) if not pifaces: raise PipelineInterfaceConfigError( "No interfaces for protocol: {}".format(protocol)) # coonvert to a list, in the future we might allow to match multiple pifaces = pifaces if isinstance(pifaces, str) else [pifaces] job_submission_bundles = [] new_jobs = [] _LOGGER.debug( "Building pipelines matched by protocol: {}".format(protocol)) for pipe_iface in pifaces: # Determine how to reference the pipeline and where it is. path = pipe_iface["path"] if not (os.path.exists(path) or is_command_callable(path)): _LOGGER.warning("Missing pipeline script: {}".format(path)) continue # Add this bundle to the collection of ones relevant for the # current PipelineInterface. new_jobs.append(pipe_iface) job_submission_bundles.append(new_jobs) return list(itertools.chain(*job_submission_bundles))
def test_extant_file_requires_exec_for_callability(tmpdir, filepath, setup, exp_exe): """ Filepath is callable iff it has exec bit. """ f = filepath(tmpdir.strpath) assert not os.path.exists(f) _mkfile(f) assert os.path.isfile(f) setup(f) assert os.access(f, os.X_OK) is exp_exe assert is_command_callable(f) is exp_exe
def check_command(self, cmd): """ Determine whether it appears that a command may be run. :param str cmd: command to check for runnability :return OSError: if it's possible to verify that running given command would fail """ if not is_command_callable(cmd): raise OSError("{} is not callable".format(cmd))
def _check_tgz(path, output): """ Check if file exists and tar it. If gzipping is requested, the pigz software is used if available. :param str path: path to the file to be tarred :param str output: path to the result file :raise OSError: if the file/directory meant to be archived does not exist """ pth, tag_name = os.path.split(path) if os.path.exists(path): # tar gzip the asset, exclude _refgenie_build dir, it may change digests cmd = "tar --exclude '_refgenie_build' -C {p} " cmd += ("-cvf - {tn} | pigz > {o}" if is_command_callable("pigz") else "-cvzf {o} {tn}") command = cmd.format(p=pth, o=output, tn=tag_name) _LOGGER.info(f"command: {command}") run(command, shell=True) else: raise OSError(f"Entity '{path}' does not exist")
def test_missing_path_isnt_callable(tmpdir, relpath): """ A filepath that doesn't exist (and isn't on PATH) isn't callable. """ p = os.path.join(tmpdir.strpath, relpath) assert not os.path.exists(p) assert not is_command_callable(p)
def bulker_activate(bulker_config, cratelist, echo=False, strict=False, prompt=True): """ Activates a given crate. :param yacman.YacAttMap bulker_config: The bulker configuration object. :param list cratelist: a list of cratevars objects, which are dicts with values for 'namespace', 'crate', and 'tag'. :param bool echo: Should we just echo the new PATH to create? Otherwise, the function will create a new shell and replace the current process with it. :param bool strict: Should we wipe out the PATH, such that the returned environment contains strictly only commands listed in the bulker manifests? """ # activating is as simple as adding a crate folder to the PATH env var. new_env = os.environ if hasattr(bulker_config.bulker, "shell_path"): shellpath = os.path.expandvars(bulker_config.bulker.shell_path) else: shellpath = os.path.expandvars("$SHELL") if not is_command_callable(shellpath): bashpath = "/bin/bash" _LOGGER.warning( "Specified shell is not callable: '{}'. Using {}.".format( shellpath, bashpath)) shell_list = [bashpath, bashpath] if hasattr(bulker_config.bulker, "shell_rc"): shell_rc = os.path.expandvars(bulker_config.bulker.shell_rc) else: if os.path.basename(shellpath) == "bash": shell_rc = "$HOME/.bashrc" elif os.path.basename(shellpath) == "zsh": shell_rc = "$HOME/.zshrc" else: _LOGGER.warning("No shell RC specified shell") if os.path.basename(shellpath) == "bash": shell_list = [shellpath, shellpath, "--noprofile"] elif os.path.basename(shellpath) == "zsh": shell_list = [shellpath, shellpath] else: bashpath = "/bin/bash" _LOGGER.warning( "Shell must be bash or zsh. Specified shell was: '{}'. Using {}.". format(shellpath, bashpath)) shell_list = [bashpath, bashpath, "--noprofile"] newpath = get_new_PATH(bulker_config, cratelist, strict) # We can use lots of them. use the last one name = "{namespace}/{crate}".format(namespace=cratelist[-1]["namespace"], crate=cratelist[-1]["crate"]) _LOGGER.debug("Newpath: {}".format(newpath)) if hasattr(bulker_config.bulker, "shell_prompt"): ps1 = bulker_config.bulker.shell_prompt else: if os.path.basename(shellpath) == "bash": ps1 = "\\u@\\b:\\w\\a\\$ " # With color: ps1 = "\\[\\033[01;93m\\]\\b|\\[\\033[00m\\]\\[\\033[01;34m\\]\\w\\[\\033[00m\\]\\$ " elif os.path.basename(shellpath) == "zsh": ps1 = "%F{226}%b|%f%F{blue}%~%f %# " else: _LOGGER.warning( "No built-in custom prompt for shells other than bash or zsh") # \b is our bulker-specific code that we populate with the crate # registry path ps1 = ps1.replace("\\b", name) # for bash ps1 = ps1.replace("%b", name) # for zsh _LOGGER.debug(ps1) if echo: print("export BULKERCRATE=\"{}\"".format(name)) print("export BULKERPATH=\"{}\"".format(newpath)) print("export BULKERSHELLRC=\"{}\"".format(shell_rc)) if prompt: print("export BULKERPROMPT=\"{}\"".format(ps1)) print("export PS1=\"{}\"".format(ps1)) print("export PATH={}".format(newpath)) return else: _LOGGER.debug("Shell list: {}".format(shell_list)) new_env["BULKERCRATE"] = name new_env["BULKERPATH"] = newpath if prompt: new_env["BULKERPROMPT"] = ps1 new_env["BULKERSHELLRC"] = shell_rc if strict: for k in bulker_config.bulker.envvars: new_env[k] = os.environ.get(k, "") if os.path.basename(shellpath) == "bash": if strict: rcfile = mkabs(bulker_config.bulker.rcfile_strict, os.path.dirname(bulker_config._file_path)) else: rcfile = mkabs(bulker_config.bulker.rcfile, os.path.dirname(bulker_config._file_path)) shell_list.append("--rcfile") shell_list.append(rcfile) _LOGGER.debug("rcfile: {}".format(rcfile)) _LOGGER.debug(shell_list) if os.path.basename(shellpath) == "zsh": if strict: rcfolder = mkabs( os.path.join( os.path.dirname(bulker_config.bulker.rcfile_strict), "zsh_start_strict"), os.path.dirname(bulker_config._file_path)) else: rcfolder = mkabs( os.path.join( os.path.dirname(bulker_config.bulker.rcfile_strict), "zsh_start"), os.path.dirname(bulker_config._file_path)) new_env["ZDOTDIR"] = rcfolder _LOGGER.debug("ZDOTDIR: {}".format(new_env["ZDOTDIR"])) _LOGGER.debug(new_env) #os.execv(shell_list[0], shell_list[1:]) os.execve(shell_list[0], shell_list[1:], env=new_env)
def bulker_load(manifest, cratevars, bcfg, exe_jinja2_template, shell_jinja2_template, crate_path=None, build=False, force=False): manifest_name = cratevars['crate'] # We store them in folder: namespace/crate/version if not crate_path: crate_path = os.path.join(bcfg.bulker.default_crate_folder, cratevars['namespace'], manifest_name, cratevars['tag']) if not os.path.isabs(crate_path): crate_path = os.path.join(os.path.dirname(bcfg._file_path), crate_path) _LOGGER.debug("Crate path: {}".format(crate_path)) _LOGGER.debug("cratevars: {}".format(cratevars)) # Update the config file if not bcfg.bulker.crates: bcfg.bulker.crates = {} if not hasattr(bcfg.bulker.crates, cratevars['namespace']): bcfg.bulker.crates[cratevars['namespace']] = yacman.YacAttMap({}) if not hasattr(bcfg.bulker.crates[cratevars['namespace']], cratevars['crate']): bcfg.bulker.crates[cratevars['namespace']][ cratevars['crate']] = yacman.YacAttMap({}) if hasattr(bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']], cratevars['tag']): _LOGGER.debug(bcfg.bulker.crates[cratevars['namespace']][ cratevars['crate']].to_dict()) if not (force or query_yes_no( "That manifest has already been loaded. Overwrite?")): return else: bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']][str( cratevars['tag'])] = crate_path _LOGGER.warning( "Removing all executables in: {}".format(crate_path)) try: shutil.rmtree(crate_path) except: _LOGGER.error( "Error removing crate at {}. Did your crate path change? Remove it manually." .format(crate_path)) else: bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']][str( cratevars['tag'])] = crate_path # Now make the crate # First add any imports mkdir(crate_path, exist_ok=True) if hasattr(manifest.manifest, "imports") and manifest.manifest.imports: for imp in manifest.manifest.imports: imp_cratevars = parse_registry_path(imp) imp_crate_path = os.path.join(bcfg.bulker.default_crate_folder, imp_cratevars['namespace'], imp_cratevars['crate'], imp_cratevars['tag']) if not os.path.isabs(imp_crate_path): imp_crate_path = os.path.join(os.path.dirname(bcfg._file_path), imp_crate_path) if not os.path.exists(imp_crate_path): _LOGGER.error("Can't import crate '{}' from '{}'".format( imp, imp_crate_path)) # Recursively load any non-existant imported crates. imp_manifest, imp_cratevars = load_remote_registry_path( bcfg, imp, None) _LOGGER.debug(imp_manifest) _LOGGER.debug(imp_cratevars) bulker_load(imp_manifest, imp_cratevars, bcfg, exe_jinja2_template, shell_jinja2_template, crate_path=None, build=build, force=force) _LOGGER.info("Importing crate '{}' from '{}'.".format( imp, imp_crate_path)) copy_tree(imp_crate_path, crate_path) # should put this in a function def host_tool_specific_args(bcfg, pkg, hosttool_arg_key): _LOGGER.debug("Arg key: '{}'".format(hosttool_arg_key)) # Here we're parsing the *image*, not the crate registry path. imvars = parse_registry_path_image(pkg['docker_image']) _LOGGER.debug(imvars) try: amap = bcfg.bulker.tool_args[imvars['namespace']][imvars['image']] if imvars['tag'] != 'default' and hasattr(amap, imvars['tag']): string = amap[imvars['tag']][hosttool_arg_key] else: string = amap.default[hosttool_arg_key] _LOGGER.debug(string) return string except: _LOGGER.debug("No host/tool args found.") return "" cmdlist = [] cmd_count = 0 if hasattr(manifest.manifest, "commands") and manifest.manifest.commands: for pkg in manifest.manifest.commands: _LOGGER.debug(pkg) pkg.update(bcfg.bulker) # Add terms from the bulker config pkg = copy.deepcopy( yacman.YacAttMap(pkg)) # (otherwise it's just a dict) # We have to deepcopy it so that changes we make to pkg aren't reflected in bcfg. if pkg.container_engine == "singularity" and "singularity_image_folder" in pkg: pkg["singularity_image"] = os.path.basename( pkg["docker_image"]) pkg["namespace"] = os.path.dirname(pkg["docker_image"]) if os.path.isabs(pkg["singularity_image_folder"]): sif = pkg["singularity_image_folder"] else: sif = os.path.join(os.path.dirname(bcfg._file_path), pkg["singularity_image_folder"]) pkg["singularity_fullpath"] = os.path.join( sif, pkg["namespace"], pkg["singularity_image"]) mkdir(os.path.dirname(pkg["singularity_fullpath"]), exist_ok=True) command = pkg["command"] path = os.path.join(crate_path, command) _LOGGER.debug("Writing {cmd}".format(cmd=path)) cmdlist.append(command) # Add any host-specific tool-specific args hosttool_arg_key = "{engine}_args".format( engine=bcfg.bulker.container_engine) hts = host_tool_specific_args(bcfg, pkg, hosttool_arg_key) _LOGGER.debug("Adding host-tool args: {}".format(hts)) if hasattr(pkg, hosttool_arg_key): pkg[hosttool_arg_key] += " " + hts else: pkg[hosttool_arg_key] = hts # Remove any excluded volumes from the package exclude_vols = host_tool_specific_args(bcfg, pkg, "exclude_volumes") _LOGGER.debug("Volume list: {}".format(pkg["volumes"])) _LOGGER.debug("pkg: {}".format(pkg)) if len(exclude_vols) > 0: for item in exclude_vols: _LOGGER.debug("Excluding volume: '{}'".format(item)) try: pkg["volumes"].remove(item) except: pass _LOGGER.debug("Volume list: {}".format(pkg["volumes"])) else: _LOGGER.debug("No excluded volumes") with open(path, "w") as fh: fh.write(exe_jinja2_template.render(pkg=pkg)) os.chmod(path, 0o755) # shell commands path_shell = os.path.join(crate_path, "_" + command) _LOGGER.debug( "Writing shell command: '{cmd}'".format(cmd=path_shell)) with open(path_shell, "w") as fh: fh.write(shell_jinja2_template.render(pkg=pkg)) os.chmod(path_shell, 0o755) if build: buildscript = build.render(pkg=pkg) x = os.system(buildscript) if x != 0: _LOGGER.error( "------ Error building. Build script used: ------") _LOGGER.error(buildscript) _LOGGER.error( "------------------------------------------------") _LOGGER.info("Container available at: {cmd}".format( cmd=pkg["singularity_fullpath"])) # host commands host_cmdlist = [] if hasattr(manifest.manifest, "host_commands") and manifest.manifest.host_commands: _LOGGER.info("Populating host commands") for cmd in manifest.manifest.host_commands: _LOGGER.debug(cmd) if not is_command_callable(cmd): _LOGGER.warning("Requested host command is not callable and " "therefore not created: '{}'".format(cmd)) continue local_exe = find_executable(cmd) path = os.path.join(crate_path, cmd) host_cmdlist.append(cmd) os.symlink(local_exe, path) # The old way: TODO: REMOVE THIS if False: populated_template = LOCAL_EXE_TEMPLATE.format(cmd=local_exe) with open(path, "w") as fh: fh.write(populated_template) os.chmod(path, 0o755) cmd_count = len(cmdlist) host_cmd_count = len(host_cmdlist) if cmd_count < 1 and host_cmd_count < 1: _LOGGER.error("No commands provided. Crate not created.") os.rmdir(crate_path) crate_path_parent = os.path.dirname(crate_path) if not os.listdir(crate_path_parent): os.rmdir(crate_path_parent) sys.exit(1) rp = "{namespace}/{crate}:{tag}".format(namespace=cratevars['namespace'], crate=cratevars['crate'], tag=cratevars['tag']) _LOGGER.info("Loading manifest: '{rp}'." " Activate with 'bulker activate {rp}'.".format(rp=rp)) if cmd_count > 0: _LOGGER.info("Commands available: {}".format(", ".join(cmdlist))) if host_cmd_count > 0: _LOGGER.info("Host commands available: {}".format( ", ".join(host_cmdlist))) bcfg.write()
def bulker_init(config_path, template_config_path, container_engine=None): """ Initialize a config file. :param str config_path: path to bulker configuration file to create/initialize :param str template_config_path: path to bulker configuration file to copy FROM """ if not config_path: _LOGGER.error("You must specify a file path to initialize.") return if not template_config_path: _LOGGER.error("You must specify a template config file path.") return if not container_engine: check_engines = ["docker", "singularity"] for engine in check_engines: if is_command_callable(engine): _LOGGER.info("Guessing container engine is {}.".format(engine)) container_engine = engine break # it's a priority list, stop at the first found engine if config_path and not (os.path.exists(config_path) and not query_yes_no("Exists. Overwrite?")): # dcc.write(config_path) # Init should *also* write the templates. dest_folder = os.path.dirname(config_path) dest_templates_dir = os.path.join(dest_folder, TEMPLATE_SUBDIR) # templates_subdir = TEMPLATE_SUBDIR copy_tree(os.path.dirname(template_config_path), dest_templates_dir) new_template = os.path.join(dest_folder, os.path.basename(template_config_path)) bulker_config = yacman.YacAttMap(filepath=template_config_path, writable=True) _LOGGER.debug("Engine used: {}".format(container_engine)) bulker_config.bulker.container_engine = container_engine if bulker_config.bulker.container_engine == "docker": bulker_config.bulker.executable_template = os.path.join( TEMPLATE_SUBDIR, DOCKER_EXE_TEMPLATE) bulker_config.bulker.shell_template = os.path.join( TEMPLATE_SUBDIR, DOCKER_SHELL_TEMPLATE) bulker_config.bulker.build_template = os.path.join( TEMPLATE_SUBDIR, DOCKER_BUILD_TEMPLATE) elif bulker_config.bulker.container_engine == "singularity": bulker_config.bulker.executable_template = os.path.join( TEMPLATE_SUBDIR, SINGULARITY_EXE_TEMPLATE) bulker_config.bulker.shell_template = os.path.join( TEMPLATE_SUBDIR, SINGULARITY_SHELL_TEMPLATE) bulker_config.bulker.build_template = os.path.join( TEMPLATE_SUBDIR, SINGULARITY_BUILD_TEMPLATE) bulker_config.bulker.rcfile = os.path.join(TEMPLATE_SUBDIR, RCFILE_TEMPLATE) bulker_config.bulker.rcfile_strict = os.path.join( TEMPLATE_SUBDIR, RCFILE_STRICT_TEMPLATE) bulker_config.write(config_path) # copyfile(template_config_path, new_template) # os.rename(new_template, config_path) _LOGGER.info("Wrote new configuration file: {}".format(config_path)) else: _LOGGER.warning( "Can't initialize, file exists: {} ".format(config_path))