def update_packages(self, config_file): """ Parse data from divvy configuration file. Given a divvy configuration file, this function will update (not overwrite) existing compute packages with existing values. It does not affect any currently active settings. :param str config_file: path to file with new divvy configuration data """ env_settings = parse_config_file(config_file) loaded_packages = env_settings[NEW_COMPUTE_KEY] for key, value in loaded_packages.items(): if type(loaded_packages[key]) is dict: for key2, value2 in loaded_packages[key].items(): if key2 == "submission_template": if not os.path.isabs(loaded_packages[key][key2]): loaded_packages[key][key2] = os.path.join( os.path.dirname(config_file), loaded_packages[key][key2]) if self.compute_packages is None: self.compute_packages = PathExAttMap(loaded_packages) else: self.compute_packages.add_entries(loaded_packages) _LOGGER.debug("Available divvy packages: {}".format(', '.join( self.list_compute_packages()))) self.config_file = config_file
def reset_active_settings(self): """ Clear out current compute settings. :return bool: success flag """ self.compute = PathExAttMap() return True
def test_basic_construction(tmpdir, from_file, bundled_piface): """ PipelineInterface constructor handles Mapping or filepath. """ if from_file: pipe_iface_config = tmpdir.join("pipe-iface-conf.yaml").strpath with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f: yaml.safe_dump(bundled_piface, f) else: pipe_iface_config = bundled_piface pi = PipelineInterface(pipe_iface_config) # Check for the protocol mapping and pipeline interface keys. assert PL_KEY in pi, "Missing pipeline key ({})".format(PL_KEY) assert PROTOMAP_KEY in pi, \ "Missing protocol mapping key: ({})".format(PROTOMAP_KEY) assert pi.pipe_iface_file == (pipe_iface_config if from_file else None) if from_file: assert pi.pipelines_path == tmpdir.strpath else: assert pi.pipelines_path is None # Validate protocol mapping and interfaces contents. assert bundled_piface[PL_KEY] == remove_piface_requirements(pi[PL_KEY]) assert PathExAttMap(bundled_piface[PROTOMAP_KEY]) == pi[PROTOMAP_KEY] # Certain access modes should agree with one another. assert pi.pipelines == pi[PL_KEY] assert list(pi.pipelines.keys()) == pi.pipeline_names
def html_select(choices, caravel=False): """ Create argument for type parameter on argparse.ArgumentParser.add_argument. :param list[object] choices: collection of valid argument provisions via to a particular CLI option :param bool caravel: whether this is being used in the caravel context :return callable: argument to the type parameter of an argparse.ArgumentParser's add_argument method. """ if not isinstance(choices, list): raise TypeError( "Argument to choices parameter must be list, got {}.".format( type(choices))) caravel_data = PathExAttMap({ "element_type": "select", "element_args": { "option": choices } }) def fun(x=None, caravel_data=caravel_data, caravel=caravel): return caravel_data if caravel else x return fun
def get_project_outputs(project, schema): """ Get project level outputs with path-like attributes populated with project attributes :param peppy.Project project: :param Iterable[dict] schema: :return attmap.PathExAttMap: mapping with populated path-like attributes """ from attmap import PathExAttMap # if not any([isinstance(project, Project), # issubclass(type(project), Project)]): # raise TypeError("Can only populate paths in peppy.Project " # "objects or it subclasses") schema = schema[-1] # use only first schema, in case there are imports if PROP_KEY not in schema: raise EidoSchemaInvalidError("Schema is missing properties section.") res = {} s = schema[PROP_KEY] path_sects = _get_path_sect_keys(s, keys=PATH_LIKE) for ps in path_sects: res[ps] = s[ps] for p in PATH_LIKE: try: res[ps][p] = s[ps][p].format(**dict(project.items())) except Exception as e: _LOGGER.debug("Caught exception: {}.\n Could not populate {} " "path".format(p, str(e))) return PathExAttMap(res)
def test_genome_folder_is_pwd_if_no_folder_key_and_raw_entries_passed( self, ro_rgc): data = PathExAttMap( {k: v for k, v in ro_rgc.items() if k != CFG_FOLDER_KEY}) new_rgc = RefGenConf(entries=data) assert os.getcwd() == new_rgc[CFG_FOLDER_KEY]
def test_does_not_need_all_sample_independent_data( self, sections, basic_project_data, sample_independent_data): """ Subset of all known independent data that's present is grabbed. """ p = PathExAttMap(sample_independent_data) expected = {s: data for s, data in basic_project_data.items() if s in sections} observed = grab_project_data(p) compare_mappings(expected, observed)
def activate_package(self, package_name): """ Activates a compute package. This copies the computing attributes from the configuration file into the `compute` attribute, where the class stores current compute settings. :param str package_name: name for non-resource compute bundle, the name of a subsection in an environment configuration file :return bool: success flag for attempt to establish compute settings """ # Hope that environment & environment compute are present. _LOGGER.info("Activating compute package '{}'".format(package_name)) if package_name and self.compute_packages and package_name in self.compute_packages: # Augment compute, creating it if needed. if self.compute is None: _LOGGER.debug("Creating Project compute") self.compute = PathExAttMap() _LOGGER.debug("Adding entries for package_name '%s'", package_name) self.compute.add_entries(self.compute_packages[package_name]) # Ensure submission template is absolute. # This is handled at update. # if not os.path.isabs(self.compute.submission_template): # try: # self.compute.submission_template = os.path.join( # os.path.dirname(self.config_file), # self.compute.submission_template) # except AttributeError as e: # # Environment and environment compute should at least have been # # set as null-valued attributes, so execution here is an error. # _LOGGER.error(str(e)) return True else: # Scenario in which environment and environment compute are # both present--but don't evaluate to True--is fairly harmless. _LOGGER.debug("Environment = {}".format(self.compute_packages)) return False
def html_checkbox(caravel=False, checked=False): """ Create argument for type parameter on argparse.ArgumentParser.add_argument. :param bool caravel: whether this is being used in the caravel context :param bool checked: whether to add a particular key-value entry to a collection used by caravel :return callable: argument to the type parameter of an argparse.ArgumentParser's add_argument method. """ caravel_data = \ PathExAttMap({"element_type": "checkbox", "element_args": {}}) if checked: caravel_data.add_entries({"element_args": {"checked": True}}) def fun(x=None, caravel_data=caravel_data, caravel=caravel): return caravel_data if caravel else eval(x) return fun
def _mv_to_modifiers(map, k_from, k_to): """ Move the sections from the root of the mapping to the sample_modifiers section. Some of the target sections may be multi-layer (encoded as list in the reference mapping) :param Mapping map: object to move sections within :param str k_from: key of the section to move :param str k_to: key of the sample_modifiers subsection to move to """ mv_msg = "Section '{}' moved to sample_modifiers.{}" if k_from in map: # TODO: determine whether we want to support the implications # reformatting or drop the old cfg format altogether if k_from in ["implied_attributes", "implied_columns"]: raise NotImplementedError( "The attribute implications section ({}) follows the " "old format. Reformatting is not implemented. Edit the " "config file manually (add '{}.{}') to comply with PEP " "2.0.0 specification: http://pep.databio.org/en/latest/" "specification/#sample-modifier-imply".format( k_from, SAMPLE_MODS_KEY, IMPLIED_KEY)) map.setdefault(SAMPLE_MODS_KEY, PathExAttMap()) if isinstance(k_to, list): if k_to[0] in map[SAMPLE_MODS_KEY]: if k_to[1] not in map[SAMPLE_MODS_KEY][k_to[0]]: map[SAMPLE_MODS_KEY][k_to[0]].\ setdefault(k_to[1], PathExAttMap()) else: return else: map[SAMPLE_MODS_KEY].setdefault( k_to[0], PathExAttMap()) map[SAMPLE_MODS_KEY][k_to[0]][k_to[1]] = map[k_from] del map[k_from] _LOGGER.debug(mv_msg.format(k_from, ".".join(k_to))) else: if k_to not in map[SAMPLE_MODS_KEY]: map[SAMPLE_MODS_KEY][k_to] = map[k_from] del map[k_from] _LOGGER.debug(mv_msg.format(k_from, k_to))
def made_genome_config_file(temp_genome_config_file): """ Make the test session's genome config file. """ genome_folder = os.path.dirname(temp_genome_config_file) extra_kv_lines = ["{}: {}".format(CFG_FOLDER_KEY, genome_folder), "{}: {}".format(CFG_SERVERS_KEY, "http://staging.refgenomes.databio.org/"), "{}: {}".format(CFG_VERSION_KEY, package_version), "{}:".format(CFG_GENOMES_KEY)] gen_data_lines = PathExAttMap(CONF_DATA).get_yaml_lines() fp = temp_genome_config_file with open(fp, 'w') as f: f.write("\n".join(extra_kv_lines + [" " + l for l in gen_data_lines])) return fp
def test_grabs_only_sample_independent_data( self, sample_independent_data, extra_data): """ Only Project data defined as Sample-independent is retrieved. """ # Create the data to pass the the argument to the call under test. data = copy.deepcopy(sample_independent_data) data_updates = {} for extra in extra_data: data_updates.update(extra) data.update(data_updates) # Convert to the correct argument type for this test case. p = PathExAttMap(data) # Make the equivalence assertion. expected = sample_independent_data observed = grab_project_data(p) compare_mappings(expected, observed)
def html_range(caravel=False, min_val=0, max_val=10, step=1, value=0): caravel_data = PathExAttMap({ "element_type": "range", "element_args": { "min": min_val, "max": max_val, "step": step, "value": value } }) if step < 1: def fun(x=None, caravel_data=caravel_data, caravel=caravel): return caravel_data if caravel else float(x) else: def fun(x=None, caravel_data=caravel_data, caravel=caravel): return caravel_data if caravel else int(x) return fun
class NullValueHelperTests: """ Tests of accuracy of null value arbiter. """ _DATA = {"a": 1, "b": [2]} @pytest.fixture( params=[lambda d: dict(d), lambda d: PathExAttMap().add_entries(d)]) def kvs(self, request): """ For test cases provide KV pair map of parameterized type.""" return request.param(self._DATA) def test_missing_key_neither_null_nor_non_null(self, kvs): """ A key not in a mapping has neither null nor non-null value. """ k = "new_key" assert k not in kvs assert not has_null_value(k, kvs) assert not non_null_value(k, kvs) @pytest.mark.parametrize("coll", [list(), set(), tuple(), dict()]) def test_empty_collection_is_null(self, coll, kvs): """ A key with an empty collection instance as its value is null. """ ck = "empty" assert ck not in kvs kvs[ck] = coll assert has_null_value(ck, kvs) assert not non_null_value(ck, kvs) def test_none_is_null(self, kvs): """ A key with None as value is null. """ bad_key = "nv" assert bad_key not in kvs kvs[bad_key] = None assert has_null_value(bad_key, kvs) assert not non_null_value(bad_key, kvs) @pytest.mark.parametrize("k", _DATA.keys()) def test_non_nulls(self, k, kvs): """ Keys with non-None atomic or nonempty collection are non-null. """ assert k in kvs assert non_null_value(k, kvs)
def parse_config_file(self, cfg_path, amendments=None): """ Parse provided yaml config file and check required fields exist. :param str cfg_path: path to the config file to read and parse :param Iterable[str] amendments: Name of amendments to activate :raises KeyError: if config file lacks required section(s) """ if CONFIG_KEY not in self: self[CONFIG_KEY] = PathExAttMap() if not os.path.exists(cfg_path): raise OSError( "Project config file path does not exist: {}".format(cfg_path)) with open(cfg_path, 'r') as conf_file: config = yaml.safe_load(conf_file) assert isinstance(config, Mapping), \ "Config file parse did not yield a Mapping; got {} ({})".\ format(config, type(config)) _LOGGER.debug("Raw ({}) config data: {}".format(cfg_path, config)) # recursively import configs if PROJ_MODS_KEY in config and CFG_IMPORTS_KEY in config[PROJ_MODS_KEY]\ and config[PROJ_MODS_KEY][CFG_IMPORTS_KEY]: _make_sections_absolute(config[PROJ_MODS_KEY], [CFG_IMPORTS_KEY], cfg_path) _LOGGER.info( "Importing external Project configurations: {}".format( ", ".join(config[PROJ_MODS_KEY][CFG_IMPORTS_KEY]))) for i in config[PROJ_MODS_KEY][CFG_IMPORTS_KEY]: _LOGGER.debug("Processing external config: {}".format(i)) if os.path.exists(i): self.parse_config_file(cfg_path=i) else: _LOGGER.warning("External Project configuration does not" " exist: {}".format(i)) self[CONFIG_KEY].add_entries(config) # Parse yaml into the project.config attributes _LOGGER.debug("Adding attributes: {}".format(", ".join(config))) # Overwrite any config entries with entries in the amendments amendments = [amendments] if isinstance(amendments, str) else amendments if amendments: for amendment in amendments: c = self[CONFIG_KEY] if PROJ_MODS_KEY in c and AMENDMENTS_KEY in c[PROJ_MODS_KEY] \ and c[PROJ_MODS_KEY][AMENDMENTS_KEY] is not None: _LOGGER.debug( "Adding entries for amendment '{}'".format(amendment)) try: amends = c[PROJ_MODS_KEY][AMENDMENTS_KEY][amendment] except KeyError: raise MissingAmendmentError( amendment, c[PROJ_MODS_KEY][AMENDMENTS_KEY]) _LOGGER.debug("Updating with: {}".format(amends)) self[CONFIG_KEY].add_entries(amends) _LOGGER.info("Using amendments: {}".format(amendment)) else: raise MissingAmendmentError(amendment) self[ACTIVE_AMENDMENTS_KEY] = amendments # determine config version and reformat it, if needed self[CONFIG_KEY][CONFIG_VERSION_KEY] = ".".join(self._get_cfg_v()) # here specify cfg sections that may need expansion relative_vars = [CFG_SAMPLE_TABLE_KEY, CFG_SUBSAMPLE_TABLE_KEY] _make_sections_absolute(self[CONFIG_KEY], relative_vars, cfg_path)
def test_settings_nonempty(self, dcc): """ Test if get_active_package produces a nonempty PathExAttMap object """ settings = dcc.get_active_package() assert settings != PathExAttMap()
def test_update_packages(self, dcc, config_file): """ Test updating does not produce empty compute packages """ dcc.update_packages(config_file) assert dcc.compute_packages != PathExAttMap()
REMOTE_ASSETS = {"rCRSd": {"bowtie2_index": ".tgz", "fasta": ".tgz"}, "mouse_chrM2x": {"bowtie2_index": ".tgz", "fasta": ".tgz"}} REQUESTS = [(g, a, "default") for g, ext_by_asset in REMOTE_ASSETS.items() for a in ext_by_asset] URL_BASE = "https://raw.githubusercontent.com/databio/refgenieserver/master/files" def _bind_to_path(kvs): return [(k, lift_into_path_pair(v)) for k, v in kvs] def lift_into_path_pair(name): return {"path": name} CONF_DATA = [ (g, {CFG_ASSETS_KEY: PathExAttMap(_bind_to_path(data))}) for g, data in [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)] ] def bind_to_assets(data): return {CFG_ASSETS_KEY: data} def get_conf_genomes(): """ Get the collection of reference genome assembly names used in test data. :return list[str]: collection of test data reference genome assembly names """ return list(list(zip(*CONF_DATA))[0])
} REQUESTS = [(g, a, "default") for g, ext_by_asset in REMOTE_ASSETS.items() for a in ext_by_asset] URL_BASE = "https://raw.githubusercontent.com/databio/refgenieserver/master/files" def _bind_to_path(kvs): return [(k, lift_into_path_pair(v)) for k, v in kvs] def lift_into_path_pair(name): return {"path": name} CONF_DATA = [(g, { CFG_ASSETS_KEY: PathExAttMap(_bind_to_path(data)) }) for g, data in [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)]] def bind_to_assets(data): return {CFG_ASSETS_KEY: data} def get_conf_genomes(): """ Get the collection of reference genome assembly names used in test data. :return list[str]: collection of test data reference genome assembly names """ return list(list(zip(*CONF_DATA))[0])
class ComputingConfiguration(PathExAttMap): """ Represents computing configuration objects. The ComputingConfiguration class provides a computing configuration object that is an *in memory* representation of a `divvy` computing configuration file. This object has various functions to allow a user to activate, modify, and retrieve computing configuration files, and use these values to populate job submission script templates. :param str config_file: YAML file specifying computing package data (The `DIVCFG` file). :param type no_env_error: type of exception to raise if divvy settings can't be established, optional; if null (the default), a warning message will be logged, and no exception will be raised. :param type no_compute_exception: type of exception to raise if compute settings can't be established, optional; if null (the default), a warning message will be logged, and no exception will be raised. """ def __init__(self, config_file=None, no_env_error=None, no_compute_exception=None): super(ComputingConfiguration, self).__init__() self.compute_packages = None if config_file: if os.path.isfile(config_file): self.config_file = config_file else: _LOGGER.error( "Config file path isn't a file: {}".format(config_file)) raise IOError(config_file) else: _LOGGER.debug("No local config file was provided") _LOGGER.debug( "Checking this set of environment variables: {}".format( self.compute_env_var)) divcfg_env_var, divcfg_file = get_first_env_var( self.compute_env_var) or ["", ""] if os.path.isfile(divcfg_file): _LOGGER.debug("Found global config file in {}: {}".format( divcfg_env_var, divcfg_file)) self.config_file = divcfg_file else: _LOGGER.info( "Using default config file, no global config file provided in environment " "variable(s): {}".format(str(self.compute_env_var))) self.config_file = self.default_config_file try: self.update_packages(self.config_file) except Exception as e: _LOGGER.error("Can't load config file '%s'", str(self.config_file)) _LOGGER.error(str(type(e).__name__) + str(e)) self._handle_missing_env_attrs(self.config_file, when_missing=no_env_error) # Initialize default compute settings. _LOGGER.debug("Establishing project compute settings") self.compute = None self.activate_package(DEFAULT_COMPUTE_RESOURCES_NAME) # Either warn or raise exception if the compute is null. if self.compute is None: message = "Failed to establish compute settings." if no_compute_exception: no_compute_exception(message) else: _LOGGER.warning(message) else: _LOGGER.debug("Compute: %s", str(self.compute)) @property def compute_env_var(self): """ Environment variable through which to access compute settings. :return list[str]: names of candidate environment variables, for which value may be path to compute settings file; first found is used. """ return COMPUTE_SETTINGS_VARNAME @property def default_config_file(self): """ Path to default compute environment settings file. :return str: Path to default compute settings file """ return os.path.join(self.templates_folder, "default_compute_settings.yaml") @property def template(self): """ Get the currently active submission template. :return str: submission script content template for current state """ with open(self.compute.submission_template, 'r') as f: return f.read() @property def templates_folder(self): """ Path to folder with default submission templates. :return str: path to folder with default submission templates """ return os.path.join(os.path.dirname(__file__), "submit_templates") def activate_package(self, package_name): """ Activates a compute package. This copies the computing attributes from the configuration file into the `compute` attribute, where the class stores current compute settings. :param str package_name: name for non-resource compute bundle, the name of a subsection in an environment configuration file :return bool: success flag for attempt to establish compute settings """ # Hope that environment & environment compute are present. _LOGGER.info("Activating compute package '{}'".format(package_name)) if package_name and self.compute_packages and package_name in self.compute_packages: # Augment compute, creating it if needed. if self.compute is None: _LOGGER.debug("Creating Project compute") self.compute = PathExAttMap() _LOGGER.debug("Adding entries for package_name '%s'", package_name) self.compute.add_entries(self.compute_packages[package_name]) # Ensure submission template is absolute. # This is handled at update. # if not os.path.isabs(self.compute.submission_template): # try: # self.compute.submission_template = os.path.join( # os.path.dirname(self.config_file), # self.compute.submission_template) # except AttributeError as e: # # Environment and environment compute should at least have been # # set as null-valued attributes, so execution here is an error. # _LOGGER.error(str(e)) return True else: # Scenario in which environment and environment compute are # both present--but don't evaluate to True--is fairly harmless. _LOGGER.debug("Environment = {}".format(self.compute_packages)) return False def clean_start(self, package_name): """ Clear current active settings and then activate the given package. :param str package_name: name of the resource package to activate :return bool: success flag """ self.reset_active_settings() return self.activate_package(package_name) def get_active_package(self): """ Returns settings for the currently active compute package :return PathExAttMap: data defining the active compute package """ return self.compute def list_compute_packages(self): """ Returns a list of available compute packages. :return set[str]: names of available compute packages """ return set(self.compute_packages.keys()) def reset_active_settings(self): """ Clear out current compute settings. :return bool: success flag """ self.compute = PathExAttMap() return True def update_packages(self, config_file): """ Parse data from divvy configuration file. Given a divvy configuration file, this function will update (not overwrite) existing compute packages with existing values. It does not affect any currently active settings. :param str config_file: path to file with new divvy configuration data """ env_settings = parse_config_file(config_file) loaded_packages = env_settings[NEW_COMPUTE_KEY] for key, value in loaded_packages.items(): if type(loaded_packages[key]) is dict: for key2, value2 in loaded_packages[key].items(): if key2 == "submission_template": if not os.path.isabs(loaded_packages[key][key2]): loaded_packages[key][key2] = os.path.join( os.path.dirname(config_file), loaded_packages[key][key2]) if self.compute_packages is None: self.compute_packages = PathExAttMap(loaded_packages) else: self.compute_packages.add_entries(loaded_packages) _LOGGER.debug("Available divvy packages: {}".format(', '.join( self.list_compute_packages()))) self.config_file = config_file def write_script(self, output_path, extra_vars=None): """ Given currently active settings, populate the active template to write a submission script. :param str output_path: Path to file to write as submission script :param Iterable[Mapping] extra_vars: A list of Dict objects with key-value pairs with which to populate template fields. These will override any values in the currently active compute package. :return str: Path to the submission script file """ from copy import deepcopy variables = deepcopy(self.compute) if extra_vars: if not isinstance(extra_vars, list): extra_vars = [extra_vars] for kvs in reversed(extra_vars): variables.update(kvs) _LOGGER.info("Writing script to {}".format( os.path.abspath(output_path))) return write_submit_script(output_path, self.template, variables) def _handle_missing_env_attrs(self, config_file, when_missing): """ Default environment settings aren't required; warn, though. """ missing_env_attrs = \ [attr for attr in [NEW_COMPUTE_KEY, "config_file"] if getattr(self, attr, None) is None] if not missing_env_attrs: return message = "'{}' lacks environment attributes: {}". \ format(config_file, missing_env_attrs) if when_missing is None: _LOGGER.warning(message) else: when_missing(message)