def parse(self, value, config): ''' Set symbols based on config values, using defaults if unset ''' symbols = { 'match': '+', 'mismatch': '-', 'unknown': '?', 'unsequenced': 'n', 'gap': '-', 'unaligned': '?', 'masked': 'x' } config_symbols = get_nested(config, 'HMM_symbols') if config_symbols is not None: for k, v in config_symbols.items(): if k not in symbols: log.warning("Unused symbol in configuration: " f"{k} -> '{v}'") else: symbols[k] = v log.debug(f"Overwriting default symbol for {k} with '{v}'") for k, v in symbols.items(): if k not in config_symbols: log.warning(f'Symbol for {k} unset in config, ' f"using default '{v}'") else: for k, v in symbols.items(): log.warning(f'Symbol for {k} unset in config, ' f"using default '{v}'") return symbols
def find_strains(self, test_strains: List[str] = None): ''' Helper method to get strains supplied in config, or from test_strains ''' strains = get_nested(self.config, 'strains') self.test_strains = test_strains if strains is None: if test_strains is None: err = ('Unable to find strains in config and ' 'no test_strains provided') log.exception(err) raise ValueError(err) # try to build strains from wildcards in test_strains strains = {} for test_strain in test_strains: # find matching files strain_glob = test_strain.format(strain='*', chrom='*') log.info(f'searching for {strain_glob}') for fname in glob.iglob(strain_glob): # extract wildcard matches match = re.match( test_strain.format(strain='(?P<strain>.*?)', chrom='(?P<chrom>[^_]*?)'), fname) if match: log.debug( f'matched with {match.group("strain", "chrom")}') strain, chrom = match.group('strain', 'chrom') if strain not in strains: strains[strain] = set() strains[strain].add(chrom) if len(strains) == 0: err = ('Found no chromosome sequence files ' f'in {test_strains}') log.exception(err) raise ValueError(err) # check if requested chromosomes are within the list of chroms chrom_set = set(self.chromosomes) for strain, chroms in strains.items(): if not chrom_set.issubset(chroms): not_found = chrom_set.difference(chroms).pop() err = (f'Strain {strain} is missing chromosomes. ' f'Unable to find chromosome \'{not_found}\'') log.exception(err) raise ValueError(err) self.strains = list(sorted(strains.keys())) else: # strains set in config self.strains = list(sorted(set(strains)))
def get_interval_states(self) -> List: ''' Build list of interval states, typically just known names but if the state has an interval name, use that ''' ref = get_nested(self.config, 'analysis_params.reference') # set with name or empty list if ref is None: ref = [] else: ref = [ref] known = get_nested(self.config, 'analysis_params.known_states') if known is None: known = [] return [ s['interval_name'] if 'interval_name' in s else s['name'] for s in ref + known ]
def test_get_nested(): assert get_nested({'a': 1}, 'a') == 1 assert get_nested({'a': 1}, 'b') is None assert get_nested({'a': {'b': 2}}, 'a.b') == 2 assert get_nested({'a': {'b': 2}}, 'a.c') is None assert get_nested({'a': {'b': {'c': 3}}}, 'a.b.c') == 3 assert get_nested(None, 'key') is None
def parse(self, value, config={}): if self.nullable: if not value: value = get_nested(config, self.config_path) else: value = validate(config, self.config_path, f'No {self.name} provided', value) if self.wildcards: check_wildcards(value, self.wildcards) return value
def get_states(self) -> Tuple[List, List]: ''' Build lists of known and unknown states from the analysis params ''' ref = get_nested(self.config, 'analysis_params.reference.name') if ref is None: ref = [] else: ref = [ref] known = get_nested(self.config, 'analysis_params.known_states') if known is None: known = [] known_states = ref + [s['name'] for s in known] unknown = get_nested(self.config, 'analysis_params.unknown_states') if unknown is None: unknown = [] unknown_states = [s['name'] for s in unknown] return known_states, unknown_states
def _set_strains(self, test_strains: str = ''): ''' build the strains to perform prediction on ''' if not test_strains: test_strains = get_nested(self.config, 'paths.test_strains') else: # need to support list for test strains test_strains = [test_strains] if test_strains is not None: for test_strain in test_strains: check_wildcards(test_strain, 'strain,chrom') self.find_strains(test_strains)
def get(self, key: str): ''' Get nested key from underlying dictionary. Returning none if any key is not in dict ''' return get_nested(self.config, key)