def parse(self, value, config):
        '''
        Set symbols based on config values, using defaults if unset
        '''
        symbols = {
            'match': '+',
            'mismatch': '-',
            'unknown': '?',
            'unsequenced': 'n',
            'gap': '-',
            'unaligned': '?',
            'masked': 'x'
        }
        config_symbols = get_nested(config, 'HMM_symbols')
        if config_symbols is not None:
            for k, v in config_symbols.items():
                if k not in symbols:
                    log.warning("Unused symbol in configuration: "
                                f"{k} -> '{v}'")
                else:
                    symbols[k] = v
                    log.debug(f"Overwriting default symbol for {k} with '{v}'")

            for k, v in symbols.items():
                if k not in config_symbols:
                    log.warning(f'Symbol for {k} unset in config, '
                                f"using default '{v}'")

        else:
            for k, v in symbols.items():
                log.warning(f'Symbol for {k} unset in config, '
                            f"using default '{v}'")

        return symbols
    def find_strains(self, test_strains: List[str] = None):
        '''
        Helper method to get strains supplied in config, or from test_strains
        '''
        strains = get_nested(self.config, 'strains')
        self.test_strains = test_strains

        if strains is None:
            if test_strains is None:
                err = ('Unable to find strains in config and '
                       'no test_strains provided')
                log.exception(err)
                raise ValueError(err)

            # try to build strains from wildcards in test_strains
            strains = {}
            for test_strain in test_strains:
                # find matching files
                strain_glob = test_strain.format(strain='*', chrom='*')
                log.info(f'searching for {strain_glob}')
                for fname in glob.iglob(strain_glob):
                    # extract wildcard matches
                    match = re.match(
                        test_strain.format(strain='(?P<strain>.*?)',
                                           chrom='(?P<chrom>[^_]*?)'), fname)
                    if match:
                        log.debug(
                            f'matched with {match.group("strain", "chrom")}')
                        strain, chrom = match.group('strain', 'chrom')
                        if strain not in strains:
                            strains[strain] = set()
                        strains[strain].add(chrom)

            if len(strains) == 0:
                err = ('Found no chromosome sequence files '
                       f'in {test_strains}')
                log.exception(err)
                raise ValueError(err)

            # check if requested chromosomes are within the list of chroms
            chrom_set = set(self.chromosomes)
            for strain, chroms in strains.items():
                if not chrom_set.issubset(chroms):
                    not_found = chrom_set.difference(chroms).pop()
                    err = (f'Strain {strain} is missing chromosomes. '
                           f'Unable to find chromosome \'{not_found}\'')
                    log.exception(err)
                    raise ValueError(err)

            self.strains = list(sorted(strains.keys()))

        else:  # strains set in config
            self.strains = list(sorted(set(strains)))
    def get_interval_states(self) -> List:
        '''
        Build list of interval states, typically just known names
        but if the state has an interval name, use that
        '''
        ref = get_nested(self.config, 'analysis_params.reference')

        # set with name or empty list
        if ref is None:
            ref = []
        else:
            ref = [ref]

        known = get_nested(self.config, 'analysis_params.known_states')
        if known is None:
            known = []

        return [
            s['interval_name'] if 'interval_name' in s else s['name']
            for s in ref + known
        ]
Ejemplo n.º 4
0
def test_get_nested():
    assert get_nested({'a': 1}, 'a') == 1
    assert get_nested({'a': 1}, 'b') is None
    assert get_nested({'a': {'b': 2}}, 'a.b') == 2
    assert get_nested({'a': {'b': 2}}, 'a.c') is None
    assert get_nested({'a': {'b': {'c': 3}}}, 'a.b.c') == 3
    assert get_nested(None, 'key') is None
    def parse(self, value, config={}):
        if self.nullable:
            if not value:
                value = get_nested(config, self.config_path)

        else:
            value = validate(config, self.config_path,
                             f'No {self.name} provided', value)

        if self.wildcards:
            check_wildcards(value, self.wildcards)

        return value
    def get_states(self) -> Tuple[List, List]:
        '''
        Build lists of known and unknown states from the analysis params
        '''

        ref = get_nested(self.config, 'analysis_params.reference.name')
        if ref is None:
            ref = []
        else:
            ref = [ref]

        known = get_nested(self.config, 'analysis_params.known_states')
        if known is None:
            known = []

        known_states = ref + [s['name'] for s in known]

        unknown = get_nested(self.config, 'analysis_params.unknown_states')
        if unknown is None:
            unknown = []

        unknown_states = [s['name'] for s in unknown]

        return known_states, unknown_states
    def _set_strains(self, test_strains: str = ''):
        '''
        build the strains to perform prediction on
        '''
        if not test_strains:
            test_strains = get_nested(self.config, 'paths.test_strains')
        else:
            # need to support list for test strains
            test_strains = [test_strains]

        if test_strains is not None:
            for test_strain in test_strains:
                check_wildcards(test_strain, 'strain,chrom')

        self.find_strains(test_strains)
 def get(self, key: str):
     '''
     Get nested key from underlying dictionary. Returning none if any
     key is not in dict
     '''
     return get_nested(self.config, key)