Esempio n. 1
0
    def set_parameters(self, args):
        """Validate and set parameters.

        Parameters
        ----------
        args : dict
            command-line arguments

        Raises
        ------
        ValueError
            found invalid parameter(s)
        """
        # load arguments
        for key, val in vars(args).items():
            setattr(self, key, val)

        # check input directory and data
        if self.input:
            if isfile(self.input):
                self.input_map = {file2id(self.input): self.input}
            elif isdir(self.input):
                self.input_map = {
                    k: join(self.input, v)
                    for k, v in id2file_map(self.input, ext='tsv').items()
                }
            else:
                raise ValueError(
                    'Invalid input data file or directory: {}.'.format(
                        self.input))
            if len(self.input_map) == 0:
                raise ValueError('No input data are found under: {}.'.format(
                    self.input))

        # check / create output directory
        makedirs(self.output, exist_ok=True)
        self.prev_map = id2file_map(self.output, 'tsv')

        # load configurations
        get_config(self, 'evalue', 'analyze.evalue', float)
        for key in ('maxhits', 'identity', 'coverage'):
            get_config(self, key, 'analyze.{}'.format(key))
        for key in ('input_cov', 'self_rank', 'close_size', 'distal_top'):
            get_config(self, key, 'grouping.{}'.format(key.replace('_', '')))
        for key in ('weighted', 'outliers', 'orphans', 'bandwidth', 'bw_steps',
                    'low_part', 'noise', 'fixed', 'silhouette', 'self_low'):
            get_config(self, key, 'predict.{}'.format(key.replace('_', '')))

        # convert boolean values
        for key in ('weighted', 'orphans', 'self_low'):
            setattr(self, key, arg2bool(getattr(self, key, None)))

        # convert fractions to percentages
        for metric in ('input_cov', 'noise', 'fixed', 'distal_top'):
            val = getattr(self, metric)
            if val and val < 1:
                setattr(self, metric, val * 100)

        # convert distal top to a factor to save compute
        self.match_th = 1 - self.distal_top / 100

        # force coverage >= 50 to ensure that candidates are sequential
        if (self.input_cov or 0) < 50:
            raise ValueError('Taxonomy coverage for auto-interence must be at '
                             'least 50%.')
Esempio n. 2
0
    def set_parameters(self, args):
        """Workflow for validating and setting arguments.

        Parameters
        ----------
        args : dict
            command-line arguments
        """
        # load arguments
        for key, val in vars(args).items():
            setattr(self, key, val)

        # load configurations
        for key in ('capital', 'block', 'latin'):
            get_config(self, key, f'taxonomy.{key}')
        for key in ('retries', 'delay', 'timeout'):
            get_config(self, key, f'download.{key}')
        for key in ('diamond', 'makeblastdb'):
            get_config(self, key, f'program.{key}')
        for key in ('threads', 'tmpdir'):
            get_config(self, key, f'local.{key}')

        # convert boolean values
        for key in ('capital', 'latin'):
            setattr(self, key, arg2bool(getattr(self, key, None)))

        # make temporary directory
        if not self.tmpdir:
            self.tmpdir = mkdtemp()
            setattr(self, 'mkdtemp', True)  # mark for cleanup
        if not isdir(self.tmpdir):
            raise ValueError(f'Invalid temporary directory: {self.tmpdir}.')

        # check local executables
        for key, exe in {'blast': 'makeblastdb', 'diamond': 'diamond'}.items():
            if self.compile in (key, 'both'):
                if getattr(self, exe) is None:
                    setattr(self, exe, exe)
                if which(getattr(self, exe)) is None:
                    raise ValueError(
                        f'Invalid {exe} executable: {getattr(self, exe)}.')

        # determine number of CPUs to use
        if self.compile in ('diamond', 'both') and not self.threads:
            self.threads = cpu_count()
            if self.threads is None:
                self.threads = 1

        # default protocol
        if self.default:
            print('The default protocol is selected for database building.')
            print('The program will download all protein sequences of NCBI '
                  'RefSeq genomes of bacteria, archaea, fungi and protozoa, '
                  'keep one genome per species, plus all NCBI-defined '
                  'reference and representative genomes.')
            self.cats = 'microbe'
            self.sample = 1
            self.rank = 'species'
            self.reference = True
            self.representative = True
            self.compile = 'diamond'

        makedirs(self.output, exist_ok=True)
Esempio n. 3
0
    def test_arg2bool(self):
        # true
        self.assertTrue(arg2bool(True))
        self.assertTrue(arg2bool('True'))
        self.assertTrue(arg2bool('Yes'))
        self.assertTrue(arg2bool('y'))
        self.assertTrue(arg2bool('1'))

        # false
        self.assertFalse(arg2bool(None))
        self.assertFalse(arg2bool(False))
        self.assertFalse(arg2bool('False'))
        self.assertFalse(arg2bool('No'))
        self.assertFalse(arg2bool('n'))
        self.assertFalse(arg2bool('0'))

        # invalid input
        with self.assertRaises(ValueError) as ctx:
            arg2bool('test')
        msg = 'Boolean value expected.'
        self.assertEqual(str(ctx.exception), msg)