def set_parameters(self, args): """Validate and set parameters. Parameters ---------- args : dict command-line arguments Raises ------ ValueError found invalid parameter(s) """ # load arguments for key, val in vars(args).items(): setattr(self, key, val) # check input directory and data if self.input: if isfile(self.input): self.input_map = {file2id(self.input): self.input} elif isdir(self.input): self.input_map = { k: join(self.input, v) for k, v in id2file_map(self.input, ext='tsv').items() } else: raise ValueError( 'Invalid input data file or directory: {}.'.format( self.input)) if len(self.input_map) == 0: raise ValueError('No input data are found under: {}.'.format( self.input)) # check / create output directory makedirs(self.output, exist_ok=True) self.prev_map = id2file_map(self.output, 'tsv') # load configurations get_config(self, 'evalue', 'analyze.evalue', float) for key in ('maxhits', 'identity', 'coverage'): get_config(self, key, 'analyze.{}'.format(key)) for key in ('input_cov', 'self_rank', 'close_size', 'distal_top'): get_config(self, key, 'grouping.{}'.format(key.replace('_', ''))) for key in ('weighted', 'outliers', 'orphans', 'bandwidth', 'bw_steps', 'low_part', 'noise', 'fixed', 'silhouette', 'self_low'): get_config(self, key, 'predict.{}'.format(key.replace('_', ''))) # convert boolean values for key in ('weighted', 'orphans', 'self_low'): setattr(self, key, arg2bool(getattr(self, key, None))) # convert fractions to percentages for metric in ('input_cov', 'noise', 'fixed', 'distal_top'): val = getattr(self, metric) if val and val < 1: setattr(self, metric, val * 100) # convert distal top to a factor to save compute self.match_th = 1 - self.distal_top / 100 # force coverage >= 50 to ensure that candidates are sequential if (self.input_cov or 0) < 50: raise ValueError('Taxonomy coverage for auto-interence must be at ' 'least 50%.')
def set_parameters(self, args): """Workflow for validating and setting arguments. Parameters ---------- args : dict command-line arguments """ # load arguments for key, val in vars(args).items(): setattr(self, key, val) # load configurations for key in ('capital', 'block', 'latin'): get_config(self, key, f'taxonomy.{key}') for key in ('retries', 'delay', 'timeout'): get_config(self, key, f'download.{key}') for key in ('diamond', 'makeblastdb'): get_config(self, key, f'program.{key}') for key in ('threads', 'tmpdir'): get_config(self, key, f'local.{key}') # convert boolean values for key in ('capital', 'latin'): setattr(self, key, arg2bool(getattr(self, key, None))) # make temporary directory if not self.tmpdir: self.tmpdir = mkdtemp() setattr(self, 'mkdtemp', True) # mark for cleanup if not isdir(self.tmpdir): raise ValueError(f'Invalid temporary directory: {self.tmpdir}.') # check local executables for key, exe in {'blast': 'makeblastdb', 'diamond': 'diamond'}.items(): if self.compile in (key, 'both'): if getattr(self, exe) is None: setattr(self, exe, exe) if which(getattr(self, exe)) is None: raise ValueError( f'Invalid {exe} executable: {getattr(self, exe)}.') # determine number of CPUs to use if self.compile in ('diamond', 'both') and not self.threads: self.threads = cpu_count() if self.threads is None: self.threads = 1 # default protocol if self.default: print('The default protocol is selected for database building.') print('The program will download all protein sequences of NCBI ' 'RefSeq genomes of bacteria, archaea, fungi and protozoa, ' 'keep one genome per species, plus all NCBI-defined ' 'reference and representative genomes.') self.cats = 'microbe' self.sample = 1 self.rank = 'species' self.reference = True self.representative = True self.compile = 'diamond' makedirs(self.output, exist_ok=True)
def test_arg2bool(self): # true self.assertTrue(arg2bool(True)) self.assertTrue(arg2bool('True')) self.assertTrue(arg2bool('Yes')) self.assertTrue(arg2bool('y')) self.assertTrue(arg2bool('1')) # false self.assertFalse(arg2bool(None)) self.assertFalse(arg2bool(False)) self.assertFalse(arg2bool('False')) self.assertFalse(arg2bool('No')) self.assertFalse(arg2bool('n')) self.assertFalse(arg2bool('0')) # invalid input with self.assertRaises(ValueError) as ctx: arg2bool('test') msg = 'Boolean value expected.' self.assertEqual(str(ctx.exception), msg)