コード例 #1
0
    def test_dict_from_param(self):
        # nothing
        self.assertEqual(dict_from_param(None), {})
        self.assertEqual(dict_from_param(''), {})

        # already dict
        self.assertEqual(dict_from_param({'a': 1}), {'a': 1})

        # dict string
        exp = {'a': '1', 'b': '2', 'c': '3'}
        self.assertDictEqual(dict_from_param('a:1,b:2,c:3'), exp)

        # invalid dict string
        with self.assertRaises(ValueError) as ctx:
            dict_from_param('test')
        msg = 'Invalid dictionary string: "test".'
        self.assertEqual(str(ctx.exception), msg)

        # dict file
        fp = join(self.tmpdir, 'test.txt')
        with open(fp, 'w') as f:
            for itm in exp.items():
                print('\t'.join(itm), file=f)
        obs = dict_from_param(fp)
        self.assertDictEqual(obs, exp)

        # invalid dict file
        with open(fp, 'w') as f:
            for itm in exp.items():
                print('test', file=f)
        with self.assertRaises(ValueError) as ctx:
            dict_from_param(fp)
        msg = f'Invalid dictionary file: "{fp}".'
        self.assertEqual(str(ctx.exception), msg)
        remove(fp)
コード例 #2
0
ファイル: analyze.py プロジェクト: sarah872/HGTector
    def assign_taxonomy(self):
        """Assign taxonomy to genomes.
        """
        # take user-defined taxIds of input genomes
        if self.input_tax:
            try:
                self.input_tax = dict_from_param(self.input_tax)
            except ValueError:
                if len(self.data) > 1:
                    raise ValueError('Invalid input taxonomy format.')
                # for single-sample analysis, one can simply enter a taxId
                self.input_tax = {max(self.data.keys()): self.input_tax}
            print('User-specified TaxIDs of input genomes:')
            for sid, tid in sorted(self.input_tax.items()):
                if tid not in self.taxdump:
                    # TODO: read from both temp and master taxdump
                    raise ValueError('TaxID {} is not present in taxonomy '
                                     'database.'.format(tid))
                print('  {}: {} ({}).'.format(sid, tid,
                                              self.taxdump[tid]['name']))
        else:
            self.input_tax = {}

        # auto-infer taxIds of remaining genomes
        sids = sorted([x for x in self.data if x not in self.input_tax])
        if sids:
            print('Auto-inferring plausible taxIds for input genomes based on '
                  'taxonomy of search results...')
            for sid in sids:
                try:
                    tid, cov = self.infer_genome_tax(self.data[sid],
                                                     self.taxdump,
                                                     self.input_cov)
                    self.input_tax[sid] = tid
                except ValueError:
                    raise ValueError('Cannot auto-infer taxonomy for {}. '
                                     'Please specify manually.'.format(sid))
                print('  {}: {} ({}) (covering {:2g}% best hits).'.format(
                    sid, tid, self.taxdump[tid]['name'], cov))

        # refine taxonomy database
        print('Refining taxonomy database...')
        refine_taxdump(self.sum_taxids(), self.taxdump)
        add_children(self.taxdump)
        print('Done. Retained {} taxa.'.format(len(self.taxdump)))

        # find lowest common ancestor (LCA) of all genomes
        self.lca = find_lca(self.input_tax.values(), self.taxdump)
        print('All input genomes belong to {} ({}).'.format(
            self.lca, describe_taxon(self.lca, self.taxdump)))