Esempio n. 1
0
    def test_infer_close_group(self):
        me = Analyze()
        me.taxdump = taxdump_from_text(taxdump_proteo)
        add_children(me.taxdump)
        me.groups = {}

        # close group is parent of LCA of self group
        me.self_tax = ['562']  # E. coli
        me.groups['self'] = set(['562'] + get_descendants('562', me.taxdump))
        me.close_tax = None
        me.close_size = None
        me.infer_close_group()
        self.assertListEqual(me.close_tax, ['561'])  # Escherichia
        self.assertSetEqual(me.groups['close'], {'561', '2580236'})

        # close group must have at least 5 taxa
        me.close_tax = None
        me.groups['close'] = None
        me.close_size = 5
        me.infer_close_group()
        self.assertListEqual(me.close_tax, ['543'])  # Enterobacteriaceae
        exp = {'543', '620', '622', '570', '548', '561', '2580236'}
        self.assertSetEqual(me.groups['close'], exp)

        # close group is LCA of multiple self groups
        me.self_tax = ['561', '620']  # Escherichia and Shigella
        me.groups['self'] = set().union(*[[x] + get_descendants(x, me.taxdump)
                                          for x in me.self_tax])
        me.close_tax = None
        me.groups['close'] = None
        me.close_size = None
        me.infer_close_group()
        self.assertListEqual(me.close_tax, ['543'])  # Enterobacteriaceae
        exp = {'543', '570', '548'}
        self.assertSetEqual(me.groups['close'], exp)
Esempio n. 2
0
    def define_groups(self):
        """Define the three (actually two) groups: "self" and "close".
        """
        self.groups = {}
        for key in ('self', 'close'):
            tids = getattr(self, '{}_tax'.format(key))

            # user-defined group
            if tids:
                setattr(self, '{}_tax'.format(key), list_from_param(tids))
                print('User-defined {} group:'.format(key))

            # auto-infer group
            else:
                getattr(self, 'infer_{}_group'.format(key))()
                print('Auto-inferred {} group:'.format(key))

            # collect taxIds that belong to group
            tids = getattr(self, '{}_tax'.format(key))
            if key not in self.groups:
                self.groups[key] = set().union(
                    *[[x] + get_descendants(x, self.taxdump) for x in tids])

                # subtract self group from close group
                if key == 'close':
                    self.groups['close'] = self.groups['close'].difference(
                        self.groups['self'])

            # report group content
            for tid in tids:
                print('  {} ({})'.format(tid,
                                         describe_taxon(tid, self.taxdump)))
            print('{} group has {} taxa.'.format(key.capitalize(),
                                                 len(self.groups[key])))
Esempio n. 3
0
    def infer_close_group(self):
        """Infer close group automatically.

        Notes
        -----
        1. Assign `close_tax` as top-level taxId(s) of the close group.
        2. Assign `groups['close']` as all taxIds under the close group.
        """
        mems = []

        # start from the LCA of self group
        cid = find_lca(self.self_tax, self.taxdump)
        while True:

            # close group should exclude self group
            mems = set([cid] + get_descendants(cid, self.taxdump)).difference(
                self.groups['self'])

            # stop when size limit is reached
            if mems and (not self.close_size or len(mems) >= self.close_size):
                break

            # move up one level
            pid = self.taxdump[cid]['parent']
            if pid == cid or pid == '0':
                break
            cid = pid
        self.close_tax = [cid]
        self.groups['close'] = mems
Esempio n. 4
0
    def define_groups(self):
        """Define the three (actually two) groups: "self" and "close".

        Notes
        -----
        Assign these attributes:
        1. `self_tax`: top-level taxId(s) of the self group.
        2. `close_tax`: top-level taxId(s) of the close group.
        3. `groups` (keys: self, close, distal): all taxIds under each group.
        """
        self.groups = {}
        for key in ('self', 'close'):
            tids = getattr(self, f'{key}_tax')

            # user-defined group
            if tids:
                setattr(self, f'{key}_tax', list_from_param(tids))
                print(f'User-defined {key} group:')

            # auto-infer group
            else:
                getattr(self, f'infer_{key}_group')()
                print(f'Auto-inferred {key} group:')

            # collect taxIds that belong to group
            tids = getattr(self, f'{key}_tax')
            if key not in self.groups:
                self.groups[key] = set().union(
                    *[[x] + get_descendants(x, self.taxdump) for x in tids])

                # subtract self group from close group
                if key == 'close':
                    self.groups['close'] = self.groups['close'].difference(
                        self.groups['self'])

            # report group content
            for tid in tids:
                print(f'  {tid} ({describe_taxon(tid, self.taxdump)})')
            print(f'{key.capitalize()} group has {len(self.groups[key])} '
                  'taxa.')
Esempio n. 5
0
 def test_get_descendants(self):
     taxdump = taxdump_from_text(taxdump_archaea)
     add_children(taxdump)
     obs = get_descendants('1935183', taxdump)  # Asgard group
     exp = ['1655434', '1655637', '1538547']
     self.assertListEqual(obs, exp)
Esempio n. 6
0
 def test_get_descendants(self):
     taxdump = {k: v for k, v in taxdump_archaea.items()}
     add_children(taxdump)
     obs = get_descendants('1935183', taxdump)  # Asgard group
     exp = ['1655434', '1655637', '1538547']
     self.assertListEqual(obs, exp)