def _run(obj): """Test reducing annotations as specified by the user""" #pylint: disable=superfluous-parens print('\nTESTING: {T} --------------------------------------------------'. format(T=obj.name)) chk = _Chk(obj) obj.chk_qualifiers() if obj.name != 'id2gos': assert chk.num_nots != 0, '{NAME} HAS 0 NOT Qualifiers'.format( NAME=obj.name) assc = obj.associations num_assc = len(assc) tic = timeit.default_timer() _ev = obj.evobj assc_dflt = obj.reduce_annotations(assc, AnnoOptions(_ev)) tic = obj.hms('Default', tic) assc_nd0_not0 = obj.reduce_annotations( assc, AnnoOptions(_ev, keep_ND=False, keep_NOT=False)) tic = obj.hms('{N:6,} assc_nd0_not0'.format(N=len(assc_nd0_not0)), tic) assc_nd0_not1 = obj.reduce_annotations( assc, AnnoOptions(_ev, keep_ND=False, keep_NOT=True)) tic = obj.hms('{N:6,} assc_nd0_not1'.format(N=len(assc_nd0_not1)), tic) assc_nd1_not0 = obj.reduce_annotations( assc, AnnoOptions(_ev, keep_ND=True, keep_NOT=False)) tic = obj.hms('{N:6,} assc_nd1_not0'.format(N=len(assc_nd1_not0)), tic) assc_nd1_not1 = obj.reduce_annotations( assc, AnnoOptions(_ev, keep_ND=True, keep_NOT=True)) tic = obj.hms('{N:6,} assc_nd1_not1'.format(N=len(assc_nd1_not1)), tic) _red = obj.reduce_annotations inc = set(_ev.code2nt.keys()).difference({'IEA'}) # pylint: disable=line-too-long assert _red(assc, AnnoOptions(obj.evobj, ev_exclude={'IEA'})) == _red( assc, AnnoOptions(obj.evobj, ev_include=inc)) inc = _ev.grp2codes['High_Throughput'] assert _red(assc, AnnoOptions(obj.evobj, ev_include={'High_Throughput'})) == _red( assc, AnnoOptions(obj.evobj, ev_include=inc)) assert len(assc_dflt) == len(assc_nd0_not0) assert len(assc_nd1_not1) == num_assc # if obj.name in {'gaf', 'id2gos', 'gpad', 'id2go'}: assert len( assc_nd1_not0 ) == num_assc - chk.num_nots, '{N}: ACT({A:,}) != EXP({E:,})'.format( N=obj.name, A=len(assc_nd1_not0), E=num_assc - chk.num_nots) assert len( assc_nd0_not1 ) == num_assc - chk.num_nds, '{N}: ACT({A:,}) != EXP({E:,})'.format( N=obj.name, A=len(assc_nd0_not1), E=num_assc - chk.num_nds)
def _get_id2gos(self, associations, **kws): """Return given associations in a dict, id2gos""" options = AnnoOptions(self.evobj, **kws) # Default reduction is to remove. For all options, see goatools/anno/opts.py: # * Evidence_Code == ND -> No biological data No biological Data available # * Qualifiers contain NOT assc = self.reduce_annotations(associations, options) return self.get_dbid2goids( assc) if options.b_geneid2gos else self.get_goid2dbids(assc)
def get_taxid2asscs(self, taxids=None, **kws): """Read Gene Association File (GAF). Return data.""" # WAS: get_annotations_taxid2dct taxid2asscs = cx.defaultdict(lambda: cx.defaultdict(lambda: cx.defaultdict(set))) options = AnnoOptions(self.evobj, **kws) for taxid in self._get_taxids(taxids): nts = self.taxid2asscs[taxid] assc = self.reduce_annotations(nts, options) taxid2asscs[taxid]['ID2GOs'] = self.get_dbid2goids(assc) taxid2asscs[taxid]['GO2IDs'] = self.get_goid2dbids(assc) return taxid2asscs
def _get_id2gos(self, associations, **kws): """Return given associations in a dict, id2gos""" options = AnnoOptions(self.evobj, **kws) # Default reduction is to remove. For all options, see goatools/anno/opts.py: # * Evidence_Code == ND -> No biological data No biological Data available # * Qualifiers contain NOT assc = self.reduce_annotations(associations, options) a2bs = self.get_dbid2goids( assc) if options.b_geneid2gos else self.get_goid2dbids(assc) # if not a2bs: # raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename)) return a2bs
def _get_id2gos(self, ntannos_usr, propagate_counts=False, relationships=None, prt=sys.stdout, **kws): """Return given ntannos_usr in a dict, id2gos""" options = AnnoOptions(self.evobj, **kws) # Default reduction is to remove. For all options, see goatools/anno/opts.py: # * Evidence_Code == ND -> No biological data No biological Data available # * Qualifiers contain NOT ntannos_m = self.reduce_annotations(ntannos_usr, options) dbid2goids = self.get_dbid2goids(ntannos_m, propagate_counts, relationships, prt) if options.b_geneid2gos: return dbid2goids # if not a2bs: # raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename)) return self._get_goid2dbids(dbid2goids)
def _tst_ns2(obj, idx): """Test functions which use ns2 functions.""" # ALL annotations for a species nts_all = obj.get_associations() num_nts_all = len(nts_all) num_nts_act = 0 # Separate ALL annotations into BP MF CC ns2ntsanno = obj.get_ns2ntsanno() assert set(ns2ntsanno.keys()).issubset({'BP', 'MF', 'CC'}), ns2ntsanno.keys() # Reduce annotations to remove IEA ns2anno_exp = {} kws = {'ev_include': INC_GOOD} for nspc, nts_orig in sorted(ns2ntsanno.items()): opt = AnnoOptions(obj.evobj, **kws) nts_redu = obj.reduce_annotations(nts_orig, opt) num_nts_orig = len(nts_orig) # Check that only current namespace is seen on namedtuples assert set(nt.NS for nt in nts_orig if nt.NS == nspc) == {nspc} num_nts_act += num_nts_orig num_nts_redu = len(nts_redu) # pylint: disable=line-too-long print( '{OPT} {IDX}) ns2ntanno {NS} {ALL:7,}=Loaded -> {N:7,} -> {R:7,} annos: {TYPE}' .format(OPT=opt, IDX=idx, NS=nspc, ALL=num_nts_all, N=num_nts_orig, R=num_nts_redu, TYPE=obj.get_desc())) ns2anno_exp[nspc] = obj.get_dbid2goids(nts_redu) assert num_nts_all >= num_nts_orig if obj.name == 'id2gos': assert num_nts_orig == num_nts_redu else: assert num_nts_orig > num_nts_redu assert num_nts_all >= num_nts_act # Compare step-by-step transformation with top-level function, id2gos ns2anno_act = obj.get_ns2assc(**kws) for nspc, anno_exp in ns2anno_exp.items(): anno_act = ns2anno_act[nspc] assert set(anno_exp.keys()) == set(anno_act.keys()) for geneid, gos_exp in anno_exp.items(): gos_act = anno_act[geneid] assert gos_exp == gos_act
def read_ncbi_gene2go(fin_gene2go, taxids=None, **kws): """Read NCBI's gene2go. Return gene2go data for user-specified taxids.""" obj = Gene2GoReader(fin_gene2go, taxids) # b_geneid2gos = not kws.get('go2geneids', False) opt = AnnoOptions(**kws) # By default, return id2gos. User can cause go2geneids to be returned by: # >>> read_ncbi_gene2go(..., go2geneids=True if 'taxid2asscs' not in kws: if len(obj.taxid2asscs) == 1: taxid = next(iter(obj.taxid2asscs)) return obj.get_annotations_dct(taxid, opt) # Optional detailed associations split by taxid and having both ID2GOs & GO2IDs # e.g., taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)) t2asscs_ret = obj.get_annotations_taxid2dct(opt) t2asscs_usr = kws.get( 'taxid2asscs', defaultdict(lambda: defaultdict(lambda: defaultdict(set)))) if 'taxid2asscs' in kws: obj.fill_taxid2asscs(t2asscs_usr, t2asscs_ret) return obj.get_id2gos_all(t2asscs_ret)