def test_semantic_similarity(usr_assc=None): """Computing basic semantic similarities between GO terms.""" not_these = {'goa_uniprot_all.gaf', 'goa_uniprot_all_noiea.gaf'} assc_names = sorted(ASSOCIATIONS.difference(not_these)) go2obj = get_go2obj() # http://current.geneontology.org/annotations/ if usr_assc is not None: assc_names = [usr_assc] not_found = set() gaf2errs = cx.defaultdict(list) for assc_name in assc_names: # Limit test numbers for speed tic = timeit.default_timer() # Get all the annotations from arabidopsis. fin_gaf = os.path.join(REPO, assc_name) if not os.path.exists(fin_gaf): dnld_annotation(fin_gaf) annoobj = GafReader(fin_gaf) for nta in annoobj.associations: if nta.GO_ID in go2obj: goterm = go2obj[nta.GO_ID] namespace_anno = NS2NAMESPACE.get(nta.NS) if namespace_anno != goterm.namespace: gaf2errs[assc_name].append(nta) else: not_found.add(nta.GO_ID) print('{HMS} {N} Associations'.format(HMS=_hms(TIC), N=len(assc_names))) if not_found: _prt_not_found(not_found) if gaf2errs: _wr_errs('namespace_errors.txt', gaf2errs, go2obj)
def test_semantic_similarity(usr_assc=None): """Computing basic semantic similarities between GO terms.""" not_these = {'goa_uniprot_all.gaf', 'goa_uniprot_all_noiea.gaf'} associations = sorted(ASSOCIATIONS.difference(not_these)) go2obj = get_go2obj() # goids = go2obj.keys() # http://current.geneontology.org/annotations/ if usr_assc is not None: associations = [usr_assc] not_found = set() errs = [] for assc_name in associations: # Limit test numbers for speed tic = timeit.default_timer() # Get all the annotations from arabidopsis. fin_gaf = os.path.join(REPO, assc_name) if not os.path.exists(fin_gaf): dnld_annotation(fin_gaf) annoobj = GafReader(fin_gaf) #### for nspc in ['BP', 'MF', 'CC']: assc_gene2gos = annoobj.get_id2gos('all') if not assc_gene2gos: not_found.add(assc_name) continue # Calculate the information content of the single term, GO:0048364 # "Information content (GO:0048364) = 7.75481392334 # Initialize the counts of each GO term. tcntobj = TermCounts(go2obj, assc_gene2gos) go_cnt = tcntobj.gocnts.most_common() #print tcntobj.gocnts.most_common() if go_cnt: print("{ASSC}".format(ASSC=assc_name)) print(tcntobj.aspect_counts) gocnt_max = go_cnt[0][1] prt_info(tcntobj, go_cnt, None) prt_info(tcntobj, go_cnt, gocnt_max / 2.0) prt_info(tcntobj, go_cnt, gocnt_max / 10.0) print("{HMS} {hms} {ASSC}\n".format(ASSC=assc_name, HMS=_hms(TIC), hms=_hms(tic))) print('{HMS} {N} Associations'.format(HMS=_hms(TIC), N=len(associations))) if not_found: _prt_not_found(not_found) if errs: fout_err = 'namespace_errors.txt' with open(fout_err, 'w') as prt: for err in errs: prt.write(err) print(' {N} ERRORS WROTE: {TXT}'.format(N=len(errs), TXT=fout_err))
def test_termcnt_init(): """Compare GOATOOLS Resnik scores and Yang Resnik scores""" godag = get_godag(os.path.join(REPO, 'go-basic.obo')) fin_gpad = os.path.join(REPO, 'goa_human.gpad') dnld_annotation(fin_gpad) # Load all annoations (BP, MF, CC) top_cnt_all = _run_full(fin_gpad, godag) # Load one annoation (BP, MF, CC) at a time top_cnt_ns = _run_each(fin_gpad, godag) # Compare different load methods assert top_cnt_all == top_cnt_ns
def test_find_enrichment(run_all=False): """RUn an enrichments using all annotation file formats""" if run_all: fin_obo = join(REPO, 'go-basic.obo') get_godag(fin_obo, optional_attrs={'relationship'}, loading_bar=None) fin_gaf = join(REPO, 'goa_human.gaf') dnld_annotation(fin_gaf) for idx, cmd in enumerate(_get_cmds()): print('------------------- TEST {I} ------------------------------------'.format(I=idx)) print('CMD: {CMD}'.format(CMD=cmd)) assert system(cmd) == 0 print("TEST PASSED") else: print('RUN THIS TEST WITH AN ARGUMENT')
def test_tcntobj_relationships(prt=sys.stdout): """Test loading of relationships, like part_of, into TermCounts""" fin_obo = os.path.join(REPO, "go-basic.obo") fin_anno = os.path.join(REPO, 'goa_human.gpad') download_go_basic_obo(fin_obo, prt, loading_bar=None) dnld_annotation(fin_anno) # Load ontologies go2obj_r0 = GODag(fin_obo) go2obj_r1 = GODag(fin_obo, optional_attrs=['relationship']) # Load annotations annoobj = GpadReader(fin_anno, godag=go2obj_r0) # Create TermCounts objects ns2tcntobj_r0 = {ns:TermCounts(go2obj_r0, annoobj.get_id2gos(ns)) for ns in NSS} ns2tcntobj_r1 = {ns:TermCounts(go2obj_r1, annoobj.get_id2gos(ns), RELS) for ns in NSS} _chk_pass_fail(ns2tcntobj_r0, ns2tcntobj_r1)
def get_anno_fullname(fin_anno): """Get annotation filename""" fin_full = join(REPO, fin_anno) dnld_annotation(fin_full) return fin_full