def go_terms_with_ec_per_paper(papers,outpath=None,top=20): """Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) tuple occurs for each paper""" # # Can be used with SP & GOA data go_ec_count = {} go_con = mysqlConnect() go_cur = go_con.cursor() for p in papers: for rec in papers[p]: go_id = rec['go_id'] go_ec = rec['go_ec'] try: name = gu.go_acc_to_name(go_id,go_cur) except IndexError: #sometimes the GO ID given is actually a synonym try: name = gu.go_acc_to_synonym_name(go_id, go_cur) except IndexError: #sometimes it just doesn't work print "problem with GO ID", go_id name = '' gokey = (go_id, name, go_ec) # go_ec_count[PMID] = {{(GO ID, GO Term Text, Ev Code) : # times paper gives this annotaion}} if p in go_ec_count: go_ec_count[p][gokey] = go_ec_count[p].get(gokey,0) + 1 else: go_ec_count[p] = {gokey: 1} go_con.close() return go_ec_count
def go_terms_per_paper(papers,outpath=None,top=20): # # Can be used with SP & GOA data go_count = {} go_con = mysqlConnect() go_cur = go_con.cursor() for p in papers: for rec in papers[p]: go_id = rec['go_id'] try: name = gu.go_acc_to_name(go_id,go_cur) except IndexError: #sometimes the GO ID given is actually a synonym try: name = gu.go_acc_to_synonym_name(go_id, go_cur) except IndexError: #sometimes it just doesn't work print "problem with GO ID", go_id name = '' gokey = (go_id, name) # go_count[PMID] = {{(GO ID, GO Term Text) : # times paper gives this annotaion}} if p in go_count: go_count[p][gokey] = go_count[p].get(gokey,0) + 1 else: go_count[p] = {gokey: 1} go_con.close() return go_count
def go_terms_with_ec_per_paper(papers, outpath=None, top=20): """Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) tuple occurs for each paper""" # # Can be used with SP & GOA data go_ec_count = {} go_con = mysqlConnect() go_cur = go_con.cursor() for p in papers: for rec in papers[p]: go_id = rec['go_id'] go_ec = rec['go_ec'] try: name = gu.go_acc_to_name(go_id, go_cur) except IndexError: #sometimes the GO ID given is actually a synonym try: name = gu.go_acc_to_synonym_name(go_id, go_cur) except IndexError: #sometimes it just doesn't work print "problem with GO ID", go_id name = '' gokey = (go_id, name, go_ec) # go_ec_count[PMID] = {{(GO ID, GO Term Text, Ev Code) : # times paper gives this annotaion}} if p in go_ec_count: go_ec_count[p][gokey] = go_ec_count[p].get(gokey, 0) + 1 else: go_ec_count[p] = {gokey: 1} go_con.close() return go_ec_count
def go_terms_per_paper(papers, outpath=None, top=20): # # Can be used with SP & GOA data go_count = {} go_con = mysqlConnect() go_cur = go_con.cursor() for p in papers: for rec in papers[p]: go_id = rec['go_id'] try: name = gu.go_acc_to_name(go_id, go_cur) except IndexError: #sometimes the GO ID given is actually a synonym try: name = gu.go_acc_to_synonym_name(go_id, go_cur) except IndexError: #sometimes it just doesn't work print "problem with GO ID", go_id name = '' gokey = (go_id, name) # go_count[PMID] = {{(GO ID, GO Term Text) : # times paper gives this annotaion}} if p in go_count: go_count[p][gokey] = go_count[p].get(gokey, 0) + 1 else: go_count[p] = {gokey: 1} go_con.close() return go_count
def top_ontology(papers,outpath=None,top=20): """Determines the top GO terms annotated in the analysis set and 1) puts it in the output dict top_go and 2) writes it out to a tab delim file 'outpath' Note: this function is currently identical to top_go_terms()""" # # Can be used with SP & GOA data go_count = {} for p in papers: for rec in papers[p]: go_id = rec['go_id'] go_count[go_id] = go_count.get(go_id,0) + 1 top_go = [(i[1],i[0]) for i in go_count.items()] top_go.sort() if outpath: go_con = mysqlConnect() go_cur = go_con.cursor() f = open(outpath,"w") for i in top_go[-top:]: name = gu.go_acc_to_name(i[1],go_cur) f.write("%d\t%s\t%s\n" % (i[0], i[1], name)) go_hist = {} for i in top_go: go_hist[i[0]] = go_hist.get(i[0],0) + 1 go_hist_list = [(h[1],h[0]) for h in go_hist.items()] go_hist_list.sort() fhist = open("hist_%s" % outpath, "w") for h in go_hist_list: # print h fhist.write("%d\t%d\n" % h) f.close() fhist.close() go_con.close() return top_go
def top_ontology(papers, outpath=None, top=20): """Determines the top GO terms annotated in the analysis set and 1) puts it in the output dict top_go and 2) writes it out to a tab delim file 'outpath' Note: this function is currently identical to top_go_terms()""" # # Can be used with SP & GOA data go_count = {} for p in papers: for rec in papers[p]: go_id = rec['go_id'] go_count[go_id] = go_count.get(go_id, 0) + 1 top_go = [(i[1], i[0]) for i in go_count.items()] top_go.sort() if outpath: go_con = mysqlConnect() go_cur = go_con.cursor() f = open(outpath, "w") for i in top_go[-top:]: name = gu.go_acc_to_name(i[1], go_cur) f.write("%d\t%s\t%s\n" % (i[0], i[1], name)) go_hist = {} for i in top_go: go_hist[i[0]] = go_hist.get(i[0], 0) + 1 go_hist_list = [(h[1], h[0]) for h in go_hist.items()] go_hist_list.sort() fhist = open("hist_%s" % outpath, "w") for h in go_hist_list: # print h fhist.write("%d\t%d\n" % h) f.close() fhist.close() go_con.close() return top_go