def redundant_annotations(go_papers_dict): go_con, go_cur = gu.open_go() ancestors_found = {} to_remove = {} gpd_leaves_only = {} for pmid in go_papers_dict.keys(): #[:1000]: if len(go_papers_dict[pmid]) < 2: continue for i in range(len(go_papers_dict[pmid])): for j in range(len(go_papers_dict[pmid])): if j >= i: continue go_id_1 = go_papers_dict[pmid][i]['go_id'] go_id_2 = go_papers_dict[pmid][j]['go_id'] if go_id_1 == go_id_2: continue sp_id_1 = go_papers_dict[pmid][i]['sp_id'] sp_id_2 = go_papers_dict[pmid][j]['sp_id'] if sp_id_1 != sp_id_2: continue if gu.is_ancestor(go_id_1, go_id_2, go_cur): ancestors_found[(pmid,go_id_1,go_id_2)] = \ ancestors_found.get((pmid,go_id_1,go_id_2),0) + 1 to_remove.setdefault(pmid,Set([])).add(j) #ancestors_found.setdefault(pmid,[]).append(go_id_1,go_id_2) elif gu.is_ancestor(go_id_2, go_id_1, go_cur): ancestors_found[(pmid,go_id_2,go_id_1)] = \ ancestors_found.get((pmid,go_id_2,go_id_1),0) + 1 to_remove.setdefault(pmid,Set([])).add(i) #ancestors_found.setdefault(pmid,[]).append(go_id_2,go_id_1) go_con.close() for pmid in go_papers_dict: if pmid not in to_remove: gpd_leaves_only[pmid] = go_papers_dict[pmid] continue else: gpd_leaves_only[pmid] = [] for i in range(len(go_papers_dict[pmid])): if i not in to_remove[pmid]: gpd_leaves_only[pmid].append(go_papers_dict[pmid][i]) return ancestors_found, to_remove,gpd_leaves_only
def redundant_annotations(go_papers_dict): go_con, go_cur = gu.open_go() ancestors_found = {} to_remove = {} gpd_leaves_only = {} for pmid in go_papers_dict.keys(): #[:1000]: if len(go_papers_dict[pmid]) < 2: continue for i in range(len(go_papers_dict[pmid])): for j in range(len(go_papers_dict[pmid])): if j >= i: continue go_id_1 = go_papers_dict[pmid][i]['go_id'] go_id_2 = go_papers_dict[pmid][j]['go_id'] if go_id_1 == go_id_2: continue sp_id_1 = go_papers_dict[pmid][i]['sp_id'] sp_id_2 = go_papers_dict[pmid][j]['sp_id'] if sp_id_1 != sp_id_2: continue if gu.is_ancestor(go_id_1, go_id_2, go_cur): ancestors_found[(pmid,go_id_1,go_id_2)] = \ ancestors_found.get((pmid,go_id_1,go_id_2),0) + 1 to_remove.setdefault(pmid, Set([])).add(j) #ancestors_found.setdefault(pmid,[]).append(go_id_1,go_id_2) elif gu.is_ancestor(go_id_2, go_id_1, go_cur): ancestors_found[(pmid,go_id_2,go_id_1)] = \ ancestors_found.get((pmid,go_id_2,go_id_1),0) + 1 to_remove.setdefault(pmid, Set([])).add(i) #ancestors_found.setdefault(pmid,[]).append(go_id_2,go_id_1) go_con.close() for pmid in go_papers_dict: if pmid not in to_remove: gpd_leaves_only[pmid] = go_papers_dict[pmid] continue else: gpd_leaves_only[pmid] = [] for i in range(len(go_papers_dict[pmid])): if i not in to_remove[pmid]: gpd_leaves_only[pmid].append(go_papers_dict[pmid][i]) return ancestors_found, to_remove, gpd_leaves_only
#!/usr/bin/env python import GO.go_utils as gu import getopt import sys opts, args = getopt.getopt(sys.argv[1:],'i:f:o:', ['infile=','field=','outfile=']) gocon, goc = gu.open_go(user="******", passwd="mingus", db="MyGO") infile = None go_acc_field = 3 # starts at 1, not 0! outfile = sys.stdout for o, a in opts: if o in ('-i','--infile'): infile = a elif o in ('-o','--outfile'): outfile = open(a,"w") elif o in ('-f','--field'): go_acc_field = int(a) print "infile", infile if infile: for inline in file(infile): sp_id = inline.strip().split()[3] go_acc = inline.strip().split()[go_acc_field-1] go_level = gu.go_level(go_acc,goc) try: go_term_type = gu.go_acc_to_term_type(go_acc, goc) except IndexError: outfile.write("%s\t%s\t%.1f\t%s\n" % (go_acc, "NOTERMTYPE", go_level, sp_id)) continue