예제 #1
0
def go_terms_with_ec_per_paper(papers,outpath=None,top=20):
    """Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) 
    tuple occurs for each paper"""
    #
    # Can be used with SP & GOA data
    
    go_ec_count = {}
    
    go_con = mysqlConnect()
    go_cur = go_con.cursor()
    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            go_ec = rec['go_ec']
            try:
                name = gu.go_acc_to_name(go_id,go_cur)
            except IndexError: #sometimes the GO ID given is actually a synonym
                try:
                    name = gu.go_acc_to_synonym_name(go_id, go_cur)
                except IndexError: #sometimes it just doesn't work
                    print "problem with GO ID", go_id
                    name = ''
            gokey = (go_id, name, go_ec)
            # go_ec_count[PMID] = {{(GO ID, GO Term Text, Ev Code) : # times paper gives this annotaion}}
            if p in go_ec_count:
                go_ec_count[p][gokey] = go_ec_count[p].get(gokey,0) + 1
            else:
                go_ec_count[p] = {gokey: 1}
    go_con.close()
    return go_ec_count
예제 #2
0
def go_terms_per_paper(papers,outpath=None,top=20):
    #
    # Can be used with SP & GOA data
    
    go_count = {}
    
    go_con = mysqlConnect()
    go_cur = go_con.cursor()
    
    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            try:
                name = gu.go_acc_to_name(go_id,go_cur)
            except IndexError: #sometimes the GO ID given is actually a synonym
                try:
                    name = gu.go_acc_to_synonym_name(go_id, go_cur)
                except IndexError: #sometimes it just doesn't work
                    print "problem with GO ID", go_id
                    name = ''
            gokey = (go_id, name)
            # go_count[PMID] = {{(GO ID, GO Term Text) : # times paper gives this annotaion}}
            if p in go_count:
                go_count[p][gokey] = go_count[p].get(gokey,0) + 1
            else:
                go_count[p] = {gokey: 1}
    go_con.close()
    return go_count
예제 #3
0
def go_terms_with_ec_per_paper(papers, outpath=None, top=20):
    """Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) 
    tuple occurs for each paper"""
    #
    # Can be used with SP & GOA data

    go_ec_count = {}

    go_con = mysqlConnect()
    go_cur = go_con.cursor()
    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            go_ec = rec['go_ec']
            try:
                name = gu.go_acc_to_name(go_id, go_cur)
            except IndexError:  #sometimes the GO ID given is actually a synonym
                try:
                    name = gu.go_acc_to_synonym_name(go_id, go_cur)
                except IndexError:  #sometimes it just doesn't work
                    print "problem with GO ID", go_id
                    name = ''
            gokey = (go_id, name, go_ec)
            # go_ec_count[PMID] = {{(GO ID, GO Term Text, Ev Code) : # times paper gives this annotaion}}
            if p in go_ec_count:
                go_ec_count[p][gokey] = go_ec_count[p].get(gokey, 0) + 1
            else:
                go_ec_count[p] = {gokey: 1}
    go_con.close()
    return go_ec_count
예제 #4
0
def go_terms_per_paper(papers, outpath=None, top=20):
    #
    # Can be used with SP & GOA data

    go_count = {}

    go_con = mysqlConnect()
    go_cur = go_con.cursor()

    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            try:
                name = gu.go_acc_to_name(go_id, go_cur)
            except IndexError:  #sometimes the GO ID given is actually a synonym
                try:
                    name = gu.go_acc_to_synonym_name(go_id, go_cur)
                except IndexError:  #sometimes it just doesn't work
                    print "problem with GO ID", go_id
                    name = ''
            gokey = (go_id, name)
            # go_count[PMID] = {{(GO ID, GO Term Text) : # times paper gives this annotaion}}
            if p in go_count:
                go_count[p][gokey] = go_count[p].get(gokey, 0) + 1
            else:
                go_count[p] = {gokey: 1}
    go_con.close()
    return go_count
예제 #5
0
def top_ontology(papers,outpath=None,top=20):
    """Determines the top GO terms annotated in the analysis set and 1) puts it in 
    the output dict top_go and 2) writes it out to a tab delim file 'outpath'
    
    Note: this function is currently identical to top_go_terms()"""
    #
    # Can be used with SP & GOA data
    
    go_count = {}
    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            go_count[go_id] = go_count.get(go_id,0) + 1
    top_go = [(i[1],i[0]) for i in go_count.items()]
    top_go.sort()
    if outpath:
        go_con = mysqlConnect()
        go_cur = go_con.cursor()
        f = open(outpath,"w")
        for i in top_go[-top:]:
            name = gu.go_acc_to_name(i[1],go_cur)
            f.write("%d\t%s\t%s\n" % (i[0], i[1], name))
        go_hist = {}
        for i in top_go:
            go_hist[i[0]] = go_hist.get(i[0],0) + 1
        go_hist_list = [(h[1],h[0]) for h in go_hist.items()] 
        go_hist_list.sort()
        fhist = open("hist_%s" % outpath, "w")
        for h in go_hist_list:
#            print h
            fhist.write("%d\t%d\n" % h)
        
        f.close()
        fhist.close()
        go_con.close()
    return top_go
예제 #6
0
def top_ontology(papers, outpath=None, top=20):
    """Determines the top GO terms annotated in the analysis set and 1) puts it in 
    the output dict top_go and 2) writes it out to a tab delim file 'outpath'
    
    Note: this function is currently identical to top_go_terms()"""
    #
    # Can be used with SP & GOA data

    go_count = {}
    for p in papers:
        for rec in papers[p]:
            go_id = rec['go_id']
            go_count[go_id] = go_count.get(go_id, 0) + 1
    top_go = [(i[1], i[0]) for i in go_count.items()]
    top_go.sort()
    if outpath:
        go_con = mysqlConnect()
        go_cur = go_con.cursor()
        f = open(outpath, "w")
        for i in top_go[-top:]:
            name = gu.go_acc_to_name(i[1], go_cur)
            f.write("%d\t%s\t%s\n" % (i[0], i[1], name))
        go_hist = {}
        for i in top_go:
            go_hist[i[0]] = go_hist.get(i[0], 0) + 1
        go_hist_list = [(h[1], h[0]) for h in go_hist.items()]
        go_hist_list.sort()
        fhist = open("hist_%s" % outpath, "w")
        for h in go_hist_list:
            #            print h
            fhist.write("%d\t%d\n" % h)

        f.close()
        fhist.close()
        go_con.close()
    return top_go