all_tt_count = sp_tools.term_types_all_papers(papersExpLO_dict) #takes a really long time print "cPikleDump: save that all_tt_count for later" print clock() sys.stdout.flush() sp_tools.cPickleDump(all_tt_count, "Uniprot-Bias/all_tt_countLO.pik") print "go_terms_with_ec_per_paper: Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) tuple occurs for each paper" print clock() sys.stdout.flush() go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExpLO_dict, top=top) # this takes a bit of time too print "ev_codes_all_papers: Calculate the number of times a paper gives a certain experimental evidence code." print clock() sys.stdout.flush() allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExpLO_dict) print "sort_papers_prots: Sort the dictionary papers_prots according to the number of proteins annotated by a particular paper (PMID)." print clock() sys.stdout.flush() sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict) print "print_paper_per_prots: print out the results of the the top papers per proteins. Final Ouptfile: allExpPaperLeavesOnlyInfoTop50.<date>.tsv" print clock() sys.stdout.flush() sp_tools.print_paper_per_prots_go(papers_annots2_dict, all_tt_count, go_ec_count, allEvCodes_dict, sortedProtsPerPaper_tuple, finalOutputFile, top=top) print "all done" print clock()
top = 50 papers_annots2_dict = sp_tools.top_papers_dict(papersExp_dict, papers_protsExp_dict, top=top) print "really long time... (1)" all_tt_count = sp_tools.term_types_all_papers( papersExp_dict) #takes a really long time print "...done" go_ec_count = sp_tools.go_terms_with_ec_per_paper( papersExp_dict, top=top) # this takes a bit of time too allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExp_dict) sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict) sp_tools.print_paper_per_prots_go(papers_annots2_dict, all_tt_count, go_ec_count, allEvCodes_dict, sortedProtsPerPaper_tuple, "allExpPaperInfoTop50_lo.txt", top=top) # Not experimental """papersNoExp_handle = open('goa-pickles/goa_not_exp_papers.pik', 'rb') papersNoExp_dict = cPickle.load(papersNoExp_handle) papers_protsNoExp_handle = open('goa-pickles/goa_not_exp_papers_prots.pik', 'rb') papers_protsNoExp_dict = cPickle.load(papers_protsNoExp_handle) # going for the top fifty papers top = 50 papers_annots2_dict_no = sp_tools.top_papers_dict(papersNoExp_dict, papers_protsNoExp_dict, top=top) all_tt_count_no = sp_tools.term_types_all_papers(papersNoExp_dict) #takes a really long time go_ec_count_no = sp_tools.go_terms_with_ec_per_paper(papersNoExp_dict, top=top) # this takes a bit of time too
#Experimental papersExp_handle = open('goa_exp_papers_lo.pik', 'rb') papersExp_dict = cPickle.load(papersExp_handle) papers_protsExp_handle = open('goa_exp_papers_prots_lo.pik', 'rb') papers_protsExp_dict = cPickle.load(papers_protsExp_handle) # going for the top fifty papers top = 50 papers_annots2_dict = sp_tools.top_papers_dict(papersExp_dict, papers_protsExp_dict, top=top) print "really long time... (1)" all_tt_count = sp_tools.term_types_all_papers(papersExp_dict) #takes a really long time print "...done" go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExp_dict, top=top) # this takes a bit of time too allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExp_dict) sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict) sp_tools.print_paper_per_prots_go(papers_annots2_dict, all_tt_count, go_ec_count, allEvCodes_dict, sortedProtsPerPaper_tuple, "allExpPaperInfoTop50_lo.txt", top=top) # Not experimental """papersNoExp_handle = open('goa-pickles/goa_not_exp_papers.pik', 'rb') papersNoExp_dict = cPickle.load(papersNoExp_handle) papers_protsNoExp_handle = open('goa-pickles/goa_not_exp_papers_prots.pik', 'rb') papers_protsNoExp_dict = cPickle.load(papers_protsNoExp_handle) # going for the top fifty papers top = 50 papers_annots2_dict_no = sp_tools.top_papers_dict(papersNoExp_dict, papers_protsNoExp_dict, top=top) all_tt_count_no = sp_tools.term_types_all_papers(papersNoExp_dict) #takes a really long time go_ec_count_no = sp_tools.go_terms_with_ec_per_paper(papersNoExp_dict, top=top) # this takes a bit of time too allEvCodes_dict_no = sp_tools.ev_codes_all_papers(papersNoExp_dict) sortedProtsPerPaper_tuple_no = sp_tools.sort_papers_prots(papers_protsNoExp_dict) sp_tools.print_paper_per_prots_go(papers_annots2_dict_no, all_tt_count_no, go_ec_count_no, allEvCodes_dict_no,
# count all the species annotations all_taxonID_dict = sp_tools.count_all_annotations_taxonIDs(papersTaxExp_dict) #top go codes top = 500 ec_go_code_count = sp_tools.count_top_go_terms_per_ecode_all_entries(papersExp_dict, "sortedECGO.txt", top) # going for the top fifty papers top = 50 papers_annots2_dict = sp_tools.top_papers_dict(papersExp_dict, papers_protsExp_dict, top=top) all_tt_count = sp_tools.term_types_all_papers(papersExp_dict) #takes a really long time go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExp_dict, top=top) # this takes a bit of time too allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExp_dict) sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict) sp_tools.print_paper_per_prots_go(papers_annots2_dict, all_tt_count, go_ec_count, allEvCodes_dict, sortedProtsPerPaper_tuple, "allExpPaperInfoTop50.txt", top=top) # going for the top fifty papers top = 50 papers_annots2_dict = sp_tools.top_papers_dict(papersExp_dict, papers_protsExp_dict, top=top) all_tt_count = sp_tools.term_types_all_papers(papersExp_dict) #takes a really long time go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExp_dict, top=top) # this takes a bit of time too GO_EC_Count_collect_tuple = sp_tools.sort_go_ec_count(go_ec_count) allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExp_dict) sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict) sp_tools.print_paper_per_prots_go(papers_annots2_dict, all_tt_count, go_ec_count, allEvCodes_dict, sortedProtsPerPaper_tuple, "allExpPaperInfoTop50.txt", top=top)