# going for the list of papers That annotate most proteins. Top designates how far down the list we go top = 50 print "top_papers_dict: Get all the PMID info for the top papers" print clock() sys.stdout.flush() papers_annots2_dict = sp_tools.top_papers_dict(papersExpLO_dict, papers_protsExp_dict, top=top) print "term_types_all_papers: Count up all the terms types for each paper" print clock() sys.stdout.flush() all_tt_count = sp_tools.term_types_all_papers(papersExpLO_dict) #takes a really long time print "cPikleDump: save that all_tt_count for later" print clock() sys.stdout.flush() sp_tools.cPickleDump(all_tt_count, "Uniprot-Bias/all_tt_countLO.pik") print "go_terms_with_ec_per_paper: Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) tuple occurs for each paper" print clock() sys.stdout.flush() go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExpLO_dict, top=top) # this takes a bit of time too print "ev_codes_all_papers: Calculate the number of times a paper gives a certain experimental evidence code." print clock() sys.stdout.flush() allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExpLO_dict) print "sort_papers_prots: Sort the dictionary papers_prots according to the number of proteins annotated by a particular paper (PMID)." print clock() sys.stdout.flush() sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict)
import cPickle import sp_tools from time import clock import sys import datetime #usage: python ./print_and_pickle_all_ncbi_paper_info.py > AllNCBIPaperInfo.log 2>&1 #load up the pre-pickled data papersExp_handle = open('Uniprot-Bias/goa_exp_papers.pik', 'rb') papersExp_dict = cPickle.load(papersExp_handle) papers_protsExp_handle = open('Uniprot-Bias/goa_exp_papers_prots.pik', 'rb') papers_protsExp_dict = cPickle.load(papers_protsExp_handle) print "all_NCBI_paper_info_dict: get all the pubmed info we want" print clock() sys.stdout.flush() ncbi_paper_dict = sp_tools.all_NCBI_paper_info_dict(papersExp_dict, papers_protsExp_dict, outpath=None,delim="\t", top=None) print "cPikleDump: save that ncbi_paper_dict for later" print clock() sys.stdout.flush() sp_tools.cPickleDump(ncbi_paper_dict, "Uniprot-Bias/ncbi_paper_info.pik") print "Done!" print clock() sys.stdout.flush()