# going for the list of papers That annotate most proteins. Top designates how far down the list we go
top = 50
print "top_papers_dict: Get all the PMID info for the top papers"
print clock()
sys.stdout.flush()
papers_annots2_dict = sp_tools.top_papers_dict(papersExpLO_dict, papers_protsExp_dict, top=top)

print "term_types_all_papers: Count up all the terms types for each paper"
print clock()
sys.stdout.flush()
all_tt_count = sp_tools.term_types_all_papers(papersExpLO_dict) #takes a really long time

print "cPikleDump: save that all_tt_count for later"
print clock()
sys.stdout.flush()
sp_tools.cPickleDump(all_tt_count, "Uniprot-Bias/all_tt_countLO.pik")

print "go_terms_with_ec_per_paper: Create a dict that counts up how many times a specific (GO ID, GO Term Text, EvCode) tuple occurs for each paper"
print clock()
sys.stdout.flush()
go_ec_count = sp_tools.go_terms_with_ec_per_paper(papersExpLO_dict, top=top) # this takes a bit of time too

print "ev_codes_all_papers: Calculate the number of times a paper gives a certain experimental evidence code."
print clock()
sys.stdout.flush()
allEvCodes_dict = sp_tools.ev_codes_all_papers(papersExpLO_dict)

print "sort_papers_prots: Sort the dictionary papers_prots according to the number of proteins annotated by a particular paper (PMID)."
print clock()
sys.stdout.flush() 
sortedProtsPerPaper_tuple = sp_tools.sort_papers_prots(papers_protsExp_dict)
import cPickle
import sp_tools
from time import clock
import sys
import datetime


#usage: python ./print_and_pickle_all_ncbi_paper_info.py > AllNCBIPaperInfo.log 2>&1 

#load up the pre-pickled data
papersExp_handle = open('Uniprot-Bias/goa_exp_papers.pik', 'rb')
papersExp_dict = cPickle.load(papersExp_handle)
papers_protsExp_handle = open('Uniprot-Bias/goa_exp_papers_prots.pik', 'rb')
papers_protsExp_dict = cPickle.load(papers_protsExp_handle)


print "all_NCBI_paper_info_dict: get all the pubmed info we want"
print clock()
sys.stdout.flush()
ncbi_paper_dict = sp_tools.all_NCBI_paper_info_dict(papersExp_dict, papers_protsExp_dict, outpath=None,delim="\t", top=None)

print "cPikleDump: save that ncbi_paper_dict for later"
print clock()
sys.stdout.flush()
sp_tools.cPickleDump(ncbi_paper_dict, "Uniprot-Bias/ncbi_paper_info.pik")

print "Done!"
print clock()
sys.stdout.flush()