def rank(filename,criterion,num): #read data and rank authors based on criteria, and output first num authors criteria = ['Citations','NumPapers','Co-Authors','Citation Rate'] if criterion not in criteria: print('Error: criterion can only be one of the following strings: Citations/NumPapers/Co-Authors') return [] f1 = open(filename,'r+',encoding='utf-8') rawdata=csv.reader(f1) authordict = Analyze.dictauthor(rawdata) f1.close() ranking = [] for author in authordict.keys(): if criterion == 'Co-Authors': heappush(ranking, [-len(authordict[author][criterion]), author]) elif criterion == 'Citation Rate': heappush(ranking, [-(authordict[author]['Citations']/authordict[author]['NumPapers']), author]) else: heappush(ranking, [-authordict[author][criterion], author]) bestofcriterion = [] while len(bestofcriterion)<num: pick = heappop(ranking) bestofcriterion.append([-pick[0],pick[1]]) return bestofcriterion
def idauthor(filename,rawlabel): #read file and find the profs closest match to rawlabel and confirm/return actual label f1 = open(filename,'r+',encoding='utf-8') rawdata=csv.reader(f1) authordict = Analyze.dictauthor(rawdata) f1.close() return Analyze.idauthor(authordict,rawlabel)
def pltCitRate(filename): ##read data from csv file, plot the clarity-citations data f1 = open(filename,'r+',encoding='utf-8') rawdata=csv.reader(f1) authordict = Analyze.dictauthor(rawdata) f1.close() data = numpy.zeros((2,len(authordict))) i=0 for author in authordict.keys(): data[0,i] = authordict[author]['NumPapers'] data[1,i] = authordict[author]['Citations']/authordict[author]['NumPapers'] i += 1 pyp.plot(data[0],data[1],'ro') pyp.xlabel('Number of papers published') pyp.ylabel('Average citations per paper') pyp.show() pyp.close() return data