예제 #1
0
def find_cites(author):
   """
   Find and return all necessary components for plotting the data set.

   Returned values:
   1. year_dict: a dictionary keyed by years with values of the citations that occured in that year
   2. start_year: an integer that holds the year the first citation occured, used for calculating
      points to plot
   3. lifetime_cites: an integer holding the total amount of cites the author has in the present
      day, used to scale the final data set

   This definition first grabs a list of all the papers written by the author. From there,
   it iterates through the list, pulling the citations of each paper and incrementing the
   appropriate year in the year_dict dictionary for that citations year.

   Next, it iterates through the year ditionary to fill in values for missing years, setting them
   to zero. It also calculates the lifetime cites during this iteration.

   """

   print "# Author:", author
   papers = get_realauthor_data(author, 'bibrec_id')
   
   year_dict = {}
   lifetime_cites = 0

   for paper in papers:
      cites = get_cited_by(int(paper[1]))
      # print papers[i][1], cites 
      for cite in cites:
         fieldvalues_yearlist = get_fieldvalues(cite, '269__C')
         if len(fieldvalues_yearlist) > 0:
            year = year_re.search(fieldvalues_yearlist[0])
            if year:
               if int(year.group()) not in year_dict:
                  year_dict[int(year.group())] = 1
               else:
                  year_dict[int(year.group())] += 1
               # print year.group()

   if len(year_dict) > 0:

      start_year = min(year_dict.keys())
      end_year = max(year_dict.keys())

      for i in range(start_year, end_year + 1):
         if i not in year_dict:
            year_dict[i] = 0
         lifetime_cites += year_dict[i]
   else:
      print "# Author has no citations"

   # print year_dict

   return year_dict, start_year, float(lifetime_cites)
예제 #2
0
def find_citesb(author):

   """
   This defition plays the same role as the above defition of a similar name. However,
   it creates a different dictionary, as this definition is used only when the user
   wants to plot citations that occured in the past five years from papers published
   in the past five years only.

   The year dictionary in rather keyed by year of paper published. The values of the 
   keys are then another dictionary that holds years as the keys (of citations)
   with the values as the number of citations.
   Ex: {paper year: {citation year, count}}

   All other return values are the same, with the addition of 'end_year', which
   is an integer denoting the final year the author had a paper cited.

   """

   print "# Author:", author
   papers = get_realauthor_data(author, 'bibrec_id')
  
   year_dict = {}
   # print papers, "Papers"
   # print 'Number of papers:', len(papers)

   lifetime_cites = 0
   end_year = 0

   for paper in papers:
      paper_yearlist = get_fieldvalues(int(paper[1]), '269__C')
      # print paper_yearlist, "Paper year list"
      # print paper[1]
      if len(paper_yearlist) > 0:
         paper_year_match = year_re.search(paper_yearlist[0])
         if paper_year_match:
            paper_year = int(paper_year_match.group())
            # print paper_year
            cites = get_cited_by(int(paper[1]))
            # print cites
            for cite in cites:
               fieldvalues_yearlist = get_fieldvalues(cite, '269__C')
               if len(fieldvalues_yearlist) > 0:
                  cite_year_match = year_re.search(fieldvalues_yearlist[0])
                  if cite_year_match:
                     cite_year = int(cite_year_match.group())
                     if cite_year > end_year:
                        end_year = cite_year
                     # print "Years:", paper_year, cite_year
                     if paper_year not in year_dict:
                        year_dict[paper_year] = {cite_year: 1}
                     elif cite_year not in year_dict[paper_year]:
                        year_dict[paper_year][cite_year] = 1
                     else:
                        year_dict[paper_year][cite_year] += 1

   if len(year_dict) > 0:

      start_year = min(year_dict.keys())
      for i in year_dict:
         for j in year_dict[i]:
            lifetime_cites += year_dict[i][j]
   else:
      print "# Author has no citations"

   # print year_dict

   return year_dict, start_year, end_year, float(lifetime_cites)